blob: e7672ddd86d4eed678c48338d7cd0421046a0b88 [file] [log] [blame]
Bram Moolenaar22e42152016-04-03 14:02:02 +02001" Tests for regexp in utf8 encoding
Bram Moolenaar22e42152016-04-03 14:02:02 +02002
Bram Moolenaarf5094052022-07-29 16:22:25 +01003source shared.vim
4
Bram Moolenaar22e42152016-04-03 14:02:02 +02005func s:equivalence_test()
Bram Moolenaar0b94e292021-04-05 13:59:53 +02006 let str = "AÀÁÂÃÄÅĀĂĄǍǞǠǺȂȦȺḀẠẢẤẦẨẪẬẮẰẲẴẶ BƁɃḂḄḆ CÇĆĈĊČƇȻḈꞒ DĎĐƊḊḌḎḐḒ EÈÉÊËĒĔĖĘĚȄȆȨɆḔḖḘḚḜẸẺẼẾỀỂỄỆ FƑḞꞘ GĜĞĠĢƓǤǦǴḠꞠ HĤĦȞḢḤḦḨḪⱧ IÌÍÎÏĨĪĬĮİƗǏȈȊḬḮỈỊ JĴɈ KĶƘǨḰḲḴⱩꝀ LĹĻĽĿŁȽḶḸḺḼⱠ MḾṀṂ NÑŃŅŇǸṄṆṈṊꞤ OÒÓÔÕÖØŌŎŐƟƠǑǪǬǾȌȎȪȬȮȰṌṎṐṒỌỎỐỒỔỖỘỚỜỞỠỢ PƤṔṖⱣ QɊ RŔŖŘȐȒɌṘṚṜṞⱤꞦ SŚŜŞŠȘṠṢṤṦṨⱾꞨ TŢŤŦƬƮȚȾṪṬṮṰ UÙÚÛÜŨŪŬŮŰƯǕǙǛǓǗȔȖɄṲṴṶṸṺỤỦỨỪỬỮỰ VƲṼṾ WŴẀẂẄẆẈ XẊẌ YÝŶŸƳȲɎẎỲỴỶỸ ZŹŻŽƵẐẒẔⱫ aàáâãäåāăąǎǟǡǻȃȧᶏḁẚạảấầẩẫậắằẳẵặⱥ bƀɓᵬᶀḃḅḇ cçćĉċčƈȼḉꞓꞔ dďđɗᵭᶁᶑḋḍḏḑḓ eèéêëēĕėęěȅȇȩɇᶒḕḗḙḛḝẹẻẽếềểễệ fƒᵮᶂḟꞙ gĝğġģǥǧǵɠᶃḡꞡ hĥħȟḣḥḧḩḫẖⱨꞕ iìíîïĩīĭįǐȉȋɨᶖḭḯỉị jĵǰɉ kķƙǩᶄḱḳḵⱪꝁ lĺļľŀłƚḷḹḻḽⱡ mᵯḿṁṃ nñńņňʼnǹᵰᶇṅṇṉṋꞥ oòóôõöøōŏőơǒǫǭǿȍȏȫȭȯȱɵṍṏṑṓọỏốồổỗộớờởỡợ pƥᵱᵽᶈṕṗ qɋʠ rŕŗřȑȓɍɽᵲᵳᶉṛṝṟꞧ sśŝşšșȿᵴᶊṡṣṥṧṩꞩ tţťŧƫƭțʈᵵṫṭṯṱẗⱦ uùúûüũūŭůűųǚǖưǔǘǜȕȗʉᵾᶙṳṵṷṹṻụủứừửữự vʋᶌṽṿ wŵẁẃẅẇẉẘ xẋẍ yýÿŷƴȳɏẏẙỳỵỷỹ zźżžƶᵶᶎẑẓẕⱬ"
Bram Moolenaar22e42152016-04-03 14:02:02 +02007 let groups = split(str)
8 for group1 in groups
9 for c in split(group1, '\zs')
10 " next statement confirms that equivalence class matches every
11 " character in group
Bram Moolenaar0b94e292021-04-05 13:59:53 +020012 call assert_match('^[[=' .. c .. '=]]*$', group1)
Bram Moolenaar22e42152016-04-03 14:02:02 +020013 for group2 in groups
14 if group2 != group1
15 " next statement converts that equivalence class doesn't match
16 " character in any other group
Bram Moolenaar0b94e292021-04-05 13:59:53 +020017 call assert_equal(-1, match(group2, '[[=' .. c .. '=]]'), c)
Bram Moolenaar22e42152016-04-03 14:02:02 +020018 endif
19 endfor
20 endfor
21 endfor
22endfunc
23
24func Test_equivalence_re1()
25 set re=1
26 call s:equivalence_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +020027 set re=0
Bram Moolenaar22e42152016-04-03 14:02:02 +020028endfunc
29
30func Test_equivalence_re2()
31 set re=2
32 call s:equivalence_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +020033 set re=0
Bram Moolenaar22e42152016-04-03 14:02:02 +020034endfunc
Bram Moolenaaraf98a492016-04-24 14:40:12 +020035
36func s:classes_test()
Bram Moolenaar470adb82020-07-20 21:21:30 +020037 if has('win32')
38 set iskeyword=@,48-57,_,192-255
39 endif
Bram Moolenaar490465b2016-04-24 15:11:02 +020040 set isprint=@,161-255
Bram Moolenaaraf98a492016-04-24 14:40:12 +020041 call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+'))
42
Bram Moolenaaraf98a492016-04-24 14:40:12 +020043 let alnumchars = ''
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020044 let alphachars = ''
45 let backspacechar = ''
46 let blankchars = ''
47 let cntrlchars = ''
48 let digitchars = ''
49 let escapechar = ''
50 let graphchars = ''
51 let lowerchars = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020052 let printchars = ''
53 let punctchars = ''
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020054 let returnchar = ''
55 let spacechars = ''
56 let tabchar = ''
57 let upperchars = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020058 let xdigitchars = ''
Bram Moolenaar221cd9f2019-01-31 15:34:40 +010059 let identchars = ''
60 let identchars1 = ''
61 let kwordchars = ''
62 let kwordchars1 = ''
63 let fnamechars = ''
64 let fnamechars1 = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020065 let i = 1
66 while i <= 255
67 let c = nr2char(i)
68 if c =~ '[[:alpha:]]'
69 let alphachars .= c
70 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +020071 if c =~ '[[:alnum:]]'
72 let alnumchars .= c
73 endif
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020074 if c =~ '[[:backspace:]]'
75 let backspacechar .= c
76 endif
77 if c =~ '[[:blank:]]'
78 let blankchars .= c
79 endif
80 if c =~ '[[:cntrl:]]'
81 let cntrlchars .= c
82 endif
83 if c =~ '[[:digit:]]'
84 let digitchars .= c
85 endif
86 if c =~ '[[:escape:]]'
87 let escapechar .= c
88 endif
89 if c =~ '[[:graph:]]'
90 let graphchars .= c
91 endif
92 if c =~ '[[:lower:]]'
93 let lowerchars .= c
94 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +020095 if c =~ '[[:print:]]'
96 let printchars .= c
97 endif
98 if c =~ '[[:punct:]]'
99 let punctchars .= c
100 endif
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200101 if c =~ '[[:return:]]'
102 let returnchar .= c
103 endif
104 if c =~ '[[:space:]]'
105 let spacechars .= c
106 endif
107 if c =~ '[[:tab:]]'
108 let tabchar .= c
109 endif
110 if c =~ '[[:upper:]]'
111 let upperchars .= c
112 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200113 if c =~ '[[:xdigit:]]'
114 let xdigitchars .= c
115 endif
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100116 if c =~ '[[:ident:]]'
117 let identchars .= c
118 endif
119 if c =~ '\i'
120 let identchars1 .= c
121 endif
122 if c =~ '[[:keyword:]]'
123 let kwordchars .= c
124 endif
125 if c =~ '\k'
126 let kwordchars1 .= c
127 endif
128 if c =~ '[[:fname:]]'
129 let fnamechars .= c
130 endif
131 if c =~ '\f'
132 let fnamechars1 .= c
133 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200134 let i += 1
135 endwhile
136
137 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200138 call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200139 call assert_equal("\b", backspacechar)
140 call assert_equal("\t ", blankchars)
Bram Moolenaar13489b92017-03-30 22:20:29 +0200141 call assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f", cntrlchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200142 call assert_equal("0123456789", digitchars)
143 call assert_equal("\<Esc>", escapechar)
Bram Moolenaar13489b92017-03-30 22:20:29 +0200144 call assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', graphchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200145 call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200146 call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars)
147 call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200148 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars)
149 call assert_equal("\r", returnchar)
150 call assert_equal("\t\n\x0b\f\r ", spacechars)
151 call assert_equal("\t", tabchar)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200152 call assert_equal('0123456789ABCDEFabcdef', xdigitchars)
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100153
154 if has('win32')
155 let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
156 let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100157 else
158 let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
159 let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
160 endif
161
162 if has('win32')
163 let fnamechars_ok = '!#$%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_abcdefghijklmnopqrstuvwxyz{}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
164 elseif has('amiga')
165 let fnamechars_ok = '$+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
166 elseif has('vms')
167 let fnamechars_ok = '#$%+,-./0123456789:;<>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100168 else
169 let fnamechars_ok = '#$%+,-./0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
170 endif
171
172 call assert_equal(identchars_ok, identchars)
173 call assert_equal(kwordchars_ok, kwordchars)
174 call assert_equal(fnamechars_ok, fnamechars)
175
176 call assert_equal(identchars1, identchars)
177 call assert_equal(kwordchars1, kwordchars)
178 call assert_equal(fnamechars1, fnamechars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200179endfunc
180
181func Test_classes_re1()
182 set re=1
183 call s:classes_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +0200184 set re=0
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200185endfunc
186
187func Test_classes_re2()
188 set re=2
189 call s:classes_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +0200190 set re=0
191endfunc
Bram Moolenaar966e58e2017-06-05 16:54:08 +0200192
193func Test_reversed_range()
194 for re in range(0, 2)
195 exe 'set re=' . re
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200196 call assert_fails('call match("abc def", "[c-a]")', 'E944:', re)
Bram Moolenaar966e58e2017-06-05 16:54:08 +0200197 endfor
198 set re=0
199endfunc
200
201func Test_large_class()
202 set re=1
203 call assert_fails('call match("abc def", "[\u3000-\u4000]")', 'E945:')
204 set re=2
205 call assert_equal(0, 'abc def' =~# '[\u3000-\u4000]')
206 call assert_equal(1, "\u3042" =~# '[\u3000-\u4000]')
207 set re=0
208endfunc
Bram Moolenaar2a5b5272019-07-20 18:56:06 +0200209
210func Test_optmatch_toolong()
211 set re=1
212 " Can only handle about 8000 characters.
213 let pat = '\\%[' .. repeat('x', 9000) .. ']'
214 call assert_fails('call match("abc def", "' .. pat .. '")', 'E339:')
215 set re=0
216endfunc
217
Bram Moolenaarafc13bd2019-12-16 22:43:31 +0100218" Test for regexp patterns with multi-byte support, using utf-8.
219func Test_multibyte_chars()
220 " tl is a List of Lists with:
221 " 2: test auto/old/new 0: test auto/old 1: test auto/new
222 " regexp pattern
223 " text to test the pattern on
224 " expected match (optional)
225 " expected submatch 1 (optional)
226 " expected submatch 2 (optional)
227 " etc.
228 " When there is no match use only the first two items.
229 let tl = []
230
231 " Multi-byte character tests. These will fail unless vim is compiled
232 " with Multibyte (FEAT_MBYTE) or BIG/HUGE features.
233 call add(tl, [2, '[[:alpha:][=a=]]\+', '879 aiaãâaiuvna ', 'aiaãâaiuvna'])
234 call add(tl, [2, '[[=a=]]\+', 'ddaãâbcd', 'aãâ']) " equivalence classes
235 call add(tl, [2, '[^ม ]\+', 'มม oijasoifjos ifjoisj f osij j มมมมม abcd', 'oijasoifjos'])
236 call add(tl, [2, ' [^ ]\+', 'start มabcdม ', ' มabcdม'])
237 call add(tl, [2, '[ม[:alpha:][=a=]]\+', '879 aiaãมâมaiuvna ', 'aiaãมâมaiuvna'])
238
239 " this is not a normal "i" but 0xec
240 call add(tl, [2, '\p\+', 'ìa', 'ìa'])
241 call add(tl, [2, '\p*', 'aあ', 'aあ'])
242
243 " Test recognition of some character classes
244 call add(tl, [2, '\i\+', '&*¨xx ', 'xx'])
245 call add(tl, [2, '\f\+', '&*Ÿfname ', 'fname'])
246
247 " Test composing character matching
248 call add(tl, [2, '.ม', 'xม่x yมy', 'yม'])
249 call add(tl, [2, '.ม่', 'xม่x yมy', 'xม่'])
250 call add(tl, [2, "\u05b9", " x\u05b9 ", "x\u05b9"])
251 call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"])
252 call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
253 call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
254 call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
255 call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
256 call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
257 call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
258 call add(tl, [2, "\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
259 call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
260 call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"])
261 call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"])
262 call add(tl, [2, "a", "ca\u0300t"])
263 call add(tl, [2, "ca", "ca\u0300t"])
264 call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"])
265 call add(tl, [2, 'a\%C', "ca\u0300t", "a\u0300"])
266 call add(tl, [2, 'ca\%C', "ca\u0300t", "ca\u0300"])
267 call add(tl, [2, 'ca\%Ct', "ca\u0300t", "ca\u0300t"])
268
269 " Test \Z
270 call add(tl, [2, 'ú\Z', 'x'])
271 call add(tl, [2, 'יהוה\Z', 'יהוה', 'יהוה'])
272 call add(tl, [2, 'יְהוָה\Z', 'יהוה', 'יהוה'])
273 call add(tl, [2, 'יהוה\Z', 'יְהוָה', 'יְהוָה'])
274 call add(tl, [2, 'יְהוָה\Z', 'יְהוָה', 'יְהוָה'])
275 call add(tl, [2, 'יְ\Z', 'וְיַ', 'יַ'])
276 call add(tl, [2, "×§\u200d\u05b9x\\Z", "x×§\u200d\u05b9xy", "×§\u200d\u05b9x"])
277 call add(tl, [2, "×§\u200d\u05b9x\\Z", "x×§\u200dxy", "×§\u200dx"])
278 call add(tl, [2, "×§\u200dx\\Z", "x×§\u200d\u05b9xy", "×§\u200d\u05b9x"])
279 call add(tl, [2, "×§\u200dx\\Z", "x×§\u200dxy", "×§\u200dx"])
280 call add(tl, [2, "\u05b9\\Z", "xyz"])
281 call add(tl, [2, "\\Z\u05b9", "xyz"])
282 call add(tl, [2, "\u05b9\\Z", "xy\u05b9z", "y\u05b9"])
283 call add(tl, [2, "\\Z\u05b9", "xy\u05b9z", "y\u05b9"])
284 call add(tl, [1, "\u05b9\\+\\Z", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
285 call add(tl, [1, "\\Z\u05b9\\+", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
286
287 " Combining different tests and features
288 call add(tl, [2, '[^[=a=]]\+', 'ddaãâbcd', 'dd'])
289
290 " Run the tests
291 for t in tl
292 let re = t[0]
293 let pat = t[1]
294 let text = t[2]
295 let matchidx = 3
296 for engine in [0, 1, 2]
297 if engine == 2 && re == 0 || engine == 1 && re == 1
298 continue
299 endif
300 let &regexpengine = engine
301 try
302 let l = matchlist(text, pat)
303 catch
304 call assert_report('Error ' . engine . ': pat: \"' . pat .
305 \ '\", text: \"' . text .
306 \ '\", caused an exception: \"' . v:exception . '\"')
307 endtry
308 " check the match itself
309 if len(l) == 0 && len(t) > matchidx
310 call assert_report('Error ' . engine . ': pat: \"' . pat .
311 \ '\", text: \"' . text .
312 \ '\", did not match, expected: \"' . t[matchidx] . '\"')
313 elseif len(l) > 0 && len(t) == matchidx
314 call assert_report('Error ' . engine . ': pat: \"' . pat .
315 \ '\", text: \"' . text . '\", match: \"' . l[0] .
316 \ '\", expected no match')
317 elseif len(t) > matchidx && l[0] != t[matchidx]
318 call assert_report('Error ' . engine . ': pat: \"' . pat .
319 \ '\", text: \"' . text . '\", match: \"' . l[0] .
320 \ '\", expected: \"' . t[matchidx] . '\"')
321 else
322 " Test passed
323 endif
324 if len(l) > 0
325 " check all the nine submatches
326 for i in range(1, 9)
327 if len(t) <= matchidx + i
328 let e = ''
329 else
330 let e = t[matchidx + i]
331 endif
332 if l[i] != e
333 call assert_report('Error ' . engine . ': pat: \"' . pat .
334 \ '\", text: \"' . text . '\", submatch ' . i .
335 \ ': \"' . l[i] . '\", expected: \"' . e . '\"')
336 endif
337 endfor
338 unlet i
339 endif
340 endfor
341 endfor
342 set regexpengine&
343endfunc
344
345" check that 'ambiwidth' does not change the meaning of \p
346func Test_ambiwidth()
347 set regexpengine=1 ambiwidth=single
348 call assert_equal(0, match("\u00EC", '\p'))
349 set regexpengine=1 ambiwidth=double
350 call assert_equal(0, match("\u00EC", '\p'))
351 set regexpengine=2 ambiwidth=single
352 call assert_equal(0, match("\u00EC", '\p'))
353 set regexpengine=2 ambiwidth=double
354 call assert_equal(0, match("\u00EC", '\p'))
355 set regexpengine& ambiwidth&
356endfunc
357
Bram Moolenaar59de4172020-06-09 19:34:54 +0200358func Run_regexp_ignore_case()
359 call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g'))
360
361 call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g'))
362 call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g'))
363 call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g'))
364 call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g'))
365 call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g'))
366 call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g'))
367endfunc
368
369func Test_regexp_ignore_case()
370 set regexpengine=1
371 call Run_regexp_ignore_case()
372 set regexpengine=2
373 call Run_regexp_ignore_case()
374 set regexpengine&
375endfunc
376
Bram Moolenaar7d40b8a2020-07-26 12:52:59 +0200377" Tests for regexp with multi-byte encoding and various magic settings
378func Run_regexp_multibyte_magic()
379 let text =<< trim END
380 1 a aa abb abbccc
381 2 d dd dee deefff
382 3 g gg ghh ghhiii
383 4 j jj jkk jkklll
384 5 m mm mnn mnnooo
385 6 x ^aa$ x
386 7 (a)(b) abbaa
387 8 axx [ab]xx
388 9 หม่x อมx
389 a อมx หม่x
390 b ちカヨは
391 c x ¬€x
392 d 天使x
393 e ü’…™¸y
394 f ü’Š¯z
395 g a啷bb
396 j 0123❤x
397 k combinations
398 l äö üᾱ̆́
399 END
400
401 new
402 call setline(1, text)
403 exe 'normal /a*b\{2}c\+/e' .. "\<CR>x"
404 call assert_equal('1 a aa abb abbcc', getline('.'))
405 exe 'normal /\Md\*e\{2}f\+/e' .. "\<CR>x"
406 call assert_equal('2 d dd dee deeff', getline('.'))
407 set nomagic
408 exe 'normal /g\*h\{2}i\+/e' .. "\<CR>x"
409 call assert_equal('3 g gg ghh ghhii', getline('.'))
410 exe 'normal /\mj*k\{2}l\+/e' .. "\<CR>x"
411 call assert_equal('4 j jj jkk jkkll', getline('.'))
412 exe 'normal /\vm*n{2}o+/e' .. "\<CR>x"
413 call assert_equal('5 m mm mnn mnnoo', getline('.'))
414 exe 'normal /\V^aa$/' .. "\<CR>x"
415 call assert_equal('6 x aa$ x', getline('.'))
416 set magic
417 exe 'normal /\v(a)(b)\2\1\1/e' .. "\<CR>x"
418 call assert_equal('7 (a)(b) abba', getline('.'))
419 exe 'normal /\V[ab]\(\[xy]\)\1' .. "\<CR>x"
420 call assert_equal('8 axx ab]xx', getline('.'))
421
422 " search for multi-byte without composing char
423 exe 'normal /ม' .. "\<CR>x"
424 call assert_equal('9 หม่x อx', getline('.'))
425
426 " search for multi-byte with composing char
427 exe 'normal /ม่' .. "\<CR>x"
428 call assert_equal('a อมx หx', getline('.'))
429
430 " find word by change of word class
431 exe 'normal /ち\<カヨ\>は' .. "\<CR>x"
432 call assert_equal('b カヨは', getline('.'))
433
434 " Test \%u, [\u] and friends
435 " c
436 exe 'normal /\%u20ac' .. "\<CR>x"
437 call assert_equal('c x ¬x', getline('.'))
438 " d
439 exe 'normal /[\u4f7f\u5929]\+' .. "\<CR>x"
440 call assert_equal('d 使x', getline('.'))
441 " e
442 exe 'normal /\%U12345678' .. "\<CR>x"
443 call assert_equal('e y', getline('.'))
444 " f
445 exe 'normal /[\U1234abcd\u1234\uabcd]' .. "\<CR>x"
446 call assert_equal('f z', getline('.'))
447 " g
448 exe 'normal /\%d21879b' .. "\<CR>x"
449 call assert_equal('g abb', getline('.'))
450
451 " j Test backwards search from a multi-byte char
452 exe "normal /x\<CR>x?.\<CR>x"
453 call assert_equal('j 012❤', getline('.'))
454 " k
455 let @w=':%s#comb[i]nations#œ̄ṣ́m̥̄ᾱ̆́#g'
456 @w
457 call assert_equal('k œ̄ṣ́m̥̄ᾱ̆́', getline(18))
458
459 close!
460endfunc
461
462func Test_regexp_multibyte_magic()
463 set regexpengine=1
464 call Run_regexp_multibyte_magic()
465 set regexpengine=2
466 call Run_regexp_multibyte_magic()
467 set regexpengine&
468endfunc
469
470" Test for 7.3.192
471" command ":s/ \?/ /g" splits multi-byte characters into bytes
472func Test_split_multibyte_to_bytes()
473 new
474 call setline(1, 'l äö üᾱ̆́')
475 s/ \?/ /g
476 call assert_equal(' l ä ö ü ᾱ̆́', getline(1))
477 close!
478endfunc
479
480" Test for matchstr() with multibyte characters
481func Test_matchstr_multibyte()
482 new
483 call assert_equal('ב', matchstr("אבגד", ".", 0, 2))
484 call assert_equal('בג', matchstr("אבגד", "..", 0, 2))
485 call assert_equal('א', matchstr("אבגד", ".", 0, 0))
486 call assert_equal('ג', matchstr("אבגד", ".", 4, -1))
487 close!
488endfunc
489
490" Test for 7.4.636
491" A search with end offset gets stuck at end of file.
492func Test_search_with_end_offset()
493 new
494 call setline(1, ['', 'dog(a', 'cat('])
Bram Moolenaar8a9bc952020-10-02 18:48:07 +0200495 exe "normal /(/e+\<CR>"
496 normal n"ayn
Bram Moolenaar7d40b8a2020-07-26 12:52:59 +0200497 call assert_equal("a\ncat(", @a)
498 close!
499endfunc
500
Bram Moolenaaref2dff52020-12-21 14:54:32 +0100501" Check that "^" matches even when the line starts with a combining char
502func Test_match_start_of_line_combining()
503 new
504 call setline(1, ['', "\u05ae", ''])
505 exe "normal gg/^\<CR>"
506 call assert_equal(2, getcurpos()[1])
507 bwipe!
508endfunc
509
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100510" Check that [[:upper:]] matches for automatic engine
511func Test_match_char_class_upper()
512 new
Bram Moolenaaref2dff52020-12-21 14:54:32 +0100513
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100514 " Test 1: [[:upper:]]\{2,\}
515 set regexpengine=0
516 call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...'])
517 call cursor(1,1)
518 let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>"
519 exe search_cmd
520 call assert_equal(4, searchcount().total, 'TEST 1')
521 set regexpengine=1
522 exe search_cmd
523 call assert_equal(2, searchcount().total, 'TEST 1')
524 set regexpengine=2
525 exe search_cmd
526 call assert_equal(4, searchcount().total, 'TEST 1')
527
528 " Test 2: [[:upper:]].\+
529 let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>"
530 set regexpengine=0
531 exe search_cmd
532 call assert_equal(2, searchcount().total, 'TEST 2')
533 set regexpengine=1
534 exe search_cmd
535 call assert_equal(1, searchcount().total, 'TEST 2')
536 set regexpengine=2
537 exe search_cmd
538 call assert_equal(2, searchcount().total, 'TEST 2')
539
540 " Test 3: [[:lower:]]\+
541 let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>"
542 set regexpengine=0
543 exe search_cmd
544 call assert_equal(4, searchcount().total, 'TEST 3 lower')
545 set regexpengine=1
546 exe search_cmd
547 call assert_equal(2, searchcount().total, 'TEST 3 lower')
548 set regexpengine=2
549 exe search_cmd
550 call assert_equal(4, searchcount().total, 'TEST 3 lower')
551
552 " clean up
Bram Moolenaar6456fae2022-02-22 13:37:31 +0000553 set regexpengine=0
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100554 bwipe!
555endfunc
Bram Moolenaaref2dff52020-12-21 14:54:32 +0100556
Bram Moolenaar65b60562021-09-07 19:26:53 +0200557func Test_match_invalid_byte()
558 call writefile(0z630a.765d30aa0a.2e0a.790a.4030, 'Xinvalid')
559 new
560 source Xinvalid
561 bwipe!
562 call delete('Xinvalid')
563endfunc
564
Bram Moolenaarf5094052022-07-29 16:22:25 +0100565func Test_match_illegal_byte()
566 let lines =<< trim END
567 silent! buffer ÿ\c
568 next ÿ
569 0scriptnames
570 source
571 END
572 call writefile(lines, 'Xregexp')
573 call system(GetVimCommand() .. ' -X -Z -e -s -S Xregexp -c qa!')
574
575 call delete('Xregexp')
576endfunc
577
Bram Moolenaar6456fae2022-02-22 13:37:31 +0000578func Test_match_too_complicated()
579 set regexpengine=1
Bram Moolenaar2457b2b2022-02-22 16:19:37 +0000580 exe "noswapfile vsplit \xeb\xdb\x99"
Bram Moolenaar6456fae2022-02-22 13:37:31 +0000581 silent! buf \&\zs*\zs*0
582 bwipe!
583 set regexpengine=0
584endfunc
585
586
Bram Moolenaarafc13bd2019-12-16 22:43:31 +0100587" vim: shiftwidth=2 sts=2 expandtab