blob: d88e26321d3cdeb2cd2256e31ae966b0350d3e77 [file] [log] [blame]
Bram Moolenaar22e42152016-04-03 14:02:02 +02001" Tests for regexp in utf8 encoding
Bram Moolenaar22e42152016-04-03 14:02:02 +02002
3func s:equivalence_test()
Bram Moolenaar0b94e292021-04-05 13:59:53 +02004 let str = "AÀÁÂÃÄÅĀĂĄǍǞǠǺȂȦȺḀẠẢẤẦẨẪẬẮẰẲẴẶ BƁɃḂḄḆ CÇĆĈĊČƇȻḈꞒ DĎĐƊḊḌḎḐḒ EÈÉÊËĒĔĖĘĚȄȆȨɆḔḖḘḚḜẸẺẼẾỀỂỄỆ FƑḞꞘ GĜĞĠĢƓǤǦǴḠꞠ HĤĦȞḢḤḦḨḪⱧ IÌÍÎÏĨĪĬĮİƗǏȈȊḬḮỈỊ JĴɈ KĶƘǨḰḲḴⱩꝀ LĹĻĽĿŁȽḶḸḺḼⱠ MḾṀṂ NÑŃŅŇǸṄṆṈṊꞤ OÒÓÔÕÖØŌŎŐƟƠǑǪǬǾȌȎȪȬȮȰṌṎṐṒỌỎỐỒỔỖỘỚỜỞỠỢ PƤṔṖⱣ QɊ RŔŖŘȐȒɌṘṚṜṞⱤꞦ SŚŜŞŠȘṠṢṤṦṨⱾꞨ TŢŤŦƬƮȚȾṪṬṮṰ UÙÚÛÜŨŪŬŮŰƯǕǙǛǓǗȔȖɄṲṴṶṸṺỤỦỨỪỬỮỰ VƲṼṾ WŴẀẂẄẆẈ XẊẌ YÝŶŸƳȲɎẎỲỴỶỸ ZŹŻŽƵẐẒẔⱫ aàáâãäåāăąǎǟǡǻȃȧᶏḁẚạảấầẩẫậắằẳẵặⱥ bƀɓᵬᶀḃḅḇ cçćĉċčƈȼḉꞓꞔ dďđɗᵭᶁᶑḋḍḏḑḓ eèéêëēĕėęěȅȇȩɇᶒḕḗḙḛḝẹẻẽếềểễệ fƒᵮᶂḟꞙ gĝğġģǥǧǵɠᶃḡꞡ hĥħȟḣḥḧḩḫẖⱨꞕ iìíîïĩīĭįǐȉȋɨᶖḭḯỉị jĵǰɉ kķƙǩᶄḱḳḵⱪꝁ lĺļľŀłƚḷḹḻḽⱡ mᵯḿṁṃ nñńņňʼnǹᵰᶇṅṇṉṋꞥ oòóôõöøōŏőơǒǫǭǿȍȏȫȭȯȱɵṍṏṑṓọỏốồổỗộớờởỡợ pƥᵱᵽᶈṕṗ qɋʠ rŕŗřȑȓɍɽᵲᵳᶉṛṝṟꞧ sśŝşšșȿᵴᶊṡṣṥṧṩꞩ tţťŧƫƭțʈᵵṫṭṯṱẗⱦ uùúûüũūŭůűųǚǖưǔǘǜȕȗʉᵾᶙṳṵṷṹṻụủứừửữự vʋᶌṽṿ wŵẁẃẅẇẉẘ xẋẍ yýÿŷƴȳɏẏẙỳỵỷỹ zźżžƶᵶᶎẑẓẕⱬ"
Bram Moolenaar22e42152016-04-03 14:02:02 +02005 let groups = split(str)
6 for group1 in groups
7 for c in split(group1, '\zs')
8 " next statement confirms that equivalence class matches every
9 " character in group
Bram Moolenaar0b94e292021-04-05 13:59:53 +020010 call assert_match('^[[=' .. c .. '=]]*$', group1)
Bram Moolenaar22e42152016-04-03 14:02:02 +020011 for group2 in groups
12 if group2 != group1
13 " next statement converts that equivalence class doesn't match
14 " character in any other group
Bram Moolenaar0b94e292021-04-05 13:59:53 +020015 call assert_equal(-1, match(group2, '[[=' .. c .. '=]]'), c)
Bram Moolenaar22e42152016-04-03 14:02:02 +020016 endif
17 endfor
18 endfor
19 endfor
20endfunc
21
22func Test_equivalence_re1()
23 set re=1
24 call s:equivalence_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +020025 set re=0
Bram Moolenaar22e42152016-04-03 14:02:02 +020026endfunc
27
28func Test_equivalence_re2()
29 set re=2
30 call s:equivalence_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +020031 set re=0
Bram Moolenaar22e42152016-04-03 14:02:02 +020032endfunc
Bram Moolenaaraf98a492016-04-24 14:40:12 +020033
34func s:classes_test()
Bram Moolenaar470adb82020-07-20 21:21:30 +020035 if has('win32')
36 set iskeyword=@,48-57,_,192-255
37 endif
Bram Moolenaar490465b2016-04-24 15:11:02 +020038 set isprint=@,161-255
Bram Moolenaaraf98a492016-04-24 14:40:12 +020039 call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+'))
40
Bram Moolenaaraf98a492016-04-24 14:40:12 +020041 let alnumchars = ''
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020042 let alphachars = ''
43 let backspacechar = ''
44 let blankchars = ''
45 let cntrlchars = ''
46 let digitchars = ''
47 let escapechar = ''
48 let graphchars = ''
49 let lowerchars = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020050 let printchars = ''
51 let punctchars = ''
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020052 let returnchar = ''
53 let spacechars = ''
54 let tabchar = ''
55 let upperchars = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020056 let xdigitchars = ''
Bram Moolenaar221cd9f2019-01-31 15:34:40 +010057 let identchars = ''
58 let identchars1 = ''
59 let kwordchars = ''
60 let kwordchars1 = ''
61 let fnamechars = ''
62 let fnamechars1 = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020063 let i = 1
64 while i <= 255
65 let c = nr2char(i)
66 if c =~ '[[:alpha:]]'
67 let alphachars .= c
68 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +020069 if c =~ '[[:alnum:]]'
70 let alnumchars .= c
71 endif
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020072 if c =~ '[[:backspace:]]'
73 let backspacechar .= c
74 endif
75 if c =~ '[[:blank:]]'
76 let blankchars .= c
77 endif
78 if c =~ '[[:cntrl:]]'
79 let cntrlchars .= c
80 endif
81 if c =~ '[[:digit:]]'
82 let digitchars .= c
83 endif
84 if c =~ '[[:escape:]]'
85 let escapechar .= c
86 endif
87 if c =~ '[[:graph:]]'
88 let graphchars .= c
89 endif
90 if c =~ '[[:lower:]]'
91 let lowerchars .= c
92 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +020093 if c =~ '[[:print:]]'
94 let printchars .= c
95 endif
96 if c =~ '[[:punct:]]'
97 let punctchars .= c
98 endif
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020099 if c =~ '[[:return:]]'
100 let returnchar .= c
101 endif
102 if c =~ '[[:space:]]'
103 let spacechars .= c
104 endif
105 if c =~ '[[:tab:]]'
106 let tabchar .= c
107 endif
108 if c =~ '[[:upper:]]'
109 let upperchars .= c
110 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200111 if c =~ '[[:xdigit:]]'
112 let xdigitchars .= c
113 endif
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100114 if c =~ '[[:ident:]]'
115 let identchars .= c
116 endif
117 if c =~ '\i'
118 let identchars1 .= c
119 endif
120 if c =~ '[[:keyword:]]'
121 let kwordchars .= c
122 endif
123 if c =~ '\k'
124 let kwordchars1 .= c
125 endif
126 if c =~ '[[:fname:]]'
127 let fnamechars .= c
128 endif
129 if c =~ '\f'
130 let fnamechars1 .= c
131 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200132 let i += 1
133 endwhile
134
135 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200136 call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200137 call assert_equal("\b", backspacechar)
138 call assert_equal("\t ", blankchars)
Bram Moolenaar13489b92017-03-30 22:20:29 +0200139 call assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f", cntrlchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200140 call assert_equal("0123456789", digitchars)
141 call assert_equal("\<Esc>", escapechar)
Bram Moolenaar13489b92017-03-30 22:20:29 +0200142 call assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', graphchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200143 call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200144 call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars)
145 call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200146 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars)
147 call assert_equal("\r", returnchar)
148 call assert_equal("\t\n\x0b\f\r ", spacechars)
149 call assert_equal("\t", tabchar)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200150 call assert_equal('0123456789ABCDEFabcdef', xdigitchars)
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100151
152 if has('win32')
153 let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
154 let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100155 else
156 let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
157 let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
158 endif
159
160 if has('win32')
161 let fnamechars_ok = '!#$%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_abcdefghijklmnopqrstuvwxyz{}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
162 elseif has('amiga')
163 let fnamechars_ok = '$+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
164 elseif has('vms')
165 let fnamechars_ok = '#$%+,-./0123456789:;<>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100166 else
167 let fnamechars_ok = '#$%+,-./0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
168 endif
169
170 call assert_equal(identchars_ok, identchars)
171 call assert_equal(kwordchars_ok, kwordchars)
172 call assert_equal(fnamechars_ok, fnamechars)
173
174 call assert_equal(identchars1, identchars)
175 call assert_equal(kwordchars1, kwordchars)
176 call assert_equal(fnamechars1, fnamechars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200177endfunc
178
179func Test_classes_re1()
180 set re=1
181 call s:classes_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +0200182 set re=0
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200183endfunc
184
185func Test_classes_re2()
186 set re=2
187 call s:classes_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +0200188 set re=0
189endfunc
Bram Moolenaar966e58e2017-06-05 16:54:08 +0200190
191func Test_reversed_range()
192 for re in range(0, 2)
193 exe 'set re=' . re
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200194 call assert_fails('call match("abc def", "[c-a]")', 'E944:', re)
Bram Moolenaar966e58e2017-06-05 16:54:08 +0200195 endfor
196 set re=0
197endfunc
198
199func Test_large_class()
200 set re=1
201 call assert_fails('call match("abc def", "[\u3000-\u4000]")', 'E945:')
202 set re=2
203 call assert_equal(0, 'abc def' =~# '[\u3000-\u4000]')
204 call assert_equal(1, "\u3042" =~# '[\u3000-\u4000]')
205 set re=0
206endfunc
Bram Moolenaar2a5b5272019-07-20 18:56:06 +0200207
208func Test_optmatch_toolong()
209 set re=1
210 " Can only handle about 8000 characters.
211 let pat = '\\%[' .. repeat('x', 9000) .. ']'
212 call assert_fails('call match("abc def", "' .. pat .. '")', 'E339:')
213 set re=0
214endfunc
215
Bram Moolenaarafc13bd2019-12-16 22:43:31 +0100216" Test for regexp patterns with multi-byte support, using utf-8.
217func Test_multibyte_chars()
218 " tl is a List of Lists with:
219 " 2: test auto/old/new 0: test auto/old 1: test auto/new
220 " regexp pattern
221 " text to test the pattern on
222 " expected match (optional)
223 " expected submatch 1 (optional)
224 " expected submatch 2 (optional)
225 " etc.
226 " When there is no match use only the first two items.
227 let tl = []
228
229 " Multi-byte character tests. These will fail unless vim is compiled
230 " with Multibyte (FEAT_MBYTE) or BIG/HUGE features.
231 call add(tl, [2, '[[:alpha:][=a=]]\+', '879 aiaãâaiuvna ', 'aiaãâaiuvna'])
232 call add(tl, [2, '[[=a=]]\+', 'ddaãâbcd', 'aãâ']) " equivalence classes
233 call add(tl, [2, '[^ม ]\+', 'มม oijasoifjos ifjoisj f osij j มมมมม abcd', 'oijasoifjos'])
234 call add(tl, [2, ' [^ ]\+', 'start มabcdม ', ' มabcdม'])
235 call add(tl, [2, '[ม[:alpha:][=a=]]\+', '879 aiaãมâมaiuvna ', 'aiaãมâมaiuvna'])
236
237 " this is not a normal "i" but 0xec
238 call add(tl, [2, '\p\+', 'ìa', 'ìa'])
239 call add(tl, [2, '\p*', 'aあ', 'aあ'])
240
241 " Test recognition of some character classes
242 call add(tl, [2, '\i\+', '&*¨xx ', 'xx'])
243 call add(tl, [2, '\f\+', '&*Ÿfname ', 'fname'])
244
245 " Test composing character matching
246 call add(tl, [2, '.ม', 'xม่x yมy', 'yม'])
247 call add(tl, [2, '.ม่', 'xม่x yมy', 'xม่'])
248 call add(tl, [2, "\u05b9", " x\u05b9 ", "x\u05b9"])
249 call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"])
250 call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
251 call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
252 call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
253 call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
254 call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
255 call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
256 call add(tl, [2, "\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
257 call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
258 call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"])
259 call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"])
260 call add(tl, [2, "a", "ca\u0300t"])
261 call add(tl, [2, "ca", "ca\u0300t"])
262 call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"])
263 call add(tl, [2, 'a\%C', "ca\u0300t", "a\u0300"])
264 call add(tl, [2, 'ca\%C', "ca\u0300t", "ca\u0300"])
265 call add(tl, [2, 'ca\%Ct', "ca\u0300t", "ca\u0300t"])
266
267 " Test \Z
268 call add(tl, [2, 'ú\Z', 'x'])
269 call add(tl, [2, 'יהוה\Z', 'יהוה', 'יהוה'])
270 call add(tl, [2, 'יְהוָה\Z', 'יהוה', 'יהוה'])
271 call add(tl, [2, 'יהוה\Z', 'יְהוָה', 'יְהוָה'])
272 call add(tl, [2, 'יְהוָה\Z', 'יְהוָה', 'יְהוָה'])
273 call add(tl, [2, 'יְ\Z', 'וְיַ', 'יַ'])
274 call add(tl, [2, "×§\u200d\u05b9x\\Z", "x×§\u200d\u05b9xy", "×§\u200d\u05b9x"])
275 call add(tl, [2, "×§\u200d\u05b9x\\Z", "x×§\u200dxy", "×§\u200dx"])
276 call add(tl, [2, "×§\u200dx\\Z", "x×§\u200d\u05b9xy", "×§\u200d\u05b9x"])
277 call add(tl, [2, "×§\u200dx\\Z", "x×§\u200dxy", "×§\u200dx"])
278 call add(tl, [2, "\u05b9\\Z", "xyz"])
279 call add(tl, [2, "\\Z\u05b9", "xyz"])
280 call add(tl, [2, "\u05b9\\Z", "xy\u05b9z", "y\u05b9"])
281 call add(tl, [2, "\\Z\u05b9", "xy\u05b9z", "y\u05b9"])
282 call add(tl, [1, "\u05b9\\+\\Z", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
283 call add(tl, [1, "\\Z\u05b9\\+", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
284
285 " Combining different tests and features
286 call add(tl, [2, '[^[=a=]]\+', 'ddaãâbcd', 'dd'])
287
288 " Run the tests
289 for t in tl
290 let re = t[0]
291 let pat = t[1]
292 let text = t[2]
293 let matchidx = 3
294 for engine in [0, 1, 2]
295 if engine == 2 && re == 0 || engine == 1 && re == 1
296 continue
297 endif
298 let &regexpengine = engine
299 try
300 let l = matchlist(text, pat)
301 catch
302 call assert_report('Error ' . engine . ': pat: \"' . pat .
303 \ '\", text: \"' . text .
304 \ '\", caused an exception: \"' . v:exception . '\"')
305 endtry
306 " check the match itself
307 if len(l) == 0 && len(t) > matchidx
308 call assert_report('Error ' . engine . ': pat: \"' . pat .
309 \ '\", text: \"' . text .
310 \ '\", did not match, expected: \"' . t[matchidx] . '\"')
311 elseif len(l) > 0 && len(t) == matchidx
312 call assert_report('Error ' . engine . ': pat: \"' . pat .
313 \ '\", text: \"' . text . '\", match: \"' . l[0] .
314 \ '\", expected no match')
315 elseif len(t) > matchidx && l[0] != t[matchidx]
316 call assert_report('Error ' . engine . ': pat: \"' . pat .
317 \ '\", text: \"' . text . '\", match: \"' . l[0] .
318 \ '\", expected: \"' . t[matchidx] . '\"')
319 else
320 " Test passed
321 endif
322 if len(l) > 0
323 " check all the nine submatches
324 for i in range(1, 9)
325 if len(t) <= matchidx + i
326 let e = ''
327 else
328 let e = t[matchidx + i]
329 endif
330 if l[i] != e
331 call assert_report('Error ' . engine . ': pat: \"' . pat .
332 \ '\", text: \"' . text . '\", submatch ' . i .
333 \ ': \"' . l[i] . '\", expected: \"' . e . '\"')
334 endif
335 endfor
336 unlet i
337 endif
338 endfor
339 endfor
340 set regexpengine&
341endfunc
342
343" check that 'ambiwidth' does not change the meaning of \p
344func Test_ambiwidth()
345 set regexpengine=1 ambiwidth=single
346 call assert_equal(0, match("\u00EC", '\p'))
347 set regexpengine=1 ambiwidth=double
348 call assert_equal(0, match("\u00EC", '\p'))
349 set regexpengine=2 ambiwidth=single
350 call assert_equal(0, match("\u00EC", '\p'))
351 set regexpengine=2 ambiwidth=double
352 call assert_equal(0, match("\u00EC", '\p'))
353 set regexpengine& ambiwidth&
354endfunc
355
Bram Moolenaar59de4172020-06-09 19:34:54 +0200356func Run_regexp_ignore_case()
357 call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g'))
358
359 call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g'))
360 call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g'))
361 call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g'))
362 call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g'))
363 call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g'))
364 call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g'))
365endfunc
366
367func Test_regexp_ignore_case()
368 set regexpengine=1
369 call Run_regexp_ignore_case()
370 set regexpengine=2
371 call Run_regexp_ignore_case()
372 set regexpengine&
373endfunc
374
Bram Moolenaar7d40b8a2020-07-26 12:52:59 +0200375" Tests for regexp with multi-byte encoding and various magic settings
376func Run_regexp_multibyte_magic()
377 let text =<< trim END
378 1 a aa abb abbccc
379 2 d dd dee deefff
380 3 g gg ghh ghhiii
381 4 j jj jkk jkklll
382 5 m mm mnn mnnooo
383 6 x ^aa$ x
384 7 (a)(b) abbaa
385 8 axx [ab]xx
386 9 หม่x อมx
387 a อมx หม่x
388 b ちカヨは
389 c x ¬€x
390 d 天使x
391 e ü’…™¸y
392 f ü’Š¯z
393 g a啷bb
394 j 0123❤x
395 k combinations
396 l äö üᾱ̆́
397 END
398
399 new
400 call setline(1, text)
401 exe 'normal /a*b\{2}c\+/e' .. "\<CR>x"
402 call assert_equal('1 a aa abb abbcc', getline('.'))
403 exe 'normal /\Md\*e\{2}f\+/e' .. "\<CR>x"
404 call assert_equal('2 d dd dee deeff', getline('.'))
405 set nomagic
406 exe 'normal /g\*h\{2}i\+/e' .. "\<CR>x"
407 call assert_equal('3 g gg ghh ghhii', getline('.'))
408 exe 'normal /\mj*k\{2}l\+/e' .. "\<CR>x"
409 call assert_equal('4 j jj jkk jkkll', getline('.'))
410 exe 'normal /\vm*n{2}o+/e' .. "\<CR>x"
411 call assert_equal('5 m mm mnn mnnoo', getline('.'))
412 exe 'normal /\V^aa$/' .. "\<CR>x"
413 call assert_equal('6 x aa$ x', getline('.'))
414 set magic
415 exe 'normal /\v(a)(b)\2\1\1/e' .. "\<CR>x"
416 call assert_equal('7 (a)(b) abba', getline('.'))
417 exe 'normal /\V[ab]\(\[xy]\)\1' .. "\<CR>x"
418 call assert_equal('8 axx ab]xx', getline('.'))
419
420 " search for multi-byte without composing char
421 exe 'normal /ม' .. "\<CR>x"
422 call assert_equal('9 หม่x อx', getline('.'))
423
424 " search for multi-byte with composing char
425 exe 'normal /ม่' .. "\<CR>x"
426 call assert_equal('a อมx หx', getline('.'))
427
428 " find word by change of word class
429 exe 'normal /ち\<カヨ\>は' .. "\<CR>x"
430 call assert_equal('b カヨは', getline('.'))
431
432 " Test \%u, [\u] and friends
433 " c
434 exe 'normal /\%u20ac' .. "\<CR>x"
435 call assert_equal('c x ¬x', getline('.'))
436 " d
437 exe 'normal /[\u4f7f\u5929]\+' .. "\<CR>x"
438 call assert_equal('d 使x', getline('.'))
439 " e
440 exe 'normal /\%U12345678' .. "\<CR>x"
441 call assert_equal('e y', getline('.'))
442 " f
443 exe 'normal /[\U1234abcd\u1234\uabcd]' .. "\<CR>x"
444 call assert_equal('f z', getline('.'))
445 " g
446 exe 'normal /\%d21879b' .. "\<CR>x"
447 call assert_equal('g abb', getline('.'))
448
449 " j Test backwards search from a multi-byte char
450 exe "normal /x\<CR>x?.\<CR>x"
451 call assert_equal('j 012❤', getline('.'))
452 " k
453 let @w=':%s#comb[i]nations#œ̄ṣ́m̥̄ᾱ̆́#g'
454 @w
455 call assert_equal('k œ̄ṣ́m̥̄ᾱ̆́', getline(18))
456
457 close!
458endfunc
459
460func Test_regexp_multibyte_magic()
461 set regexpengine=1
462 call Run_regexp_multibyte_magic()
463 set regexpengine=2
464 call Run_regexp_multibyte_magic()
465 set regexpengine&
466endfunc
467
468" Test for 7.3.192
469" command ":s/ \?/ /g" splits multi-byte characters into bytes
470func Test_split_multibyte_to_bytes()
471 new
472 call setline(1, 'l äö üᾱ̆́')
473 s/ \?/ /g
474 call assert_equal(' l ä ö ü ᾱ̆́', getline(1))
475 close!
476endfunc
477
478" Test for matchstr() with multibyte characters
479func Test_matchstr_multibyte()
480 new
481 call assert_equal('ב', matchstr("אבגד", ".", 0, 2))
482 call assert_equal('בג', matchstr("אבגד", "..", 0, 2))
483 call assert_equal('א', matchstr("אבגד", ".", 0, 0))
484 call assert_equal('ג', matchstr("אבגד", ".", 4, -1))
485 close!
486endfunc
487
488" Test for 7.4.636
489" A search with end offset gets stuck at end of file.
490func Test_search_with_end_offset()
491 new
492 call setline(1, ['', 'dog(a', 'cat('])
Bram Moolenaar8a9bc952020-10-02 18:48:07 +0200493 exe "normal /(/e+\<CR>"
494 normal n"ayn
Bram Moolenaar7d40b8a2020-07-26 12:52:59 +0200495 call assert_equal("a\ncat(", @a)
496 close!
497endfunc
498
Bram Moolenaaref2dff52020-12-21 14:54:32 +0100499" Check that "^" matches even when the line starts with a combining char
500func Test_match_start_of_line_combining()
501 new
502 call setline(1, ['', "\u05ae", ''])
503 exe "normal gg/^\<CR>"
504 call assert_equal(2, getcurpos()[1])
505 bwipe!
506endfunc
507
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100508" Check that [[:upper:]] matches for automatic engine
509func Test_match_char_class_upper()
510 new
Bram Moolenaaref2dff52020-12-21 14:54:32 +0100511
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100512 " Test 1: [[:upper:]]\{2,\}
513 set regexpengine=0
514 call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...'])
515 call cursor(1,1)
516 let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>"
517 exe search_cmd
518 call assert_equal(4, searchcount().total, 'TEST 1')
519 set regexpengine=1
520 exe search_cmd
521 call assert_equal(2, searchcount().total, 'TEST 1')
522 set regexpengine=2
523 exe search_cmd
524 call assert_equal(4, searchcount().total, 'TEST 1')
525
526 " Test 2: [[:upper:]].\+
527 let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>"
528 set regexpengine=0
529 exe search_cmd
530 call assert_equal(2, searchcount().total, 'TEST 2')
531 set regexpengine=1
532 exe search_cmd
533 call assert_equal(1, searchcount().total, 'TEST 2')
534 set regexpengine=2
535 exe search_cmd
536 call assert_equal(2, searchcount().total, 'TEST 2')
537
538 " Test 3: [[:lower:]]\+
539 let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>"
540 set regexpengine=0
541 exe search_cmd
542 call assert_equal(4, searchcount().total, 'TEST 3 lower')
543 set regexpengine=1
544 exe search_cmd
545 call assert_equal(2, searchcount().total, 'TEST 3 lower')
546 set regexpengine=2
547 exe search_cmd
548 call assert_equal(4, searchcount().total, 'TEST 3 lower')
549
550 " clean up
Bram Moolenaar6456fae2022-02-22 13:37:31 +0000551 set regexpengine=0
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100552 bwipe!
553endfunc
Bram Moolenaaref2dff52020-12-21 14:54:32 +0100554
Bram Moolenaar65b60562021-09-07 19:26:53 +0200555func Test_match_invalid_byte()
556 call writefile(0z630a.765d30aa0a.2e0a.790a.4030, 'Xinvalid')
557 new
558 source Xinvalid
559 bwipe!
560 call delete('Xinvalid')
561endfunc
562
Bram Moolenaar6456fae2022-02-22 13:37:31 +0000563func Test_match_too_complicated()
564 set regexpengine=1
Bram Moolenaar2457b2b2022-02-22 16:19:37 +0000565 exe "noswapfile vsplit \xeb\xdb\x99"
Bram Moolenaar6456fae2022-02-22 13:37:31 +0000566 silent! buf \&\zs*\zs*0
567 bwipe!
568 set regexpengine=0
569endfunc
570
571
Bram Moolenaarafc13bd2019-12-16 22:43:31 +0100572" vim: shiftwidth=2 sts=2 expandtab