blob: 9f0ffb9aafc60e8c68c6f54fba1b75cc35299d86 [file] [log] [blame]
Bram Moolenaar22e42152016-04-03 14:02:02 +02001" Tests for regexp in utf8 encoding
Bram Moolenaar22e42152016-04-03 14:02:02 +02002
3func s:equivalence_test()
Bram Moolenaar0b94e292021-04-05 13:59:53 +02004 let str = "AÀÁÂÃÄÅĀĂĄǍǞǠǺȂȦȺḀẠẢẤẦẨẪẬẮẰẲẴẶ BƁɃḂḄḆ CÇĆĈĊČƇȻḈꞒ DĎĐƊḊḌḎḐḒ EÈÉÊËĒĔĖĘĚȄȆȨɆḔḖḘḚḜẸẺẼẾỀỂỄỆ FƑḞꞘ GĜĞĠĢƓǤǦǴḠꞠ HĤĦȞḢḤḦḨḪⱧ IÌÍÎÏĨĪĬĮİƗǏȈȊḬḮỈỊ JĴɈ KĶƘǨḰḲḴⱩꝀ LĹĻĽĿŁȽḶḸḺḼⱠ MḾṀṂ NÑŃŅŇǸṄṆṈṊꞤ OÒÓÔÕÖØŌŎŐƟƠǑǪǬǾȌȎȪȬȮȰṌṎṐṒỌỎỐỒỔỖỘỚỜỞỠỢ PƤṔṖⱣ QɊ RŔŖŘȐȒɌṘṚṜṞⱤꞦ SŚŜŞŠȘṠṢṤṦṨⱾꞨ TŢŤŦƬƮȚȾṪṬṮṰ UÙÚÛÜŨŪŬŮŰƯǕǙǛǓǗȔȖɄṲṴṶṸṺỤỦỨỪỬỮỰ VƲṼṾ WŴẀẂẄẆẈ XẊẌ YÝŶŸƳȲɎẎỲỴỶỸ ZŹŻŽƵẐẒẔⱫ aàáâãäåāăąǎǟǡǻȃȧᶏḁẚạảấầẩẫậắằẳẵặⱥ bƀɓᵬᶀḃḅḇ cçćĉċčƈȼḉꞓꞔ dďđɗᵭᶁᶑḋḍḏḑḓ eèéêëēĕėęěȅȇȩɇᶒḕḗḙḛḝẹẻẽếềểễệ fƒᵮᶂḟꞙ gĝğġģǥǧǵɠᶃḡꞡ hĥħȟḣḥḧḩḫẖⱨꞕ iìíîïĩīĭįǐȉȋɨᶖḭḯỉị jĵǰɉ kķƙǩᶄḱḳḵⱪꝁ lĺļľŀłƚḷḹḻḽⱡ mᵯḿṁṃ nñńņňʼnǹᵰᶇṅṇṉṋꞥ oòóôõöøōŏőơǒǫǭǿȍȏȫȭȯȱɵṍṏṑṓọỏốồổỗộớờởỡợ pƥᵱᵽᶈṕṗ qɋʠ rŕŗřȑȓɍɽᵲᵳᶉṛṝṟꞧ sśŝşšșȿᵴᶊṡṣṥṧṩꞩ tţťŧƫƭțʈᵵṫṭṯṱẗⱦ uùúûüũūŭůűųǚǖưǔǘǜȕȗʉᵾᶙṳṵṷṹṻụủứừửữự vʋᶌṽṿ wŵẁẃẅẇẉẘ xẋẍ yýÿŷƴȳɏẏẙỳỵỷỹ zźżžƶᵶᶎẑẓẕⱬ"
Bram Moolenaar22e42152016-04-03 14:02:02 +02005 let groups = split(str)
6 for group1 in groups
7 for c in split(group1, '\zs')
8 " next statement confirms that equivalence class matches every
9 " character in group
Bram Moolenaar0b94e292021-04-05 13:59:53 +020010 call assert_match('^[[=' .. c .. '=]]*$', group1)
Bram Moolenaar22e42152016-04-03 14:02:02 +020011 for group2 in groups
12 if group2 != group1
13 " next statement converts that equivalence class doesn't match
14 " character in any other group
Bram Moolenaar0b94e292021-04-05 13:59:53 +020015 call assert_equal(-1, match(group2, '[[=' .. c .. '=]]'), c)
Bram Moolenaar22e42152016-04-03 14:02:02 +020016 endif
17 endfor
18 endfor
19 endfor
20endfunc
21
22func Test_equivalence_re1()
23 set re=1
24 call s:equivalence_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +020025 set re=0
Bram Moolenaar22e42152016-04-03 14:02:02 +020026endfunc
27
28func Test_equivalence_re2()
29 set re=2
30 call s:equivalence_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +020031 set re=0
Bram Moolenaar22e42152016-04-03 14:02:02 +020032endfunc
Bram Moolenaaraf98a492016-04-24 14:40:12 +020033
34func s:classes_test()
Bram Moolenaar470adb82020-07-20 21:21:30 +020035 if has('win32')
36 set iskeyword=@,48-57,_,192-255
37 endif
Bram Moolenaar490465b2016-04-24 15:11:02 +020038 set isprint=@,161-255
Bram Moolenaaraf98a492016-04-24 14:40:12 +020039 call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+'))
40
Bram Moolenaaraf98a492016-04-24 14:40:12 +020041 let alnumchars = ''
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020042 let alphachars = ''
43 let backspacechar = ''
44 let blankchars = ''
45 let cntrlchars = ''
46 let digitchars = ''
47 let escapechar = ''
48 let graphchars = ''
49 let lowerchars = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020050 let printchars = ''
51 let punctchars = ''
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020052 let returnchar = ''
53 let spacechars = ''
54 let tabchar = ''
55 let upperchars = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020056 let xdigitchars = ''
Bram Moolenaar221cd9f2019-01-31 15:34:40 +010057 let identchars = ''
58 let identchars1 = ''
59 let kwordchars = ''
60 let kwordchars1 = ''
61 let fnamechars = ''
62 let fnamechars1 = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020063 let i = 1
64 while i <= 255
65 let c = nr2char(i)
66 if c =~ '[[:alpha:]]'
67 let alphachars .= c
68 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +020069 if c =~ '[[:alnum:]]'
70 let alnumchars .= c
71 endif
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020072 if c =~ '[[:backspace:]]'
73 let backspacechar .= c
74 endif
75 if c =~ '[[:blank:]]'
76 let blankchars .= c
77 endif
78 if c =~ '[[:cntrl:]]'
79 let cntrlchars .= c
80 endif
81 if c =~ '[[:digit:]]'
82 let digitchars .= c
83 endif
84 if c =~ '[[:escape:]]'
85 let escapechar .= c
86 endif
87 if c =~ '[[:graph:]]'
88 let graphchars .= c
89 endif
90 if c =~ '[[:lower:]]'
91 let lowerchars .= c
92 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +020093 if c =~ '[[:print:]]'
94 let printchars .= c
95 endif
96 if c =~ '[[:punct:]]'
97 let punctchars .= c
98 endif
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020099 if c =~ '[[:return:]]'
100 let returnchar .= c
101 endif
102 if c =~ '[[:space:]]'
103 let spacechars .= c
104 endif
105 if c =~ '[[:tab:]]'
106 let tabchar .= c
107 endif
108 if c =~ '[[:upper:]]'
109 let upperchars .= c
110 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200111 if c =~ '[[:xdigit:]]'
112 let xdigitchars .= c
113 endif
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100114 if c =~ '[[:ident:]]'
115 let identchars .= c
116 endif
117 if c =~ '\i'
118 let identchars1 .= c
119 endif
120 if c =~ '[[:keyword:]]'
121 let kwordchars .= c
122 endif
123 if c =~ '\k'
124 let kwordchars1 .= c
125 endif
126 if c =~ '[[:fname:]]'
127 let fnamechars .= c
128 endif
129 if c =~ '\f'
130 let fnamechars1 .= c
131 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200132 let i += 1
133 endwhile
134
135 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200136 call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200137 call assert_equal("\b", backspacechar)
138 call assert_equal("\t ", blankchars)
Bram Moolenaar13489b92017-03-30 22:20:29 +0200139 call assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f", cntrlchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200140 call assert_equal("0123456789", digitchars)
141 call assert_equal("\<Esc>", escapechar)
Bram Moolenaar13489b92017-03-30 22:20:29 +0200142 call assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', graphchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200143 call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200144 call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars)
145 call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200146 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars)
147 call assert_equal("\r", returnchar)
148 call assert_equal("\t\n\x0b\f\r ", spacechars)
149 call assert_equal("\t", tabchar)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200150 call assert_equal('0123456789ABCDEFabcdef', xdigitchars)
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100151
152 if has('win32')
153 let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
154 let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
155 elseif has('ebcdic')
156 let identchars_ok = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz€ŒŽœž¬®µº¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
157 let kwordchars_ok = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz€ŒŽœž¬®µº¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
158 else
159 let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
160 let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
161 endif
162
163 if has('win32')
164 let fnamechars_ok = '!#$%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_abcdefghijklmnopqrstuvwxyz{}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
165 elseif has('amiga')
166 let fnamechars_ok = '$+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
167 elseif has('vms')
168 let fnamechars_ok = '#$%+,-./0123456789:;<>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
169 elseif has('ebcdic')
170 let fnamechars_ok = '#$%+,-./=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
171 else
172 let fnamechars_ok = '#$%+,-./0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
173 endif
174
175 call assert_equal(identchars_ok, identchars)
176 call assert_equal(kwordchars_ok, kwordchars)
177 call assert_equal(fnamechars_ok, fnamechars)
178
179 call assert_equal(identchars1, identchars)
180 call assert_equal(kwordchars1, kwordchars)
181 call assert_equal(fnamechars1, fnamechars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200182endfunc
183
184func Test_classes_re1()
185 set re=1
186 call s:classes_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +0200187 set re=0
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200188endfunc
189
190func Test_classes_re2()
191 set re=2
192 call s:classes_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +0200193 set re=0
194endfunc
Bram Moolenaar966e58e2017-06-05 16:54:08 +0200195
196func Test_reversed_range()
197 for re in range(0, 2)
198 exe 'set re=' . re
Bram Moolenaar0b94e292021-04-05 13:59:53 +0200199 call assert_fails('call match("abc def", "[c-a]")', 'E944:', re)
Bram Moolenaar966e58e2017-06-05 16:54:08 +0200200 endfor
201 set re=0
202endfunc
203
204func Test_large_class()
205 set re=1
206 call assert_fails('call match("abc def", "[\u3000-\u4000]")', 'E945:')
207 set re=2
208 call assert_equal(0, 'abc def' =~# '[\u3000-\u4000]')
209 call assert_equal(1, "\u3042" =~# '[\u3000-\u4000]')
210 set re=0
211endfunc
Bram Moolenaar2a5b5272019-07-20 18:56:06 +0200212
213func Test_optmatch_toolong()
214 set re=1
215 " Can only handle about 8000 characters.
216 let pat = '\\%[' .. repeat('x', 9000) .. ']'
217 call assert_fails('call match("abc def", "' .. pat .. '")', 'E339:')
218 set re=0
219endfunc
220
Bram Moolenaarafc13bd2019-12-16 22:43:31 +0100221" Test for regexp patterns with multi-byte support, using utf-8.
222func Test_multibyte_chars()
223 " tl is a List of Lists with:
224 " 2: test auto/old/new 0: test auto/old 1: test auto/new
225 " regexp pattern
226 " text to test the pattern on
227 " expected match (optional)
228 " expected submatch 1 (optional)
229 " expected submatch 2 (optional)
230 " etc.
231 " When there is no match use only the first two items.
232 let tl = []
233
234 " Multi-byte character tests. These will fail unless vim is compiled
235 " with Multibyte (FEAT_MBYTE) or BIG/HUGE features.
236 call add(tl, [2, '[[:alpha:][=a=]]\+', '879 aiaãâaiuvna ', 'aiaãâaiuvna'])
237 call add(tl, [2, '[[=a=]]\+', 'ddaãâbcd', 'aãâ']) " equivalence classes
238 call add(tl, [2, '[^ม ]\+', 'มม oijasoifjos ifjoisj f osij j มมมมม abcd', 'oijasoifjos'])
239 call add(tl, [2, ' [^ ]\+', 'start มabcdม ', ' มabcdม'])
240 call add(tl, [2, '[ม[:alpha:][=a=]]\+', '879 aiaãมâมaiuvna ', 'aiaãมâมaiuvna'])
241
242 " this is not a normal "i" but 0xec
243 call add(tl, [2, '\p\+', 'ìa', 'ìa'])
244 call add(tl, [2, '\p*', 'aあ', 'aあ'])
245
246 " Test recognition of some character classes
247 call add(tl, [2, '\i\+', '&*¨xx ', 'xx'])
248 call add(tl, [2, '\f\+', '&*Ÿfname ', 'fname'])
249
250 " Test composing character matching
251 call add(tl, [2, '.ม', 'xม่x yมy', 'yม'])
252 call add(tl, [2, '.ม่', 'xม่x yมy', 'xม่'])
253 call add(tl, [2, "\u05b9", " x\u05b9 ", "x\u05b9"])
254 call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"])
255 call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
256 call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
257 call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
258 call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
259 call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
260 call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
261 call add(tl, [2, "\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
262 call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"])
263 call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"])
264 call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"])
265 call add(tl, [2, "a", "ca\u0300t"])
266 call add(tl, [2, "ca", "ca\u0300t"])
267 call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"])
268 call add(tl, [2, 'a\%C', "ca\u0300t", "a\u0300"])
269 call add(tl, [2, 'ca\%C', "ca\u0300t", "ca\u0300"])
270 call add(tl, [2, 'ca\%Ct', "ca\u0300t", "ca\u0300t"])
271
272 " Test \Z
273 call add(tl, [2, 'ú\Z', 'x'])
274 call add(tl, [2, 'יהוה\Z', 'יהוה', 'יהוה'])
275 call add(tl, [2, 'יְהוָה\Z', 'יהוה', 'יהוה'])
276 call add(tl, [2, 'יהוה\Z', 'יְהוָה', 'יְהוָה'])
277 call add(tl, [2, 'יְהוָה\Z', 'יְהוָה', 'יְהוָה'])
278 call add(tl, [2, 'יְ\Z', 'וְיַ', 'יַ'])
279 call add(tl, [2, "×§\u200d\u05b9x\\Z", "x×§\u200d\u05b9xy", "×§\u200d\u05b9x"])
280 call add(tl, [2, "×§\u200d\u05b9x\\Z", "x×§\u200dxy", "×§\u200dx"])
281 call add(tl, [2, "×§\u200dx\\Z", "x×§\u200d\u05b9xy", "×§\u200d\u05b9x"])
282 call add(tl, [2, "×§\u200dx\\Z", "x×§\u200dxy", "×§\u200dx"])
283 call add(tl, [2, "\u05b9\\Z", "xyz"])
284 call add(tl, [2, "\\Z\u05b9", "xyz"])
285 call add(tl, [2, "\u05b9\\Z", "xy\u05b9z", "y\u05b9"])
286 call add(tl, [2, "\\Z\u05b9", "xy\u05b9z", "y\u05b9"])
287 call add(tl, [1, "\u05b9\\+\\Z", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
288 call add(tl, [1, "\\Z\u05b9\\+", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"])
289
290 " Combining different tests and features
291 call add(tl, [2, '[^[=a=]]\+', 'ddaãâbcd', 'dd'])
292
293 " Run the tests
294 for t in tl
295 let re = t[0]
296 let pat = t[1]
297 let text = t[2]
298 let matchidx = 3
299 for engine in [0, 1, 2]
300 if engine == 2 && re == 0 || engine == 1 && re == 1
301 continue
302 endif
303 let &regexpengine = engine
304 try
305 let l = matchlist(text, pat)
306 catch
307 call assert_report('Error ' . engine . ': pat: \"' . pat .
308 \ '\", text: \"' . text .
309 \ '\", caused an exception: \"' . v:exception . '\"')
310 endtry
311 " check the match itself
312 if len(l) == 0 && len(t) > matchidx
313 call assert_report('Error ' . engine . ': pat: \"' . pat .
314 \ '\", text: \"' . text .
315 \ '\", did not match, expected: \"' . t[matchidx] . '\"')
316 elseif len(l) > 0 && len(t) == matchidx
317 call assert_report('Error ' . engine . ': pat: \"' . pat .
318 \ '\", text: \"' . text . '\", match: \"' . l[0] .
319 \ '\", expected no match')
320 elseif len(t) > matchidx && l[0] != t[matchidx]
321 call assert_report('Error ' . engine . ': pat: \"' . pat .
322 \ '\", text: \"' . text . '\", match: \"' . l[0] .
323 \ '\", expected: \"' . t[matchidx] . '\"')
324 else
325 " Test passed
326 endif
327 if len(l) > 0
328 " check all the nine submatches
329 for i in range(1, 9)
330 if len(t) <= matchidx + i
331 let e = ''
332 else
333 let e = t[matchidx + i]
334 endif
335 if l[i] != e
336 call assert_report('Error ' . engine . ': pat: \"' . pat .
337 \ '\", text: \"' . text . '\", submatch ' . i .
338 \ ': \"' . l[i] . '\", expected: \"' . e . '\"')
339 endif
340 endfor
341 unlet i
342 endif
343 endfor
344 endfor
345 set regexpengine&
346endfunc
347
348" check that 'ambiwidth' does not change the meaning of \p
349func Test_ambiwidth()
350 set regexpengine=1 ambiwidth=single
351 call assert_equal(0, match("\u00EC", '\p'))
352 set regexpengine=1 ambiwidth=double
353 call assert_equal(0, match("\u00EC", '\p'))
354 set regexpengine=2 ambiwidth=single
355 call assert_equal(0, match("\u00EC", '\p'))
356 set regexpengine=2 ambiwidth=double
357 call assert_equal(0, match("\u00EC", '\p'))
358 set regexpengine& ambiwidth&
359endfunc
360
Bram Moolenaar59de4172020-06-09 19:34:54 +0200361func Run_regexp_ignore_case()
362 call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g'))
363
364 call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g'))
365 call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g'))
366 call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g'))
367 call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g'))
368 call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g'))
369 call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g'))
370endfunc
371
372func Test_regexp_ignore_case()
373 set regexpengine=1
374 call Run_regexp_ignore_case()
375 set regexpengine=2
376 call Run_regexp_ignore_case()
377 set regexpengine&
378endfunc
379
Bram Moolenaar7d40b8a2020-07-26 12:52:59 +0200380" Tests for regexp with multi-byte encoding and various magic settings
381func Run_regexp_multibyte_magic()
382 let text =<< trim END
383 1 a aa abb abbccc
384 2 d dd dee deefff
385 3 g gg ghh ghhiii
386 4 j jj jkk jkklll
387 5 m mm mnn mnnooo
388 6 x ^aa$ x
389 7 (a)(b) abbaa
390 8 axx [ab]xx
391 9 หม่x อมx
392 a อมx หม่x
393 b ちカヨは
394 c x ¬€x
395 d 天使x
396 e ü’…™¸y
397 f ü’Š¯z
398 g a啷bb
399 j 0123❤x
400 k combinations
401 l äö üᾱ̆́
402 END
403
404 new
405 call setline(1, text)
406 exe 'normal /a*b\{2}c\+/e' .. "\<CR>x"
407 call assert_equal('1 a aa abb abbcc', getline('.'))
408 exe 'normal /\Md\*e\{2}f\+/e' .. "\<CR>x"
409 call assert_equal('2 d dd dee deeff', getline('.'))
410 set nomagic
411 exe 'normal /g\*h\{2}i\+/e' .. "\<CR>x"
412 call assert_equal('3 g gg ghh ghhii', getline('.'))
413 exe 'normal /\mj*k\{2}l\+/e' .. "\<CR>x"
414 call assert_equal('4 j jj jkk jkkll', getline('.'))
415 exe 'normal /\vm*n{2}o+/e' .. "\<CR>x"
416 call assert_equal('5 m mm mnn mnnoo', getline('.'))
417 exe 'normal /\V^aa$/' .. "\<CR>x"
418 call assert_equal('6 x aa$ x', getline('.'))
419 set magic
420 exe 'normal /\v(a)(b)\2\1\1/e' .. "\<CR>x"
421 call assert_equal('7 (a)(b) abba', getline('.'))
422 exe 'normal /\V[ab]\(\[xy]\)\1' .. "\<CR>x"
423 call assert_equal('8 axx ab]xx', getline('.'))
424
425 " search for multi-byte without composing char
426 exe 'normal /ม' .. "\<CR>x"
427 call assert_equal('9 หม่x อx', getline('.'))
428
429 " search for multi-byte with composing char
430 exe 'normal /ม่' .. "\<CR>x"
431 call assert_equal('a อมx หx', getline('.'))
432
433 " find word by change of word class
434 exe 'normal /ち\<カヨ\>は' .. "\<CR>x"
435 call assert_equal('b カヨは', getline('.'))
436
437 " Test \%u, [\u] and friends
438 " c
439 exe 'normal /\%u20ac' .. "\<CR>x"
440 call assert_equal('c x ¬x', getline('.'))
441 " d
442 exe 'normal /[\u4f7f\u5929]\+' .. "\<CR>x"
443 call assert_equal('d 使x', getline('.'))
444 " e
445 exe 'normal /\%U12345678' .. "\<CR>x"
446 call assert_equal('e y', getline('.'))
447 " f
448 exe 'normal /[\U1234abcd\u1234\uabcd]' .. "\<CR>x"
449 call assert_equal('f z', getline('.'))
450 " g
451 exe 'normal /\%d21879b' .. "\<CR>x"
452 call assert_equal('g abb', getline('.'))
453
454 " j Test backwards search from a multi-byte char
455 exe "normal /x\<CR>x?.\<CR>x"
456 call assert_equal('j 012❤', getline('.'))
457 " k
458 let @w=':%s#comb[i]nations#œ̄ṣ́m̥̄ᾱ̆́#g'
459 @w
460 call assert_equal('k œ̄ṣ́m̥̄ᾱ̆́', getline(18))
461
462 close!
463endfunc
464
465func Test_regexp_multibyte_magic()
466 set regexpengine=1
467 call Run_regexp_multibyte_magic()
468 set regexpengine=2
469 call Run_regexp_multibyte_magic()
470 set regexpengine&
471endfunc
472
473" Test for 7.3.192
474" command ":s/ \?/ /g" splits multi-byte characters into bytes
475func Test_split_multibyte_to_bytes()
476 new
477 call setline(1, 'l äö üᾱ̆́')
478 s/ \?/ /g
479 call assert_equal(' l ä ö ü ᾱ̆́', getline(1))
480 close!
481endfunc
482
483" Test for matchstr() with multibyte characters
484func Test_matchstr_multibyte()
485 new
486 call assert_equal('ב', matchstr("אבגד", ".", 0, 2))
487 call assert_equal('בג', matchstr("אבגד", "..", 0, 2))
488 call assert_equal('א', matchstr("אבגד", ".", 0, 0))
489 call assert_equal('ג', matchstr("אבגד", ".", 4, -1))
490 close!
491endfunc
492
493" Test for 7.4.636
494" A search with end offset gets stuck at end of file.
495func Test_search_with_end_offset()
496 new
497 call setline(1, ['', 'dog(a', 'cat('])
Bram Moolenaar8a9bc952020-10-02 18:48:07 +0200498 exe "normal /(/e+\<CR>"
499 normal n"ayn
Bram Moolenaar7d40b8a2020-07-26 12:52:59 +0200500 call assert_equal("a\ncat(", @a)
501 close!
502endfunc
503
Bram Moolenaaref2dff52020-12-21 14:54:32 +0100504" Check that "^" matches even when the line starts with a combining char
505func Test_match_start_of_line_combining()
506 new
507 call setline(1, ['', "\u05ae", ''])
508 exe "normal gg/^\<CR>"
509 call assert_equal(2, getcurpos()[1])
510 bwipe!
511endfunc
512
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100513" Check that [[:upper:]] matches for automatic engine
514func Test_match_char_class_upper()
515 new
516 let _engine=&regexpengine
Bram Moolenaaref2dff52020-12-21 14:54:32 +0100517
Bram Moolenaar66c50c52021-01-02 17:43:49 +0100518 " Test 1: [[:upper:]]\{2,\}
519 set regexpengine=0
520 call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...'])
521 call cursor(1,1)
522 let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>"
523 exe search_cmd
524 call assert_equal(4, searchcount().total, 'TEST 1')
525 set regexpengine=1
526 exe search_cmd
527 call assert_equal(2, searchcount().total, 'TEST 1')
528 set regexpengine=2
529 exe search_cmd
530 call assert_equal(4, searchcount().total, 'TEST 1')
531
532 " Test 2: [[:upper:]].\+
533 let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>"
534 set regexpengine=0
535 exe search_cmd
536 call assert_equal(2, searchcount().total, 'TEST 2')
537 set regexpengine=1
538 exe search_cmd
539 call assert_equal(1, searchcount().total, 'TEST 2')
540 set regexpengine=2
541 exe search_cmd
542 call assert_equal(2, searchcount().total, 'TEST 2')
543
544 " Test 3: [[:lower:]]\+
545 let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>"
546 set regexpengine=0
547 exe search_cmd
548 call assert_equal(4, searchcount().total, 'TEST 3 lower')
549 set regexpengine=1
550 exe search_cmd
551 call assert_equal(2, searchcount().total, 'TEST 3 lower')
552 set regexpengine=2
553 exe search_cmd
554 call assert_equal(4, searchcount().total, 'TEST 3 lower')
555
556 " clean up
557 let &regexpengine=_engine
558 bwipe!
559endfunc
Bram Moolenaaref2dff52020-12-21 14:54:32 +0100560
Bram Moolenaarafc13bd2019-12-16 22:43:31 +0100561" vim: shiftwidth=2 sts=2 expandtab