blob: 98b9e73de603259a82e8af8b9b5493caac642bf5 [file] [log] [blame]
Bram Moolenaar22e42152016-04-03 14:02:02 +02001" Tests for regexp in utf8 encoding
Bram Moolenaar22e42152016-04-03 14:02:02 +02002
3func s:equivalence_test()
4 let str = "AÀÁÂÃÄÅĀĂĄǍǞǠẢ BḂḆ CÇĆĈĊČ DĎĐḊḎḐ EÈÉÊËĒĔĖĘĚẺẼ F GĜĞĠĢǤǦǴḠ HĤĦḢḦḨ IÌÍÎÏĨĪĬĮİǏỈ JĴ KĶǨḰḴ LĹĻĽĿŁḺ MḾṀ NÑŃŅŇṄṈ OÒÓÔÕÖØŌŎŐƠǑǪǬỎ PṔṖ Q RŔŖŘṘṞ SŚŜŞŠṠ TŢŤŦṪṮ UÙÚÛÜŨŪŬŮŰŲƯǓỦ V WŴẀẂẄẆ XẊẌ YÝŶŸẎỲỶỸ ZŹŻŽƵẐẔ aàáâãäåāăąǎǟǡả bḃḇ cçćĉċč dďđḋḏḑ eèéêëēĕėęěẻẽ f gĝğġģǥǧǵḡ hĥħḣḧḩẖ iìíîïĩīĭįǐỉ jĵǰ kķǩḱḵ lĺļľŀłḻ mḿṁ nñńņňʼnṅṉ oòóôõöøōŏőơǒǫǭỏ pṕṗ q rŕŗřṙṟ sśŝşšṡ tţťŧṫṯẗ uùúûüũūŭůűųưǔủ v wŵẁẃẅẇẘ xẋẍ yýÿŷẏẙỳỷỹ zźżžƶẑẕ"
5 let groups = split(str)
6 for group1 in groups
7 for c in split(group1, '\zs')
8 " next statement confirms that equivalence class matches every
9 " character in group
10 call assert_match('^[[=' . c . '=]]*$', group1)
11 for group2 in groups
12 if group2 != group1
13 " next statement converts that equivalence class doesn't match
14 " character in any other group
15 call assert_equal(-1, match(group2, '[[=' . c . '=]]'))
16 endif
17 endfor
18 endfor
19 endfor
20endfunc
21
22func Test_equivalence_re1()
23 set re=1
24 call s:equivalence_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +020025 set re=0
Bram Moolenaar22e42152016-04-03 14:02:02 +020026endfunc
27
28func Test_equivalence_re2()
29 set re=2
30 call s:equivalence_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +020031 set re=0
Bram Moolenaar22e42152016-04-03 14:02:02 +020032endfunc
Bram Moolenaaraf98a492016-04-24 14:40:12 +020033
34func s:classes_test()
Bram Moolenaar490465b2016-04-24 15:11:02 +020035 set isprint=@,161-255
Bram Moolenaaraf98a492016-04-24 14:40:12 +020036 call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+'))
37
Bram Moolenaaraf98a492016-04-24 14:40:12 +020038 let alnumchars = ''
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020039 let alphachars = ''
40 let backspacechar = ''
41 let blankchars = ''
42 let cntrlchars = ''
43 let digitchars = ''
44 let escapechar = ''
45 let graphchars = ''
46 let lowerchars = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020047 let printchars = ''
48 let punctchars = ''
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020049 let returnchar = ''
50 let spacechars = ''
51 let tabchar = ''
52 let upperchars = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020053 let xdigitchars = ''
Bram Moolenaar221cd9f2019-01-31 15:34:40 +010054 let identchars = ''
55 let identchars1 = ''
56 let kwordchars = ''
57 let kwordchars1 = ''
58 let fnamechars = ''
59 let fnamechars1 = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020060 let i = 1
61 while i <= 255
62 let c = nr2char(i)
63 if c =~ '[[:alpha:]]'
64 let alphachars .= c
65 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +020066 if c =~ '[[:alnum:]]'
67 let alnumchars .= c
68 endif
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020069 if c =~ '[[:backspace:]]'
70 let backspacechar .= c
71 endif
72 if c =~ '[[:blank:]]'
73 let blankchars .= c
74 endif
75 if c =~ '[[:cntrl:]]'
76 let cntrlchars .= c
77 endif
78 if c =~ '[[:digit:]]'
79 let digitchars .= c
80 endif
81 if c =~ '[[:escape:]]'
82 let escapechar .= c
83 endif
84 if c =~ '[[:graph:]]'
85 let graphchars .= c
86 endif
87 if c =~ '[[:lower:]]'
88 let lowerchars .= c
89 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +020090 if c =~ '[[:print:]]'
91 let printchars .= c
92 endif
93 if c =~ '[[:punct:]]'
94 let punctchars .= c
95 endif
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020096 if c =~ '[[:return:]]'
97 let returnchar .= c
98 endif
99 if c =~ '[[:space:]]'
100 let spacechars .= c
101 endif
102 if c =~ '[[:tab:]]'
103 let tabchar .= c
104 endif
105 if c =~ '[[:upper:]]'
106 let upperchars .= c
107 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200108 if c =~ '[[:xdigit:]]'
109 let xdigitchars .= c
110 endif
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100111 if c =~ '[[:ident:]]'
112 let identchars .= c
113 endif
114 if c =~ '\i'
115 let identchars1 .= c
116 endif
117 if c =~ '[[:keyword:]]'
118 let kwordchars .= c
119 endif
120 if c =~ '\k'
121 let kwordchars1 .= c
122 endif
123 if c =~ '[[:fname:]]'
124 let fnamechars .= c
125 endif
126 if c =~ '\f'
127 let fnamechars1 .= c
128 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200129 let i += 1
130 endwhile
131
132 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200133 call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200134 call assert_equal("\b", backspacechar)
135 call assert_equal("\t ", blankchars)
Bram Moolenaar13489b92017-03-30 22:20:29 +0200136 call assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f", cntrlchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200137 call assert_equal("0123456789", digitchars)
138 call assert_equal("\<Esc>", escapechar)
Bram Moolenaar13489b92017-03-30 22:20:29 +0200139 call assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', graphchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200140 call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200141 call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars)
142 call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200143 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars)
144 call assert_equal("\r", returnchar)
145 call assert_equal("\t\n\x0b\f\r ", spacechars)
146 call assert_equal("\t", tabchar)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200147 call assert_equal('0123456789ABCDEFabcdef', xdigitchars)
Bram Moolenaar221cd9f2019-01-31 15:34:40 +0100148
149 if has('win32')
150 let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
151 let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
152 elseif has('ebcdic')
153 let identchars_ok = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz€ŒŽœž¬®µº¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
154 let kwordchars_ok = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz€ŒŽœž¬®µº¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
155 else
156 let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
157 let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
158 endif
159
160 if has('win32')
161 let fnamechars_ok = '!#$%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_abcdefghijklmnopqrstuvwxyz{}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
162 elseif has('amiga')
163 let fnamechars_ok = '$+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
164 elseif has('vms')
165 let fnamechars_ok = '#$%+,-./0123456789:;<>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
166 elseif has('ebcdic')
167 let fnamechars_ok = '#$%+,-./=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
168 else
169 let fnamechars_ok = '#$%+,-./0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
170 endif
171
172 call assert_equal(identchars_ok, identchars)
173 call assert_equal(kwordchars_ok, kwordchars)
174 call assert_equal(fnamechars_ok, fnamechars)
175
176 call assert_equal(identchars1, identchars)
177 call assert_equal(kwordchars1, kwordchars)
178 call assert_equal(fnamechars1, fnamechars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200179endfunc
180
181func Test_classes_re1()
182 set re=1
183 call s:classes_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +0200184 set re=0
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200185endfunc
186
187func Test_classes_re2()
188 set re=2
189 call s:classes_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +0200190 set re=0
191endfunc
Bram Moolenaar966e58e2017-06-05 16:54:08 +0200192
193func Test_reversed_range()
194 for re in range(0, 2)
195 exe 'set re=' . re
196 call assert_fails('call match("abc def", "[c-a]")', 'E944:')
197 endfor
198 set re=0
199endfunc
200
201func Test_large_class()
202 set re=1
203 call assert_fails('call match("abc def", "[\u3000-\u4000]")', 'E945:')
204 set re=2
205 call assert_equal(0, 'abc def' =~# '[\u3000-\u4000]')
206 call assert_equal(1, "\u3042" =~# '[\u3000-\u4000]')
207 set re=0
208endfunc