blob: bec5e0ed4fadbfa59a7f7a11bf3bf5082216741a [file] [log] [blame]
Bram Moolenaar22e42152016-04-03 14:02:02 +02001" Tests for regexp in utf8 encoding
2if !has('multi_byte')
3 finish
4endif
Bram Moolenaar22e42152016-04-03 14:02:02 +02005
6func s:equivalence_test()
7 let str = "AÀÁÂÃÄÅĀĂĄǍǞǠẢ BḂḆ CÇĆĈĊČ DĎĐḊḎḐ EÈÉÊËĒĔĖĘĚẺẼ F GĜĞĠĢǤǦǴḠ HĤĦḢḦḨ IÌÍÎÏĨĪĬĮİǏỈ JĴ KĶǨḰḴ LĹĻĽĿŁḺ MḾṀ NÑŃŅŇṄṈ OÒÓÔÕÖØŌŎŐƠǑǪǬỎ PṔṖ Q RŔŖŘṘṞ SŚŜŞŠṠ TŢŤŦṪṮ UÙÚÛÜŨŪŬŮŰŲƯǓỦ V WŴẀẂẄẆ XẊẌ YÝŶŸẎỲỶỸ ZŹŻŽƵẐẔ aàáâãäåāăąǎǟǡả bḃḇ cçćĉċč dďđḋḏḑ eèéêëēĕėęěẻẽ f gĝğġģǥǧǵḡ hĥħḣḧḩẖ iìíîïĩīĭįǐỉ jĵǰ kķǩḱḵ lĺļľŀłḻ mḿṁ nñńņňʼnṅṉ oòóôõöøōŏőơǒǫǭỏ pṕṗ q rŕŗřṙṟ sśŝşšṡ tţťŧṫṯẗ uùúûüũūŭůűųưǔủ v wŵẁẃẅẇẘ xẋẍ yýÿŷẏẙỳỷỹ zźżžƶẑẕ"
8 let groups = split(str)
9 for group1 in groups
10 for c in split(group1, '\zs')
11 " next statement confirms that equivalence class matches every
12 " character in group
13 call assert_match('^[[=' . c . '=]]*$', group1)
14 for group2 in groups
15 if group2 != group1
16 " next statement converts that equivalence class doesn't match
17 " character in any other group
18 call assert_equal(-1, match(group2, '[[=' . c . '=]]'))
19 endif
20 endfor
21 endfor
22 endfor
23endfunc
24
25func Test_equivalence_re1()
26 set re=1
27 call s:equivalence_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +020028 set re=0
Bram Moolenaar22e42152016-04-03 14:02:02 +020029endfunc
30
31func Test_equivalence_re2()
32 set re=2
33 call s:equivalence_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +020034 set re=0
Bram Moolenaar22e42152016-04-03 14:02:02 +020035endfunc
Bram Moolenaaraf98a492016-04-24 14:40:12 +020036
37func s:classes_test()
Bram Moolenaar490465b2016-04-24 15:11:02 +020038 set isprint=@,161-255
Bram Moolenaaraf98a492016-04-24 14:40:12 +020039 call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+'))
40
Bram Moolenaaraf98a492016-04-24 14:40:12 +020041 let alnumchars = ''
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020042 let alphachars = ''
43 let backspacechar = ''
44 let blankchars = ''
45 let cntrlchars = ''
46 let digitchars = ''
47 let escapechar = ''
48 let graphchars = ''
49 let lowerchars = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020050 let printchars = ''
51 let punctchars = ''
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020052 let returnchar = ''
53 let spacechars = ''
54 let tabchar = ''
55 let upperchars = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020056 let xdigitchars = ''
57 let i = 1
58 while i <= 255
59 let c = nr2char(i)
60 if c =~ '[[:alpha:]]'
61 let alphachars .= c
62 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +020063 if c =~ '[[:alnum:]]'
64 let alnumchars .= c
65 endif
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020066 if c =~ '[[:backspace:]]'
67 let backspacechar .= c
68 endif
69 if c =~ '[[:blank:]]'
70 let blankchars .= c
71 endif
72 if c =~ '[[:cntrl:]]'
73 let cntrlchars .= c
74 endif
75 if c =~ '[[:digit:]]'
76 let digitchars .= c
77 endif
78 if c =~ '[[:escape:]]'
79 let escapechar .= c
80 endif
81 if c =~ '[[:graph:]]'
82 let graphchars .= c
83 endif
84 if c =~ '[[:lower:]]'
85 let lowerchars .= c
86 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +020087 if c =~ '[[:print:]]'
88 let printchars .= c
89 endif
90 if c =~ '[[:punct:]]'
91 let punctchars .= c
92 endif
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020093 if c =~ '[[:return:]]'
94 let returnchar .= c
95 endif
96 if c =~ '[[:space:]]'
97 let spacechars .= c
98 endif
99 if c =~ '[[:tab:]]'
100 let tabchar .= c
101 endif
102 if c =~ '[[:upper:]]'
103 let upperchars .= c
104 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200105 if c =~ '[[:xdigit:]]'
106 let xdigitchars .= c
107 endif
108 let i += 1
109 endwhile
110
111 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200112 call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200113 call assert_equal("\b", backspacechar)
114 call assert_equal("\t ", blankchars)
Bram Moolenaar13489b92017-03-30 22:20:29 +0200115 call assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f", cntrlchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200116 call assert_equal("0123456789", digitchars)
117 call assert_equal("\<Esc>", escapechar)
Bram Moolenaar13489b92017-03-30 22:20:29 +0200118 call assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', graphchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200119 call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200120 call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars)
121 call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200122 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars)
123 call assert_equal("\r", returnchar)
124 call assert_equal("\t\n\x0b\f\r ", spacechars)
125 call assert_equal("\t", tabchar)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200126 call assert_equal('0123456789ABCDEFabcdef', xdigitchars)
127endfunc
128
129func Test_classes_re1()
130 set re=1
131 call s:classes_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +0200132 set re=0
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200133endfunc
134
135func Test_classes_re2()
136 set re=2
137 call s:classes_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +0200138 set re=0
139endfunc
Bram Moolenaar966e58e2017-06-05 16:54:08 +0200140
141func Test_reversed_range()
142 for re in range(0, 2)
143 exe 'set re=' . re
144 call assert_fails('call match("abc def", "[c-a]")', 'E944:')
145 endfor
146 set re=0
147endfunc
148
149func Test_large_class()
150 set re=1
151 call assert_fails('call match("abc def", "[\u3000-\u4000]")', 'E945:')
152 set re=2
153 call assert_equal(0, 'abc def' =~# '[\u3000-\u4000]')
154 call assert_equal(1, "\u3042" =~# '[\u3000-\u4000]')
155 set re=0
156endfunc