blob: 47bd7014abbbe55ec1c8c41bdc3e7733d4ceb482 [file] [log] [blame]
Bram Moolenaar22e42152016-04-03 14:02:02 +02001" Tests for regexp in utf8 encoding
2if !has('multi_byte')
3 finish
4endif
Bram Moolenaar22e42152016-04-03 14:02:02 +02005
6func s:equivalence_test()
7 let str = "AÀÁÂÃÄÅĀĂĄǍǞǠẢ BḂḆ CÇĆĈĊČ DĎĐḊḎḐ EÈÉÊËĒĔĖĘĚẺẼ F GĜĞĠĢǤǦǴḠ HĤĦḢḦḨ IÌÍÎÏĨĪĬĮİǏỈ JĴ KĶǨḰḴ LĹĻĽĿŁḺ MḾṀ NÑŃŅŇṄṈ OÒÓÔÕÖØŌŎŐƠǑǪǬỎ PṔṖ Q RŔŖŘṘṞ SŚŜŞŠṠ TŢŤŦṪṮ UÙÚÛÜŨŪŬŮŰŲƯǓỦ V WŴẀẂẄẆ XẊẌ YÝŶŸẎỲỶỸ ZŹŻŽƵẐẔ aàáâãäåāăąǎǟǡả bḃḇ cçćĉċč dďđḋḏḑ eèéêëēĕėęěẻẽ f gĝğġģǥǧǵḡ hĥħḣḧḩẖ iìíîïĩīĭįǐỉ jĵǰ kķǩḱḵ lĺļľŀłḻ mḿṁ nñńņňʼnṅṉ oòóôõöøōŏőơǒǫǭỏ pṕṗ q rŕŗřṙṟ sśŝşšṡ tţťŧṫṯẗ uùúûüũūŭůűųưǔủ v wŵẁẃẅẇẘ xẋẍ yýÿŷẏẙỳỷỹ zźżžƶẑẕ"
8 let groups = split(str)
9 for group1 in groups
10 for c in split(group1, '\zs')
11 " next statement confirms that equivalence class matches every
12 " character in group
13 call assert_match('^[[=' . c . '=]]*$', group1)
14 for group2 in groups
15 if group2 != group1
16 " next statement converts that equivalence class doesn't match
17 " character in any other group
18 call assert_equal(-1, match(group2, '[[=' . c . '=]]'))
19 endif
20 endfor
21 endfor
22 endfor
23endfunc
24
25func Test_equivalence_re1()
26 set re=1
27 call s:equivalence_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +020028 set re=0
Bram Moolenaar22e42152016-04-03 14:02:02 +020029endfunc
30
31func Test_equivalence_re2()
32 set re=2
33 call s:equivalence_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +020034 set re=0
Bram Moolenaar22e42152016-04-03 14:02:02 +020035endfunc
Bram Moolenaaraf98a492016-04-24 14:40:12 +020036
37func s:classes_test()
Bram Moolenaar490465b2016-04-24 15:11:02 +020038 set isprint=@,161-255
Bram Moolenaaraf98a492016-04-24 14:40:12 +020039 call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+'))
40
Bram Moolenaaraf98a492016-04-24 14:40:12 +020041 let alnumchars = ''
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020042 let alphachars = ''
43 let backspacechar = ''
44 let blankchars = ''
45 let cntrlchars = ''
46 let digitchars = ''
47 let escapechar = ''
48 let graphchars = ''
49 let lowerchars = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020050 let printchars = ''
51 let punctchars = ''
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020052 let returnchar = ''
53 let spacechars = ''
54 let tabchar = ''
55 let upperchars = ''
Bram Moolenaaraf98a492016-04-24 14:40:12 +020056 let xdigitchars = ''
57 let i = 1
58 while i <= 255
59 let c = nr2char(i)
60 if c =~ '[[:alpha:]]'
61 let alphachars .= c
62 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +020063 if c =~ '[[:alnum:]]'
64 let alnumchars .= c
65 endif
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020066 if c =~ '[[:backspace:]]'
67 let backspacechar .= c
68 endif
69 if c =~ '[[:blank:]]'
70 let blankchars .= c
71 endif
72 if c =~ '[[:cntrl:]]'
73 let cntrlchars .= c
74 endif
75 if c =~ '[[:digit:]]'
76 let digitchars .= c
77 endif
78 if c =~ '[[:escape:]]'
79 let escapechar .= c
80 endif
81 if c =~ '[[:graph:]]'
82 let graphchars .= c
83 endif
84 if c =~ '[[:lower:]]'
85 let lowerchars .= c
86 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +020087 if c =~ '[[:print:]]'
88 let printchars .= c
89 endif
90 if c =~ '[[:punct:]]'
91 let punctchars .= c
92 endif
Bram Moolenaar0c078fc2017-03-29 15:31:20 +020093 if c =~ '[[:return:]]'
94 let returnchar .= c
95 endif
96 if c =~ '[[:space:]]'
97 let spacechars .= c
98 endif
99 if c =~ '[[:tab:]]'
100 let tabchar .= c
101 endif
102 if c =~ '[[:upper:]]'
103 let upperchars .= c
104 endif
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200105 if c =~ '[[:xdigit:]]'
106 let xdigitchars .= c
107 endif
108 let i += 1
109 endwhile
110
111 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200112 call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200113 call assert_equal("\b", backspacechar)
114 call assert_equal("\t ", blankchars)
115 " Commented out: it succeeds on Linux and Windows, but fails on macOs in Travis.
116 " call assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f", cntrlchars)
117 call assert_equal("0123456789", digitchars)
118 call assert_equal("\<Esc>", escapechar)
119 " Commented out: it succeeds on Linux and Windows, but fails on macOs in Travis.
120 " call assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', graphchars)
121 call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200122 call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars)
123 call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
Bram Moolenaar0c078fc2017-03-29 15:31:20 +0200124 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars)
125 call assert_equal("\r", returnchar)
126 call assert_equal("\t\n\x0b\f\r ", spacechars)
127 call assert_equal("\t", tabchar)
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200128 call assert_equal('0123456789ABCDEFabcdef', xdigitchars)
129endfunc
130
131func Test_classes_re1()
132 set re=1
133 call s:classes_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +0200134 set re=0
Bram Moolenaaraf98a492016-04-24 14:40:12 +0200135endfunc
136
137func Test_classes_re2()
138 set re=2
139 call s:classes_test()
Bram Moolenaar6bff02e2016-08-16 22:50:55 +0200140 set re=0
141endfunc