Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 1 | " Script to extract tables from Unicode .txt files, to be used in src/mbyte.c. |
| 2 | " The format of the UnicodeData.txt file is explained here: |
| 3 | " http://www.unicode.org/Public/5.1.0/ucd/UCD.html |
| 4 | " For the other files see the header. |
| 5 | " |
Bram Moolenaar | 383aa84 | 2017-06-22 15:27:37 +0200 | [diff] [blame] | 6 | " Might need to update the URL to the emoji-data.txt |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 7 | " Usage: Vim -S <this-file> |
| 8 | " |
| 9 | " Author: Bram Moolenaar |
Bram Moolenaar | 207f009 | 2020-08-30 17:20:20 +0200 | [diff] [blame] | 10 | " Last Update: 2020 Aug 24 |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 11 | |
| 12 | " Parse lines of UnicodeData.txt. Creates a list of lists in s:dataprops. |
| 13 | func! ParseDataToProps() |
| 14 | let s:dataprops = [] |
| 15 | let lnum = 1 |
| 16 | while lnum <= line('$') |
| 17 | let l = split(getline(lnum), '\s*;\s*', 1) |
| 18 | if len(l) != 15 |
| 19 | echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 15' |
| 20 | return |
| 21 | endif |
| 22 | call add(s:dataprops, l) |
| 23 | let lnum += 1 |
| 24 | endwhile |
| 25 | endfunc |
| 26 | |
| 27 | " Parse lines of CaseFolding.txt. Creates a list of lists in s:foldprops. |
| 28 | func! ParseFoldProps() |
| 29 | let s:foldprops = [] |
| 30 | let lnum = 1 |
| 31 | while lnum <= line('$') |
| 32 | let line = getline(lnum) |
| 33 | if line !~ '^#' && line !~ '^\s*$' |
| 34 | let l = split(line, '\s*;\s*', 1) |
| 35 | if len(l) != 4 |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 36 | echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 4' |
| 37 | return |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 38 | endif |
| 39 | call add(s:foldprops, l) |
| 40 | endif |
| 41 | let lnum += 1 |
| 42 | endwhile |
| 43 | endfunc |
| 44 | |
| 45 | " Parse lines of EastAsianWidth.txt. Creates a list of lists in s:widthprops. |
| 46 | func! ParseWidthProps() |
| 47 | let s:widthprops = [] |
| 48 | let lnum = 1 |
| 49 | while lnum <= line('$') |
| 50 | let line = getline(lnum) |
| 51 | if line !~ '^#' && line !~ '^\s*$' |
| 52 | let l = split(line, '\s*;\s*', 1) |
| 53 | if len(l) != 2 |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 54 | echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 2' |
| 55 | return |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 56 | endif |
| 57 | call add(s:widthprops, l) |
| 58 | endif |
| 59 | let lnum += 1 |
| 60 | endwhile |
| 61 | endfunc |
| 62 | |
| 63 | " Build the toLower or toUpper table in a new buffer. |
| 64 | " Uses s:dataprops. |
| 65 | func! BuildCaseTable(name, index) |
| 66 | let start = -1 |
| 67 | let end = -1 |
| 68 | let step = 0 |
| 69 | let add = -1 |
| 70 | let ranges = [] |
| 71 | for p in s:dataprops |
| 72 | if p[a:index] != '' |
| 73 | let n = ('0x' . p[0]) + 0 |
| 74 | let nl = ('0x' . p[a:index]) + 0 |
| 75 | if start >= 0 && add == nl - n && (step == 0 || n - end == step) |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 76 | " continue with same range. |
| 77 | let step = n - end |
| 78 | let end = n |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 79 | else |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 80 | if start >= 0 |
| 81 | " produce previous range |
| 82 | call Range(ranges, start, end, step, add) |
| 83 | endif |
| 84 | let start = n |
| 85 | let end = n |
| 86 | let step = 0 |
| 87 | let add = nl - n |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 88 | endif |
| 89 | endif |
| 90 | endfor |
| 91 | if start >= 0 |
| 92 | call Range(ranges, start, end, step, add) |
| 93 | endif |
| 94 | |
| 95 | " New buffer to put the result in. |
| 96 | new |
| 97 | exe "file to" . a:name |
| 98 | call setline(1, "static convertStruct to" . a:name . "[] =") |
| 99 | call setline(2, "{") |
| 100 | call append('$', ranges) |
| 101 | call setline('$', getline('$')[:-2]) " remove last comma |
| 102 | call setline(line('$') + 1, "};") |
| 103 | wincmd p |
| 104 | endfunc |
| 105 | |
| 106 | " Build the foldCase table in a new buffer. |
| 107 | " Uses s:foldprops. |
| 108 | func! BuildFoldTable() |
| 109 | let start = -1 |
| 110 | let end = -1 |
| 111 | let step = 0 |
| 112 | let add = -1 |
| 113 | let ranges = [] |
| 114 | for p in s:foldprops |
| 115 | if p[1] == 'C' || p[1] == 'S' |
| 116 | let n = ('0x' . p[0]) + 0 |
| 117 | let nl = ('0x' . p[2]) + 0 |
| 118 | if start >= 0 && add == nl - n && (step == 0 || n - end == step) |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 119 | " continue with same range. |
| 120 | let step = n - end |
| 121 | let end = n |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 122 | else |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 123 | if start >= 0 |
| 124 | " produce previous range |
| 125 | call Range(ranges, start, end, step, add) |
| 126 | endif |
| 127 | let start = n |
| 128 | let end = n |
| 129 | let step = 0 |
| 130 | let add = nl - n |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 131 | endif |
| 132 | endif |
| 133 | endfor |
| 134 | if start >= 0 |
| 135 | call Range(ranges, start, end, step, add) |
| 136 | endif |
| 137 | |
| 138 | " New buffer to put the result in. |
| 139 | new |
| 140 | file foldCase |
| 141 | call setline(1, "static convertStruct foldCase[] =") |
| 142 | call setline(2, "{") |
| 143 | call append('$', ranges) |
| 144 | call setline('$', getline('$')[:-2]) " remove last comma |
| 145 | call setline(line('$') + 1, "};") |
| 146 | wincmd p |
| 147 | endfunc |
| 148 | |
| 149 | func! Range(ranges, start, end, step, add) |
| 150 | let s = printf("\t{0x%x,0x%x,%d,%d},", a:start, a:end, a:step == 0 ? -1 : a:step, a:add) |
| 151 | call add(a:ranges, s) |
| 152 | endfunc |
| 153 | |
| 154 | " Build the combining table. |
| 155 | " Uses s:dataprops. |
| 156 | func! BuildCombiningTable() |
| 157 | let start = -1 |
| 158 | let end = -1 |
| 159 | let ranges = [] |
| 160 | for p in s:dataprops |
Bram Moolenaar | 7beaf6a | 2022-10-05 18:03:00 +0100 | [diff] [blame] | 161 | " The 'Mc' property was removed, it does take up space. |
| 162 | if p[2] == 'Mn' || p[2] == 'Me' |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 163 | let n = ('0x' . p[0]) + 0 |
| 164 | if start >= 0 && end + 1 == n |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 165 | " continue with same range. |
| 166 | let end = n |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 167 | else |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 168 | if start >= 0 |
| 169 | " produce previous range |
| 170 | call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) |
| 171 | endif |
| 172 | let start = n |
| 173 | let end = n |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 174 | endif |
| 175 | endif |
| 176 | endfor |
| 177 | if start >= 0 |
| 178 | call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end)) |
| 179 | endif |
| 180 | |
| 181 | " New buffer to put the result in. |
| 182 | new |
| 183 | file combining |
| 184 | call setline(1, " static struct interval combining[] =") |
| 185 | call setline(2, " {") |
| 186 | call append('$', ranges) |
| 187 | call setline('$', getline('$')[:-2]) " remove last comma |
| 188 | call setline(line('$') + 1, " };") |
| 189 | wincmd p |
| 190 | endfunc |
| 191 | |
Bram Moolenaar | da4d7a9 | 2010-01-27 18:29:26 +0100 | [diff] [blame] | 192 | " Build the double width or ambiguous width table in a new buffer. |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 193 | " Uses s:widthprops and s:dataprops. |
Bram Moolenaar | da4d7a9 | 2010-01-27 18:29:26 +0100 | [diff] [blame] | 194 | func! BuildWidthTable(pattern, tableName) |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 195 | let start = -1 |
| 196 | let end = -1 |
| 197 | let ranges = [] |
| 198 | let dataidx = 0 |
Christian Brabandt | d887297 | 2021-06-27 21:30:14 +0200 | [diff] [blame] | 199 | " Account for indentation differences between ambiguous and doublewidth |
| 200 | " table in mbyte.c |
| 201 | if a:pattern == 'A' |
| 202 | let spc = ' ' |
| 203 | else |
| 204 | let spc = "\t" |
| 205 | endif |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 206 | for p in s:widthprops |
Bram Moolenaar | da4d7a9 | 2010-01-27 18:29:26 +0100 | [diff] [blame] | 207 | if p[1][0] =~ a:pattern |
| 208 | if p[0] =~ '\.\.' |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 209 | " It is a range. we don't check for composing char then. |
| 210 | let rng = split(p[0], '\.\.') |
| 211 | if len(rng) != 2 |
| 212 | echoerr "Cannot parse range: '" . p[0] . "' in width table" |
| 213 | endif |
| 214 | let n = ('0x' . rng[0]) + 0 |
| 215 | let n_last = ('0x' . rng[1]) + 0 |
Bram Moolenaar | da4d7a9 | 2010-01-27 18:29:26 +0100 | [diff] [blame] | 216 | else |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 217 | let n = ('0x' . p[0]) + 0 |
| 218 | let n_last = n |
Bram Moolenaar | da4d7a9 | 2010-01-27 18:29:26 +0100 | [diff] [blame] | 219 | endif |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 220 | " Find this char in the data table. |
| 221 | while 1 |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 222 | let dn = ('0x' . s:dataprops[dataidx][0]) + 0 |
| 223 | if dn >= n |
| 224 | break |
| 225 | endif |
| 226 | let dataidx += 1 |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 227 | endwhile |
Bram Moolenaar | da4d7a9 | 2010-01-27 18:29:26 +0100 | [diff] [blame] | 228 | if dn != n && n_last == n |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 229 | echoerr "Cannot find character " . n . " in data table" |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 230 | endif |
| 231 | " Only use the char when it's not a composing char. |
Bram Moolenaar | da4d7a9 | 2010-01-27 18:29:26 +0100 | [diff] [blame] | 232 | " But use all chars from a range. |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 233 | let dp = s:dataprops[dataidx] |
Bram Moolenaar | da4d7a9 | 2010-01-27 18:29:26 +0100 | [diff] [blame] | 234 | if n_last > n || (dp[2] != 'Mn' && dp[2] != 'Mc' && dp[2] != 'Me') |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 235 | if start >= 0 && end + 1 == n |
| 236 | " continue with same range. |
| 237 | else |
| 238 | if start >= 0 |
| 239 | " produce previous range |
Christian Brabandt | d887297 | 2021-06-27 21:30:14 +0200 | [diff] [blame] | 240 | call add(ranges, printf("%s{0x%04x, 0x%04x},", spc, start, end)) |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 241 | if a:pattern == 'A' |
| 242 | call add(s:ambitable, [start, end]) |
| 243 | else |
| 244 | call add(s:doubletable, [start, end]) |
| 245 | endif |
| 246 | endif |
| 247 | let start = n |
| 248 | endif |
| 249 | let end = n_last |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 250 | endif |
| 251 | endif |
| 252 | endfor |
| 253 | if start >= 0 |
Christian Brabandt | d887297 | 2021-06-27 21:30:14 +0200 | [diff] [blame] | 254 | call add(ranges, printf("%s{0x%04x, 0x%04x},", spc, start, end)) |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 255 | if a:pattern == 'A' |
| 256 | call add(s:ambitable, [start, end]) |
| 257 | else |
| 258 | call add(s:doubletable, [start, end]) |
| 259 | endif |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 260 | endif |
| 261 | |
| 262 | " New buffer to put the result in. |
| 263 | new |
Bram Moolenaar | da4d7a9 | 2010-01-27 18:29:26 +0100 | [diff] [blame] | 264 | exe "file " . a:tableName |
Christian Brabandt | d887297 | 2021-06-27 21:30:14 +0200 | [diff] [blame] | 265 | if a:pattern == 'A' |
| 266 | call setline(1, "static struct interval " . a:tableName . "[] =") |
| 267 | call setline(2, "{") |
| 268 | else |
| 269 | call setline(1, " static struct interval " . a:tableName . "[] =") |
| 270 | call setline(2, " {") |
| 271 | endif |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 272 | call append('$', ranges) |
| 273 | call setline('$', getline('$')[:-2]) " remove last comma |
Christian Brabandt | d887297 | 2021-06-27 21:30:14 +0200 | [diff] [blame] | 274 | if a:pattern == 'A' |
| 275 | call setline(line('$') + 1, "};") |
| 276 | else |
| 277 | call setline(line('$') + 1, " };") |
| 278 | endif |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 279 | wincmd p |
| 280 | endfunc |
| 281 | |
Bram Moolenaar | 207f009 | 2020-08-30 17:20:20 +0200 | [diff] [blame] | 282 | |
| 283 | " Get characters from a list of lines in form "12ab .." or "12ab..56cd ..." |
| 284 | " and put them in dictionary "chardict" |
| 285 | func AddLinesToCharDict(lines, chardict) |
| 286 | for line in a:lines |
| 287 | let tokens = split(line, '\.\.') |
| 288 | let first = str2nr(tokens[0], 16) |
| 289 | if len(tokens) == 1 |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 290 | let last = first |
| 291 | else |
Bram Moolenaar | 207f009 | 2020-08-30 17:20:20 +0200 | [diff] [blame] | 292 | let last = str2nr(tokens[1], 16) |
Bram Moolenaar | 3848e00 | 2016-03-19 18:42:29 +0100 | [diff] [blame] | 293 | endif |
Bram Moolenaar | 207f009 | 2020-08-30 17:20:20 +0200 | [diff] [blame] | 294 | for nr in range(first, last) |
| 295 | let a:chardict[nr] = 1 |
| 296 | endfor |
| 297 | endfor |
| 298 | endfunc |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 299 | |
Bram Moolenaar | 207f009 | 2020-08-30 17:20:20 +0200 | [diff] [blame] | 300 | func Test_AddLinesToCharDict() |
| 301 | let dict = {} |
| 302 | call AddLinesToCharDict([ |
| 303 | \ '1234 blah blah', |
| 304 | \ '1235 blah blah', |
| 305 | \ '12a0..12a2 blah blah', |
| 306 | \ '12a1 blah blah', |
| 307 | \ ], dict) |
| 308 | call assert_equal({0x1234: 1, 0x1235: 1, |
| 309 | \ 0x12a0: 1, 0x12a1: 1, 0x12a2: 1, |
| 310 | \ }, dict) |
| 311 | if v:errors != [] |
| 312 | echoerr 'AddLinesToCharDict' v:errors |
| 313 | return 1 |
| 314 | endif |
| 315 | return 0 |
| 316 | endfunc |
| 317 | |
| 318 | |
| 319 | func CharDictToPairList(chardict) |
| 320 | let result = [] |
| 321 | let keys = keys(a:chardict)->map('str2nr(v:val)')->sort('N') |
| 322 | let low = keys[0] |
| 323 | let high = keys[0] |
| 324 | for key in keys |
| 325 | if key > high + 1 |
| 326 | call add(result, [low, high]) |
| 327 | let low = key |
| 328 | let high = key |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 329 | else |
Bram Moolenaar | 207f009 | 2020-08-30 17:20:20 +0200 | [diff] [blame] | 330 | let high = key |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 331 | endif |
Bram Moolenaar | 3848e00 | 2016-03-19 18:42:29 +0100 | [diff] [blame] | 332 | endfor |
Bram Moolenaar | 207f009 | 2020-08-30 17:20:20 +0200 | [diff] [blame] | 333 | call add(result, [low, high]) |
| 334 | return result |
| 335 | endfunc |
| 336 | |
| 337 | func Test_CharDictToPairList() |
| 338 | let dict = {0x1020: 1, 0x1021: 1, 0x1022: 1, |
| 339 | \ 0x1024: 1, |
| 340 | \ 0x2022: 1, |
| 341 | \ 0x2024: 1, 0x2025: 1} |
| 342 | call assert_equal([ |
| 343 | \ [0x1020, 0x1022], |
| 344 | \ [0x1024, 0x1024], |
| 345 | \ [0x2022, 0x2022], |
| 346 | \ [0x2024, 0x2025], |
| 347 | \ ], CharDictToPairList(dict)) |
| 348 | if v:errors != [] |
| 349 | echoerr 'CharDictToPairList' v:errors |
| 350 | return 1 |
| 351 | endif |
| 352 | return 0 |
| 353 | endfunc |
| 354 | |
| 355 | |
| 356 | " Build the amoji width table in a new buffer. |
| 357 | func BuildEmojiTable() |
| 358 | " First make the table for all emojis. |
| 359 | let pattern = '; Emoji\s\+#\s' |
| 360 | let lines = map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~pattern'), 'matchstr(v:val,"^\\S\\+")') |
| 361 | |
| 362 | " Make a dictionary with an entry for each character. |
| 363 | let chardict = {} |
| 364 | call AddLinesToCharDict(lines, chardict) |
| 365 | let pairlist = CharDictToPairList(chardict) |
| 366 | let allranges = map(pairlist, 'printf(" {0x%04x, 0x%04x},", v:val[0], v:val[1])') |
Bram Moolenaar | 3848e00 | 2016-03-19 18:42:29 +0100 | [diff] [blame] | 367 | |
| 368 | " New buffer to put the result in. |
| 369 | new |
Bram Moolenaar | 207f009 | 2020-08-30 17:20:20 +0200 | [diff] [blame] | 370 | exe 'file emoji_all' |
| 371 | call setline(1, "static struct interval emoji_all[] =") |
| 372 | call setline(2, "{") |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 373 | call append('$', allranges) |
| 374 | call setline('$', getline('$')[:-2]) " remove last comma |
Bram Moolenaar | 207f009 | 2020-08-30 17:20:20 +0200 | [diff] [blame] | 375 | call setline(line('$') + 1, "};") |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 376 | wincmd p |
| 377 | |
Bram Moolenaar | 207f009 | 2020-08-30 17:20:20 +0200 | [diff] [blame] | 378 | " Make the table for wide emojis. |
| 379 | let pattern = '; Emoji_\(Presentation\|Modifier_Base\)\s\+#\s' |
| 380 | let lines = map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~pattern'), 'matchstr(v:val,"^\\S\\+")') |
| 381 | |
| 382 | " Make a dictionary with an entry for each character. |
| 383 | let chardict = {} |
| 384 | call AddLinesToCharDict(lines, chardict) |
| 385 | |
| 386 | " exclude characters that are in the "ambiguous" or "doublewidth" table |
| 387 | for ambi in s:ambitable |
| 388 | for nr in range(ambi[0], ambi[1]) |
| 389 | if has_key(chardict, nr) |
| 390 | call remove(chardict, nr) |
| 391 | endif |
| 392 | endfor |
| 393 | endfor |
| 394 | |
| 395 | for wide in s:doubletable |
| 396 | for nr in range(wide[0], wide[1]) |
| 397 | if has_key(chardict, nr) |
| 398 | call remove(chardict, nr) |
| 399 | endif |
| 400 | endfor |
| 401 | endfor |
| 402 | |
| 403 | let pairlist = CharDictToPairList(chardict) |
| 404 | let wide_ranges = map(pairlist, 'printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])') |
| 405 | |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 406 | " New buffer to put the result in. |
| 407 | new |
Bram Moolenaar | 207f009 | 2020-08-30 17:20:20 +0200 | [diff] [blame] | 408 | exe 'file emoji_wide' |
| 409 | call setline(1, " static struct interval emoji_wide[] =") |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 410 | call setline(2, " {") |
Bram Moolenaar | 207f009 | 2020-08-30 17:20:20 +0200 | [diff] [blame] | 411 | call append('$', wide_ranges) |
Bram Moolenaar | 3848e00 | 2016-03-19 18:42:29 +0100 | [diff] [blame] | 412 | call setline('$', getline('$')[:-2]) " remove last comma |
| 413 | call setline(line('$') + 1, " };") |
| 414 | wincmd p |
| 415 | endfunc |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 416 | |
Bram Moolenaar | 207f009 | 2020-08-30 17:20:20 +0200 | [diff] [blame] | 417 | " First test a few things |
| 418 | let v:errors = [] |
| 419 | if Test_AddLinesToCharDict() || Test_CharDictToPairList() |
| 420 | finish |
| 421 | endif |
| 422 | |
Christian Brabandt | 4298c5f | 2024-09-17 20:24:56 +0200 | [diff] [blame] | 423 | if !exists("g:loaded_netrw") |
| 424 | echomsg "Netrw not available, cannot download" |
| 425 | finish |
| 426 | endif |
Bram Moolenaar | 207f009 | 2020-08-30 17:20:20 +0200 | [diff] [blame] | 427 | |
Bram Moolenaar | 66312ac | 2015-06-21 14:22:00 +0200 | [diff] [blame] | 428 | " Try to avoid hitting E36 |
| 429 | set equalalways |
Bram Moolenaar | 3e8cb58 | 2010-01-12 19:52:03 +0100 | [diff] [blame] | 430 | |
| 431 | " Edit the Unicode text file. Requires the netrw plugin. |
| 432 | edit http://unicode.org/Public/UNIDATA/UnicodeData.txt |
| 433 | |
| 434 | " Parse each line, create a list of lists. |
| 435 | call ParseDataToProps() |
| 436 | |
| 437 | " Build the toLower table. |
| 438 | call BuildCaseTable("Lower", 13) |
| 439 | |
| 440 | " Build the toUpper table. |
| 441 | call BuildCaseTable("Upper", 12) |
| 442 | |
| 443 | " Build the ranges of composing chars. |
| 444 | call BuildCombiningTable() |
| 445 | |
| 446 | " Edit the case folding text file. Requires the netrw plugin. |
| 447 | edit http://www.unicode.org/Public/UNIDATA/CaseFolding.txt |
| 448 | |
| 449 | " Parse each line, create a list of lists. |
| 450 | call ParseFoldProps() |
| 451 | |
| 452 | " Build the foldCase table. |
| 453 | call BuildFoldTable() |
| 454 | |
| 455 | " Edit the width text file. Requires the netrw plugin. |
| 456 | edit http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt |
| 457 | |
| 458 | " Parse each line, create a list of lists. |
| 459 | call ParseWidthProps() |
| 460 | |
Bram Moolenaar | da4d7a9 | 2010-01-27 18:29:26 +0100 | [diff] [blame] | 461 | " Build the double width table. |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 462 | let s:doubletable = [] |
Bram Moolenaar | da4d7a9 | 2010-01-27 18:29:26 +0100 | [diff] [blame] | 463 | call BuildWidthTable('[WF]', 'doublewidth') |
| 464 | |
| 465 | " Build the ambiguous width table. |
Bram Moolenaar | b86f10e | 2016-03-21 22:09:44 +0100 | [diff] [blame] | 466 | let s:ambitable = [] |
Bram Moolenaar | da4d7a9 | 2010-01-27 18:29:26 +0100 | [diff] [blame] | 467 | call BuildWidthTable('A', 'ambiguous') |
Bram Moolenaar | 3848e00 | 2016-03-19 18:42:29 +0100 | [diff] [blame] | 468 | |
| 469 | " Edit the emoji text file. Requires the netrw plugin. |
Christian Brabandt | 9882e9d | 2022-09-25 19:25:51 +0100 | [diff] [blame] | 470 | " commented out, because it drops too many characters |
| 471 | "edit https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt |
| 472 | " |
| 473 | "" Build the emoji table. Ver. 1.0 - 6.0 |
| 474 | "" Must come after the "ambiguous" and "doublewidth" tables |
| 475 | "call BuildEmojiTable() |