blob: f0d6d96a47f02c92c1615d428cb767c31c3e9282 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * spellfile.c: code for reading and writing spell files.
12 *
13 * See spell.c for information about spell checking.
14 */
15
16/*
17 * Vim spell file format: <HEADER>
18 * <SECTIONS>
19 * <LWORDTREE>
20 * <KWORDTREE>
21 * <PREFIXTREE>
22 *
23 * <HEADER>: <fileID> <versionnr>
24 *
25 * <fileID> 8 bytes "VIMspell"
26 * <versionnr> 1 byte VIMSPELLVERSION
27 *
28 *
29 * Sections make it possible to add information to the .spl file without
30 * making it incompatible with previous versions. There are two kinds of
31 * sections:
32 * 1. Not essential for correct spell checking. E.g. for making suggestions.
33 * These are skipped when not supported.
34 * 2. Optional information, but essential for spell checking when present.
35 * E.g. conditions for affixes. When this section is present but not
36 * supported an error message is given.
37 *
38 * <SECTIONS>: <section> ... <sectionend>
39 *
40 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
41 *
42 * <sectionID> 1 byte number from 0 to 254 identifying the section
43 *
44 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct
45 * spell checking
46 *
47 * <sectionlen> 4 bytes length of section contents, MSB first
48 *
49 * <sectionend> 1 byte SN_END
50 *
51 *
52 * sectionID == SN_INFO: <infotext>
53 * <infotext> N bytes free format text with spell file info (version,
54 * website, etc)
55 *
56 * sectionID == SN_REGION: <regionname> ...
Bram Moolenaar2993ac52018-02-10 14:12:43 +010057 * <regionname> 2 bytes Up to MAXREGIONS region names: ca, au, etc. Lower
58 * case. First <regionname> is region 1.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +020059 *
60 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
61 * <folcharslen> <folchars>
62 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
63 * <charflags> N bytes List of flags (first one is for character 128):
64 * 0x01 word character CF_WORD
65 * 0x02 upper-case character CF_UPPER
66 * <folcharslen> 2 bytes Number of bytes in <folchars>.
67 * <folchars> N bytes Folded characters, first one is for character 128.
68 *
69 * sectionID == SN_MIDWORD: <midword>
70 * <midword> N bytes Characters that are word characters only when used
71 * in the middle of a word.
72 *
73 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
74 * <prefcondcnt> 2 bytes Number of <prefcond> items following.
75 * <prefcond> : <condlen> <condstr>
76 * <condlen> 1 byte Length of <condstr>.
77 * <condstr> N bytes Condition for the prefix.
78 *
79 * sectionID == SN_REP: <repcount> <rep> ...
80 * <repcount> 2 bytes number of <rep> items, MSB first.
81 * <rep> : <repfromlen> <repfrom> <reptolen> <repto>
82 * <repfromlen> 1 byte length of <repfrom>
83 * <repfrom> N bytes "from" part of replacement
84 * <reptolen> 1 byte length of <repto>
85 * <repto> N bytes "to" part of replacement
86 *
87 * sectionID == SN_REPSAL: <repcount> <rep> ...
88 * just like SN_REP but for soundfolded words
89 *
90 * sectionID == SN_SAL: <salflags> <salcount> <sal> ...
91 * <salflags> 1 byte flags for soundsalike conversion:
92 * SAL_F0LLOWUP
93 * SAL_COLLAPSE
94 * SAL_REM_ACCENTS
95 * <salcount> 2 bytes number of <sal> items following
96 * <sal> : <salfromlen> <salfrom> <saltolen> <salto>
97 * <salfromlen> 1 byte length of <salfrom>
98 * <salfrom> N bytes "from" part of soundsalike
99 * <saltolen> 1 byte length of <salto>
100 * <salto> N bytes "to" part of soundsalike
101 *
102 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
103 * <sofofromlen> 2 bytes length of <sofofrom>
104 * <sofofrom> N bytes "from" part of soundfold
105 * <sofotolen> 2 bytes length of <sofoto>
106 * <sofoto> N bytes "to" part of soundfold
107 *
108 * sectionID == SN_SUGFILE: <timestamp>
109 * <timestamp> 8 bytes time in seconds that must match with .sug file
110 *
111 * sectionID == SN_NOSPLITSUGS: nothing
112 *
113 * sectionID == SN_NOCOMPOUNDSUGS: nothing
114 *
115 * sectionID == SN_WORDS: <word> ...
116 * <word> N bytes NUL terminated common word
117 *
118 * sectionID == SN_MAP: <mapstr>
119 * <mapstr> N bytes String with sequences of similar characters,
120 * separated by slashes.
121 *
122 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions>
123 * <comppatcount> <comppattern> ... <compflags>
124 * <compmax> 1 byte Maximum nr of words in compound word.
125 * <compminlen> 1 byte Minimal word length for compounding.
126 * <compsylmax> 1 byte Maximum nr of syllables in compound word.
127 * <compoptions> 2 bytes COMP_ flags.
128 * <comppatcount> 2 bytes number of <comppattern> following
129 * <compflags> N bytes Flags from COMPOUNDRULE items, separated by
130 * slashes.
131 *
132 * <comppattern>: <comppatlen> <comppattext>
133 * <comppatlen> 1 byte length of <comppattext>
134 * <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN
135 *
136 * sectionID == SN_NOBREAK: (empty, its presence is what matters)
137 *
138 * sectionID == SN_SYLLABLE: <syllable>
139 * <syllable> N bytes String from SYLLABLE item.
140 *
141 * <LWORDTREE>: <wordtree>
142 *
143 * <KWORDTREE>: <wordtree>
144 *
145 * <PREFIXTREE>: <wordtree>
146 *
147 *
148 * <wordtree>: <nodecount> <nodedata> ...
149 *
150 * <nodecount> 4 bytes Number of nodes following. MSB first.
151 *
152 * <nodedata>: <siblingcount> <sibling> ...
153 *
154 * <siblingcount> 1 byte Number of siblings in this node. The siblings
155 * follow in sorted order.
156 *
157 * <sibling>: <byte> [ <nodeidx> <xbyte>
158 * | <flags> [<flags2>] [<region>] [<affixID>]
159 * | [<pflags>] <affixID> <prefcondnr> ]
160 *
161 * <byte> 1 byte Byte value of the sibling. Special cases:
162 * BY_NOFLAGS: End of word without flags and for all
163 * regions.
164 * For PREFIXTREE <affixID> and
165 * <prefcondnr> follow.
166 * BY_FLAGS: End of word, <flags> follow.
167 * For PREFIXTREE <pflags>, <affixID>
168 * and <prefcondnr> follow.
169 * BY_FLAGS2: End of word, <flags> and <flags2>
170 * follow. Not used in PREFIXTREE.
171 * BY_INDEX: Child of sibling is shared, <nodeidx>
172 * and <xbyte> follow.
173 *
174 * <nodeidx> 3 bytes Index of child for this sibling, MSB first.
175 *
176 * <xbyte> 1 byte byte value of the sibling.
177 *
178 * <flags> 1 byte bitmask of:
179 * WF_ALLCAP word must have only capitals
180 * WF_ONECAP first char of word must be capital
181 * WF_KEEPCAP keep-case word
182 * WF_FIXCAP keep-case word, all caps not allowed
183 * WF_RARE rare word
184 * WF_BANNED bad word
185 * WF_REGION <region> follows
186 * WF_AFX <affixID> follows
187 *
188 * <flags2> 1 byte Bitmask of:
189 * WF_HAS_AFF >> 8 word includes affix
190 * WF_NEEDCOMP >> 8 word only valid in compound
191 * WF_NOSUGGEST >> 8 word not used for suggestions
192 * WF_COMPROOT >> 8 word already a compound
193 * WF_NOCOMPBEF >> 8 no compounding before this word
194 * WF_NOCOMPAFT >> 8 no compounding after this word
195 *
196 * <pflags> 1 byte bitmask of:
197 * WFP_RARE rare prefix
198 * WFP_NC non-combining prefix
199 * WFP_UP letter after prefix made upper case
200 *
201 * <region> 1 byte Bitmask for regions in which word is valid. When
202 * omitted it's valid in all regions.
203 * Lowest bit is for region 1.
204 *
205 * <affixID> 1 byte ID of affix that can be used with this word. In
206 * PREFIXTREE used for the required prefix ID.
207 *
208 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list
209 * from HEADER.
210 *
211 * All text characters are in 'encoding', but stored as single bytes.
212 */
213
214/*
215 * Vim .sug file format: <SUGHEADER>
216 * <SUGWORDTREE>
217 * <SUGTABLE>
218 *
219 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
220 *
221 * <fileID> 6 bytes "VIMsug"
222 * <versionnr> 1 byte VIMSUGVERSION
223 * <timestamp> 8 bytes timestamp that must match with .spl file
224 *
225 *
226 * <SUGWORDTREE>: <wordtree> (see above, no flags or region used)
227 *
228 *
229 * <SUGTABLE>: <sugwcount> <sugline> ...
230 *
231 * <sugwcount> 4 bytes number of <sugline> following
232 *
233 * <sugline>: <sugnr> ... NUL
234 *
235 * <sugnr>: X bytes word number that results in this soundfolded word,
236 * stored as an offset to the previous number in as
237 * few bytes as possible, see offset2bytes())
238 */
239
240#include "vim.h"
241
242#if defined(FEAT_SPELL) || defined(PROTO)
243
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100244#ifndef UNIX // it's in os_unix.h for Unix
245# include <time.h> // for time_t
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200246#endif
247
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100248#ifndef UNIX // it's in os_unix.h for Unix
249# include <time.h> // for time_t
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200250#endif
251
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100252// Special byte values for <byte>. Some are only used in the tree for
253// postponed prefixes, some only in the other trees. This is a bit messy...
254#define BY_NOFLAGS 0 // end of word without flags or region; for
255 // postponed prefix: no <pflags>
256#define BY_INDEX 1 // child is shared, index follows
257#define BY_FLAGS 2 // end of word, <flags> byte follows; for
258 // postponed prefix: <pflags> follows
259#define BY_FLAGS2 3 // end of word, <flags> and <flags2> bytes
260 // follow; never used in prefix tree
261#define BY_SPECIAL BY_FLAGS2 // highest special byte value
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200262
Bram Moolenaar3d2a47c2019-11-07 20:48:42 +0100263#define ZERO_FLAG 65009 // used when flag is zero: "0"
264
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100265// Flags used in .spl file for soundsalike flags.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200266#define SAL_F0LLOWUP 1
267#define SAL_COLLAPSE 2
268#define SAL_REM_ACCENTS 4
269
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100270#define VIMSPELLMAGIC "VIMspell" // string at start of Vim spell file
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200271#define VIMSPELLMAGICL 8
272#define VIMSPELLVERSION 50
273
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100274// Section IDs. Only renumber them when VIMSPELLVERSION changes!
275#define SN_REGION 0 // <regionname> section
276#define SN_CHARFLAGS 1 // charflags section
277#define SN_MIDWORD 2 // <midword> section
278#define SN_PREFCOND 3 // <prefcond> section
279#define SN_REP 4 // REP items section
280#define SN_SAL 5 // SAL items section
281#define SN_SOFO 6 // soundfolding section
282#define SN_MAP 7 // MAP items section
283#define SN_COMPOUND 8 // compound words section
284#define SN_SYLLABLE 9 // syllable section
285#define SN_NOBREAK 10 // NOBREAK section
286#define SN_SUGFILE 11 // timestamp for .sug file
287#define SN_REPSAL 12 // REPSAL items section
288#define SN_WORDS 13 // common words
289#define SN_NOSPLITSUGS 14 // don't split word for suggestions
290#define SN_INFO 15 // info section
291#define SN_NOCOMPOUNDSUGS 16 // don't compound for suggestions
292#define SN_END 255 // end of sections
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200293
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100294#define SNF_REQUIRED 1 // <sectionflags>: required section
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200295
296#define CF_WORD 0x01
297#define CF_UPPER 0x02
298
Bram Moolenaaraeea7212020-04-02 18:50:46 +0200299/*
300 * Loop through all the siblings of a node (including the node)
301 */
302#define FOR_ALL_NODE_SIBLINGS(node, np) \
303 for ((np) = (node); (np) != NULL; (np) = (np)->wn_sibling)
304
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200305static int set_spell_finish(spelltab_T *new_st);
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +0000306static int write_spell_prefcond(FILE *fd, garray_T *gap, size_t *fwv);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200307static int read_region_section(FILE *fd, slang_T *slang, int len);
308static int read_charflags_section(FILE *fd);
309static int read_prefcond_section(FILE *fd, slang_T *lp);
310static int read_rep_section(FILE *fd, garray_T *gap, short *first);
311static int read_sal_section(FILE *fd, slang_T *slang);
312static int read_words_section(FILE *fd, slang_T *lp, int len);
313static int read_sofo_section(FILE *fd, slang_T *slang);
314static int read_compound(FILE *fd, slang_T *slang, int len);
315static int set_sofo(slang_T *lp, char_u *from, char_u *to);
316static void set_sal_first(slang_T *lp);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200317static int *mb_str2wide(char_u *s);
Bram Moolenaar07399e72020-08-24 20:05:50 +0200318static int spell_read_tree(FILE *fd, char_u **bytsp, long *bytsp_len, idx_T **idxsp, int prefixtree, int prefixcnt);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200319static idx_T read_tree_node(FILE *fd, char_u *byts, idx_T *idxs, int maxidx, idx_T startidx, int prefixtree, int maxprefcondnr);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200320static void set_spell_charflags(char_u *flags, int cnt, char_u *upp);
321static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp);
322static void set_map_str(slang_T *lp, char_u *map);
323
324
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200325static char *e_afftrailing = N_("Trailing text in %s line %d: %s");
326static char *e_affname = N_("Affix name too long in %s line %d: %s");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200327static char *msg_compressing = N_("Compressing word tree...");
328
329/*
330 * Load one spell file and store the info into a slang_T.
331 *
332 * This is invoked in three ways:
333 * - From spell_load_cb() to load a spell file for the first time. "lang" is
334 * the language name, "old_lp" is NULL. Will allocate an slang_T.
335 * - To reload a spell file that was changed. "lang" is NULL and "old_lp"
336 * points to the existing slang_T.
337 * - Just after writing a .spl file; it's read back to produce the .sug file.
338 * "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T.
339 *
340 * Returns the slang_T the spell file was loaded into. NULL for error.
341 */
342 slang_T *
343spell_load_file(
344 char_u *fname,
345 char_u *lang,
346 slang_T *old_lp,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100347 int silent) // no error if file doesn't exist
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200348{
349 FILE *fd;
350 char_u buf[VIMSPELLMAGICL];
351 char_u *p;
352 int i;
353 int n;
354 int len;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200355 slang_T *lp = NULL;
356 int c = 0;
357 int res;
Bram Moolenaarce6db022020-01-07 20:11:42 +0100358 int did_estack_push = FALSE;
Bram Moolenaare31ee862020-01-07 20:59:34 +0100359 ESTACK_CHECK_DECLARATION
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200360
361 fd = mch_fopen((char *)fname, "r");
362 if (fd == NULL)
363 {
364 if (!silent)
Bram Moolenaar460ae5d2022-01-01 14:19:49 +0000365 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200366 else if (p_verbose > 2)
367 {
368 verbose_enter();
Bram Moolenaar460ae5d2022-01-01 14:19:49 +0000369 smsg((const char *)e_cant_open_file_str, fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200370 verbose_leave();
371 }
372 goto endFAIL;
373 }
374 if (p_verbose > 2)
375 {
376 verbose_enter();
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100377 smsg(_("Reading spell file \"%s\""), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200378 verbose_leave();
379 }
380
381 if (old_lp == NULL)
382 {
383 lp = slang_alloc(lang);
384 if (lp == NULL)
385 goto endFAIL;
386
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100387 // Remember the file name, used to reload the file when it's updated.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200388 lp->sl_fname = vim_strsave(fname);
389 if (lp->sl_fname == NULL)
390 goto endFAIL;
391
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100392 // Check for .add.spl (_add.spl for VMS).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200393 lp->sl_add = strstr((char *)gettail(fname), SPL_FNAME_ADD) != NULL;
394 }
395 else
396 lp = old_lp;
397
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100398 // Set sourcing_name, so that error messages mention the file name.
Bram Moolenaar1a47ae32019-12-29 23:04:25 +0100399 estack_push(ETYPE_SPELL, fname, 0);
Bram Moolenaare31ee862020-01-07 20:59:34 +0100400 ESTACK_CHECK_SETUP
Bram Moolenaarce6db022020-01-07 20:11:42 +0100401 did_estack_push = TRUE;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200402
403 /*
404 * <HEADER>: <fileID>
405 */
406 for (i = 0; i < VIMSPELLMAGICL; ++i)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100407 buf[i] = getc(fd); // <fileID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200408 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
409 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000410 emsg(_(e_this_does_not_look_like_spell_file));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200411 goto endFAIL;
412 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100413 c = getc(fd); // <versionnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200414 if (c < VIMSPELLVERSION)
415 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000416 emsg(_(e_old_spell_file_needs_to_be_updated));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200417 goto endFAIL;
418 }
419 else if (c > VIMSPELLVERSION)
420 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000421 emsg(_(e_spell_file_is_for_newer_version_of_vim));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200422 goto endFAIL;
423 }
424
425
426 /*
427 * <SECTIONS>: <section> ... <sectionend>
428 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
429 */
430 for (;;)
431 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100432 n = getc(fd); // <sectionID> or <sectionend>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200433 if (n == SN_END)
434 break;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100435 c = getc(fd); // <sectionflags>
436 len = get4c(fd); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200437 if (len < 0)
438 goto truncerr;
439
440 res = 0;
441 switch (n)
442 {
443 case SN_INFO:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100444 lp->sl_info = read_string(fd, len); // <infotext>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200445 if (lp->sl_info == NULL)
446 goto endFAIL;
447 break;
448
449 case SN_REGION:
450 res = read_region_section(fd, lp, len);
451 break;
452
453 case SN_CHARFLAGS:
454 res = read_charflags_section(fd);
455 break;
456
457 case SN_MIDWORD:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100458 lp->sl_midword = read_string(fd, len); // <midword>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200459 if (lp->sl_midword == NULL)
460 goto endFAIL;
461 break;
462
463 case SN_PREFCOND:
464 res = read_prefcond_section(fd, lp);
465 break;
466
467 case SN_REP:
468 res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first);
469 break;
470
471 case SN_REPSAL:
472 res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first);
473 break;
474
475 case SN_SAL:
476 res = read_sal_section(fd, lp);
477 break;
478
479 case SN_SOFO:
480 res = read_sofo_section(fd, lp);
481 break;
482
483 case SN_MAP:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100484 p = read_string(fd, len); // <mapstr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200485 if (p == NULL)
486 goto endFAIL;
487 set_map_str(lp, p);
488 vim_free(p);
489 break;
490
491 case SN_WORDS:
492 res = read_words_section(fd, lp, len);
493 break;
494
495 case SN_SUGFILE:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100496 lp->sl_sugtime = get8ctime(fd); // <timestamp>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200497 break;
498
499 case SN_NOSPLITSUGS:
500 lp->sl_nosplitsugs = TRUE;
501 break;
502
503 case SN_NOCOMPOUNDSUGS:
504 lp->sl_nocompoundsugs = TRUE;
505 break;
506
507 case SN_COMPOUND:
508 res = read_compound(fd, lp, len);
509 break;
510
511 case SN_NOBREAK:
512 lp->sl_nobreak = TRUE;
513 break;
514
515 case SN_SYLLABLE:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100516 lp->sl_syllable = read_string(fd, len); // <syllable>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200517 if (lp->sl_syllable == NULL)
518 goto endFAIL;
Bram Moolenaarfc2a47f2020-08-20 15:41:55 +0200519 if (init_syl_tab(lp) != OK)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200520 goto endFAIL;
521 break;
522
523 default:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100524 // Unsupported section. When it's required give an error
525 // message. When it's not required skip the contents.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200526 if (c & SNF_REQUIRED)
527 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000528 emsg(_(e_unsupported_section_in_spell_file));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200529 goto endFAIL;
530 }
531 while (--len >= 0)
532 if (getc(fd) < 0)
533 goto truncerr;
534 break;
535 }
536someerror:
537 if (res == SP_FORMERROR)
538 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000539 emsg(_(e_format_error_in_spell_file));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200540 goto endFAIL;
541 }
542 if (res == SP_TRUNCERROR)
543 {
544truncerr:
Bram Moolenaar677658a2022-01-05 16:09:06 +0000545 emsg(_(e_truncated_spell_file));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200546 goto endFAIL;
547 }
548 if (res == SP_OTHERERROR)
549 goto endFAIL;
550 }
551
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100552 // <LWORDTREE>
Bram Moolenaar07399e72020-08-24 20:05:50 +0200553 res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fbyts_len,
554 &lp->sl_fidxs, FALSE, 0);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200555 if (res != 0)
556 goto someerror;
557
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100558 // <KWORDTREE>
Bram Moolenaar07399e72020-08-24 20:05:50 +0200559 res = spell_read_tree(fd, &lp->sl_kbyts, NULL, &lp->sl_kidxs, FALSE, 0);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200560 if (res != 0)
561 goto someerror;
562
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100563 // <PREFIXTREE>
Bram Moolenaar07399e72020-08-24 20:05:50 +0200564 res = spell_read_tree(fd, &lp->sl_pbyts, NULL, &lp->sl_pidxs, TRUE,
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200565 lp->sl_prefixcnt);
566 if (res != 0)
567 goto someerror;
568
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100569 // For a new file link it in the list of spell files.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200570 if (old_lp == NULL && lang != NULL)
571 {
572 lp->sl_next = first_lang;
573 first_lang = lp;
574 }
575
576 goto endOK;
577
578endFAIL:
579 if (lang != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100580 // truncating the name signals the error to spell_load_lang()
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200581 *lang = NUL;
582 if (lp != NULL && old_lp == NULL)
583 slang_free(lp);
584 lp = NULL;
585
586endOK:
587 if (fd != NULL)
588 fclose(fd);
Bram Moolenaarce6db022020-01-07 20:11:42 +0100589 if (did_estack_push)
Bram Moolenaare31ee862020-01-07 20:59:34 +0100590 {
591 ESTACK_CHECK_NOW
Bram Moolenaarce6db022020-01-07 20:11:42 +0100592 estack_pop();
Bram Moolenaare31ee862020-01-07 20:59:34 +0100593 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200594
595 return lp;
596}
597
598/*
599 * Fill in the wordcount fields for a trie.
600 * Returns the total number of words.
601 */
602 static void
603tree_count_words(char_u *byts, idx_T *idxs)
604{
605 int depth;
606 idx_T arridx[MAXWLEN];
607 int curi[MAXWLEN];
608 int c;
609 idx_T n;
610 int wordcount[MAXWLEN];
611
612 arridx[0] = 0;
613 curi[0] = 1;
614 wordcount[0] = 0;
615 depth = 0;
616 while (depth >= 0 && !got_int)
617 {
618 if (curi[depth] > byts[arridx[depth]])
619 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100620 // Done all bytes at this node, go up one level.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200621 idxs[arridx[depth]] = wordcount[depth];
622 if (depth > 0)
623 wordcount[depth - 1] += wordcount[depth];
624
625 --depth;
626 fast_breakcheck();
627 }
628 else
629 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100630 // Do one more byte at this node.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200631 n = arridx[depth] + curi[depth];
632 ++curi[depth];
633
634 c = byts[n];
635 if (c == 0)
636 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100637 // End of word, count it.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200638 ++wordcount[depth];
639
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100640 // Skip over any other NUL bytes (same word with different
641 // flags).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200642 while (byts[n + 1] == 0)
643 {
644 ++n;
645 ++curi[depth];
646 }
647 }
648 else
649 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100650 // Normal char, go one level deeper to count the words.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200651 ++depth;
652 arridx[depth] = idxs[n];
653 curi[depth] = 1;
654 wordcount[depth] = 0;
655 }
656 }
657 }
658}
659
660/*
661 * Load the .sug files for languages that have one and weren't loaded yet.
662 */
663 void
664suggest_load_files(void)
665{
666 langp_T *lp;
667 int lpi;
668 slang_T *slang;
669 char_u *dotp;
670 FILE *fd;
671 char_u buf[MAXWLEN];
672 int i;
673 time_t timestamp;
674 int wcount;
675 int wordnr;
676 garray_T ga;
677 int c;
678
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100679 // Do this for all languages that support sound folding.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200680 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
681 {
682 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
683 slang = lp->lp_slang;
684 if (slang->sl_sugtime != 0 && !slang->sl_sugloaded)
685 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100686 // Change ".spl" to ".sug" and open the file. When the file isn't
687 // found silently skip it. Do set "sl_sugloaded" so that we
688 // don't try again and again.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200689 slang->sl_sugloaded = TRUE;
690
691 dotp = vim_strrchr(slang->sl_fname, '.');
692 if (dotp == NULL || fnamecmp(dotp, ".spl") != 0)
693 continue;
694 STRCPY(dotp, ".sug");
695 fd = mch_fopen((char *)slang->sl_fname, "r");
696 if (fd == NULL)
697 goto nextone;
698
699 /*
700 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
701 */
702 for (i = 0; i < VIMSUGMAGICL; ++i)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100703 buf[i] = getc(fd); // <fileID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200704 if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0)
705 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000706 semsg(_(e_this_does_not_look_like_sug_file_str),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200707 slang->sl_fname);
708 goto nextone;
709 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100710 c = getc(fd); // <versionnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200711 if (c < VIMSUGVERSION)
712 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000713 semsg(_(e_old_sug_file_needs_to_be_updated_str),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200714 slang->sl_fname);
715 goto nextone;
716 }
717 else if (c > VIMSUGVERSION)
718 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000719 semsg(_(e_sug_file_is_for_newer_version_of_vim_str),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200720 slang->sl_fname);
721 goto nextone;
722 }
723
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100724 // Check the timestamp, it must be exactly the same as the one in
725 // the .spl file. Otherwise the word numbers won't match.
726 timestamp = get8ctime(fd); // <timestamp>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200727 if (timestamp != slang->sl_sugtime)
728 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000729 semsg(_(e_sug_file_doesnt_match_spl_file_str),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200730 slang->sl_fname);
731 goto nextone;
732 }
733
734 /*
735 * <SUGWORDTREE>: <wordtree>
736 * Read the trie with the soundfolded words.
737 */
Bram Moolenaar07399e72020-08-24 20:05:50 +0200738 if (spell_read_tree(fd, &slang->sl_sbyts, NULL, &slang->sl_sidxs,
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200739 FALSE, 0) != 0)
740 {
741someerror:
Bram Moolenaar677658a2022-01-05 16:09:06 +0000742 semsg(_(e_error_while_reading_sug_file_str),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200743 slang->sl_fname);
744 slang_clear_sug(slang);
745 goto nextone;
746 }
747
748 /*
749 * <SUGTABLE>: <sugwcount> <sugline> ...
750 *
751 * Read the table with word numbers. We use a file buffer for
752 * this, because it's so much like a file with lines. Makes it
753 * possible to swap the info and save on memory use.
754 */
755 slang->sl_sugbuf = open_spellbuf();
756 if (slang->sl_sugbuf == NULL)
757 goto someerror;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100758 // <sugwcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200759 wcount = get4c(fd);
760 if (wcount < 0)
761 goto someerror;
762
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100763 // Read all the wordnr lists into the buffer, one NUL terminated
764 // list per line.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200765 ga_init2(&ga, 1, 100);
766 for (wordnr = 0; wordnr < wcount; ++wordnr)
767 {
768 ga.ga_len = 0;
769 for (;;)
770 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100771 c = getc(fd); // <sugline>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200772 if (c < 0 || ga_grow(&ga, 1) == FAIL)
773 goto someerror;
774 ((char_u *)ga.ga_data)[ga.ga_len++] = c;
775 if (c == NUL)
776 break;
777 }
778 if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr,
779 ga.ga_data, ga.ga_len, TRUE) == FAIL)
780 goto someerror;
781 }
782 ga_clear(&ga);
783
784 /*
785 * Need to put word counts in the word tries, so that we can find
786 * a word by its number.
787 */
788 tree_count_words(slang->sl_fbyts, slang->sl_fidxs);
789 tree_count_words(slang->sl_sbyts, slang->sl_sidxs);
790
791nextone:
792 if (fd != NULL)
793 fclose(fd);
794 STRCPY(dotp, ".spl");
795 }
796 }
797}
798
799
800/*
801 * Read a length field from "fd" in "cnt_bytes" bytes.
802 * Allocate memory, read the string into it and add a NUL at the end.
803 * Returns NULL when the count is zero.
804 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result
805 * otherwise.
806 */
807 static char_u *
808read_cnt_string(FILE *fd, int cnt_bytes, int *cntp)
809{
810 int cnt = 0;
811 int i;
812 char_u *str;
813
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100814 // read the length bytes, MSB first
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200815 for (i = 0; i < cnt_bytes; ++i)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200816 {
Bram Moolenaar4ad739f2020-09-02 10:25:45 +0200817 int c = getc(fd);
818
819 if (c == EOF)
820 {
821 *cntp = SP_TRUNCERROR;
822 return NULL;
823 }
824 cnt = (cnt << 8) + (unsigned)c;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200825 }
826 *cntp = cnt;
827 if (cnt == 0)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100828 return NULL; // nothing to read, return NULL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200829
830 str = read_string(fd, cnt);
831 if (str == NULL)
832 *cntp = SP_OTHERERROR;
833 return str;
834}
835
836/*
837 * Read SN_REGION: <regionname> ...
838 * Return SP_*ERROR flags.
839 */
840 static int
841read_region_section(FILE *fd, slang_T *lp, int len)
842{
843 int i;
844
Bram Moolenaar2993ac52018-02-10 14:12:43 +0100845 if (len > MAXREGIONS * 2)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200846 return SP_FORMERROR;
847 for (i = 0; i < len; ++i)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100848 lp->sl_regions[i] = getc(fd); // <regionname>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200849 lp->sl_regions[len] = NUL;
850 return 0;
851}
852
853/*
854 * Read SN_CHARFLAGS section: <charflagslen> <charflags>
855 * <folcharslen> <folchars>
856 * Return SP_*ERROR flags.
857 */
858 static int
859read_charflags_section(FILE *fd)
860{
861 char_u *flags;
862 char_u *fol;
863 int flagslen, follen;
864
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100865 // <charflagslen> <charflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200866 flags = read_cnt_string(fd, 1, &flagslen);
867 if (flagslen < 0)
868 return flagslen;
869
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100870 // <folcharslen> <folchars>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200871 fol = read_cnt_string(fd, 2, &follen);
872 if (follen < 0)
873 {
874 vim_free(flags);
875 return follen;
876 }
877
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100878 // Set the word-char flags and fill SPELL_ISUPPER() table.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200879 if (flags != NULL && fol != NULL)
880 set_spell_charflags(flags, flagslen, fol);
881
882 vim_free(flags);
883 vim_free(fol);
884
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100885 // When <charflagslen> is zero then <fcharlen> must also be zero.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200886 if ((flags == NULL) != (fol == NULL))
887 return SP_FORMERROR;
888 return 0;
889}
890
891/*
892 * Read SN_PREFCOND section.
893 * Return SP_*ERROR flags.
894 */
895 static int
896read_prefcond_section(FILE *fd, slang_T *lp)
897{
898 int cnt;
899 int i;
900 int n;
901 char_u *p;
902 char_u buf[MAXWLEN + 1];
903
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100904 // <prefcondcnt> <prefcond> ...
905 cnt = get2c(fd); // <prefcondcnt>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200906 if (cnt <= 0)
907 return SP_FORMERROR;
908
Bram Moolenaarc799fe22019-05-28 23:08:19 +0200909 lp->sl_prefprog = ALLOC_CLEAR_MULT(regprog_T *, cnt);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200910 if (lp->sl_prefprog == NULL)
911 return SP_OTHERERROR;
912 lp->sl_prefixcnt = cnt;
913
914 for (i = 0; i < cnt; ++i)
915 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100916 // <prefcond> : <condlen> <condstr>
917 n = getc(fd); // <condlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200918 if (n < 0 || n >= MAXWLEN)
919 return SP_FORMERROR;
920
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100921 // When <condlen> is zero we have an empty condition. Otherwise
922 // compile the regexp program used to check for the condition.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200923 if (n > 0)
924 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100925 buf[0] = '^'; // always match at one position only
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200926 p = buf + 1;
927 while (n-- > 0)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100928 *p++ = getc(fd); // <condstr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200929 *p = NUL;
930 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
931 }
932 }
933 return 0;
934}
935
936/*
937 * Read REP or REPSAL items section from "fd": <repcount> <rep> ...
938 * Return SP_*ERROR flags.
939 */
940 static int
941read_rep_section(FILE *fd, garray_T *gap, short *first)
942{
943 int cnt;
944 fromto_T *ftp;
945 int i;
946
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100947 cnt = get2c(fd); // <repcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200948 if (cnt < 0)
949 return SP_TRUNCERROR;
950
951 if (ga_grow(gap, cnt) == FAIL)
952 return SP_OTHERERROR;
953
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100954 // <rep> : <repfromlen> <repfrom> <reptolen> <repto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200955 for (; gap->ga_len < cnt; ++gap->ga_len)
956 {
957 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
958 ftp->ft_from = read_cnt_string(fd, 1, &i);
959 if (i < 0)
960 return i;
961 if (i == 0)
962 return SP_FORMERROR;
963 ftp->ft_to = read_cnt_string(fd, 1, &i);
964 if (i <= 0)
965 {
966 vim_free(ftp->ft_from);
967 if (i < 0)
968 return i;
969 return SP_FORMERROR;
970 }
971 }
972
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100973 // Fill the first-index table.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200974 for (i = 0; i < 256; ++i)
975 first[i] = -1;
976 for (i = 0; i < gap->ga_len; ++i)
977 {
978 ftp = &((fromto_T *)gap->ga_data)[i];
979 if (first[*ftp->ft_from] == -1)
980 first[*ftp->ft_from] = i;
981 }
982 return 0;
983}
984
985/*
986 * Read SN_SAL section: <salflags> <salcount> <sal> ...
987 * Return SP_*ERROR flags.
988 */
989 static int
990read_sal_section(FILE *fd, slang_T *slang)
991{
992 int i;
993 int cnt;
994 garray_T *gap;
995 salitem_T *smp;
996 int ccnt;
997 char_u *p;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200998
999 slang->sl_sofo = FALSE;
1000
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001001 i = getc(fd); // <salflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001002 if (i & SAL_F0LLOWUP)
1003 slang->sl_followup = TRUE;
1004 if (i & SAL_COLLAPSE)
1005 slang->sl_collapse = TRUE;
1006 if (i & SAL_REM_ACCENTS)
1007 slang->sl_rem_accents = TRUE;
1008
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001009 cnt = get2c(fd); // <salcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001010 if (cnt < 0)
1011 return SP_TRUNCERROR;
1012
1013 gap = &slang->sl_sal;
1014 ga_init2(gap, sizeof(salitem_T), 10);
1015 if (ga_grow(gap, cnt + 1) == FAIL)
1016 return SP_OTHERERROR;
1017
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001018 // <sal> : <salfromlen> <salfrom> <saltolen> <salto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001019 for (; gap->ga_len < cnt; ++gap->ga_len)
1020 {
Bram Moolenaar97d2f342020-07-10 20:03:03 +02001021 int c = NUL;
1022
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001023 smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001024 ccnt = getc(fd); // <salfromlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001025 if (ccnt < 0)
1026 return SP_TRUNCERROR;
1027 if ((p = alloc(ccnt + 2)) == NULL)
1028 return SP_OTHERERROR;
1029 smp->sm_lead = p;
1030
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001031 // Read up to the first special char into sm_lead.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001032 for (i = 0; i < ccnt; ++i)
1033 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001034 c = getc(fd); // <salfrom>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001035 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
1036 break;
1037 *p++ = c;
1038 }
1039 smp->sm_leadlen = (int)(p - smp->sm_lead);
1040 *p++ = NUL;
1041
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001042 // Put (abc) chars in sm_oneof, if any.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001043 if (c == '(')
1044 {
1045 smp->sm_oneof = p;
1046 for (++i; i < ccnt; ++i)
1047 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001048 c = getc(fd); // <salfrom>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001049 if (c == ')')
1050 break;
1051 *p++ = c;
1052 }
1053 *p++ = NUL;
1054 if (++i < ccnt)
1055 c = getc(fd);
1056 }
1057 else
1058 smp->sm_oneof = NULL;
1059
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001060 // Any following chars go in sm_rules.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001061 smp->sm_rules = p;
1062 if (i < ccnt)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001063 // store the char we got while checking for end of sm_lead
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001064 *p++ = c;
1065 for (++i; i < ccnt; ++i)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001066 *p++ = getc(fd); // <salfrom>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001067 *p++ = NUL;
1068
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001069 // <saltolen> <salto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001070 smp->sm_to = read_cnt_string(fd, 1, &ccnt);
1071 if (ccnt < 0)
1072 {
1073 vim_free(smp->sm_lead);
1074 return ccnt;
1075 }
1076
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001077 if (has_mbyte)
1078 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001079 // convert the multi-byte strings to wide char strings
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001080 smp->sm_lead_w = mb_str2wide(smp->sm_lead);
1081 smp->sm_leadlen = mb_charlen(smp->sm_lead);
1082 if (smp->sm_oneof == NULL)
1083 smp->sm_oneof_w = NULL;
1084 else
1085 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
1086 if (smp->sm_to == NULL)
1087 smp->sm_to_w = NULL;
1088 else
1089 smp->sm_to_w = mb_str2wide(smp->sm_to);
1090 if (smp->sm_lead_w == NULL
1091 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL)
1092 || (smp->sm_to_w == NULL && smp->sm_to != NULL))
1093 {
1094 vim_free(smp->sm_lead);
1095 vim_free(smp->sm_to);
1096 vim_free(smp->sm_lead_w);
1097 vim_free(smp->sm_oneof_w);
1098 vim_free(smp->sm_to_w);
1099 return SP_OTHERERROR;
1100 }
1101 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001102 }
1103
1104 if (gap->ga_len > 0)
1105 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001106 // Add one extra entry to mark the end with an empty sm_lead. Avoids
1107 // that we need to check the index every time.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001108 smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
1109 if ((p = alloc(1)) == NULL)
1110 return SP_OTHERERROR;
1111 p[0] = NUL;
1112 smp->sm_lead = p;
1113 smp->sm_leadlen = 0;
1114 smp->sm_oneof = NULL;
1115 smp->sm_rules = p;
1116 smp->sm_to = NULL;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001117 if (has_mbyte)
1118 {
1119 smp->sm_lead_w = mb_str2wide(smp->sm_lead);
1120 smp->sm_leadlen = 0;
1121 smp->sm_oneof_w = NULL;
1122 smp->sm_to_w = NULL;
1123 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001124 ++gap->ga_len;
1125 }
1126
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001127 // Fill the first-index table.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001128 set_sal_first(slang);
1129
1130 return 0;
1131}
1132
1133/*
1134 * Read SN_WORDS: <word> ...
1135 * Return SP_*ERROR flags.
1136 */
1137 static int
1138read_words_section(FILE *fd, slang_T *lp, int len)
1139{
1140 int done = 0;
1141 int i;
1142 int c;
1143 char_u word[MAXWLEN];
1144
1145 while (done < len)
1146 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001147 // Read one word at a time.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001148 for (i = 0; ; ++i)
1149 {
1150 c = getc(fd);
1151 if (c == EOF)
1152 return SP_TRUNCERROR;
1153 word[i] = c;
1154 if (word[i] == NUL)
1155 break;
1156 if (i == MAXWLEN - 1)
1157 return SP_FORMERROR;
1158 }
1159
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001160 // Init the count to 10.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001161 count_common_word(lp, word, -1, 10);
1162 done += i + 1;
1163 }
1164 return 0;
1165}
1166
1167/*
1168 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
1169 * Return SP_*ERROR flags.
1170 */
1171 static int
1172read_sofo_section(FILE *fd, slang_T *slang)
1173{
1174 int cnt;
1175 char_u *from, *to;
1176 int res;
1177
1178 slang->sl_sofo = TRUE;
1179
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001180 // <sofofromlen> <sofofrom>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001181 from = read_cnt_string(fd, 2, &cnt);
1182 if (cnt < 0)
1183 return cnt;
1184
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001185 // <sofotolen> <sofoto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001186 to = read_cnt_string(fd, 2, &cnt);
1187 if (cnt < 0)
1188 {
1189 vim_free(from);
1190 return cnt;
1191 }
1192
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001193 // Store the info in slang->sl_sal and/or slang->sl_sal_first.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001194 if (from != NULL && to != NULL)
1195 res = set_sofo(slang, from, to);
1196 else if (from != NULL || to != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001197 res = SP_FORMERROR; // only one of two strings is an error
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001198 else
1199 res = 0;
1200
1201 vim_free(from);
1202 vim_free(to);
1203 return res;
1204}
1205
1206/*
1207 * Read the compound section from the .spl file:
1208 * <compmax> <compminlen> <compsylmax> <compoptions> <compflags>
1209 * Returns SP_*ERROR flags.
1210 */
1211 static int
1212read_compound(FILE *fd, slang_T *slang, int len)
1213{
1214 int todo = len;
1215 int c;
1216 int atstart;
1217 char_u *pat;
1218 char_u *pp;
1219 char_u *cp;
1220 char_u *ap;
1221 char_u *crp;
1222 int cnt;
1223 garray_T *gap;
1224
1225 if (todo < 2)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001226 return SP_FORMERROR; // need at least two bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001227
1228 --todo;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001229 c = getc(fd); // <compmax>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001230 if (c < 2)
1231 c = MAXWLEN;
1232 slang->sl_compmax = c;
1233
1234 --todo;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001235 c = getc(fd); // <compminlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001236 if (c < 1)
1237 c = 0;
1238 slang->sl_compminlen = c;
1239
1240 --todo;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001241 c = getc(fd); // <compsylmax>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001242 if (c < 1)
1243 c = MAXWLEN;
1244 slang->sl_compsylmax = c;
1245
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001246 c = getc(fd); // <compoptions>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001247 if (c != 0)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001248 ungetc(c, fd); // be backwards compatible with Vim 7.0b
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001249 else
1250 {
1251 --todo;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001252 c = getc(fd); // only use the lower byte for now
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001253 --todo;
1254 slang->sl_compoptions = c;
1255
1256 gap = &slang->sl_comppat;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001257 c = get2c(fd); // <comppatcount>
Bram Moolenaarb85d3622021-08-11 15:54:59 +02001258 if (c < 0)
1259 return SP_TRUNCERROR;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001260 todo -= 2;
1261 ga_init2(gap, sizeof(char_u *), c);
1262 if (ga_grow(gap, c) == OK)
1263 while (--c >= 0)
1264 {
1265 ((char_u **)(gap->ga_data))[gap->ga_len++] =
Bram Moolenaarb85d3622021-08-11 15:54:59 +02001266 read_cnt_string(fd, 1, &cnt);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001267 // <comppatlen> <comppattext>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001268 if (cnt < 0)
1269 return cnt;
1270 todo -= cnt + 1;
1271 }
1272 }
1273 if (todo < 0)
1274 return SP_FORMERROR;
1275
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001276 // Turn the COMPOUNDRULE items into a regexp pattern:
1277 // "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$".
1278 // Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes.
1279 // Conversion to utf-8 may double the size.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001280 c = todo * 2 + 7;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001281 if (enc_utf8)
1282 c += todo * 2;
Bram Moolenaar964b3742019-05-24 18:54:09 +02001283 pat = alloc(c);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001284 if (pat == NULL)
1285 return SP_OTHERERROR;
1286
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001287 // We also need a list of all flags that can appear at the start and one
1288 // for all flags.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001289 cp = alloc(todo + 1);
1290 if (cp == NULL)
1291 {
1292 vim_free(pat);
1293 return SP_OTHERERROR;
1294 }
1295 slang->sl_compstartflags = cp;
1296 *cp = NUL;
1297
1298 ap = alloc(todo + 1);
1299 if (ap == NULL)
1300 {
1301 vim_free(pat);
1302 return SP_OTHERERROR;
1303 }
1304 slang->sl_compallflags = ap;
1305 *ap = NUL;
1306
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001307 // And a list of all patterns in their original form, for checking whether
1308 // compounding may work in match_compoundrule(). This is freed when we
1309 // encounter a wildcard, the check doesn't work then.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001310 crp = alloc(todo + 1);
1311 slang->sl_comprules = crp;
1312
1313 pp = pat;
1314 *pp++ = '^';
1315 *pp++ = '\\';
1316 *pp++ = '(';
1317
1318 atstart = 1;
1319 while (todo-- > 0)
1320 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001321 c = getc(fd); // <compflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001322 if (c == EOF)
1323 {
1324 vim_free(pat);
1325 return SP_TRUNCERROR;
1326 }
1327
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001328 // Add all flags to "sl_compallflags".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001329 if (vim_strchr((char_u *)"?*+[]/", c) == NULL
1330 && !byte_in_str(slang->sl_compallflags, c))
1331 {
1332 *ap++ = c;
1333 *ap = NUL;
1334 }
1335
1336 if (atstart != 0)
1337 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001338 // At start of item: copy flags to "sl_compstartflags". For a
1339 // [abc] item set "atstart" to 2 and copy up to the ']'.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001340 if (c == '[')
1341 atstart = 2;
1342 else if (c == ']')
1343 atstart = 0;
1344 else
1345 {
1346 if (!byte_in_str(slang->sl_compstartflags, c))
1347 {
1348 *cp++ = c;
1349 *cp = NUL;
1350 }
1351 if (atstart == 1)
1352 atstart = 0;
1353 }
1354 }
1355
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001356 // Copy flag to "sl_comprules", unless we run into a wildcard.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001357 if (crp != NULL)
1358 {
1359 if (c == '?' || c == '+' || c == '*')
1360 {
Bram Moolenaard23a8232018-02-10 18:45:26 +01001361 VIM_CLEAR(slang->sl_comprules);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001362 crp = NULL;
1363 }
1364 else
1365 *crp++ = c;
1366 }
1367
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001368 if (c == '/') // slash separates two items
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001369 {
1370 *pp++ = '\\';
1371 *pp++ = '|';
1372 atstart = 1;
1373 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001374 else // normal char, "[abc]" and '*' are copied as-is
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001375 {
1376 if (c == '?' || c == '+' || c == '~')
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001377 *pp++ = '\\'; // "a?" becomes "a\?", "a+" becomes "a\+"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001378 if (enc_utf8)
1379 pp += mb_char2bytes(c, pp);
1380 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001381 *pp++ = c;
1382 }
1383 }
1384
1385 *pp++ = '\\';
1386 *pp++ = ')';
1387 *pp++ = '$';
1388 *pp = NUL;
1389
1390 if (crp != NULL)
1391 *crp = NUL;
1392
1393 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT);
1394 vim_free(pat);
1395 if (slang->sl_compprog == NULL)
1396 return SP_FORMERROR;
1397
1398 return 0;
1399}
1400
1401/*
1402 * Set the SOFOFROM and SOFOTO items in language "lp".
1403 * Returns SP_*ERROR flags when there is something wrong.
1404 */
1405 static int
1406set_sofo(slang_T *lp, char_u *from, char_u *to)
1407{
1408 int i;
1409
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001410 garray_T *gap;
1411 char_u *s;
1412 char_u *p;
1413 int c;
1414 int *inp;
1415
1416 if (has_mbyte)
1417 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001418 // Use "sl_sal" as an array with 256 pointers to a list of wide
1419 // characters. The index is the low byte of the character.
1420 // The list contains from-to pairs with a terminating NUL.
1421 // sl_sal_first[] is used for latin1 "from" characters.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001422 gap = &lp->sl_sal;
1423 ga_init2(gap, sizeof(int *), 1);
1424 if (ga_grow(gap, 256) == FAIL)
1425 return SP_OTHERERROR;
1426 vim_memset(gap->ga_data, 0, sizeof(int *) * 256);
1427 gap->ga_len = 256;
1428
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001429 // First count the number of items for each list. Temporarily use
1430 // sl_sal_first[] for this.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001431 for (p = from, s = to; *p != NUL && *s != NUL; )
1432 {
1433 c = mb_cptr2char_adv(&p);
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001434 MB_CPTR_ADV(s);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001435 if (c >= 256)
1436 ++lp->sl_sal_first[c & 0xff];
1437 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001438 if (*p != NUL || *s != NUL) // lengths differ
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001439 return SP_FORMERROR;
1440
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001441 // Allocate the lists.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001442 for (i = 0; i < 256; ++i)
1443 if (lp->sl_sal_first[i] > 0)
1444 {
1445 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
1446 if (p == NULL)
1447 return SP_OTHERERROR;
1448 ((int **)gap->ga_data)[i] = (int *)p;
1449 *(int *)p = 0;
1450 }
1451
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001452 // Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
1453 // list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001454 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
1455 for (p = from, s = to; *p != NUL && *s != NUL; )
1456 {
1457 c = mb_cptr2char_adv(&p);
1458 i = mb_cptr2char_adv(&s);
1459 if (c >= 256)
1460 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001461 // Append the from-to chars at the end of the list with
1462 // the low byte.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001463 inp = ((int **)gap->ga_data)[c & 0xff];
1464 while (*inp != 0)
1465 ++inp;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001466 *inp++ = c; // from char
1467 *inp++ = i; // to char
1468 *inp++ = NUL; // NUL at the end
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001469 }
1470 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001471 // mapping byte to char is done in sl_sal_first[]
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001472 lp->sl_sal_first[c] = i;
1473 }
1474 }
1475 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001476 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001477 // mapping bytes to bytes is done in sl_sal_first[]
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001478 if (STRLEN(from) != STRLEN(to))
1479 return SP_FORMERROR;
1480
1481 for (i = 0; to[i] != NUL; ++i)
1482 lp->sl_sal_first[from[i]] = to[i];
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001483 lp->sl_sal.ga_len = 1; // indicates we have soundfolding
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001484 }
1485
1486 return 0;
1487}
1488
1489/*
1490 * Fill the first-index table for "lp".
1491 */
1492 static void
1493set_sal_first(slang_T *lp)
1494{
1495 salfirst_T *sfirst;
1496 int i;
1497 salitem_T *smp;
1498 int c;
1499 garray_T *gap = &lp->sl_sal;
1500
1501 sfirst = lp->sl_sal_first;
1502 for (i = 0; i < 256; ++i)
1503 sfirst[i] = -1;
1504 smp = (salitem_T *)gap->ga_data;
1505 for (i = 0; i < gap->ga_len; ++i)
1506 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001507 if (has_mbyte)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001508 // Use the lowest byte of the first character. For latin1 it's
1509 // the character, for other encodings it should differ for most
1510 // characters.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001511 c = *smp[i].sm_lead_w & 0xff;
1512 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001513 c = *smp[i].sm_lead;
1514 if (sfirst[c] == -1)
1515 {
1516 sfirst[c] = i;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001517 if (has_mbyte)
1518 {
1519 int n;
1520
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001521 // Make sure all entries with this byte are following each
1522 // other. Move the ones that are in the wrong position. Do
1523 // keep the same ordering!
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001524 while (i + 1 < gap->ga_len
1525 && (*smp[i + 1].sm_lead_w & 0xff) == c)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001526 // Skip over entry with same index byte.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001527 ++i;
1528
1529 for (n = 1; i + n < gap->ga_len; ++n)
1530 if ((*smp[i + n].sm_lead_w & 0xff) == c)
1531 {
1532 salitem_T tsal;
1533
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001534 // Move entry with same index byte after the entries
1535 // we already found.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001536 ++i;
1537 --n;
1538 tsal = smp[i + n];
1539 mch_memmove(smp + i + 1, smp + i,
1540 sizeof(salitem_T) * n);
1541 smp[i] = tsal;
1542 }
1543 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001544 }
1545 }
1546}
1547
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001548/*
1549 * Turn a multi-byte string into a wide character string.
1550 * Return it in allocated memory (NULL for out-of-memory)
1551 */
1552 static int *
1553mb_str2wide(char_u *s)
1554{
1555 int *res;
1556 char_u *p;
1557 int i = 0;
1558
Bram Moolenaarc799fe22019-05-28 23:08:19 +02001559 res = ALLOC_MULT(int, mb_charlen(s) + 1);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001560 if (res != NULL)
1561 {
1562 for (p = s; *p != NUL; )
1563 res[i++] = mb_ptr2char_adv(&p);
1564 res[i] = NUL;
1565 }
1566 return res;
1567}
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001568
1569/*
1570 * Read a tree from the .spl or .sug file.
1571 * Allocates the memory and stores pointers in "bytsp" and "idxsp".
1572 * This is skipped when the tree has zero length.
1573 * Returns zero when OK, SP_ value for an error.
1574 */
1575 static int
1576spell_read_tree(
1577 FILE *fd,
1578 char_u **bytsp,
Bram Moolenaar07399e72020-08-24 20:05:50 +02001579 long *bytsp_len,
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001580 idx_T **idxsp,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001581 int prefixtree, // TRUE for the prefix tree
1582 int prefixcnt) // when "prefixtree" is TRUE: prefix count
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001583{
Bram Moolenaar6d3c8582017-02-26 15:27:23 +01001584 long len;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001585 int idx;
1586 char_u *bp;
1587 idx_T *ip;
1588
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001589 // The tree size was computed when writing the file, so that we can
1590 // allocate it as one long block. <nodecount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001591 len = get4c(fd);
1592 if (len < 0)
1593 return SP_TRUNCERROR;
Bram Moolenaar6d3c8582017-02-26 15:27:23 +01001594 if (len >= LONG_MAX / (long)sizeof(int))
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001595 // Invalid length, multiply with sizeof(int) would overflow.
Bram Moolenaar399c2972017-02-09 21:07:12 +01001596 return SP_FORMERROR;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001597 if (len > 0)
1598 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001599 // Allocate the byte array.
Bram Moolenaar18a4ba22019-05-24 19:39:03 +02001600 bp = alloc(len);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001601 if (bp == NULL)
1602 return SP_OTHERERROR;
1603 *bytsp = bp;
Bram Moolenaar07399e72020-08-24 20:05:50 +02001604 if (bytsp_len != NULL)
1605 *bytsp_len = len;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001606
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001607 // Allocate the index array.
Bram Moolenaarc799fe22019-05-28 23:08:19 +02001608 ip = lalloc_clear(len * sizeof(int), TRUE);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001609 if (ip == NULL)
1610 return SP_OTHERERROR;
1611 *idxsp = ip;
1612
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001613 // Recursively read the tree and store it in the array.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001614 idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt);
1615 if (idx < 0)
1616 return idx;
1617 }
1618 return 0;
1619}
1620
1621/*
1622 * Read one row of siblings from the spell file and store it in the byte array
1623 * "byts" and index array "idxs". Recursively read the children.
1624 *
1625 * NOTE: The code here must match put_node()!
1626 *
1627 * Returns the index (>= 0) following the siblings.
1628 * Returns SP_TRUNCERROR if the file is shorter than expected.
1629 * Returns SP_FORMERROR if there is a format error.
1630 */
1631 static idx_T
1632read_tree_node(
1633 FILE *fd,
1634 char_u *byts,
1635 idx_T *idxs,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001636 int maxidx, // size of arrays
1637 idx_T startidx, // current index in "byts" and "idxs"
1638 int prefixtree, // TRUE for reading PREFIXTREE
1639 int maxprefcondnr) // maximum for <prefcondnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001640{
1641 int len;
1642 int i;
1643 int n;
1644 idx_T idx = startidx;
1645 int c;
1646 int c2;
1647#define SHARED_MASK 0x8000000
1648
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001649 len = getc(fd); // <siblingcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001650 if (len <= 0)
1651 return SP_TRUNCERROR;
1652
1653 if (startidx + len >= maxidx)
1654 return SP_FORMERROR;
1655 byts[idx++] = len;
1656
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001657 // Read the byte values, flag/region bytes and shared indexes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001658 for (i = 1; i <= len; ++i)
1659 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001660 c = getc(fd); // <byte>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001661 if (c < 0)
1662 return SP_TRUNCERROR;
1663 if (c <= BY_SPECIAL)
1664 {
1665 if (c == BY_NOFLAGS && !prefixtree)
1666 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001667 // No flags, all regions.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001668 idxs[idx] = 0;
1669 c = 0;
1670 }
1671 else if (c != BY_INDEX)
1672 {
1673 if (prefixtree)
1674 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001675 // Read the optional pflags byte, the prefix ID and the
1676 // condition nr. In idxs[] store the prefix ID in the low
1677 // byte, the condition index shifted up 8 bits, the flags
1678 // shifted up 24 bits.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001679 if (c == BY_FLAGS)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001680 c = getc(fd) << 24; // <pflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001681 else
1682 c = 0;
1683
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001684 c |= getc(fd); // <affixID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001685
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001686 n = get2c(fd); // <prefcondnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001687 if (n >= maxprefcondnr)
1688 return SP_FORMERROR;
1689 c |= (n << 8);
1690 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001691 else // c must be BY_FLAGS or BY_FLAGS2
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001692 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001693 // Read flags and optional region and prefix ID. In
1694 // idxs[] the flags go in the low two bytes, region above
1695 // that and prefix ID above the region.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001696 c2 = c;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001697 c = getc(fd); // <flags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001698 if (c2 == BY_FLAGS2)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001699 c = (getc(fd) << 8) + c; // <flags2>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001700 if (c & WF_REGION)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001701 c = (getc(fd) << 16) + c; // <region>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001702 if (c & WF_AFX)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001703 c = (getc(fd) << 24) + c; // <affixID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001704 }
1705
1706 idxs[idx] = c;
1707 c = 0;
1708 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001709 else // c == BY_INDEX
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001710 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001711 // <nodeidx>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001712 n = get3c(fd);
1713 if (n < 0 || n >= maxidx)
1714 return SP_FORMERROR;
1715 idxs[idx] = n + SHARED_MASK;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001716 c = getc(fd); // <xbyte>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001717 }
1718 }
1719 byts[idx++] = c;
1720 }
1721
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001722 // Recursively read the children for non-shared siblings.
1723 // Skip the end-of-word ones (zero byte value) and the shared ones (and
1724 // remove SHARED_MASK)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001725 for (i = 1; i <= len; ++i)
1726 if (byts[startidx + i] != 0)
1727 {
1728 if (idxs[startidx + i] & SHARED_MASK)
1729 idxs[startidx + i] &= ~SHARED_MASK;
1730 else
1731 {
1732 idxs[startidx + i] = idx;
1733 idx = read_tree_node(fd, byts, idxs, maxidx, idx,
1734 prefixtree, maxprefcondnr);
1735 if (idx < 0)
1736 break;
1737 }
1738 }
1739
1740 return idx;
1741}
1742
1743/*
1744 * Reload the spell file "fname" if it's loaded.
1745 */
1746 static void
1747spell_reload_one(
1748 char_u *fname,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001749 int added_word) // invoked through "zg"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001750{
1751 slang_T *slang;
1752 int didit = FALSE;
1753
Bram Moolenaaraeea7212020-04-02 18:50:46 +02001754 FOR_ALL_SPELL_LANGS(slang)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001755 {
Bram Moolenaar99499b12019-05-23 21:35:48 +02001756 if (fullpathcmp(fname, slang->sl_fname, FALSE, TRUE) == FPC_SAME)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001757 {
1758 slang_clear(slang);
1759 if (spell_load_file(fname, NULL, slang, FALSE) == NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001760 // reloading failed, clear the language
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001761 slang_clear(slang);
1762 redraw_all_later(SOME_VALID);
1763 didit = TRUE;
1764 }
1765 }
1766
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001767 // When "zg" was used and the file wasn't loaded yet, should redo
1768 // 'spelllang' to load it now.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001769 if (added_word && !didit)
1770 did_set_spelllang(curwin);
1771}
1772
1773
1774/*
1775 * Functions for ":mkspell".
1776 */
1777
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001778#define MAXLINELEN 500 // Maximum length in bytes of a line in a .aff
1779 // and .dic file.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001780/*
1781 * Main structure to store the contents of a ".aff" file.
1782 */
1783typedef struct afffile_S
1784{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001785 char_u *af_enc; // "SET", normalized, alloc'ed string or NULL
1786 int af_flagtype; // AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG
1787 unsigned af_rare; // RARE ID for rare word
1788 unsigned af_keepcase; // KEEPCASE ID for keep-case word
1789 unsigned af_bad; // BAD ID for banned word
1790 unsigned af_needaffix; // NEEDAFFIX ID
1791 unsigned af_circumfix; // CIRCUMFIX ID
1792 unsigned af_needcomp; // NEEDCOMPOUND ID
1793 unsigned af_comproot; // COMPOUNDROOT ID
1794 unsigned af_compforbid; // COMPOUNDFORBIDFLAG ID
1795 unsigned af_comppermit; // COMPOUNDPERMITFLAG ID
1796 unsigned af_nosuggest; // NOSUGGEST ID
1797 int af_pfxpostpone; // postpone prefixes without chop string and
1798 // without flags
1799 int af_ignoreextra; // IGNOREEXTRA present
1800 hashtab_T af_pref; // hashtable for prefixes, affheader_T
1801 hashtab_T af_suff; // hashtable for suffixes, affheader_T
1802 hashtab_T af_comp; // hashtable for compound flags, compitem_T
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001803} afffile_T;
1804
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001805#define AFT_CHAR 0 // flags are one character
1806#define AFT_LONG 1 // flags are two characters
1807#define AFT_CAPLONG 2 // flags are one or two characters
1808#define AFT_NUM 3 // flags are numbers, comma separated
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001809
1810typedef struct affentry_S affentry_T;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001811// Affix entry from ".aff" file. Used for prefixes and suffixes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001812struct affentry_S
1813{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001814 affentry_T *ae_next; // next affix with same name/number
1815 char_u *ae_chop; // text to chop off basic word (can be NULL)
1816 char_u *ae_add; // text to add to basic word (can be NULL)
1817 char_u *ae_flags; // flags on the affix (can be NULL)
1818 char_u *ae_cond; // condition (NULL for ".")
1819 regprog_T *ae_prog; // regexp program for ae_cond or NULL
1820 char ae_compforbid; // COMPOUNDFORBIDFLAG found
1821 char ae_comppermit; // COMPOUNDPERMITFLAG found
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001822};
1823
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001824#define AH_KEY_LEN 17 // 2 x 8 bytes + NUL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001825
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001826// Affix header from ".aff" file. Used for af_pref and af_suff.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001827typedef struct affheader_S
1828{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001829 char_u ah_key[AH_KEY_LEN]; // key for hashtab == name of affix
1830 unsigned ah_flag; // affix name as number, uses "af_flagtype"
1831 int ah_newID; // prefix ID after renumbering; 0 if not used
1832 int ah_combine; // suffix may combine with prefix
1833 int ah_follows; // another affix block should be following
1834 affentry_T *ah_first; // first affix entry
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001835} affheader_T;
1836
1837#define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
1838
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001839// Flag used in compound items.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001840typedef struct compitem_S
1841{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001842 char_u ci_key[AH_KEY_LEN]; // key for hashtab == name of compound
1843 unsigned ci_flag; // affix name as number, uses "af_flagtype"
1844 int ci_newID; // affix ID after renumbering.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001845} compitem_T;
1846
1847#define HI2CI(hi) ((compitem_T *)(hi)->hi_key)
1848
1849/*
1850 * Structure that is used to store the items in the word tree. This avoids
1851 * the need to keep track of each allocated thing, everything is freed all at
1852 * once after ":mkspell" is done.
1853 * Note: "sb_next" must be just before "sb_data" to make sure the alignment of
1854 * "sb_data" is correct for systems where pointers must be aligned on
1855 * pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc).
1856 */
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001857#define SBLOCKSIZE 16000 // size of sb_data
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001858typedef struct sblock_S sblock_T;
1859struct sblock_S
1860{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001861 int sb_used; // nr of bytes already in use
1862 sblock_T *sb_next; // next block in list
1863 char_u sb_data[1]; // data, actually longer
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001864};
1865
1866/*
1867 * A node in the tree.
1868 */
1869typedef struct wordnode_S wordnode_T;
1870struct wordnode_S
1871{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001872 union // shared to save space
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001873 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001874 char_u hashkey[6]; // the hash key, only used while compressing
1875 int index; // index in written nodes (valid after first
1876 // round)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001877 } wn_u1;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001878 union // shared to save space
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001879 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001880 wordnode_T *next; // next node with same hash key
1881 wordnode_T *wnode; // parent node that will write this node
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001882 } wn_u2;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001883 wordnode_T *wn_child; // child (next byte in word)
1884 wordnode_T *wn_sibling; // next sibling (alternate byte in word,
1885 // always sorted)
1886 int wn_refs; // Nr. of references to this node. Only
1887 // relevant for first node in a list of
1888 // siblings, in following siblings it is
1889 // always one.
1890 char_u wn_byte; // Byte for this node. NUL for word end
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001891
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001892 // Info for when "wn_byte" is NUL.
1893 // In PREFIXTREE "wn_region" is used for the prefcondnr.
1894 // In the soundfolded word tree "wn_flags" has the MSW of the wordnr and
1895 // "wn_region" the LSW of the wordnr.
1896 char_u wn_affixID; // supported/required prefix ID or 0
1897 short_u wn_flags; // WF_ flags
1898 short wn_region; // region mask
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001899
1900#ifdef SPELL_PRINTTREE
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001901 int wn_nr; // sequence nr for printing
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001902#endif
1903};
1904
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001905#define WN_MASK 0xffff // mask relevant bits of "wn_flags"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001906
1907#define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
1908
1909/*
1910 * Info used while reading the spell files.
1911 */
1912typedef struct spellinfo_S
1913{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001914 wordnode_T *si_foldroot; // tree with case-folded words
1915 long si_foldwcount; // nr of words in si_foldroot
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001916
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001917 wordnode_T *si_keeproot; // tree with keep-case words
1918 long si_keepwcount; // nr of words in si_keeproot
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001919
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001920 wordnode_T *si_prefroot; // tree with postponed prefixes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001921
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001922 long si_sugtree; // creating the soundfolding trie
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001923
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001924 sblock_T *si_blocks; // memory blocks used
1925 long si_blocks_cnt; // memory blocks allocated
1926 int si_did_emsg; // TRUE when ran out of memory
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001927
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001928 long si_compress_cnt; // words to add before lowering
1929 // compression limit
1930 wordnode_T *si_first_free; // List of nodes that have been freed during
1931 // compression, linked by "wn_child" field.
1932 long si_free_count; // number of nodes in si_first_free
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001933#ifdef SPELL_PRINTTREE
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001934 int si_wordnode_nr; // sequence nr for nodes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001935#endif
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001936 buf_T *si_spellbuf; // buffer used to store soundfold word table
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001937
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001938 int si_ascii; // handling only ASCII words
1939 int si_add; // addition file
1940 int si_clear_chartab; // when TRUE clear char tables
1941 int si_region; // region mask
1942 vimconv_T si_conv; // for conversion to 'encoding'
1943 int si_memtot; // runtime memory used
1944 int si_verbose; // verbose messages
1945 int si_msg_count; // number of words added since last message
1946 char_u *si_info; // info text chars or NULL
1947 int si_region_count; // number of regions supported (1 when there
1948 // are no regions)
Bram Moolenaar2993ac52018-02-10 14:12:43 +01001949 char_u si_region_name[MAXREGIONS * 2 + 1];
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001950 // region names; used only if
1951 // si_region_count > 1)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001952
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001953 garray_T si_rep; // list of fromto_T entries from REP lines
1954 garray_T si_repsal; // list of fromto_T entries from REPSAL lines
1955 garray_T si_sal; // list of fromto_T entries from SAL lines
1956 char_u *si_sofofr; // SOFOFROM text
1957 char_u *si_sofoto; // SOFOTO text
1958 int si_nosugfile; // NOSUGFILE item found
1959 int si_nosplitsugs; // NOSPLITSUGS item found
1960 int si_nocompoundsugs; // NOCOMPOUNDSUGS item found
1961 int si_followup; // soundsalike: ?
1962 int si_collapse; // soundsalike: ?
1963 hashtab_T si_commonwords; // hashtable for common words
1964 time_t si_sugtime; // timestamp for .sug file
1965 int si_rem_accents; // soundsalike: remove accents
1966 garray_T si_map; // MAP info concatenated
1967 char_u *si_midword; // MIDWORD chars or NULL
1968 int si_compmax; // max nr of words for compounding
1969 int si_compminlen; // minimal length for compounding
1970 int si_compsylmax; // max nr of syllables for compounding
1971 int si_compoptions; // COMP_ flags
1972 garray_T si_comppat; // CHECKCOMPOUNDPATTERN items, each stored as
1973 // a string
1974 char_u *si_compflags; // flags used for compounding
1975 char_u si_nobreak; // NOBREAK
1976 char_u *si_syllable; // syllable string
1977 garray_T si_prefcond; // table with conditions for postponed
1978 // prefixes, each stored as a string
1979 int si_newprefID; // current value for ah_newID
1980 int si_newcompID; // current value for compound ID
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001981} spellinfo_T;
1982
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001983static int is_aff_rule(char_u **items, int itemcnt, char *rulename, int mincount);
1984static void aff_process_flags(afffile_T *affile, affentry_T *entry);
1985static int spell_info_item(char_u *s);
1986static unsigned affitem2flag(int flagtype, char_u *item, char_u *fname, int lnum);
1987static unsigned get_affitem(int flagtype, char_u **pp);
1988static void process_compflags(spellinfo_T *spin, afffile_T *aff, char_u *compflags);
1989static void check_renumber(spellinfo_T *spin);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001990static void aff_check_number(int spinval, int affval, char *name);
1991static void aff_check_string(char_u *spinval, char_u *affval, char *name);
1992static int str_equal(char_u *s1, char_u *s2);
1993static void add_fromto(spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to);
1994static int sal_to_bool(char_u *s);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001995static int get_affix_flags(afffile_T *affile, char_u *afflist);
1996static int get_pfxlist(afffile_T *affile, char_u *afflist, char_u *store_afflist);
1997static void get_compflags(afffile_T *affile, char_u *afflist, char_u *store_afflist);
1998static int store_aff_word(spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int condit, int flags, char_u *pfxlist, int pfxlen);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001999static void *getroom(spellinfo_T *spin, size_t len, int align);
2000static char_u *getroom_save(spellinfo_T *spin, char_u *s);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002001static int store_word(spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix);
2002static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID);
2003static wordnode_T *get_wordnode(spellinfo_T *spin);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002004static void free_wordnode(spellinfo_T *spin, wordnode_T *n);
Bram Moolenaar408c23b2020-06-03 22:15:45 +02002005static void wordtree_compress(spellinfo_T *spin, wordnode_T *root, char *name);
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02002006static long node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, long *tot);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002007static int node_equal(wordnode_T *n1, wordnode_T *n2);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002008static void clear_node(wordnode_T *node);
2009static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002010static int sug_filltree(spellinfo_T *spin, slang_T *slang);
2011static int sug_maketable(spellinfo_T *spin);
2012static int sug_filltable(spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap);
2013static int offset2bytes(int nr, char_u *buf);
2014static void sug_write(spellinfo_T *spin, char_u *fname);
2015static void spell_message(spellinfo_T *spin, char_u *str);
2016static void init_spellfile(void);
2017
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002018// In the postponed prefixes tree wn_flags is used to store the WFP_ flags,
2019// but it must be negative to indicate the prefix tree to tree_add_word().
2020// Use a negative number with the lower 8 bits zero.
kylo252ae6f1d82022-02-16 19:24:07 +00002021#define PFX_FLAGS (-256)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002022
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002023// flags for "condit" argument of store_aff_word()
2024#define CONDIT_COMB 1 // affix must combine
2025#define CONDIT_CFIX 2 // affix must have CIRCUMFIX flag
2026#define CONDIT_SUF 4 // add a suffix for matching flags
2027#define CONDIT_AFF 8 // word already has an affix
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002028
2029/*
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02002030 * Tunable parameters for when the tree is compressed. Filled from the
2031 * 'mkspellmem' option.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002032 */
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002033static long compress_start = 30000; // memory / SBLOCKSIZE
2034static long compress_inc = 100; // memory / SBLOCKSIZE
2035static long compress_added = 500000; // word count
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002036
2037/*
2038 * Check the 'mkspellmem' option. Return FAIL if it's wrong.
2039 * Sets "sps_flags".
2040 */
2041 int
2042spell_check_msm(void)
2043{
2044 char_u *p = p_msm;
2045 long start = 0;
2046 long incr = 0;
2047 long added = 0;
2048
2049 if (!VIM_ISDIGIT(*p))
2050 return FAIL;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002051 // block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002052 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102);
2053 if (*p != ',')
2054 return FAIL;
2055 ++p;
2056 if (!VIM_ISDIGIT(*p))
2057 return FAIL;
2058 incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10);
2059 if (*p != ',')
2060 return FAIL;
2061 ++p;
2062 if (!VIM_ISDIGIT(*p))
2063 return FAIL;
2064 added = getdigits(&p) * 1024;
2065 if (*p != NUL)
2066 return FAIL;
2067
2068 if (start == 0 || incr == 0 || added == 0 || incr > start)
2069 return FAIL;
2070
2071 compress_start = start;
2072 compress_inc = incr;
2073 compress_added = added;
2074 return OK;
2075}
2076
2077#ifdef SPELL_PRINTTREE
2078/*
2079 * For debugging the tree code: print the current tree in a (more or less)
2080 * readable format, so that we can see what happens when adding a word and/or
2081 * compressing the tree.
2082 * Based on code from Olaf Seibert.
2083 */
2084#define PRINTLINESIZE 1000
2085#define PRINTWIDTH 6
2086
2087#define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
2088 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
2089
2090static char line1[PRINTLINESIZE];
2091static char line2[PRINTLINESIZE];
2092static char line3[PRINTLINESIZE];
2093
2094 static void
2095spell_clear_flags(wordnode_T *node)
2096{
2097 wordnode_T *np;
2098
Bram Moolenaaraeea7212020-04-02 18:50:46 +02002099 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002100 {
2101 np->wn_u1.index = FALSE;
2102 spell_clear_flags(np->wn_child);
2103 }
2104}
2105
2106 static void
2107spell_print_node(wordnode_T *node, int depth)
2108{
2109 if (node->wn_u1.index)
2110 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002111 // Done this node before, print the reference.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002112 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
2113 PRINTSOME(line2, depth, " ", 0, 0);
2114 PRINTSOME(line3, depth, " ", 0, 0);
Bram Moolenaar32526b32019-01-19 17:43:09 +01002115 msg(line1);
2116 msg(line2);
2117 msg(line3);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002118 }
2119 else
2120 {
2121 node->wn_u1.index = TRUE;
2122
2123 if (node->wn_byte != NUL)
2124 {
2125 if (node->wn_child != NULL)
2126 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0);
2127 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002128 // Cannot happen?
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002129 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0);
2130 }
2131 else
2132 PRINTSOME(line1, depth, " $ ", 0, 0);
2133
2134 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs);
2135
2136 if (node->wn_sibling != NULL)
2137 PRINTSOME(line3, depth, " | ", 0, 0);
2138 else
2139 PRINTSOME(line3, depth, " ", 0, 0);
2140
2141 if (node->wn_byte == NUL)
2142 {
Bram Moolenaar32526b32019-01-19 17:43:09 +01002143 msg(line1);
2144 msg(line2);
2145 msg(line3);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002146 }
2147
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002148 // do the children
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002149 if (node->wn_byte != NUL && node->wn_child != NULL)
2150 spell_print_node(node->wn_child, depth + 1);
2151
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002152 // do the siblings
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002153 if (node->wn_sibling != NULL)
2154 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002155 // get rid of all parent details except |
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002156 STRCPY(line1, line3);
2157 STRCPY(line2, line3);
2158 spell_print_node(node->wn_sibling, depth);
2159 }
2160 }
2161}
2162
2163 static void
2164spell_print_tree(wordnode_T *root)
2165{
2166 if (root != NULL)
2167 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002168 // Clear the "wn_u1.index" fields, used to remember what has been
2169 // done.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002170 spell_clear_flags(root);
2171
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002172 // Recursively print the tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002173 spell_print_node(root, 0);
2174 }
2175}
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002176#endif // SPELL_PRINTTREE
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002177
2178/*
2179 * Read the affix file "fname".
2180 * Returns an afffile_T, NULL for complete failure.
2181 */
2182 static afffile_T *
2183spell_read_aff(spellinfo_T *spin, char_u *fname)
2184{
2185 FILE *fd;
2186 afffile_T *aff;
2187 char_u rline[MAXLINELEN];
2188 char_u *line;
2189 char_u *pc = NULL;
2190#define MAXITEMCNT 30
2191 char_u *(items[MAXITEMCNT]);
2192 int itemcnt;
2193 char_u *p;
2194 int lnum = 0;
2195 affheader_T *cur_aff = NULL;
2196 int did_postpone_prefix = FALSE;
2197 int aff_todo = 0;
2198 hashtab_T *tp;
2199 char_u *low = NULL;
2200 char_u *fol = NULL;
2201 char_u *upp = NULL;
2202 int do_rep;
2203 int do_repsal;
2204 int do_sal;
2205 int do_mapline;
2206 int found_map = FALSE;
2207 hashitem_T *hi;
2208 int l;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002209 int compminlen = 0; // COMPOUNDMIN value
2210 int compsylmax = 0; // COMPOUNDSYLMAX value
2211 int compoptions = 0; // COMP_ flags
2212 int compmax = 0; // COMPOUNDWORDMAX value
2213 char_u *compflags = NULL; // COMPOUNDFLAG and COMPOUNDRULE
2214 // concatenated
2215 char_u *midword = NULL; // MIDWORD value
2216 char_u *syllable = NULL; // SYLLABLE value
2217 char_u *sofofrom = NULL; // SOFOFROM value
2218 char_u *sofoto = NULL; // SOFOTO value
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002219
2220 /*
2221 * Open the file.
2222 */
2223 fd = mch_fopen((char *)fname, "r");
2224 if (fd == NULL)
2225 {
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00002226 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002227 return NULL;
2228 }
2229
Bram Moolenaarc1669272018-06-19 14:23:53 +02002230 vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s..."), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002231 spell_message(spin, IObuff);
2232
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002233 // Only do REP lines when not done in another .aff file already.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002234 do_rep = spin->si_rep.ga_len == 0;
2235
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002236 // Only do REPSAL lines when not done in another .aff file already.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002237 do_repsal = spin->si_repsal.ga_len == 0;
2238
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002239 // Only do SAL lines when not done in another .aff file already.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002240 do_sal = spin->si_sal.ga_len == 0;
2241
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002242 // Only do MAP lines when not done in another .aff file already.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002243 do_mapline = spin->si_map.ga_len == 0;
2244
2245 /*
2246 * Allocate and init the afffile_T structure.
2247 */
2248 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE);
2249 if (aff == NULL)
2250 {
2251 fclose(fd);
2252 return NULL;
2253 }
2254 hash_init(&aff->af_pref);
2255 hash_init(&aff->af_suff);
2256 hash_init(&aff->af_comp);
2257
2258 /*
2259 * Read all the lines in the file one by one.
2260 */
2261 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
2262 {
2263 line_breakcheck();
2264 ++lnum;
2265
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002266 // Skip comment lines.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002267 if (*rline == '#')
2268 continue;
2269
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002270 // Convert from "SET" to 'encoding' when needed.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002271 vim_free(pc);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002272 if (spin->si_conv.vc_type != CONV_NONE)
2273 {
2274 pc = string_convert(&spin->si_conv, rline, NULL);
2275 if (pc == NULL)
2276 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002277 smsg(_("Conversion failure for word in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002278 fname, lnum, rline);
2279 continue;
2280 }
2281 line = pc;
2282 }
2283 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002284 {
2285 pc = NULL;
2286 line = rline;
2287 }
2288
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002289 // Split the line up in white separated items. Put a NUL after each
2290 // item.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002291 itemcnt = 0;
2292 for (p = line; ; )
2293 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002294 while (*p != NUL && *p <= ' ') // skip white space and CR/NL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002295 ++p;
2296 if (*p == NUL)
2297 break;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002298 if (itemcnt == MAXITEMCNT) // too many items
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002299 break;
2300 items[itemcnt++] = p;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002301 // A few items have arbitrary text argument, don't split them.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002302 if (itemcnt == 2 && spell_info_item(items[0]))
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002303 while (*p >= ' ' || *p == TAB) // skip until CR/NL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002304 ++p;
2305 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002306 while (*p > ' ') // skip until white space or CR/NL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002307 ++p;
2308 if (*p == NUL)
2309 break;
2310 *p++ = NUL;
2311 }
2312
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002313 // Handle non-empty lines.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002314 if (itemcnt > 0)
2315 {
2316 if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL)
2317 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002318 // Setup for conversion from "ENC" to 'encoding'.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002319 aff->af_enc = enc_canonize(items[1]);
2320 if (aff->af_enc != NULL && !spin->si_ascii
2321 && convert_setup(&spin->si_conv, aff->af_enc,
2322 p_enc) == FAIL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002323 smsg(_("Conversion in %s not supported: from %s to %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002324 fname, aff->af_enc, p_enc);
2325 spin->si_conv.vc_fail = TRUE;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002326 }
2327 else if (is_aff_rule(items, itemcnt, "FLAG", 2)
2328 && aff->af_flagtype == AFT_CHAR)
2329 {
2330 if (STRCMP(items[1], "long") == 0)
2331 aff->af_flagtype = AFT_LONG;
2332 else if (STRCMP(items[1], "num") == 0)
2333 aff->af_flagtype = AFT_NUM;
2334 else if (STRCMP(items[1], "caplong") == 0)
2335 aff->af_flagtype = AFT_CAPLONG;
2336 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002337 smsg(_("Invalid value for FLAG in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002338 fname, lnum, items[1]);
2339 if (aff->af_rare != 0
2340 || aff->af_keepcase != 0
2341 || aff->af_bad != 0
2342 || aff->af_needaffix != 0
2343 || aff->af_circumfix != 0
2344 || aff->af_needcomp != 0
2345 || aff->af_comproot != 0
2346 || aff->af_nosuggest != 0
2347 || compflags != NULL
2348 || aff->af_suff.ht_used > 0
2349 || aff->af_pref.ht_used > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002350 smsg(_("FLAG after using flags in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002351 fname, lnum, items[1]);
2352 }
2353 else if (spell_info_item(items[0]))
2354 {
2355 p = (char_u *)getroom(spin,
2356 (spin->si_info == NULL ? 0 : STRLEN(spin->si_info))
2357 + STRLEN(items[0])
2358 + STRLEN(items[1]) + 3, FALSE);
2359 if (p != NULL)
2360 {
2361 if (spin->si_info != NULL)
2362 {
2363 STRCPY(p, spin->si_info);
2364 STRCAT(p, "\n");
2365 }
2366 STRCAT(p, items[0]);
2367 STRCAT(p, " ");
2368 STRCAT(p, items[1]);
2369 spin->si_info = p;
2370 }
2371 }
2372 else if (is_aff_rule(items, itemcnt, "MIDWORD", 2)
2373 && midword == NULL)
2374 {
2375 midword = getroom_save(spin, items[1]);
2376 }
2377 else if (is_aff_rule(items, itemcnt, "TRY", 2))
2378 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002379 // ignored, we look in the tree for what chars may appear
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002380 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002381 // TODO: remove "RAR" later
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002382 else if ((is_aff_rule(items, itemcnt, "RAR", 2)
2383 || is_aff_rule(items, itemcnt, "RARE", 2))
2384 && aff->af_rare == 0)
2385 {
2386 aff->af_rare = affitem2flag(aff->af_flagtype, items[1],
2387 fname, lnum);
2388 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002389 // TODO: remove "KEP" later
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002390 else if ((is_aff_rule(items, itemcnt, "KEP", 2)
2391 || is_aff_rule(items, itemcnt, "KEEPCASE", 2))
2392 && aff->af_keepcase == 0)
2393 {
2394 aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1],
2395 fname, lnum);
2396 }
2397 else if ((is_aff_rule(items, itemcnt, "BAD", 2)
2398 || is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2))
2399 && aff->af_bad == 0)
2400 {
2401 aff->af_bad = affitem2flag(aff->af_flagtype, items[1],
2402 fname, lnum);
2403 }
2404 else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2)
2405 && aff->af_needaffix == 0)
2406 {
2407 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1],
2408 fname, lnum);
2409 }
2410 else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2)
2411 && aff->af_circumfix == 0)
2412 {
2413 aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1],
2414 fname, lnum);
2415 }
2416 else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2)
2417 && aff->af_nosuggest == 0)
2418 {
2419 aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1],
2420 fname, lnum);
2421 }
2422 else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2)
2423 || is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2))
2424 && aff->af_needcomp == 0)
2425 {
2426 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1],
2427 fname, lnum);
2428 }
2429 else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2)
2430 && aff->af_comproot == 0)
2431 {
2432 aff->af_comproot = affitem2flag(aff->af_flagtype, items[1],
2433 fname, lnum);
2434 }
2435 else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2)
2436 && aff->af_compforbid == 0)
2437 {
2438 aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1],
2439 fname, lnum);
2440 if (aff->af_pref.ht_used > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002441 smsg(_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002442 fname, lnum);
2443 }
2444 else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2)
2445 && aff->af_comppermit == 0)
2446 {
2447 aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1],
2448 fname, lnum);
2449 if (aff->af_pref.ht_used > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002450 smsg(_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002451 fname, lnum);
2452 }
2453 else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2)
2454 && compflags == NULL)
2455 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002456 // Turn flag "c" into COMPOUNDRULE compatible string "c+",
2457 // "Na" into "Na+", "1234" into "1234+".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002458 p = getroom(spin, STRLEN(items[1]) + 2, FALSE);
2459 if (p != NULL)
2460 {
2461 STRCPY(p, items[1]);
2462 STRCAT(p, "+");
2463 compflags = p;
2464 }
2465 }
2466 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2))
2467 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002468 // We don't use the count, but do check that it's a number and
2469 // not COMPOUNDRULE mistyped.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002470 if (atoi((char *)items[1]) == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002471 smsg(_("Wrong COMPOUNDRULES value in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002472 fname, lnum, items[1]);
2473 }
2474 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2))
2475 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002476 // Don't use the first rule if it is a number.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002477 if (compflags != NULL || *skipdigits(items[1]) != NUL)
2478 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002479 // Concatenate this string to previously defined ones,
2480 // using a slash to separate them.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002481 l = (int)STRLEN(items[1]) + 1;
2482 if (compflags != NULL)
2483 l += (int)STRLEN(compflags) + 1;
2484 p = getroom(spin, l, FALSE);
2485 if (p != NULL)
2486 {
2487 if (compflags != NULL)
2488 {
2489 STRCPY(p, compflags);
2490 STRCAT(p, "/");
2491 }
2492 STRCAT(p, items[1]);
2493 compflags = p;
2494 }
2495 }
2496 }
2497 else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2)
2498 && compmax == 0)
2499 {
2500 compmax = atoi((char *)items[1]);
2501 if (compmax == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002502 smsg(_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002503 fname, lnum, items[1]);
2504 }
2505 else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2)
2506 && compminlen == 0)
2507 {
2508 compminlen = atoi((char *)items[1]);
2509 if (compminlen == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002510 smsg(_("Wrong COMPOUNDMIN value in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002511 fname, lnum, items[1]);
2512 }
2513 else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2)
2514 && compsylmax == 0)
2515 {
2516 compsylmax = atoi((char *)items[1]);
2517 if (compsylmax == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002518 smsg(_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002519 fname, lnum, items[1]);
2520 }
2521 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1))
2522 {
2523 compoptions |= COMP_CHECKDUP;
2524 }
2525 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1))
2526 {
2527 compoptions |= COMP_CHECKREP;
2528 }
2529 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1))
2530 {
2531 compoptions |= COMP_CHECKCASE;
2532 }
2533 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1))
2534 {
2535 compoptions |= COMP_CHECKTRIPLE;
2536 }
2537 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2))
2538 {
2539 if (atoi((char *)items[1]) == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002540 smsg(_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002541 fname, lnum, items[1]);
2542 }
2543 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3))
2544 {
2545 garray_T *gap = &spin->si_comppat;
2546 int i;
2547
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002548 // Only add the couple if it isn't already there.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002549 for (i = 0; i < gap->ga_len - 1; i += 2)
2550 if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0
2551 && STRCMP(((char_u **)(gap->ga_data))[i + 1],
2552 items[2]) == 0)
2553 break;
2554 if (i >= gap->ga_len && ga_grow(gap, 2) == OK)
2555 {
2556 ((char_u **)(gap->ga_data))[gap->ga_len++]
2557 = getroom_save(spin, items[1]);
2558 ((char_u **)(gap->ga_data))[gap->ga_len++]
2559 = getroom_save(spin, items[2]);
2560 }
2561 }
2562 else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2)
2563 && syllable == NULL)
2564 {
2565 syllable = getroom_save(spin, items[1]);
2566 }
2567 else if (is_aff_rule(items, itemcnt, "NOBREAK", 1))
2568 {
2569 spin->si_nobreak = TRUE;
2570 }
2571 else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1))
2572 {
2573 spin->si_nosplitsugs = TRUE;
2574 }
2575 else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1))
2576 {
2577 spin->si_nocompoundsugs = TRUE;
2578 }
2579 else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1))
2580 {
2581 spin->si_nosugfile = TRUE;
2582 }
2583 else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1))
2584 {
2585 aff->af_pfxpostpone = TRUE;
2586 }
2587 else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1))
2588 {
2589 aff->af_ignoreextra = TRUE;
2590 }
2591 else if ((STRCMP(items[0], "PFX") == 0
2592 || STRCMP(items[0], "SFX") == 0)
2593 && aff_todo == 0
2594 && itemcnt >= 4)
2595 {
2596 int lasti = 4;
2597 char_u key[AH_KEY_LEN];
2598
2599 if (*items[0] == 'P')
2600 tp = &aff->af_pref;
2601 else
2602 tp = &aff->af_suff;
2603
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002604 // Myspell allows the same affix name to be used multiple
2605 // times. The affix files that do this have an undocumented
2606 // "S" flag on all but the last block, thus we check for that
2607 // and store it in ah_follows.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002608 vim_strncpy(key, items[1], AH_KEY_LEN - 1);
2609 hi = hash_find(tp, key);
2610 if (!HASHITEM_EMPTY(hi))
2611 {
2612 cur_aff = HI2AH(hi);
2613 if (cur_aff->ah_combine != (*items[2] == 'Y'))
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002614 smsg(_("Different combining flag in continued affix block in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002615 fname, lnum, items[1]);
2616 if (!cur_aff->ah_follows)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002617 smsg(_("Duplicate affix in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002618 fname, lnum, items[1]);
2619 }
2620 else
2621 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002622 // New affix letter.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002623 cur_aff = (affheader_T *)getroom(spin,
2624 sizeof(affheader_T), TRUE);
2625 if (cur_aff == NULL)
2626 break;
2627 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1],
2628 fname, lnum);
2629 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN)
2630 break;
2631 if (cur_aff->ah_flag == aff->af_bad
2632 || cur_aff->ah_flag == aff->af_rare
2633 || cur_aff->ah_flag == aff->af_keepcase
2634 || cur_aff->ah_flag == aff->af_needaffix
2635 || cur_aff->ah_flag == aff->af_circumfix
2636 || cur_aff->ah_flag == aff->af_nosuggest
2637 || cur_aff->ah_flag == aff->af_needcomp
2638 || cur_aff->ah_flag == aff->af_comproot)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002639 smsg(_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002640 fname, lnum, items[1]);
2641 STRCPY(cur_aff->ah_key, items[1]);
2642 hash_add(tp, cur_aff->ah_key);
2643
2644 cur_aff->ah_combine = (*items[2] == 'Y');
2645 }
2646
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002647 // Check for the "S" flag, which apparently means that another
2648 // block with the same affix name is following.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002649 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0)
2650 {
2651 ++lasti;
2652 cur_aff->ah_follows = TRUE;
2653 }
2654 else
2655 cur_aff->ah_follows = FALSE;
2656
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002657 // Myspell allows extra text after the item, but that might
2658 // mean mistakes go unnoticed. Require a comment-starter.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002659 if (itemcnt > lasti && *items[lasti] != '#')
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002660 smsg(_(e_afftrailing), fname, lnum, items[lasti]);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002661
2662 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002663 smsg(_("Expected Y or N in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002664 fname, lnum, items[2]);
2665
2666 if (*items[0] == 'P' && aff->af_pfxpostpone)
2667 {
2668 if (cur_aff->ah_newID == 0)
2669 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002670 // Use a new number in the .spl file later, to be able
2671 // to handle multiple .aff files.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002672 check_renumber(spin);
2673 cur_aff->ah_newID = ++spin->si_newprefID;
2674
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002675 // We only really use ah_newID if the prefix is
2676 // postponed. We know that only after handling all
2677 // the items.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002678 did_postpone_prefix = FALSE;
2679 }
2680 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002681 // Did use the ID in a previous block.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002682 did_postpone_prefix = TRUE;
2683 }
2684
2685 aff_todo = atoi((char *)items[3]);
2686 }
2687 else if ((STRCMP(items[0], "PFX") == 0
2688 || STRCMP(items[0], "SFX") == 0)
2689 && aff_todo > 0
2690 && STRCMP(cur_aff->ah_key, items[1]) == 0
2691 && itemcnt >= 5)
2692 {
2693 affentry_T *aff_entry;
2694 int upper = FALSE;
2695 int lasti = 5;
2696
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002697 // Myspell allows extra text after the item, but that might
2698 // mean mistakes go unnoticed. Require a comment-starter,
2699 // unless IGNOREEXTRA is used. Hunspell uses a "-" item.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002700 if (itemcnt > lasti
2701 && !aff->af_ignoreextra
2702 && *items[lasti] != '#'
2703 && (STRCMP(items[lasti], "-") != 0
2704 || itemcnt != lasti + 1))
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002705 smsg(_(e_afftrailing), fname, lnum, items[lasti]);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002706
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002707 // New item for an affix letter.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002708 --aff_todo;
2709 aff_entry = (affentry_T *)getroom(spin,
2710 sizeof(affentry_T), TRUE);
2711 if (aff_entry == NULL)
2712 break;
2713
2714 if (STRCMP(items[2], "0") != 0)
2715 aff_entry->ae_chop = getroom_save(spin, items[2]);
2716 if (STRCMP(items[3], "0") != 0)
2717 {
2718 aff_entry->ae_add = getroom_save(spin, items[3]);
2719
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002720 // Recognize flags on the affix: abcd/XYZ
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002721 aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/');
2722 if (aff_entry->ae_flags != NULL)
2723 {
2724 *aff_entry->ae_flags++ = NUL;
2725 aff_process_flags(aff, aff_entry);
2726 }
2727 }
2728
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002729 // Don't use an affix entry with non-ASCII characters when
2730 // "spin->si_ascii" is TRUE.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002731 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop)
2732 || has_non_ascii(aff_entry->ae_add)))
2733 {
2734 aff_entry->ae_next = cur_aff->ah_first;
2735 cur_aff->ah_first = aff_entry;
2736
2737 if (STRCMP(items[4], ".") != 0)
2738 {
2739 char_u buf[MAXLINELEN];
2740
2741 aff_entry->ae_cond = getroom_save(spin, items[4]);
2742 if (*items[0] == 'P')
2743 sprintf((char *)buf, "^%s", items[4]);
2744 else
2745 sprintf((char *)buf, "%s$", items[4]);
2746 aff_entry->ae_prog = vim_regcomp(buf,
2747 RE_MAGIC + RE_STRING + RE_STRICT);
2748 if (aff_entry->ae_prog == NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002749 smsg(_("Broken condition in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002750 fname, lnum, items[4]);
2751 }
2752
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002753 // For postponed prefixes we need an entry in si_prefcond
2754 // for the condition. Use an existing one if possible.
2755 // Can't be done for an affix with flags, ignoring
2756 // COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002757 if (*items[0] == 'P' && aff->af_pfxpostpone
2758 && aff_entry->ae_flags == NULL)
2759 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002760 // When the chop string is one lower-case letter and
2761 // the add string ends in the upper-case letter we set
2762 // the "upper" flag, clear "ae_chop" and remove the
2763 // letters from "ae_add". The condition must either
2764 // be empty or start with the same letter.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002765 if (aff_entry->ae_chop != NULL
2766 && aff_entry->ae_add != NULL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002767 && aff_entry->ae_chop[(*mb_ptr2len)(
Bram Moolenaar264b74f2019-01-24 17:18:42 +01002768 aff_entry->ae_chop)] == NUL)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002769 {
2770 int c, c_up;
2771
2772 c = PTR2CHAR(aff_entry->ae_chop);
2773 c_up = SPELL_TOUPPER(c);
2774 if (c_up != c
2775 && (aff_entry->ae_cond == NULL
2776 || PTR2CHAR(aff_entry->ae_cond) == c))
2777 {
2778 p = aff_entry->ae_add
2779 + STRLEN(aff_entry->ae_add);
Bram Moolenaar91acfff2017-03-12 19:22:36 +01002780 MB_PTR_BACK(aff_entry->ae_add, p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002781 if (PTR2CHAR(p) == c_up)
2782 {
2783 upper = TRUE;
2784 aff_entry->ae_chop = NULL;
2785 *p = NUL;
2786
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002787 // The condition is matched with the
2788 // actual word, thus must check for the
2789 // upper-case letter.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002790 if (aff_entry->ae_cond != NULL)
2791 {
2792 char_u buf[MAXLINELEN];
Bram Moolenaar264b74f2019-01-24 17:18:42 +01002793
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002794 if (has_mbyte)
2795 {
2796 onecap_copy(items[4], buf, TRUE);
2797 aff_entry->ae_cond = getroom_save(
2798 spin, buf);
2799 }
2800 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002801 *aff_entry->ae_cond = c_up;
2802 if (aff_entry->ae_cond != NULL)
2803 {
2804 sprintf((char *)buf, "^%s",
2805 aff_entry->ae_cond);
2806 vim_regfree(aff_entry->ae_prog);
2807 aff_entry->ae_prog = vim_regcomp(
2808 buf, RE_MAGIC + RE_STRING);
2809 }
2810 }
2811 }
2812 }
2813 }
2814
2815 if (aff_entry->ae_chop == NULL
2816 && aff_entry->ae_flags == NULL)
2817 {
2818 int idx;
2819 char_u **pp;
2820 int n;
2821
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002822 // Find a previously used condition.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002823 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0;
2824 --idx)
2825 {
2826 p = ((char_u **)spin->si_prefcond.ga_data)[idx];
2827 if (str_equal(p, aff_entry->ae_cond))
2828 break;
2829 }
2830 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK)
2831 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002832 // Not found, add a new condition.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002833 idx = spin->si_prefcond.ga_len++;
2834 pp = ((char_u **)spin->si_prefcond.ga_data)
2835 + idx;
2836 if (aff_entry->ae_cond == NULL)
2837 *pp = NULL;
2838 else
2839 *pp = getroom_save(spin,
2840 aff_entry->ae_cond);
2841 }
2842
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002843 // Add the prefix to the prefix tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002844 if (aff_entry->ae_add == NULL)
2845 p = (char_u *)"";
2846 else
2847 p = aff_entry->ae_add;
2848
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002849 // PFX_FLAGS is a negative number, so that
2850 // tree_add_word() knows this is the prefix tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002851 n = PFX_FLAGS;
2852 if (!cur_aff->ah_combine)
2853 n |= WFP_NC;
2854 if (upper)
2855 n |= WFP_UP;
2856 if (aff_entry->ae_comppermit)
2857 n |= WFP_COMPPERMIT;
2858 if (aff_entry->ae_compforbid)
2859 n |= WFP_COMPFORBID;
2860 tree_add_word(spin, p, spin->si_prefroot, n,
2861 idx, cur_aff->ah_newID);
2862 did_postpone_prefix = TRUE;
2863 }
2864
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002865 // Didn't actually use ah_newID, backup si_newprefID.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002866 if (aff_todo == 0 && !did_postpone_prefix)
2867 {
2868 --spin->si_newprefID;
2869 cur_aff->ah_newID = 0;
2870 }
2871 }
2872 }
2873 }
2874 else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL)
2875 {
2876 fol = vim_strsave(items[1]);
2877 }
2878 else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL)
2879 {
2880 low = vim_strsave(items[1]);
2881 }
2882 else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL)
2883 {
2884 upp = vim_strsave(items[1]);
2885 }
2886 else if (is_aff_rule(items, itemcnt, "REP", 2)
2887 || is_aff_rule(items, itemcnt, "REPSAL", 2))
2888 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002889 // Ignore REP/REPSAL count
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002890 if (!isdigit(*items[1]))
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002891 smsg(_("Expected REP(SAL) count in %s line %d"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002892 fname, lnum);
2893 }
2894 else if ((STRCMP(items[0], "REP") == 0
2895 || STRCMP(items[0], "REPSAL") == 0)
2896 && itemcnt >= 3)
2897 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002898 // REP/REPSAL item
2899 // Myspell ignores extra arguments, we require it starts with
2900 // # to detect mistakes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002901 if (itemcnt > 3 && items[3][0] != '#')
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002902 smsg(_(e_afftrailing), fname, lnum, items[3]);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002903 if (items[0][3] == 'S' ? do_repsal : do_rep)
2904 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002905 // Replace underscore with space (can't include a space
2906 // directly).
Bram Moolenaar91acfff2017-03-12 19:22:36 +01002907 for (p = items[1]; *p != NUL; MB_PTR_ADV(p))
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002908 if (*p == '_')
2909 *p = ' ';
Bram Moolenaar91acfff2017-03-12 19:22:36 +01002910 for (p = items[2]; *p != NUL; MB_PTR_ADV(p))
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002911 if (*p == '_')
2912 *p = ' ';
2913 add_fromto(spin, items[0][3] == 'S'
2914 ? &spin->si_repsal
2915 : &spin->si_rep, items[1], items[2]);
2916 }
2917 }
2918 else if (is_aff_rule(items, itemcnt, "MAP", 2))
2919 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002920 // MAP item or count
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002921 if (!found_map)
2922 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002923 // First line contains the count.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002924 found_map = TRUE;
2925 if (!isdigit(*items[1]))
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002926 smsg(_("Expected MAP count in %s line %d"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002927 fname, lnum);
2928 }
2929 else if (do_mapline)
2930 {
2931 int c;
2932
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002933 // Check that every character appears only once.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002934 for (p = items[1]; *p != NUL; )
2935 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002936 c = mb_ptr2char_adv(&p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002937 if ((spin->si_map.ga_len > 0
2938 && vim_strchr(spin->si_map.ga_data, c)
2939 != NULL)
2940 || vim_strchr(p, c) != NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002941 smsg(_("Duplicate character in MAP in %s line %d"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002942 fname, lnum);
2943 }
2944
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002945 // We simply concatenate all the MAP strings, separated by
2946 // slashes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002947 ga_concat(&spin->si_map, items[1]);
2948 ga_append(&spin->si_map, '/');
2949 }
2950 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002951 // Accept "SAL from to" and "SAL from to #comment".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002952 else if (is_aff_rule(items, itemcnt, "SAL", 3))
2953 {
2954 if (do_sal)
2955 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002956 // SAL item (sounds-a-like)
2957 // Either one of the known keys or a from-to pair.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002958 if (STRCMP(items[1], "followup") == 0)
2959 spin->si_followup = sal_to_bool(items[2]);
2960 else if (STRCMP(items[1], "collapse_result") == 0)
2961 spin->si_collapse = sal_to_bool(items[2]);
2962 else if (STRCMP(items[1], "remove_accents") == 0)
2963 spin->si_rem_accents = sal_to_bool(items[2]);
2964 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002965 // when "to" is "_" it means empty
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002966 add_fromto(spin, &spin->si_sal, items[1],
2967 STRCMP(items[2], "_") == 0 ? (char_u *)""
2968 : items[2]);
2969 }
2970 }
2971 else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2)
2972 && sofofrom == NULL)
2973 {
2974 sofofrom = getroom_save(spin, items[1]);
2975 }
2976 else if (is_aff_rule(items, itemcnt, "SOFOTO", 2)
2977 && sofoto == NULL)
2978 {
2979 sofoto = getroom_save(spin, items[1]);
2980 }
2981 else if (STRCMP(items[0], "COMMON") == 0)
2982 {
2983 int i;
2984
2985 for (i = 1; i < itemcnt; ++i)
2986 {
2987 if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords,
2988 items[i])))
2989 {
2990 p = vim_strsave(items[i]);
2991 if (p == NULL)
2992 break;
2993 hash_add(&spin->si_commonwords, p);
2994 }
2995 }
2996 }
2997 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002998 smsg(_("Unrecognized or duplicate item in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002999 fname, lnum, items[0]);
3000 }
3001 }
3002
3003 if (fol != NULL || low != NULL || upp != NULL)
3004 {
3005 if (spin->si_clear_chartab)
3006 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003007 // Clear the char type tables, don't want to use any of the
3008 // currently used spell properties.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003009 init_spell_chartab();
3010 spin->si_clear_chartab = FALSE;
3011 }
3012
3013 /*
3014 * Don't write a word table for an ASCII file, so that we don't check
3015 * for conflicts with a word table that matches 'encoding'.
3016 * Don't write one for utf-8 either, we use utf_*() and
3017 * mb_get_class(), the list of chars in the file will be incomplete.
3018 */
Bram Moolenaar264b74f2019-01-24 17:18:42 +01003019 if (!spin->si_ascii && !enc_utf8)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003020 {
3021 if (fol == NULL || low == NULL || upp == NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003022 smsg(_("Missing FOL/LOW/UPP line in %s"), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003023 else
3024 (void)set_spell_chartab(fol, low, upp);
3025 }
3026
3027 vim_free(fol);
3028 vim_free(low);
3029 vim_free(upp);
3030 }
3031
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003032 // Use compound specifications of the .aff file for the spell info.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003033 if (compmax != 0)
3034 {
3035 aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX");
3036 spin->si_compmax = compmax;
3037 }
3038
3039 if (compminlen != 0)
3040 {
3041 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN");
3042 spin->si_compminlen = compminlen;
3043 }
3044
3045 if (compsylmax != 0)
3046 {
3047 if (syllable == NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003048 smsg(_("COMPOUNDSYLMAX used without SYLLABLE"));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003049 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX");
3050 spin->si_compsylmax = compsylmax;
3051 }
3052
3053 if (compoptions != 0)
3054 {
3055 aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options");
3056 spin->si_compoptions |= compoptions;
3057 }
3058
3059 if (compflags != NULL)
3060 process_compflags(spin, aff, compflags);
3061
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003062 // Check that we didn't use too many renumbered flags.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003063 if (spin->si_newcompID < spin->si_newprefID)
3064 {
3065 if (spin->si_newcompID == 127 || spin->si_newcompID == 255)
Bram Moolenaar32526b32019-01-19 17:43:09 +01003066 msg(_("Too many postponed prefixes"));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003067 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127)
Bram Moolenaar32526b32019-01-19 17:43:09 +01003068 msg(_("Too many compound flags"));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003069 else
Bram Moolenaar32526b32019-01-19 17:43:09 +01003070 msg(_("Too many postponed prefixes and/or compound flags"));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003071 }
3072
3073 if (syllable != NULL)
3074 {
3075 aff_check_string(spin->si_syllable, syllable, "SYLLABLE");
3076 spin->si_syllable = syllable;
3077 }
3078
3079 if (sofofrom != NULL || sofoto != NULL)
3080 {
3081 if (sofofrom == NULL || sofoto == NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003082 smsg(_("Missing SOFO%s line in %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003083 sofofrom == NULL ? "FROM" : "TO", fname);
3084 else if (spin->si_sal.ga_len > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003085 smsg(_("Both SAL and SOFO lines in %s"), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003086 else
3087 {
3088 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM");
3089 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO");
3090 spin->si_sofofr = sofofrom;
3091 spin->si_sofoto = sofoto;
3092 }
3093 }
3094
3095 if (midword != NULL)
3096 {
3097 aff_check_string(spin->si_midword, midword, "MIDWORD");
3098 spin->si_midword = midword;
3099 }
3100
3101 vim_free(pc);
3102 fclose(fd);
3103 return aff;
3104}
3105
3106/*
3107 * Return TRUE when items[0] equals "rulename", there are "mincount" items or
3108 * a comment is following after item "mincount".
3109 */
3110 static int
3111is_aff_rule(
3112 char_u **items,
3113 int itemcnt,
3114 char *rulename,
3115 int mincount)
3116{
3117 return (STRCMP(items[0], rulename) == 0
3118 && (itemcnt == mincount
3119 || (itemcnt > mincount && items[mincount][0] == '#')));
3120}
3121
3122/*
3123 * For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from
3124 * ae_flags to ae_comppermit and ae_compforbid.
3125 */
3126 static void
3127aff_process_flags(afffile_T *affile, affentry_T *entry)
3128{
3129 char_u *p;
3130 char_u *prevp;
3131 unsigned flag;
3132
3133 if (entry->ae_flags != NULL
3134 && (affile->af_compforbid != 0 || affile->af_comppermit != 0))
3135 {
3136 for (p = entry->ae_flags; *p != NUL; )
3137 {
3138 prevp = p;
3139 flag = get_affitem(affile->af_flagtype, &p);
3140 if (flag == affile->af_comppermit || flag == affile->af_compforbid)
3141 {
3142 STRMOVE(prevp, p);
3143 p = prevp;
3144 if (flag == affile->af_comppermit)
3145 entry->ae_comppermit = TRUE;
3146 else
3147 entry->ae_compforbid = TRUE;
3148 }
3149 if (affile->af_flagtype == AFT_NUM && *p == ',')
3150 ++p;
3151 }
3152 if (*entry->ae_flags == NUL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003153 entry->ae_flags = NULL; // nothing left
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003154 }
3155}
3156
3157/*
3158 * Return TRUE if "s" is the name of an info item in the affix file.
3159 */
3160 static int
3161spell_info_item(char_u *s)
3162{
3163 return STRCMP(s, "NAME") == 0
3164 || STRCMP(s, "HOME") == 0
3165 || STRCMP(s, "VERSION") == 0
3166 || STRCMP(s, "AUTHOR") == 0
3167 || STRCMP(s, "EMAIL") == 0
3168 || STRCMP(s, "COPYRIGHT") == 0;
3169}
3170
3171/*
3172 * Turn an affix flag name into a number, according to the FLAG type.
3173 * returns zero for failure.
3174 */
3175 static unsigned
3176affitem2flag(
3177 int flagtype,
3178 char_u *item,
3179 char_u *fname,
3180 int lnum)
3181{
3182 unsigned res;
3183 char_u *p = item;
3184
3185 res = get_affitem(flagtype, &p);
3186 if (res == 0)
3187 {
3188 if (flagtype == AFT_NUM)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003189 smsg(_("Flag is not a number in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003190 fname, lnum, item);
3191 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003192 smsg(_("Illegal flag in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003193 fname, lnum, item);
3194 }
3195 if (*p != NUL)
3196 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003197 smsg(_(e_affname), fname, lnum, item);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003198 return 0;
3199 }
3200
3201 return res;
3202}
3203
3204/*
3205 * Get one affix name from "*pp" and advance the pointer.
Bram Moolenaar3d2a47c2019-11-07 20:48:42 +01003206 * Returns ZERO_FLAG for "0".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003207 * Returns zero for an error, still advances the pointer then.
3208 */
3209 static unsigned
3210get_affitem(int flagtype, char_u **pp)
3211{
3212 int res;
3213
3214 if (flagtype == AFT_NUM)
3215 {
3216 if (!VIM_ISDIGIT(**pp))
3217 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003218 ++*pp; // always advance, avoid getting stuck
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003219 return 0;
3220 }
3221 res = getdigits(pp);
Bram Moolenaar3d2a47c2019-11-07 20:48:42 +01003222 if (res == 0)
3223 res = ZERO_FLAG;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003224 }
3225 else
3226 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003227 res = mb_ptr2char_adv(pp);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003228 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG
3229 && res >= 'A' && res <= 'Z'))
3230 {
3231 if (**pp == NUL)
3232 return 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003233 res = mb_ptr2char_adv(pp) + (res << 16);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003234 }
3235 }
3236 return res;
3237}
3238
3239/*
3240 * Process the "compflags" string used in an affix file and append it to
3241 * spin->si_compflags.
3242 * The processing involves changing the affix names to ID numbers, so that
3243 * they fit in one byte.
3244 */
3245 static void
3246process_compflags(
3247 spellinfo_T *spin,
3248 afffile_T *aff,
3249 char_u *compflags)
3250{
3251 char_u *p;
3252 char_u *prevp;
3253 unsigned flag;
3254 compitem_T *ci;
3255 int id;
3256 int len;
3257 char_u *tp;
3258 char_u key[AH_KEY_LEN];
3259 hashitem_T *hi;
3260
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003261 // Make room for the old and the new compflags, concatenated with a / in
3262 // between. Processing it makes it shorter, but we don't know by how
3263 // much, thus allocate the maximum.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003264 len = (int)STRLEN(compflags) + 1;
3265 if (spin->si_compflags != NULL)
3266 len += (int)STRLEN(spin->si_compflags) + 1;
3267 p = getroom(spin, len, FALSE);
3268 if (p == NULL)
3269 return;
3270 if (spin->si_compflags != NULL)
3271 {
3272 STRCPY(p, spin->si_compflags);
3273 STRCAT(p, "/");
3274 }
3275 spin->si_compflags = p;
3276 tp = p + STRLEN(p);
3277
3278 for (p = compflags; *p != NUL; )
3279 {
3280 if (vim_strchr((char_u *)"/?*+[]", *p) != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003281 // Copy non-flag characters directly.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003282 *tp++ = *p++;
3283 else
3284 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003285 // First get the flag number, also checks validity.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003286 prevp = p;
3287 flag = get_affitem(aff->af_flagtype, &p);
3288 if (flag != 0)
3289 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003290 // Find the flag in the hashtable. If it was used before, use
3291 // the existing ID. Otherwise add a new entry.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003292 vim_strncpy(key, prevp, p - prevp);
3293 hi = hash_find(&aff->af_comp, key);
3294 if (!HASHITEM_EMPTY(hi))
3295 id = HI2CI(hi)->ci_newID;
3296 else
3297 {
3298 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE);
3299 if (ci == NULL)
3300 break;
3301 STRCPY(ci->ci_key, key);
3302 ci->ci_flag = flag;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003303 // Avoid using a flag ID that has a special meaning in a
3304 // regexp (also inside []).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003305 do
3306 {
3307 check_renumber(spin);
3308 id = spin->si_newcompID--;
3309 } while (vim_strchr((char_u *)"/?*+[]\\-^", id) != NULL);
3310 ci->ci_newID = id;
3311 hash_add(&aff->af_comp, ci->ci_key);
3312 }
3313 *tp++ = id;
3314 }
3315 if (aff->af_flagtype == AFT_NUM && *p == ',')
3316 ++p;
3317 }
3318 }
3319
3320 *tp = NUL;
3321}
3322
3323/*
3324 * Check that the new IDs for postponed affixes and compounding don't overrun
3325 * each other. We have almost 255 available, but start at 0-127 to avoid
3326 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255.
3327 * When that is used up an error message is given.
3328 */
3329 static void
3330check_renumber(spellinfo_T *spin)
3331{
3332 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128)
3333 {
3334 spin->si_newprefID = 127;
3335 spin->si_newcompID = 255;
3336 }
3337}
3338
3339/*
3340 * Return TRUE if flag "flag" appears in affix list "afflist".
3341 */
3342 static int
3343flag_in_afflist(int flagtype, char_u *afflist, unsigned flag)
3344{
3345 char_u *p;
3346 unsigned n;
3347
3348 switch (flagtype)
3349 {
3350 case AFT_CHAR:
3351 return vim_strchr(afflist, flag) != NULL;
3352
3353 case AFT_CAPLONG:
3354 case AFT_LONG:
3355 for (p = afflist; *p != NUL; )
3356 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003357 n = mb_ptr2char_adv(&p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003358 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z'))
3359 && *p != NUL)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003360 n = mb_ptr2char_adv(&p) + (n << 16);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003361 if (n == flag)
3362 return TRUE;
3363 }
3364 break;
3365
3366 case AFT_NUM:
3367 for (p = afflist; *p != NUL; )
3368 {
3369 n = getdigits(&p);
Bram Moolenaar3d2a47c2019-11-07 20:48:42 +01003370 if (n == 0)
3371 n = ZERO_FLAG;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003372 if (n == flag)
3373 return TRUE;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003374 if (*p != NUL) // skip over comma
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003375 ++p;
3376 }
3377 break;
3378 }
3379 return FALSE;
3380}
3381
3382/*
3383 * Give a warning when "spinval" and "affval" numbers are set and not the same.
3384 */
3385 static void
3386aff_check_number(int spinval, int affval, char *name)
3387{
3388 if (spinval != 0 && spinval != affval)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003389 smsg(_("%s value differs from what is used in another .aff file"), name);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003390}
3391
3392/*
3393 * Give a warning when "spinval" and "affval" strings are set and not the same.
3394 */
3395 static void
3396aff_check_string(char_u *spinval, char_u *affval, char *name)
3397{
3398 if (spinval != NULL && STRCMP(spinval, affval) != 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003399 smsg(_("%s value differs from what is used in another .aff file"), name);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003400}
3401
3402/*
3403 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being
3404 * NULL as equal.
3405 */
3406 static int
3407str_equal(char_u *s1, char_u *s2)
3408{
3409 if (s1 == NULL || s2 == NULL)
3410 return s1 == s2;
3411 return STRCMP(s1, s2) == 0;
3412}
3413
3414/*
3415 * Add a from-to item to "gap". Used for REP and SAL items.
3416 * They are stored case-folded.
3417 */
3418 static void
3419add_fromto(
3420 spellinfo_T *spin,
3421 garray_T *gap,
3422 char_u *from,
3423 char_u *to)
3424{
3425 fromto_T *ftp;
3426 char_u word[MAXWLEN];
3427
3428 if (ga_grow(gap, 1) == OK)
3429 {
3430 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
Bram Moolenaar4f135272021-06-11 19:07:40 +02003431 (void)spell_casefold(curwin, from, (int)STRLEN(from), word, MAXWLEN);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003432 ftp->ft_from = getroom_save(spin, word);
Bram Moolenaar4f135272021-06-11 19:07:40 +02003433 (void)spell_casefold(curwin, to, (int)STRLEN(to), word, MAXWLEN);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003434 ftp->ft_to = getroom_save(spin, word);
3435 ++gap->ga_len;
3436 }
3437}
3438
3439/*
3440 * Convert a boolean argument in a SAL line to TRUE or FALSE;
3441 */
3442 static int
3443sal_to_bool(char_u *s)
3444{
3445 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0;
3446}
3447
3448/*
3449 * Free the structure filled by spell_read_aff().
3450 */
3451 static void
3452spell_free_aff(afffile_T *aff)
3453{
3454 hashtab_T *ht;
3455 hashitem_T *hi;
3456 int todo;
3457 affheader_T *ah;
3458 affentry_T *ae;
3459
3460 vim_free(aff->af_enc);
3461
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003462 // All this trouble to free the "ae_prog" items...
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003463 for (ht = &aff->af_pref; ; ht = &aff->af_suff)
3464 {
3465 todo = (int)ht->ht_used;
3466 for (hi = ht->ht_array; todo > 0; ++hi)
3467 {
3468 if (!HASHITEM_EMPTY(hi))
3469 {
3470 --todo;
3471 ah = HI2AH(hi);
3472 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
3473 vim_regfree(ae->ae_prog);
3474 }
3475 }
3476 if (ht == &aff->af_suff)
3477 break;
3478 }
3479
3480 hash_clear(&aff->af_pref);
3481 hash_clear(&aff->af_suff);
3482 hash_clear(&aff->af_comp);
3483}
3484
3485/*
3486 * Read dictionary file "fname".
3487 * Returns OK or FAIL;
3488 */
3489 static int
3490spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile)
3491{
3492 hashtab_T ht;
3493 char_u line[MAXLINELEN];
3494 char_u *p;
3495 char_u *afflist;
3496 char_u store_afflist[MAXWLEN];
3497 int pfxlen;
3498 int need_affix;
3499 char_u *dw;
3500 char_u *pc;
3501 char_u *w;
3502 int l;
3503 hash_T hash;
3504 hashitem_T *hi;
3505 FILE *fd;
3506 int lnum = 1;
3507 int non_ascii = 0;
3508 int retval = OK;
3509 char_u message[MAXLINELEN + MAXWLEN];
3510 int flags;
3511 int duplicate = 0;
Bram Moolenaar408c23b2020-06-03 22:15:45 +02003512 time_T last_msg_time = 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003513
3514 /*
3515 * Open the file.
3516 */
3517 fd = mch_fopen((char *)fname, "r");
3518 if (fd == NULL)
3519 {
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00003520 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003521 return FAIL;
3522 }
3523
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003524 // The hashtable is only used to detect duplicated words.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003525 hash_init(&ht);
3526
3527 vim_snprintf((char *)IObuff, IOSIZE,
Bram Moolenaarc1669272018-06-19 14:23:53 +02003528 _("Reading dictionary file %s..."), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003529 spell_message(spin, IObuff);
3530
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003531 // start with a message for the first line
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003532 spin->si_msg_count = 999999;
3533
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003534 // Read and ignore the first line: word count.
Bram Moolenaare90d63e2020-09-02 12:58:48 +02003535 if (vim_fgets(line, MAXLINELEN, fd) || !vim_isdigit(*skipwhite(line)))
Bram Moolenaar677658a2022-01-05 16:09:06 +00003536 semsg(_(e_no_word_count_in_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003537
3538 /*
3539 * Read all the lines in the file one by one.
3540 * The words are converted to 'encoding' here, before being added to
3541 * the hashtable.
3542 */
3543 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
3544 {
3545 line_breakcheck();
3546 ++lnum;
3547 if (line[0] == '#' || line[0] == '/')
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003548 continue; // comment line
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003549
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003550 // Remove CR, LF and white space from the end. White space halfway
3551 // the word is kept to allow e.g., "et al.".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003552 l = (int)STRLEN(line);
3553 while (l > 0 && line[l - 1] <= ' ')
3554 --l;
3555 if (l == 0)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003556 continue; // empty line
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003557 line[l] = NUL;
3558
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003559 // Convert from "SET" to 'encoding' when needed.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003560 if (spin->si_conv.vc_type != CONV_NONE)
3561 {
3562 pc = string_convert(&spin->si_conv, line, NULL);
3563 if (pc == NULL)
3564 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003565 smsg(_("Conversion failure for word in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003566 fname, lnum, line);
3567 continue;
3568 }
3569 w = pc;
3570 }
3571 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003572 {
3573 pc = NULL;
3574 w = line;
3575 }
3576
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003577 // Truncate the word at the "/", set "afflist" to what follows.
3578 // Replace "\/" by "/" and "\\" by "\".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003579 afflist = NULL;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003580 for (p = w; *p != NUL; MB_PTR_ADV(p))
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003581 {
3582 if (*p == '\\' && (p[1] == '\\' || p[1] == '/'))
3583 STRMOVE(p, p + 1);
3584 else if (*p == '/')
3585 {
3586 *p = NUL;
3587 afflist = p + 1;
3588 break;
3589 }
3590 }
3591
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003592 // Skip non-ASCII words when "spin->si_ascii" is TRUE.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003593 if (spin->si_ascii && has_non_ascii(w))
3594 {
3595 ++non_ascii;
3596 vim_free(pc);
3597 continue;
3598 }
3599
Bram Moolenaar408c23b2020-06-03 22:15:45 +02003600 // This takes time, print a message every 10000 words, but not more
3601 // often than once per second.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003602 if (spin->si_verbose && spin->si_msg_count > 10000)
3603 {
3604 spin->si_msg_count = 0;
Bram Moolenaar408c23b2020-06-03 22:15:45 +02003605 if (vim_time() > last_msg_time)
3606 {
3607 last_msg_time = vim_time();
3608 vim_snprintf((char *)message, sizeof(message),
3609 _("line %6d, word %6ld - %s"),
3610 lnum, spin->si_foldwcount + spin->si_keepwcount, w);
3611 msg_start();
3612 msg_outtrans_long_attr(message, 0);
3613 msg_clr_eos();
3614 msg_didout = FALSE;
3615 msg_col = 0;
3616 out_flush();
3617 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003618 }
3619
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003620 // Store the word in the hashtable to be able to find duplicates.
=?UTF-8?q?Dundar=20G=C3=B6c?=420fabc2022-01-28 15:28:04 +00003621 dw = getroom_save(spin, w);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003622 if (dw == NULL)
3623 {
3624 retval = FAIL;
3625 vim_free(pc);
3626 break;
3627 }
3628
3629 hash = hash_hash(dw);
3630 hi = hash_lookup(&ht, dw, hash);
3631 if (!HASHITEM_EMPTY(hi))
3632 {
3633 if (p_verbose > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003634 smsg(_("Duplicate word in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003635 fname, lnum, dw);
3636 else if (duplicate == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003637 smsg(_("First duplicate word in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003638 fname, lnum, dw);
3639 ++duplicate;
3640 }
3641 else
3642 hash_add_item(&ht, hi, dw, hash);
3643
3644 flags = 0;
3645 store_afflist[0] = NUL;
3646 pfxlen = 0;
3647 need_affix = FALSE;
3648 if (afflist != NULL)
3649 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003650 // Extract flags from the affix list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003651 flags |= get_affix_flags(affile, afflist);
3652
3653 if (affile->af_needaffix != 0 && flag_in_afflist(
3654 affile->af_flagtype, afflist, affile->af_needaffix))
3655 need_affix = TRUE;
3656
3657 if (affile->af_pfxpostpone)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003658 // Need to store the list of prefix IDs with the word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003659 pfxlen = get_pfxlist(affile, afflist, store_afflist);
3660
3661 if (spin->si_compflags != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003662 // Need to store the list of compound flags with the word.
3663 // Concatenate them to the list of prefix IDs.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003664 get_compflags(affile, afflist, store_afflist + pfxlen);
3665 }
3666
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003667 // Add the word to the word tree(s).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003668 if (store_word(spin, dw, flags, spin->si_region,
3669 store_afflist, need_affix) == FAIL)
3670 retval = FAIL;
3671
3672 if (afflist != NULL)
3673 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003674 // Find all matching suffixes and add the resulting words.
3675 // Additionally do matching prefixes that combine.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003676 if (store_aff_word(spin, dw, afflist, affile,
3677 &affile->af_suff, &affile->af_pref,
3678 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
3679 retval = FAIL;
3680
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003681 // Find all matching prefixes and add the resulting words.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003682 if (store_aff_word(spin, dw, afflist, affile,
3683 &affile->af_pref, NULL,
3684 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
3685 retval = FAIL;
3686 }
3687
3688 vim_free(pc);
3689 }
3690
3691 if (duplicate > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003692 smsg(_("%d duplicate word(s) in %s"), duplicate, fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003693 if (spin->si_ascii && non_ascii > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003694 smsg(_("Ignored %d word(s) with non-ASCII characters in %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003695 non_ascii, fname);
3696 hash_clear(&ht);
3697
3698 fclose(fd);
3699 return retval;
3700}
3701
3702/*
3703 * Check for affix flags in "afflist" that are turned into word flags.
3704 * Return WF_ flags.
3705 */
3706 static int
3707get_affix_flags(afffile_T *affile, char_u *afflist)
3708{
3709 int flags = 0;
3710
3711 if (affile->af_keepcase != 0 && flag_in_afflist(
3712 affile->af_flagtype, afflist, affile->af_keepcase))
3713 flags |= WF_KEEPCAP | WF_FIXCAP;
3714 if (affile->af_rare != 0 && flag_in_afflist(
3715 affile->af_flagtype, afflist, affile->af_rare))
3716 flags |= WF_RARE;
3717 if (affile->af_bad != 0 && flag_in_afflist(
3718 affile->af_flagtype, afflist, affile->af_bad))
3719 flags |= WF_BANNED;
3720 if (affile->af_needcomp != 0 && flag_in_afflist(
3721 affile->af_flagtype, afflist, affile->af_needcomp))
3722 flags |= WF_NEEDCOMP;
3723 if (affile->af_comproot != 0 && flag_in_afflist(
3724 affile->af_flagtype, afflist, affile->af_comproot))
3725 flags |= WF_COMPROOT;
3726 if (affile->af_nosuggest != 0 && flag_in_afflist(
3727 affile->af_flagtype, afflist, affile->af_nosuggest))
3728 flags |= WF_NOSUGGEST;
3729 return flags;
3730}
3731
3732/*
3733 * Get the list of prefix IDs from the affix list "afflist".
3734 * Used for PFXPOSTPONE.
3735 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL
3736 * and return the number of affixes.
3737 */
3738 static int
3739get_pfxlist(
3740 afffile_T *affile,
3741 char_u *afflist,
3742 char_u *store_afflist)
3743{
3744 char_u *p;
3745 char_u *prevp;
3746 int cnt = 0;
3747 int id;
3748 char_u key[AH_KEY_LEN];
3749 hashitem_T *hi;
3750
3751 for (p = afflist; *p != NUL; )
3752 {
3753 prevp = p;
3754 if (get_affitem(affile->af_flagtype, &p) != 0)
3755 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003756 // A flag is a postponed prefix flag if it appears in "af_pref"
3757 // and its ID is not zero.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003758 vim_strncpy(key, prevp, p - prevp);
3759 hi = hash_find(&affile->af_pref, key);
3760 if (!HASHITEM_EMPTY(hi))
3761 {
3762 id = HI2AH(hi)->ah_newID;
3763 if (id != 0)
3764 store_afflist[cnt++] = id;
3765 }
3766 }
3767 if (affile->af_flagtype == AFT_NUM && *p == ',')
3768 ++p;
3769 }
3770
3771 store_afflist[cnt] = NUL;
3772 return cnt;
3773}
3774
3775/*
3776 * Get the list of compound IDs from the affix list "afflist" that are used
3777 * for compound words.
3778 * Puts the flags in "store_afflist[]".
3779 */
3780 static void
3781get_compflags(
3782 afffile_T *affile,
3783 char_u *afflist,
3784 char_u *store_afflist)
3785{
3786 char_u *p;
3787 char_u *prevp;
3788 int cnt = 0;
3789 char_u key[AH_KEY_LEN];
3790 hashitem_T *hi;
3791
3792 for (p = afflist; *p != NUL; )
3793 {
3794 prevp = p;
3795 if (get_affitem(affile->af_flagtype, &p) != 0)
3796 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003797 // A flag is a compound flag if it appears in "af_comp".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003798 vim_strncpy(key, prevp, p - prevp);
3799 hi = hash_find(&affile->af_comp, key);
3800 if (!HASHITEM_EMPTY(hi))
3801 store_afflist[cnt++] = HI2CI(hi)->ci_newID;
3802 }
3803 if (affile->af_flagtype == AFT_NUM && *p == ',')
3804 ++p;
3805 }
3806
3807 store_afflist[cnt] = NUL;
3808}
3809
3810/*
3811 * Apply affixes to a word and store the resulting words.
3812 * "ht" is the hashtable with affentry_T that need to be applied, either
3813 * prefixes or suffixes.
3814 * "xht", when not NULL, is the prefix hashtable, to be used additionally on
3815 * the resulting words for combining affixes.
3816 *
3817 * Returns FAIL when out of memory.
3818 */
3819 static int
3820store_aff_word(
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003821 spellinfo_T *spin, // spell info
3822 char_u *word, // basic word start
3823 char_u *afflist, // list of names of supported affixes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003824 afffile_T *affile,
3825 hashtab_T *ht,
3826 hashtab_T *xht,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003827 int condit, // CONDIT_SUF et al.
3828 int flags, // flags for the word
3829 char_u *pfxlist, // list of prefix IDs
3830 int pfxlen) // nr of flags in "pfxlist" for prefixes, rest
3831 // is compound flags
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003832{
3833 int todo;
3834 hashitem_T *hi;
3835 affheader_T *ah;
3836 affentry_T *ae;
3837 char_u newword[MAXWLEN];
3838 int retval = OK;
3839 int i, j;
3840 char_u *p;
3841 int use_flags;
3842 char_u *use_pfxlist;
3843 int use_pfxlen;
3844 int need_affix;
3845 char_u store_afflist[MAXWLEN];
3846 char_u pfx_pfxlist[MAXWLEN];
3847 size_t wordlen = STRLEN(word);
3848 int use_condit;
3849
3850 todo = (int)ht->ht_used;
3851 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
3852 {
3853 if (!HASHITEM_EMPTY(hi))
3854 {
3855 --todo;
3856 ah = HI2AH(hi);
3857
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003858 // Check that the affix combines, if required, and that the word
3859 // supports this affix.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003860 if (((condit & CONDIT_COMB) == 0 || ah->ah_combine)
3861 && flag_in_afflist(affile->af_flagtype, afflist,
3862 ah->ah_flag))
3863 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003864 // Loop over all affix entries with this name.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003865 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
3866 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003867 // Check the condition. It's not logical to match case
3868 // here, but it is required for compatibility with
3869 // Myspell.
3870 // Another requirement from Myspell is that the chop
3871 // string is shorter than the word itself.
3872 // For prefixes, when "PFXPOSTPONE" was used, only do
3873 // prefixes with a chop string and/or flags.
3874 // When a previously added affix had CIRCUMFIX this one
3875 // must have it too, if it had not then this one must not
3876 // have one either.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003877 if ((xht != NULL || !affile->af_pfxpostpone
3878 || ae->ae_chop != NULL
3879 || ae->ae_flags != NULL)
3880 && (ae->ae_chop == NULL
3881 || STRLEN(ae->ae_chop) < wordlen)
3882 && (ae->ae_prog == NULL
3883 || vim_regexec_prog(&ae->ae_prog, FALSE,
3884 word, (colnr_T)0))
3885 && (((condit & CONDIT_CFIX) == 0)
3886 == ((condit & CONDIT_AFF) == 0
3887 || ae->ae_flags == NULL
3888 || !flag_in_afflist(affile->af_flagtype,
3889 ae->ae_flags, affile->af_circumfix))))
3890 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003891 // Match. Remove the chop and add the affix.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003892 if (xht == NULL)
3893 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003894 // prefix: chop/add at the start of the word
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003895 if (ae->ae_add == NULL)
3896 *newword = NUL;
3897 else
3898 vim_strncpy(newword, ae->ae_add, MAXWLEN - 1);
3899 p = word;
3900 if (ae->ae_chop != NULL)
3901 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003902 // Skip chop string.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003903 if (has_mbyte)
3904 {
3905 i = mb_charlen(ae->ae_chop);
3906 for ( ; i > 0; --i)
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003907 MB_PTR_ADV(p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003908 }
3909 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003910 p += STRLEN(ae->ae_chop);
3911 }
3912 STRCAT(newword, p);
3913 }
3914 else
3915 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003916 // suffix: chop/add at the end of the word
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003917 vim_strncpy(newword, word, MAXWLEN - 1);
3918 if (ae->ae_chop != NULL)
3919 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003920 // Remove chop string.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003921 p = newword + STRLEN(newword);
3922 i = (int)MB_CHARLEN(ae->ae_chop);
3923 for ( ; i > 0; --i)
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003924 MB_PTR_BACK(newword, p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003925 *p = NUL;
3926 }
3927 if (ae->ae_add != NULL)
3928 STRCAT(newword, ae->ae_add);
3929 }
3930
3931 use_flags = flags;
3932 use_pfxlist = pfxlist;
3933 use_pfxlen = pfxlen;
3934 need_affix = FALSE;
3935 use_condit = condit | CONDIT_COMB | CONDIT_AFF;
3936 if (ae->ae_flags != NULL)
3937 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003938 // Extract flags from the affix list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003939 use_flags |= get_affix_flags(affile, ae->ae_flags);
3940
3941 if (affile->af_needaffix != 0 && flag_in_afflist(
3942 affile->af_flagtype, ae->ae_flags,
3943 affile->af_needaffix))
3944 need_affix = TRUE;
3945
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003946 // When there is a CIRCUMFIX flag the other affix
3947 // must also have it and we don't add the word
3948 // with one affix.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003949 if (affile->af_circumfix != 0 && flag_in_afflist(
3950 affile->af_flagtype, ae->ae_flags,
3951 affile->af_circumfix))
3952 {
3953 use_condit |= CONDIT_CFIX;
3954 if ((condit & CONDIT_CFIX) == 0)
3955 need_affix = TRUE;
3956 }
3957
3958 if (affile->af_pfxpostpone
3959 || spin->si_compflags != NULL)
3960 {
3961 if (affile->af_pfxpostpone)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003962 // Get prefix IDS from the affix list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003963 use_pfxlen = get_pfxlist(affile,
3964 ae->ae_flags, store_afflist);
3965 else
3966 use_pfxlen = 0;
3967 use_pfxlist = store_afflist;
3968
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003969 // Combine the prefix IDs. Avoid adding the
3970 // same ID twice.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003971 for (i = 0; i < pfxlen; ++i)
3972 {
3973 for (j = 0; j < use_pfxlen; ++j)
3974 if (pfxlist[i] == use_pfxlist[j])
3975 break;
3976 if (j == use_pfxlen)
3977 use_pfxlist[use_pfxlen++] = pfxlist[i];
3978 }
3979
3980 if (spin->si_compflags != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003981 // Get compound IDS from the affix list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003982 get_compflags(affile, ae->ae_flags,
3983 use_pfxlist + use_pfxlen);
3984
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003985 // Combine the list of compound flags.
3986 // Concatenate them to the prefix IDs list.
3987 // Avoid adding the same ID twice.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003988 for (i = pfxlen; pfxlist[i] != NUL; ++i)
3989 {
3990 for (j = use_pfxlen;
3991 use_pfxlist[j] != NUL; ++j)
3992 if (pfxlist[i] == use_pfxlist[j])
3993 break;
3994 if (use_pfxlist[j] == NUL)
3995 {
3996 use_pfxlist[j++] = pfxlist[i];
3997 use_pfxlist[j] = NUL;
3998 }
3999 }
4000 }
4001 }
4002
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004003 // Obey a "COMPOUNDFORBIDFLAG" of the affix: don't
4004 // use the compound flags.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004005 if (use_pfxlist != NULL && ae->ae_compforbid)
4006 {
4007 vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen);
4008 use_pfxlist = pfx_pfxlist;
4009 }
4010
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004011 // When there are postponed prefixes...
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004012 if (spin->si_prefroot != NULL
4013 && spin->si_prefroot->wn_sibling != NULL)
4014 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004015 // ... add a flag to indicate an affix was used.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004016 use_flags |= WF_HAS_AFF;
4017
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004018 // ... don't use a prefix list if combining
4019 // affixes is not allowed. But do use the
4020 // compound flags after them.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004021 if (!ah->ah_combine && use_pfxlist != NULL)
4022 use_pfxlist += use_pfxlen;
4023 }
4024
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004025 // When compounding is supported and there is no
4026 // "COMPOUNDPERMITFLAG" then forbid compounding on the
4027 // side where the affix is applied.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004028 if (spin->si_compflags != NULL && !ae->ae_comppermit)
4029 {
4030 if (xht != NULL)
4031 use_flags |= WF_NOCOMPAFT;
4032 else
4033 use_flags |= WF_NOCOMPBEF;
4034 }
4035
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004036 // Store the modified word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004037 if (store_word(spin, newword, use_flags,
4038 spin->si_region, use_pfxlist,
4039 need_affix) == FAIL)
4040 retval = FAIL;
4041
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004042 // When added a prefix or a first suffix and the affix
4043 // has flags may add a(nother) suffix. RECURSIVE!
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004044 if ((condit & CONDIT_SUF) && ae->ae_flags != NULL)
4045 if (store_aff_word(spin, newword, ae->ae_flags,
4046 affile, &affile->af_suff, xht,
4047 use_condit & (xht == NULL
4048 ? ~0 : ~CONDIT_SUF),
4049 use_flags, use_pfxlist, pfxlen) == FAIL)
4050 retval = FAIL;
4051
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004052 // When added a suffix and combining is allowed also
4053 // try adding a prefix additionally. Both for the
4054 // word flags and for the affix flags. RECURSIVE!
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004055 if (xht != NULL && ah->ah_combine)
4056 {
4057 if (store_aff_word(spin, newword,
4058 afflist, affile,
4059 xht, NULL, use_condit,
4060 use_flags, use_pfxlist,
4061 pfxlen) == FAIL
4062 || (ae->ae_flags != NULL
4063 && store_aff_word(spin, newword,
4064 ae->ae_flags, affile,
4065 xht, NULL, use_condit,
4066 use_flags, use_pfxlist,
4067 pfxlen) == FAIL))
4068 retval = FAIL;
4069 }
4070 }
4071 }
4072 }
4073 }
4074 }
4075
4076 return retval;
4077}
4078
4079/*
4080 * Read a file with a list of words.
4081 */
4082 static int
4083spell_read_wordfile(spellinfo_T *spin, char_u *fname)
4084{
4085 FILE *fd;
4086 long lnum = 0;
4087 char_u rline[MAXLINELEN];
4088 char_u *line;
4089 char_u *pc = NULL;
4090 char_u *p;
4091 int l;
4092 int retval = OK;
4093 int did_word = FALSE;
4094 int non_ascii = 0;
4095 int flags;
4096 int regionmask;
4097
4098 /*
4099 * Open the file.
4100 */
4101 fd = mch_fopen((char *)fname, "r");
4102 if (fd == NULL)
4103 {
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00004104 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004105 return FAIL;
4106 }
4107
Bram Moolenaarc1669272018-06-19 14:23:53 +02004108 vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s..."), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004109 spell_message(spin, IObuff);
4110
4111 /*
4112 * Read all the lines in the file one by one.
4113 */
4114 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
4115 {
4116 line_breakcheck();
4117 ++lnum;
4118
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004119 // Skip comment lines.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004120 if (*rline == '#')
4121 continue;
4122
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004123 // Remove CR, LF and white space from the end.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004124 l = (int)STRLEN(rline);
4125 while (l > 0 && rline[l - 1] <= ' ')
4126 --l;
4127 if (l == 0)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004128 continue; // empty or blank line
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004129 rline[l] = NUL;
4130
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004131 // Convert from "/encoding={encoding}" to 'encoding' when needed.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004132 vim_free(pc);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004133 if (spin->si_conv.vc_type != CONV_NONE)
4134 {
4135 pc = string_convert(&spin->si_conv, rline, NULL);
4136 if (pc == NULL)
4137 {
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004138 smsg(_("Conversion failure for word in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004139 fname, lnum, rline);
4140 continue;
4141 }
4142 line = pc;
4143 }
4144 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004145 {
4146 pc = NULL;
4147 line = rline;
4148 }
4149
4150 if (*line == '/')
4151 {
4152 ++line;
4153 if (STRNCMP(line, "encoding=", 9) == 0)
4154 {
4155 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004156 smsg(_("Duplicate /encoding= line ignored in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004157 fname, lnum, line - 1);
4158 else if (did_word)
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004159 smsg(_("/encoding= line after word ignored in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004160 fname, lnum, line - 1);
4161 else
4162 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004163 char_u *enc;
4164
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004165 // Setup for conversion to 'encoding'.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004166 line += 9;
4167 enc = enc_canonize(line);
4168 if (enc != NULL && !spin->si_ascii
4169 && convert_setup(&spin->si_conv, enc,
4170 p_enc) == FAIL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01004171 smsg(_("Conversion in %s not supported: from %s to %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004172 fname, line, p_enc);
4173 vim_free(enc);
4174 spin->si_conv.vc_fail = TRUE;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004175 }
4176 continue;
4177 }
4178
4179 if (STRNCMP(line, "regions=", 8) == 0)
4180 {
4181 if (spin->si_region_count > 1)
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004182 smsg(_("Duplicate /regions= line ignored in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004183 fname, lnum, line);
4184 else
4185 {
4186 line += 8;
Bram Moolenaar2993ac52018-02-10 14:12:43 +01004187 if (STRLEN(line) > MAXREGIONS * 2)
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004188 smsg(_("Too many regions in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004189 fname, lnum, line);
4190 else
4191 {
4192 spin->si_region_count = (int)STRLEN(line) / 2;
4193 STRCPY(spin->si_region_name, line);
4194
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004195 // Adjust the mask for a word valid in all regions.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004196 spin->si_region = (1 << spin->si_region_count) - 1;
4197 }
4198 }
4199 continue;
4200 }
4201
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004202 smsg(_("/ line ignored in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004203 fname, lnum, line - 1);
4204 continue;
4205 }
4206
4207 flags = 0;
4208 regionmask = spin->si_region;
4209
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004210 // Check for flags and region after a slash.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004211 p = vim_strchr(line, '/');
4212 if (p != NULL)
4213 {
4214 *p++ = NUL;
4215 while (*p != NUL)
4216 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004217 if (*p == '=') // keep-case word
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004218 flags |= WF_KEEPCAP | WF_FIXCAP;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004219 else if (*p == '!') // Bad, bad, wicked word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004220 flags |= WF_BANNED;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004221 else if (*p == '?') // Rare word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004222 flags |= WF_RARE;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004223 else if (VIM_ISDIGIT(*p)) // region number(s)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004224 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004225 if ((flags & WF_REGION) == 0) // first one
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004226 regionmask = 0;
4227 flags |= WF_REGION;
4228
4229 l = *p - '0';
Bram Moolenaaree03b942017-10-27 00:57:05 +02004230 if (l == 0 || l > spin->si_region_count)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004231 {
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004232 smsg(_("Invalid region nr in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004233 fname, lnum, p);
4234 break;
4235 }
4236 regionmask |= 1 << (l - 1);
4237 }
4238 else
4239 {
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004240 smsg(_("Unrecognized flags in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004241 fname, lnum, p);
4242 break;
4243 }
4244 ++p;
4245 }
4246 }
4247
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004248 // Skip non-ASCII words when "spin->si_ascii" is TRUE.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004249 if (spin->si_ascii && has_non_ascii(line))
4250 {
4251 ++non_ascii;
4252 continue;
4253 }
4254
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004255 // Normal word: store it.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004256 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL)
4257 {
4258 retval = FAIL;
4259 break;
4260 }
4261 did_word = TRUE;
4262 }
4263
4264 vim_free(pc);
4265 fclose(fd);
4266
4267 if (spin->si_ascii && non_ascii > 0)
4268 {
4269 vim_snprintf((char *)IObuff, IOSIZE,
4270 _("Ignored %d words with non-ASCII characters"), non_ascii);
4271 spell_message(spin, IObuff);
4272 }
4273
4274 return retval;
4275}
4276
4277/*
4278 * Get part of an sblock_T, "len" bytes long.
4279 * This avoids calling free() for every little struct we use (and keeping
4280 * track of them).
4281 * The memory is cleared to all zeros.
4282 * Returns NULL when out of memory.
4283 */
4284 static void *
4285getroom(
4286 spellinfo_T *spin,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004287 size_t len, // length needed
4288 int align) // align for pointer
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004289{
4290 char_u *p;
4291 sblock_T *bl = spin->si_blocks;
4292
4293 if (align && bl != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004294 // Round size up for alignment. On some systems structures need to be
4295 // aligned to the size of a pointer (e.g., SPARC).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004296 bl->sb_used = (bl->sb_used + sizeof(char *) - 1)
4297 & ~(sizeof(char *) - 1);
4298
4299 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE)
4300 {
4301 if (len >= SBLOCKSIZE)
4302 bl = NULL;
4303 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004304 // Allocate a block of memory. It is not freed until much later.
Bram Moolenaarc799fe22019-05-28 23:08:19 +02004305 bl = alloc_clear(sizeof(sblock_T) + SBLOCKSIZE);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004306 if (bl == NULL)
4307 {
4308 if (!spin->si_did_emsg)
4309 {
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00004310 emsg(_(e_insufficient_memory_word_list_will_be_incomplete));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004311 spin->si_did_emsg = TRUE;
4312 }
4313 return NULL;
4314 }
4315 bl->sb_next = spin->si_blocks;
4316 spin->si_blocks = bl;
4317 bl->sb_used = 0;
4318 ++spin->si_blocks_cnt;
4319 }
4320
4321 p = bl->sb_data + bl->sb_used;
4322 bl->sb_used += (int)len;
4323
4324 return p;
4325}
4326
4327/*
4328 * Make a copy of a string into memory allocated with getroom().
4329 * Returns NULL when out of memory.
4330 */
4331 static char_u *
4332getroom_save(spellinfo_T *spin, char_u *s)
4333{
4334 char_u *sc;
4335
4336 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE);
4337 if (sc != NULL)
4338 STRCPY(sc, s);
4339 return sc;
4340}
4341
4342
4343/*
4344 * Free the list of allocated sblock_T.
4345 */
4346 static void
4347free_blocks(sblock_T *bl)
4348{
4349 sblock_T *next;
4350
4351 while (bl != NULL)
4352 {
4353 next = bl->sb_next;
4354 vim_free(bl);
4355 bl = next;
4356 }
4357}
4358
4359/*
4360 * Allocate the root of a word tree.
4361 * Returns NULL when out of memory.
4362 */
4363 static wordnode_T *
4364wordtree_alloc(spellinfo_T *spin)
4365{
4366 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
4367}
4368
4369/*
4370 * Store a word in the tree(s).
4371 * Always store it in the case-folded tree. For a keep-case word this is
4372 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and
4373 * used to find suggestions.
4374 * For a keep-case word also store it in the keep-case tree.
4375 * When "pfxlist" is not NULL store the word for each postponed prefix ID and
4376 * compound flag.
4377 */
4378 static int
4379store_word(
4380 spellinfo_T *spin,
4381 char_u *word,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004382 int flags, // extra flags, WF_BANNED
4383 int region, // supported region(s)
4384 char_u *pfxlist, // list of prefix IDs or NULL
4385 int need_affix) // only store word with affix ID
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004386{
4387 int len = (int)STRLEN(word);
4388 int ct = captype(word, word + len);
4389 char_u foldword[MAXWLEN];
4390 int res = OK;
4391 char_u *p;
4392
Bram Moolenaar7c824682022-05-08 22:32:58 +01004393 // Avoid adding illegal bytes to the word tree.
4394 if (enc_utf8 && !utf_valid_string(word, NULL))
4395 return FAIL;
4396
Bram Moolenaar4f135272021-06-11 19:07:40 +02004397 (void)spell_casefold(curwin, word, len, foldword, MAXWLEN);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004398 for (p = pfxlist; res == OK; ++p)
4399 {
4400 if (!need_affix || (p != NULL && *p != NUL))
4401 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags,
4402 region, p == NULL ? 0 : *p);
4403 if (p == NULL || *p == NUL)
4404 break;
4405 }
4406 ++spin->si_foldwcount;
4407
4408 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP)))
4409 {
4410 for (p = pfxlist; res == OK; ++p)
4411 {
4412 if (!need_affix || (p != NULL && *p != NUL))
4413 res = tree_add_word(spin, word, spin->si_keeproot, flags,
4414 region, p == NULL ? 0 : *p);
4415 if (p == NULL || *p == NUL)
4416 break;
4417 }
4418 ++spin->si_keepwcount;
4419 }
4420 return res;
4421}
4422
4423/*
4424 * Add word "word" to a word tree at "root".
4425 * When "flags" < 0 we are adding to the prefix tree where "flags" is used for
4426 * "rare" and "region" is the condition nr.
4427 * Returns FAIL when out of memory.
4428 */
4429 static int
4430tree_add_word(
4431 spellinfo_T *spin,
4432 char_u *word,
4433 wordnode_T *root,
4434 int flags,
4435 int region,
4436 int affixID)
4437{
4438 wordnode_T *node = root;
4439 wordnode_T *np;
4440 wordnode_T *copyp, **copyprev;
4441 wordnode_T **prev = NULL;
4442 int i;
4443
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004444 // Add each byte of the word to the tree, including the NUL at the end.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004445 for (i = 0; ; ++i)
4446 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004447 // When there is more than one reference to this node we need to make
4448 // a copy, so that we can modify it. Copy the whole list of siblings
4449 // (we don't optimize for a partly shared list of siblings).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004450 if (node != NULL && node->wn_refs > 1)
4451 {
4452 --node->wn_refs;
4453 copyprev = prev;
Bram Moolenaaraeea7212020-04-02 18:50:46 +02004454 FOR_ALL_NODE_SIBLINGS(node, copyp)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004455 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004456 // Allocate a new node and copy the info.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004457 np = get_wordnode(spin);
4458 if (np == NULL)
4459 return FAIL;
4460 np->wn_child = copyp->wn_child;
4461 if (np->wn_child != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004462 ++np->wn_child->wn_refs; // child gets extra ref
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004463 np->wn_byte = copyp->wn_byte;
4464 if (np->wn_byte == NUL)
4465 {
4466 np->wn_flags = copyp->wn_flags;
4467 np->wn_region = copyp->wn_region;
4468 np->wn_affixID = copyp->wn_affixID;
4469 }
4470
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004471 // Link the new node in the list, there will be one ref.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004472 np->wn_refs = 1;
4473 if (copyprev != NULL)
4474 *copyprev = np;
4475 copyprev = &np->wn_sibling;
4476
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004477 // Let "node" point to the head of the copied list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004478 if (copyp == node)
4479 node = np;
4480 }
4481 }
4482
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004483 // Look for the sibling that has the same character. They are sorted
4484 // on byte value, thus stop searching when a sibling is found with a
4485 // higher byte value. For zero bytes (end of word) the sorting is
4486 // done on flags and then on affixID.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004487 while (node != NULL
4488 && (node->wn_byte < word[i]
4489 || (node->wn_byte == NUL
4490 && (flags < 0
4491 ? node->wn_affixID < (unsigned)affixID
4492 : (node->wn_flags < (unsigned)(flags & WN_MASK)
4493 || (node->wn_flags == (flags & WN_MASK)
4494 && (spin->si_sugtree
4495 ? (node->wn_region & 0xffff) < region
4496 : node->wn_affixID
4497 < (unsigned)affixID)))))))
4498 {
4499 prev = &node->wn_sibling;
4500 node = *prev;
4501 }
4502 if (node == NULL
4503 || node->wn_byte != word[i]
4504 || (word[i] == NUL
4505 && (flags < 0
4506 || spin->si_sugtree
4507 || node->wn_flags != (flags & WN_MASK)
4508 || node->wn_affixID != affixID)))
4509 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004510 // Allocate a new node.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004511 np = get_wordnode(spin);
4512 if (np == NULL)
4513 return FAIL;
4514 np->wn_byte = word[i];
4515
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004516 // If "node" is NULL this is a new child or the end of the sibling
4517 // list: ref count is one. Otherwise use ref count of sibling and
4518 // make ref count of sibling one (matters when inserting in front
4519 // of the list of siblings).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004520 if (node == NULL)
4521 np->wn_refs = 1;
4522 else
4523 {
4524 np->wn_refs = node->wn_refs;
4525 node->wn_refs = 1;
4526 }
4527 if (prev != NULL)
4528 *prev = np;
4529 np->wn_sibling = node;
4530 node = np;
4531 }
4532
4533 if (word[i] == NUL)
4534 {
4535 node->wn_flags = flags;
4536 node->wn_region |= region;
4537 node->wn_affixID = affixID;
4538 break;
4539 }
4540 prev = &node->wn_child;
4541 node = *prev;
4542 }
4543#ifdef SPELL_PRINTTREE
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01004544 smsg("Added \"%s\"", word);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004545 spell_print_tree(root->wn_sibling);
4546#endif
4547
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004548 // count nr of words added since last message
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004549 ++spin->si_msg_count;
4550
4551 if (spin->si_compress_cnt > 1)
4552 {
4553 if (--spin->si_compress_cnt == 1)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004554 // Did enough words to lower the block count limit.
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004555 spin->si_blocks_cnt += compress_inc;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004556 }
4557
4558 /*
4559 * When we have allocated lots of memory we need to compress the word tree
4560 * to free up some room. But compression is slow, and we might actually
4561 * need that room, thus only compress in the following situations:
4562 * 1. When not compressed before (si_compress_cnt == 0): when using
4563 * "compress_start" blocks.
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004564 * 2. When compressed before and used "compress_inc" blocks before
4565 * adding "compress_added" words (si_compress_cnt > 1).
4566 * 3. When compressed before, added "compress_added" words
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004567 * (si_compress_cnt == 1) and the number of free nodes drops below the
4568 * maximum word length.
4569 */
4570#ifndef SPELL_COMPRESS_ALLWAYS
4571 if (spin->si_compress_cnt == 1
4572 ? spin->si_free_count < MAXWLEN
4573 : spin->si_blocks_cnt >= compress_start)
4574#endif
4575 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004576 // Decrement the block counter. The effect is that we compress again
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004577 // when the freed up room has been used and another "compress_inc"
4578 // blocks have been allocated. Unless "compress_added" words have
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004579 // been added, then the limit is put back again.
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004580 spin->si_blocks_cnt -= compress_inc;
4581 spin->si_compress_cnt = compress_added;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004582
4583 if (spin->si_verbose)
4584 {
4585 msg_start();
Bram Moolenaar32526b32019-01-19 17:43:09 +01004586 msg_puts(_(msg_compressing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004587 msg_clr_eos();
4588 msg_didout = FALSE;
4589 msg_col = 0;
4590 out_flush();
4591 }
4592
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004593 // Compress both trees. Either they both have many nodes, which makes
4594 // compression useful, or one of them is small, which means
4595 // compression goes fast. But when filling the soundfold word tree
4596 // there is no keep-case tree.
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004597 wordtree_compress(spin, spin->si_foldroot, "case-folded");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004598 if (affixID >= 0)
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004599 wordtree_compress(spin, spin->si_keeproot, "keep-case");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004600 }
4601
4602 return OK;
4603}
4604
4605/*
4606 * Get a wordnode_T, either from the list of previously freed nodes or
4607 * allocate a new one.
4608 * Returns NULL when out of memory.
4609 */
4610 static wordnode_T *
4611get_wordnode(spellinfo_T *spin)
4612{
4613 wordnode_T *n;
4614
4615 if (spin->si_first_free == NULL)
4616 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
4617 else
4618 {
4619 n = spin->si_first_free;
4620 spin->si_first_free = n->wn_child;
Bram Moolenaara80faa82020-04-12 19:37:17 +02004621 CLEAR_POINTER(n);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004622 --spin->si_free_count;
4623 }
4624#ifdef SPELL_PRINTTREE
4625 if (n != NULL)
4626 n->wn_nr = ++spin->si_wordnode_nr;
4627#endif
4628 return n;
4629}
4630
4631/*
4632 * Decrement the reference count on a node (which is the head of a list of
4633 * siblings). If the reference count becomes zero free the node and its
4634 * siblings.
4635 * Returns the number of nodes actually freed.
4636 */
4637 static int
4638deref_wordnode(spellinfo_T *spin, wordnode_T *node)
4639{
4640 wordnode_T *np;
4641 int cnt = 0;
4642
4643 if (--node->wn_refs == 0)
4644 {
Bram Moolenaaraeea7212020-04-02 18:50:46 +02004645 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004646 {
4647 if (np->wn_child != NULL)
4648 cnt += deref_wordnode(spin, np->wn_child);
4649 free_wordnode(spin, np);
4650 ++cnt;
4651 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004652 ++cnt; // length field
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004653 }
4654 return cnt;
4655}
4656
4657/*
4658 * Free a wordnode_T for re-use later.
4659 * Only the "wn_child" field becomes invalid.
4660 */
4661 static void
4662free_wordnode(spellinfo_T *spin, wordnode_T *n)
4663{
4664 n->wn_child = spin->si_first_free;
4665 spin->si_first_free = n;
4666 ++spin->si_free_count;
4667}
4668
4669/*
4670 * Compress a tree: find tails that are identical and can be shared.
4671 */
4672 static void
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004673wordtree_compress(spellinfo_T *spin, wordnode_T *root, char *name)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004674{
4675 hashtab_T ht;
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02004676 long n;
4677 long tot = 0;
4678 long perc;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004679
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004680 // Skip the root itself, it's not actually used. The first sibling is the
4681 // start of the tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004682 if (root->wn_sibling != NULL)
4683 {
4684 hash_init(&ht);
4685 n = node_compress(spin, root->wn_sibling, &ht, &tot);
4686
4687#ifndef SPELL_PRINTTREE
4688 if (spin->si_verbose || p_verbose > 2)
4689#endif
4690 {
4691 if (tot > 1000000)
4692 perc = (tot - n) / (tot / 100);
4693 else if (tot == 0)
4694 perc = 0;
4695 else
4696 perc = (tot - n) * 100 / tot;
4697 vim_snprintf((char *)IObuff, IOSIZE,
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004698 _("Compressed %s: %ld of %ld nodes; %ld (%ld%%) remaining"),
4699 name, n, tot, tot - n, perc);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004700 spell_message(spin, IObuff);
4701 }
4702#ifdef SPELL_PRINTTREE
4703 spell_print_tree(root->wn_sibling);
4704#endif
4705 hash_clear(&ht);
4706 }
4707}
4708
4709/*
4710 * Compress a node, its siblings and its children, depth first.
4711 * Returns the number of compressed nodes.
4712 */
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02004713 static long
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004714node_compress(
4715 spellinfo_T *spin,
4716 wordnode_T *node,
4717 hashtab_T *ht,
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02004718 long *tot) // total count of nodes before compressing,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004719 // incremented while going through the tree
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004720{
4721 wordnode_T *np;
4722 wordnode_T *tp;
4723 wordnode_T *child;
4724 hash_T hash;
4725 hashitem_T *hi;
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02004726 long len = 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004727 unsigned nr, n;
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02004728 long compressed = 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004729
4730 /*
4731 * Go through the list of siblings. Compress each child and then try
4732 * finding an identical child to replace it.
4733 * Note that with "child" we mean not just the node that is pointed to,
4734 * but the whole list of siblings of which the child node is the first.
4735 */
4736 for (np = node; np != NULL && !got_int; np = np->wn_sibling)
4737 {
4738 ++len;
4739 if ((child = np->wn_child) != NULL)
4740 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004741 // Compress the child first. This fills hashkey.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004742 compressed += node_compress(spin, child, ht, tot);
4743
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004744 // Try to find an identical child.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004745 hash = hash_hash(child->wn_u1.hashkey);
4746 hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
4747 if (!HASHITEM_EMPTY(hi))
4748 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004749 // There are children we encountered before with a hash value
4750 // identical to the current child. Now check if there is one
4751 // that is really identical.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004752 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
4753 if (node_equal(child, tp))
4754 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004755 // Found one! Now use that child in place of the
4756 // current one. This means the current child and all
4757 // its siblings is unlinked from the tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004758 ++tp->wn_refs;
4759 compressed += deref_wordnode(spin, child);
4760 np->wn_child = tp;
4761 break;
4762 }
4763 if (tp == NULL)
4764 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004765 // No other child with this hash value equals the child of
4766 // the node, add it to the linked list after the first
4767 // item.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004768 tp = HI2WN(hi);
4769 child->wn_u2.next = tp->wn_u2.next;
4770 tp->wn_u2.next = child;
4771 }
4772 }
4773 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004774 // No other child has this hash value, add it to the
4775 // hashtable.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004776 hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
4777 }
4778 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004779 *tot += len + 1; // add one for the node that stores the length
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004780
4781 /*
4782 * Make a hash key for the node and its siblings, so that we can quickly
4783 * find a lookalike node. This must be done after compressing the sibling
4784 * list, otherwise the hash key would become invalid by the compression.
4785 */
4786 node->wn_u1.hashkey[0] = len;
4787 nr = 0;
Bram Moolenaaraeea7212020-04-02 18:50:46 +02004788 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004789 {
4790 if (np->wn_byte == NUL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004791 // end node: use wn_flags, wn_region and wn_affixID
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004792 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16);
4793 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004794 // byte node: use the byte value and the child pointer
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004795 n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8));
4796 nr = nr * 101 + n;
4797 }
4798
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004799 // Avoid NUL bytes, it terminates the hash key.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004800 n = nr & 0xff;
4801 node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
4802 n = (nr >> 8) & 0xff;
4803 node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
4804 n = (nr >> 16) & 0xff;
4805 node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
4806 n = (nr >> 24) & 0xff;
4807 node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
4808 node->wn_u1.hashkey[5] = NUL;
4809
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004810 // Check for CTRL-C pressed now and then.
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004811 veryfast_breakcheck();
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004812
4813 return compressed;
4814}
4815
4816/*
4817 * Return TRUE when two nodes have identical siblings and children.
4818 */
4819 static int
4820node_equal(wordnode_T *n1, wordnode_T *n2)
4821{
4822 wordnode_T *p1;
4823 wordnode_T *p2;
4824
4825 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
4826 p1 = p1->wn_sibling, p2 = p2->wn_sibling)
4827 if (p1->wn_byte != p2->wn_byte
4828 || (p1->wn_byte == NUL
4829 ? (p1->wn_flags != p2->wn_flags
4830 || p1->wn_region != p2->wn_region
4831 || p1->wn_affixID != p2->wn_affixID)
4832 : (p1->wn_child != p2->wn_child)))
4833 break;
4834
4835 return p1 == NULL && p2 == NULL;
4836}
4837
Bram Moolenaareae1b912019-05-09 15:12:55 +02004838static int rep_compare(const void *s1, const void *s2);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004839
4840/*
4841 * Function given to qsort() to sort the REP items on "from" string.
4842 */
4843 static int
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004844rep_compare(const void *s1, const void *s2)
4845{
4846 fromto_T *p1 = (fromto_T *)s1;
4847 fromto_T *p2 = (fromto_T *)s2;
4848
4849 return STRCMP(p1->ft_from, p2->ft_from);
4850}
4851
4852/*
4853 * Write the Vim .spl file "fname".
4854 * Return FAIL or OK;
4855 */
4856 static int
4857write_vim_spell(spellinfo_T *spin, char_u *fname)
4858{
4859 FILE *fd;
4860 int regionmask;
4861 int round;
4862 wordnode_T *tree;
4863 int nodecount;
4864 int i;
4865 int l;
4866 garray_T *gap;
4867 fromto_T *ftp;
4868 char_u *p;
4869 int rr;
4870 int retval = OK;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004871 size_t fwv = 1; // collect return value of fwrite() to avoid
4872 // warnings from picky compiler
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004873
4874 fd = mch_fopen((char *)fname, "w");
4875 if (fd == NULL)
4876 {
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00004877 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004878 return FAIL;
4879 }
4880
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004881 // <HEADER>: <fileID> <versionnr>
4882 // <fileID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004883 fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd);
4884 if (fwv != (size_t)1)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004885 // Catch first write error, don't try writing more.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004886 goto theend;
4887
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004888 putc(VIMSPELLVERSION, fd); // <versionnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004889
4890 /*
4891 * <SECTIONS>: <section> ... <sectionend>
4892 */
4893
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004894 // SN_INFO: <infotext>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004895 if (spin->si_info != NULL)
4896 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004897 putc(SN_INFO, fd); // <sectionID>
4898 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004899
4900 i = (int)STRLEN(spin->si_info);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004901 put_bytes(fd, (long_u)i, 4); // <sectionlen>
4902 fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); // <infotext>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004903 }
4904
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004905 // SN_REGION: <regionname> ...
4906 // Write the region names only if there is more than one.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004907 if (spin->si_region_count > 1)
4908 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004909 putc(SN_REGION, fd); // <sectionID>
4910 putc(SNF_REQUIRED, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004911 l = spin->si_region_count * 2;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004912 put_bytes(fd, (long_u)l, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004913 fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004914 // <regionname> ...
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004915 regionmask = (1 << spin->si_region_count) - 1;
4916 }
4917 else
4918 regionmask = 0;
4919
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004920 // SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars>
4921 //
4922 // The table with character flags and the table for case folding.
4923 // This makes sure the same characters are recognized as word characters
Dominique Pelleaf4a61a2021-12-27 17:21:41 +00004924 // when generating and when using a spell file.
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004925 // Skip this for ASCII, the table may conflict with the one used for
4926 // 'encoding'.
4927 // Also skip this for an .add.spl file, the main spell file must contain
4928 // the table (avoids that it conflicts). File is shorter too.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004929 if (!spin->si_ascii && !spin->si_add)
4930 {
4931 char_u folchars[128 * 8];
4932 int flags;
4933
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004934 putc(SN_CHARFLAGS, fd); // <sectionID>
4935 putc(SNF_REQUIRED, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004936
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004937 // Form the <folchars> string first, we need to know its length.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004938 l = 0;
4939 for (i = 128; i < 256; ++i)
4940 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004941 if (has_mbyte)
4942 l += mb_char2bytes(spelltab.st_fold[i], folchars + l);
4943 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004944 folchars[l++] = spelltab.st_fold[i];
4945 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004946 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004947
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004948 fputc(128, fd); // <charflagslen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004949 for (i = 128; i < 256; ++i)
4950 {
4951 flags = 0;
4952 if (spelltab.st_isw[i])
4953 flags |= CF_WORD;
4954 if (spelltab.st_isu[i])
4955 flags |= CF_UPPER;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004956 fputc(flags, fd); // <charflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004957 }
4958
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004959 put_bytes(fd, (long_u)l, 2); // <folcharslen>
4960 fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); // <folchars>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004961 }
4962
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004963 // SN_MIDWORD: <midword>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004964 if (spin->si_midword != NULL)
4965 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004966 putc(SN_MIDWORD, fd); // <sectionID>
4967 putc(SNF_REQUIRED, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004968
4969 i = (int)STRLEN(spin->si_midword);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004970 put_bytes(fd, (long_u)i, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004971 fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004972 // <midword>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004973 }
4974
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004975 // SN_PREFCOND: <prefcondcnt> <prefcond> ...
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004976 if (spin->si_prefcond.ga_len > 0)
4977 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004978 putc(SN_PREFCOND, fd); // <sectionID>
4979 putc(SNF_REQUIRED, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004980
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +00004981 l = write_spell_prefcond(NULL, &spin->si_prefcond, &fwv);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004982 put_bytes(fd, (long_u)l, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004983
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +00004984 write_spell_prefcond(fd, &spin->si_prefcond, &fwv);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004985 }
4986
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004987 // SN_REP: <repcount> <rep> ...
4988 // SN_SAL: <salflags> <salcount> <sal> ...
4989 // SN_REPSAL: <repcount> <rep> ...
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004990
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004991 // round 1: SN_REP section
4992 // round 2: SN_SAL section (unless SN_SOFO is used)
4993 // round 3: SN_REPSAL section
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004994 for (round = 1; round <= 3; ++round)
4995 {
4996 if (round == 1)
4997 gap = &spin->si_rep;
4998 else if (round == 2)
4999 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005000 // Don't write SN_SAL when using a SN_SOFO section
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005001 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
5002 continue;
5003 gap = &spin->si_sal;
5004 }
5005 else
5006 gap = &spin->si_repsal;
5007
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005008 // Don't write the section if there are no items.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005009 if (gap->ga_len == 0)
5010 continue;
5011
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005012 // Sort the REP/REPSAL items.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005013 if (round != 2)
5014 qsort(gap->ga_data, (size_t)gap->ga_len,
5015 sizeof(fromto_T), rep_compare);
5016
5017 i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005018 putc(i, fd); // <sectionID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005019
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005020 // This is for making suggestions, section is not required.
5021 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005022
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005023 // Compute the length of what follows.
5024 l = 2; // count <repcount> or <salcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005025 for (i = 0; i < gap->ga_len; ++i)
5026 {
5027 ftp = &((fromto_T *)gap->ga_data)[i];
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005028 l += 1 + (int)STRLEN(ftp->ft_from); // count <*fromlen> and <*from>
5029 l += 1 + (int)STRLEN(ftp->ft_to); // count <*tolen> and <*to>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005030 }
5031 if (round == 2)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005032 ++l; // count <salflags>
5033 put_bytes(fd, (long_u)l, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005034
5035 if (round == 2)
5036 {
5037 i = 0;
5038 if (spin->si_followup)
5039 i |= SAL_F0LLOWUP;
5040 if (spin->si_collapse)
5041 i |= SAL_COLLAPSE;
5042 if (spin->si_rem_accents)
5043 i |= SAL_REM_ACCENTS;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005044 putc(i, fd); // <salflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005045 }
5046
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005047 put_bytes(fd, (long_u)gap->ga_len, 2); // <repcount> or <salcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005048 for (i = 0; i < gap->ga_len; ++i)
5049 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005050 // <rep> : <repfromlen> <repfrom> <reptolen> <repto>
5051 // <sal> : <salfromlen> <salfrom> <saltolen> <salto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005052 ftp = &((fromto_T *)gap->ga_data)[i];
5053 for (rr = 1; rr <= 2; ++rr)
5054 {
5055 p = rr == 1 ? ftp->ft_from : ftp->ft_to;
5056 l = (int)STRLEN(p);
5057 putc(l, fd);
5058 if (l > 0)
5059 fwv &= fwrite(p, l, (size_t)1, fd);
5060 }
5061 }
5062
5063 }
5064
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005065 // SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
5066 // This is for making suggestions, section is not required.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005067 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
5068 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005069 putc(SN_SOFO, fd); // <sectionID>
5070 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005071
5072 l = (int)STRLEN(spin->si_sofofr);
5073 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005074 // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005075
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005076 put_bytes(fd, (long_u)l, 2); // <sofofromlen>
5077 fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); // <sofofrom>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005078
5079 l = (int)STRLEN(spin->si_sofoto);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005080 put_bytes(fd, (long_u)l, 2); // <sofotolen>
5081 fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); // <sofoto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005082 }
5083
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005084 // SN_WORDS: <word> ...
5085 // This is for making suggestions, section is not required.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005086 if (spin->si_commonwords.ht_used > 0)
5087 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005088 putc(SN_WORDS, fd); // <sectionID>
5089 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005090
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005091 // round 1: count the bytes
5092 // round 2: write the bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005093 for (round = 1; round <= 2; ++round)
5094 {
5095 int todo;
5096 int len = 0;
5097 hashitem_T *hi;
5098
5099 todo = (int)spin->si_commonwords.ht_used;
5100 for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi)
5101 if (!HASHITEM_EMPTY(hi))
5102 {
5103 l = (int)STRLEN(hi->hi_key) + 1;
5104 len += l;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005105 if (round == 2) // <word>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005106 fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd);
5107 --todo;
5108 }
5109 if (round == 1)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005110 put_bytes(fd, (long_u)len, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005111 }
5112 }
5113
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005114 // SN_MAP: <mapstr>
5115 // This is for making suggestions, section is not required.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005116 if (spin->si_map.ga_len > 0)
5117 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005118 putc(SN_MAP, fd); // <sectionID>
5119 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005120 l = spin->si_map.ga_len;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005121 put_bytes(fd, (long_u)l, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005122 fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005123 // <mapstr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005124 }
5125
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005126 // SN_SUGFILE: <timestamp>
5127 // This is used to notify that a .sug file may be available and at the
5128 // same time allows for checking that a .sug file that is found matches
5129 // with this .spl file. That's because the word numbers must be exactly
5130 // right.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005131 if (!spin->si_nosugfile
5132 && (spin->si_sal.ga_len > 0
5133 || (spin->si_sofofr != NULL && spin->si_sofoto != NULL)))
5134 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005135 putc(SN_SUGFILE, fd); // <sectionID>
5136 putc(0, fd); // <sectionflags>
5137 put_bytes(fd, (long_u)8, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005138
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005139 // Set si_sugtime and write it to the file.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005140 spin->si_sugtime = time(NULL);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005141 put_time(fd, spin->si_sugtime); // <timestamp>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005142 }
5143
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005144 // SN_NOSPLITSUGS: nothing
5145 // This is used to notify that no suggestions with word splits are to be
5146 // made.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005147 if (spin->si_nosplitsugs)
5148 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005149 putc(SN_NOSPLITSUGS, fd); // <sectionID>
5150 putc(0, fd); // <sectionflags>
5151 put_bytes(fd, (long_u)0, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005152 }
5153
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005154 // SN_NOCOMPUNDSUGS: nothing
5155 // This is used to notify that no suggestions with compounds are to be
5156 // made.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005157 if (spin->si_nocompoundsugs)
5158 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005159 putc(SN_NOCOMPOUNDSUGS, fd); // <sectionID>
5160 putc(0, fd); // <sectionflags>
5161 put_bytes(fd, (long_u)0, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005162 }
5163
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005164 // SN_COMPOUND: compound info.
5165 // We don't mark it required, when not supported all compound words will
5166 // be bad words.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005167 if (spin->si_compflags != NULL)
5168 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005169 putc(SN_COMPOUND, fd); // <sectionID>
5170 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005171
5172 l = (int)STRLEN(spin->si_compflags);
5173 for (i = 0; i < spin->si_comppat.ga_len; ++i)
5174 l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005175 put_bytes(fd, (long_u)(l + 7), 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005176
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005177 putc(spin->si_compmax, fd); // <compmax>
5178 putc(spin->si_compminlen, fd); // <compminlen>
5179 putc(spin->si_compsylmax, fd); // <compsylmax>
5180 putc(0, fd); // for Vim 7.0b compatibility
5181 putc(spin->si_compoptions, fd); // <compoptions>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005182 put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005183 // <comppatcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005184 for (i = 0; i < spin->si_comppat.ga_len; ++i)
5185 {
5186 p = ((char_u **)(spin->si_comppat.ga_data))[i];
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005187 putc((int)STRLEN(p), fd); // <comppatlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005188 fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005189 // <comppattext>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005190 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005191 // <compflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005192 fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags),
5193 (size_t)1, fd);
5194 }
5195
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005196 // SN_NOBREAK: NOBREAK flag
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005197 if (spin->si_nobreak)
5198 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005199 putc(SN_NOBREAK, fd); // <sectionID>
5200 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005201
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005202 // It's empty, the presence of the section flags the feature.
5203 put_bytes(fd, (long_u)0, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005204 }
5205
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005206 // SN_SYLLABLE: syllable info.
5207 // We don't mark it required, when not supported syllables will not be
5208 // counted.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005209 if (spin->si_syllable != NULL)
5210 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005211 putc(SN_SYLLABLE, fd); // <sectionID>
5212 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005213
5214 l = (int)STRLEN(spin->si_syllable);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005215 put_bytes(fd, (long_u)l, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005216 fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005217 // <syllable>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005218 }
5219
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005220 // end of <SECTIONS>
5221 putc(SN_END, fd); // <sectionend>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005222
5223
5224 /*
5225 * <LWORDTREE> <KWORDTREE> <PREFIXTREE>
5226 */
5227 spin->si_memtot = 0;
5228 for (round = 1; round <= 3; ++round)
5229 {
5230 if (round == 1)
5231 tree = spin->si_foldroot->wn_sibling;
5232 else if (round == 2)
5233 tree = spin->si_keeproot->wn_sibling;
5234 else
5235 tree = spin->si_prefroot->wn_sibling;
5236
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005237 // Clear the index and wnode fields in the tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005238 clear_node(tree);
5239
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005240 // Count the number of nodes. Needed to be able to allocate the
5241 // memory when reading the nodes. Also fills in index for shared
5242 // nodes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005243 nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
5244
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005245 // number of nodes in 4 bytes
5246 put_bytes(fd, (long_u)nodecount, 4); // <nodecount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005247 spin->si_memtot += nodecount + nodecount * sizeof(int);
5248
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005249 // Write the nodes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005250 (void)put_node(fd, tree, 0, regionmask, round == 3);
5251 }
5252
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005253 // Write another byte to check for errors (file system full).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005254 if (putc(0, fd) == EOF)
5255 retval = FAIL;
5256theend:
5257 if (fclose(fd) == EOF)
5258 retval = FAIL;
5259
5260 if (fwv != (size_t)1)
5261 retval = FAIL;
5262 if (retval == FAIL)
Bram Moolenaar40bcec12021-12-05 22:19:27 +00005263 emsg(_(e_error_while_writing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005264
5265 return retval;
5266}
5267
5268/*
5269 * Clear the index and wnode fields of "node", it siblings and its
5270 * children. This is needed because they are a union with other items to save
5271 * space.
5272 */
5273 static void
5274clear_node(wordnode_T *node)
5275{
5276 wordnode_T *np;
5277
5278 if (node != NULL)
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005279 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005280 {
5281 np->wn_u1.index = 0;
5282 np->wn_u2.wnode = NULL;
5283
5284 if (np->wn_byte != NUL)
5285 clear_node(np->wn_child);
5286 }
5287}
5288
5289
5290/*
5291 * Dump a word tree at node "node".
5292 *
5293 * This first writes the list of possible bytes (siblings). Then for each
5294 * byte recursively write the children.
5295 *
5296 * NOTE: The code here must match the code in read_tree_node(), since
5297 * assumptions are made about the indexes (so that we don't have to write them
5298 * in the file).
5299 *
5300 * Returns the number of nodes used.
5301 */
5302 static int
5303put_node(
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005304 FILE *fd, // NULL when only counting
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005305 wordnode_T *node,
5306 int idx,
5307 int regionmask,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005308 int prefixtree) // TRUE for PREFIXTREE
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005309{
5310 int newindex = idx;
5311 int siblingcount = 0;
5312 wordnode_T *np;
5313 int flags;
5314
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005315 // If "node" is zero the tree is empty.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005316 if (node == NULL)
5317 return 0;
5318
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005319 // Store the index where this node is written.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005320 node->wn_u1.index = idx;
5321
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005322 // Count the number of siblings.
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005323 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005324 ++siblingcount;
5325
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005326 // Write the sibling count.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005327 if (fd != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005328 putc(siblingcount, fd); // <siblingcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005329
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005330 // Write each sibling byte and optionally extra info.
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005331 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005332 {
5333 if (np->wn_byte == 0)
5334 {
5335 if (fd != NULL)
5336 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005337 // For a NUL byte (end of word) write the flags etc.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005338 if (prefixtree)
5339 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005340 // In PREFIXTREE write the required affixID and the
5341 // associated condition nr (stored in wn_region). The
5342 // byte value is misused to store the "rare" and "not
5343 // combining" flags
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005344 if (np->wn_flags == (short_u)PFX_FLAGS)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005345 putc(BY_NOFLAGS, fd); // <byte>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005346 else
5347 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005348 putc(BY_FLAGS, fd); // <byte>
5349 putc(np->wn_flags, fd); // <pflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005350 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005351 putc(np->wn_affixID, fd); // <affixID>
5352 put_bytes(fd, (long_u)np->wn_region, 2); // <prefcondnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005353 }
5354 else
5355 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005356 // For word trees we write the flag/region items.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005357 flags = np->wn_flags;
5358 if (regionmask != 0 && np->wn_region != regionmask)
5359 flags |= WF_REGION;
5360 if (np->wn_affixID != 0)
5361 flags |= WF_AFX;
5362 if (flags == 0)
5363 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005364 // word without flags or region
5365 putc(BY_NOFLAGS, fd); // <byte>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005366 }
5367 else
5368 {
5369 if (np->wn_flags >= 0x100)
5370 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005371 putc(BY_FLAGS2, fd); // <byte>
5372 putc(flags, fd); // <flags>
5373 putc((unsigned)flags >> 8, fd); // <flags2>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005374 }
5375 else
5376 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005377 putc(BY_FLAGS, fd); // <byte>
5378 putc(flags, fd); // <flags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005379 }
5380 if (flags & WF_REGION)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005381 putc(np->wn_region, fd); // <region>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005382 if (flags & WF_AFX)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005383 putc(np->wn_affixID, fd); // <affixID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005384 }
5385 }
5386 }
5387 }
5388 else
5389 {
5390 if (np->wn_child->wn_u1.index != 0
5391 && np->wn_child->wn_u2.wnode != node)
5392 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005393 // The child is written elsewhere, write the reference.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005394 if (fd != NULL)
5395 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005396 putc(BY_INDEX, fd); // <byte>
5397 // <nodeidx>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005398 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
5399 }
5400 }
5401 else if (np->wn_child->wn_u2.wnode == NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005402 // We will write the child below and give it an index.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005403 np->wn_child->wn_u2.wnode = node;
5404
5405 if (fd != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005406 if (putc(np->wn_byte, fd) == EOF) // <byte> or <xbyte>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005407 {
Bram Moolenaar40bcec12021-12-05 22:19:27 +00005408 emsg(_(e_error_while_writing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005409 return 0;
5410 }
5411 }
5412 }
5413
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005414 // Space used in the array when reading: one for each sibling and one for
5415 // the count.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005416 newindex += siblingcount + 1;
5417
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005418 // Recursively dump the children of each sibling.
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005419 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005420 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
5421 newindex = put_node(fd, np->wn_child, newindex, regionmask,
5422 prefixtree);
5423
5424 return newindex;
5425}
5426
5427
5428/*
5429 * ":mkspell [-ascii] outfile infile ..."
5430 * ":mkspell [-ascii] addfile"
5431 */
5432 void
5433ex_mkspell(exarg_T *eap)
5434{
5435 int fcount;
5436 char_u **fnames;
5437 char_u *arg = eap->arg;
5438 int ascii = FALSE;
5439
5440 if (STRNCMP(arg, "-ascii", 6) == 0)
5441 {
5442 ascii = TRUE;
5443 arg = skipwhite(arg + 6);
5444 }
5445
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005446 // Expand all the remaining arguments (e.g., $VIMRUNTIME).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005447 if (get_arglist_exp(arg, &fcount, &fnames, FALSE) == OK)
5448 {
5449 mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
5450 FreeWild(fcount, fnames);
5451 }
5452}
5453
5454/*
5455 * Create the .sug file.
5456 * Uses the soundfold info in "spin".
5457 * Writes the file with the name "wfname", with ".spl" changed to ".sug".
5458 */
5459 static void
5460spell_make_sugfile(spellinfo_T *spin, char_u *wfname)
5461{
5462 char_u *fname = NULL;
5463 int len;
5464 slang_T *slang;
5465 int free_slang = FALSE;
5466
5467 /*
5468 * Read back the .spl file that was written. This fills the required
5469 * info for soundfolding. This also uses less memory than the
5470 * pointer-linked version of the trie. And it avoids having two versions
5471 * of the code for the soundfolding stuff.
5472 * It might have been done already by spell_reload_one().
5473 */
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005474 FOR_ALL_SPELL_LANGS(slang)
Bram Moolenaar99499b12019-05-23 21:35:48 +02005475 if (fullpathcmp(wfname, slang->sl_fname, FALSE, TRUE) == FPC_SAME)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005476 break;
5477 if (slang == NULL)
5478 {
5479 spell_message(spin, (char_u *)_("Reading back spell file..."));
5480 slang = spell_load_file(wfname, NULL, NULL, FALSE);
5481 if (slang == NULL)
5482 return;
5483 free_slang = TRUE;
5484 }
5485
5486 /*
5487 * Clear the info in "spin" that is used.
5488 */
5489 spin->si_blocks = NULL;
5490 spin->si_blocks_cnt = 0;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005491 spin->si_compress_cnt = 0; // will stay at 0 all the time
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005492 spin->si_free_count = 0;
5493 spin->si_first_free = NULL;
5494 spin->si_foldwcount = 0;
5495
5496 /*
5497 * Go through the trie of good words, soundfold each word and add it to
5498 * the soundfold trie.
5499 */
5500 spell_message(spin, (char_u *)_("Performing soundfolding..."));
5501 if (sug_filltree(spin, slang) == FAIL)
5502 goto theend;
5503
5504 /*
5505 * Create the table which links each soundfold word with a list of the
5506 * good words it may come from. Creates buffer "spin->si_spellbuf".
5507 * This also removes the wordnr from the NUL byte entries to make
5508 * compression possible.
5509 */
5510 if (sug_maketable(spin) == FAIL)
5511 goto theend;
5512
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005513 smsg(_("Number of words after soundfolding: %ld"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005514 (long)spin->si_spellbuf->b_ml.ml_line_count);
5515
5516 /*
5517 * Compress the soundfold trie.
5518 */
5519 spell_message(spin, (char_u *)_(msg_compressing));
Bram Moolenaar408c23b2020-06-03 22:15:45 +02005520 wordtree_compress(spin, spin->si_foldroot, "case-folded");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005521
5522 /*
5523 * Write the .sug file.
5524 * Make the file name by changing ".spl" to ".sug".
5525 */
5526 fname = alloc(MAXPATHL);
5527 if (fname == NULL)
5528 goto theend;
5529 vim_strncpy(fname, wfname, MAXPATHL - 1);
5530 len = (int)STRLEN(fname);
5531 fname[len - 2] = 'u';
5532 fname[len - 1] = 'g';
5533 sug_write(spin, fname);
5534
5535theend:
5536 vim_free(fname);
5537 if (free_slang)
5538 slang_free(slang);
5539 free_blocks(spin->si_blocks);
5540 close_spellbuf(spin->si_spellbuf);
5541}
5542
5543/*
5544 * Build the soundfold trie for language "slang".
5545 */
5546 static int
5547sug_filltree(spellinfo_T *spin, slang_T *slang)
5548{
5549 char_u *byts;
5550 idx_T *idxs;
5551 int depth;
5552 idx_T arridx[MAXWLEN];
5553 int curi[MAXWLEN];
5554 char_u tword[MAXWLEN];
5555 char_u tsalword[MAXWLEN];
5556 int c;
5557 idx_T n;
5558 unsigned words_done = 0;
5559 int wordcount[MAXWLEN];
5560
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005561 // We use si_foldroot for the soundfolded trie.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005562 spin->si_foldroot = wordtree_alloc(spin);
5563 if (spin->si_foldroot == NULL)
5564 return FAIL;
5565
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005566 // let tree_add_word() know we're adding to the soundfolded tree
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005567 spin->si_sugtree = TRUE;
5568
5569 /*
5570 * Go through the whole case-folded tree, soundfold each word and put it
5571 * in the trie.
5572 */
5573 byts = slang->sl_fbyts;
5574 idxs = slang->sl_fidxs;
5575
5576 arridx[0] = 0;
5577 curi[0] = 1;
5578 wordcount[0] = 0;
5579
5580 depth = 0;
5581 while (depth >= 0 && !got_int)
5582 {
5583 if (curi[depth] > byts[arridx[depth]])
5584 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005585 // Done all bytes at this node, go up one level.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005586 idxs[arridx[depth]] = wordcount[depth];
5587 if (depth > 0)
5588 wordcount[depth - 1] += wordcount[depth];
5589
5590 --depth;
5591 line_breakcheck();
5592 }
5593 else
5594 {
5595
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005596 // Do one more byte at this node.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005597 n = arridx[depth] + curi[depth];
5598 ++curi[depth];
5599
5600 c = byts[n];
5601 if (c == 0)
5602 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005603 // Sound-fold the word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005604 tword[depth] = NUL;
5605 spell_soundfold(slang, tword, TRUE, tsalword);
5606
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005607 // We use the "flags" field for the MSB of the wordnr,
5608 // "region" for the LSB of the wordnr.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005609 if (tree_add_word(spin, tsalword, spin->si_foldroot,
5610 words_done >> 16, words_done & 0xffff,
5611 0) == FAIL)
5612 return FAIL;
5613
5614 ++words_done;
5615 ++wordcount[depth];
5616
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005617 // Reset the block count each time to avoid compression
5618 // kicking in.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005619 spin->si_blocks_cnt = 0;
5620
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005621 // Skip over any other NUL bytes (same word with different
Bram Moolenaar07399e72020-08-24 20:05:50 +02005622 // flags). But don't go over the end.
5623 while (n + 1 < slang->sl_fbyts_len && byts[n + 1] == 0)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005624 {
5625 ++n;
5626 ++curi[depth];
5627 }
5628 }
5629 else
5630 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005631 // Normal char, go one level deeper.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005632 tword[depth++] = c;
5633 arridx[depth] = idxs[n];
5634 curi[depth] = 1;
5635 wordcount[depth] = 0;
5636 }
5637 }
5638 }
5639
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005640 smsg(_("Total number of words: %d"), words_done);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005641
5642 return OK;
5643}
5644
5645/*
5646 * Make the table that links each word in the soundfold trie to the words it
5647 * can be produced from.
5648 * This is not unlike lines in a file, thus use a memfile to be able to access
5649 * the table efficiently.
5650 * Returns FAIL when out of memory.
5651 */
5652 static int
5653sug_maketable(spellinfo_T *spin)
5654{
5655 garray_T ga;
5656 int res = OK;
5657
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005658 // Allocate a buffer, open a memline for it and create the swap file
5659 // (uses a temp file, not a .swp file).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005660 spin->si_spellbuf = open_spellbuf();
5661 if (spin->si_spellbuf == NULL)
5662 return FAIL;
5663
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005664 // Use a buffer to store the line info, avoids allocating many small
5665 // pieces of memory.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005666 ga_init2(&ga, 1, 100);
5667
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005668 // recursively go through the tree
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005669 if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1)
5670 res = FAIL;
5671
5672 ga_clear(&ga);
5673 return res;
5674}
5675
5676/*
5677 * Fill the table for one node and its children.
5678 * Returns the wordnr at the start of the node.
5679 * Returns -1 when out of memory.
5680 */
5681 static int
5682sug_filltable(
5683 spellinfo_T *spin,
5684 wordnode_T *node,
5685 int startwordnr,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005686 garray_T *gap) // place to store line of numbers
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005687{
5688 wordnode_T *p, *np;
5689 int wordnr = startwordnr;
5690 int nr;
5691 int prev_nr;
5692
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005693 FOR_ALL_NODE_SIBLINGS(node, p)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005694 {
5695 if (p->wn_byte == NUL)
5696 {
5697 gap->ga_len = 0;
5698 prev_nr = 0;
5699 for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling)
5700 {
5701 if (ga_grow(gap, 10) == FAIL)
5702 return -1;
5703
5704 nr = (np->wn_flags << 16) + (np->wn_region & 0xffff);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005705 // Compute the offset from the previous nr and store the
5706 // offset in a way that it takes a minimum number of bytes.
5707 // It's a bit like utf-8, but without the need to mark
5708 // following bytes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005709 nr -= prev_nr;
5710 prev_nr += nr;
5711 gap->ga_len += offset2bytes(nr,
5712 (char_u *)gap->ga_data + gap->ga_len);
5713 }
5714
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005715 // add the NUL byte
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005716 ((char_u *)gap->ga_data)[gap->ga_len++] = NUL;
5717
5718 if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr,
5719 gap->ga_data, gap->ga_len, TRUE) == FAIL)
5720 return -1;
5721 ++wordnr;
5722
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005723 // Remove extra NUL entries, we no longer need them. We don't
Dominique Pelleaf4a61a2021-12-27 17:21:41 +00005724 // bother freeing the nodes, they won't be reused anyway.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005725 while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL)
5726 p->wn_sibling = p->wn_sibling->wn_sibling;
5727
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005728 // Clear the flags on the remaining NUL node, so that compression
5729 // works a lot better.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005730 p->wn_flags = 0;
5731 p->wn_region = 0;
5732 }
5733 else
5734 {
5735 wordnr = sug_filltable(spin, p->wn_child, wordnr, gap);
5736 if (wordnr == -1)
5737 return -1;
5738 }
5739 }
5740 return wordnr;
5741}
5742
5743/*
5744 * Convert an offset into a minimal number of bytes.
5745 * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL
5746 * bytes.
5747 */
5748 static int
5749offset2bytes(int nr, char_u *buf)
5750{
5751 int rem;
5752 int b1, b2, b3, b4;
5753
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005754 // Split the number in parts of base 255. We need to avoid NUL bytes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005755 b1 = nr % 255 + 1;
5756 rem = nr / 255;
5757 b2 = rem % 255 + 1;
5758 rem = rem / 255;
5759 b3 = rem % 255 + 1;
5760 b4 = rem / 255 + 1;
5761
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005762 if (b4 > 1 || b3 > 0x1f) // 4 bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005763 {
5764 buf[0] = 0xe0 + b4;
5765 buf[1] = b3;
5766 buf[2] = b2;
5767 buf[3] = b1;
5768 return 4;
5769 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005770 if (b3 > 1 || b2 > 0x3f ) // 3 bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005771 {
5772 buf[0] = 0xc0 + b3;
5773 buf[1] = b2;
5774 buf[2] = b1;
5775 return 3;
5776 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005777 if (b2 > 1 || b1 > 0x7f ) // 2 bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005778 {
5779 buf[0] = 0x80 + b2;
5780 buf[1] = b1;
5781 return 2;
5782 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005783 // 1 byte
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005784 buf[0] = b1;
5785 return 1;
5786}
5787
5788/*
5789 * Write the .sug file in "fname".
5790 */
5791 static void
5792sug_write(spellinfo_T *spin, char_u *fname)
5793{
5794 FILE *fd;
5795 wordnode_T *tree;
5796 int nodecount;
5797 int wcount;
5798 char_u *line;
5799 linenr_T lnum;
5800 int len;
5801
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005802 // Create the file. Note that an existing file is silently overwritten!
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005803 fd = mch_fopen((char *)fname, "w");
5804 if (fd == NULL)
5805 {
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00005806 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005807 return;
5808 }
5809
5810 vim_snprintf((char *)IObuff, IOSIZE,
Bram Moolenaarc1669272018-06-19 14:23:53 +02005811 _("Writing suggestion file %s..."), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005812 spell_message(spin, IObuff);
5813
5814 /*
5815 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
5816 */
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005817 if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) // <fileID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005818 {
Bram Moolenaar40bcec12021-12-05 22:19:27 +00005819 emsg(_(e_error_while_writing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005820 goto theend;
5821 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005822 putc(VIMSUGVERSION, fd); // <versionnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005823
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005824 // Write si_sugtime to the file.
5825 put_time(fd, spin->si_sugtime); // <timestamp>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005826
5827 /*
5828 * <SUGWORDTREE>
5829 */
5830 spin->si_memtot = 0;
5831 tree = spin->si_foldroot->wn_sibling;
5832
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005833 // Clear the index and wnode fields in the tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005834 clear_node(tree);
5835
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005836 // Count the number of nodes. Needed to be able to allocate the
5837 // memory when reading the nodes. Also fills in index for shared
5838 // nodes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005839 nodecount = put_node(NULL, tree, 0, 0, FALSE);
5840
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005841 // number of nodes in 4 bytes
5842 put_bytes(fd, (long_u)nodecount, 4); // <nodecount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005843 spin->si_memtot += nodecount + nodecount * sizeof(int);
5844
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005845 // Write the nodes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005846 (void)put_node(fd, tree, 0, 0, FALSE);
5847
5848 /*
5849 * <SUGTABLE>: <sugwcount> <sugline> ...
5850 */
5851 wcount = spin->si_spellbuf->b_ml.ml_line_count;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005852 put_bytes(fd, (long_u)wcount, 4); // <sugwcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005853
5854 for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum)
5855 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005856 // <sugline>: <sugnr> ... NUL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005857 line = ml_get_buf(spin->si_spellbuf, lnum, FALSE);
5858 len = (int)STRLEN(line) + 1;
5859 if (fwrite(line, (size_t)len, (size_t)1, fd) == 0)
5860 {
Bram Moolenaar40bcec12021-12-05 22:19:27 +00005861 emsg(_(e_error_while_writing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005862 goto theend;
5863 }
5864 spin->si_memtot += len;
5865 }
5866
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005867 // Write another byte to check for errors.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005868 if (putc(0, fd) == EOF)
Bram Moolenaar40bcec12021-12-05 22:19:27 +00005869 emsg(_(e_error_while_writing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005870
5871 vim_snprintf((char *)IObuff, IOSIZE,
5872 _("Estimated runtime memory use: %d bytes"), spin->si_memtot);
5873 spell_message(spin, IObuff);
5874
5875theend:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005876 // close the file
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005877 fclose(fd);
5878}
5879
5880
5881/*
5882 * Create a Vim spell file from one or more word lists.
5883 * "fnames[0]" is the output file name.
5884 * "fnames[fcount - 1]" is the last input file name.
5885 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name
5886 * and ".spl" is appended to make the output file name.
5887 */
5888 void
5889mkspell(
5890 int fcount,
5891 char_u **fnames,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005892 int ascii, // -ascii argument given
5893 int over_write, // overwrite existing output file
5894 int added_word) // invoked through "zg"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005895{
5896 char_u *fname = NULL;
5897 char_u *wfname;
5898 char_u **innames;
5899 int incount;
Bram Moolenaar2993ac52018-02-10 14:12:43 +01005900 afffile_T *(afile[MAXREGIONS]);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005901 int i;
5902 int len;
5903 stat_T st;
5904 int error = FALSE;
5905 spellinfo_T spin;
5906
Bram Moolenaara80faa82020-04-12 19:37:17 +02005907 CLEAR_FIELD(spin);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005908 spin.si_verbose = !added_word;
5909 spin.si_ascii = ascii;
5910 spin.si_followup = TRUE;
5911 spin.si_rem_accents = TRUE;
Bram Moolenaar04935fb2022-01-08 16:19:22 +00005912 ga_init2(&spin.si_rep, sizeof(fromto_T), 20);
5913 ga_init2(&spin.si_repsal, sizeof(fromto_T), 20);
5914 ga_init2(&spin.si_sal, sizeof(fromto_T), 20);
5915 ga_init2(&spin.si_map, sizeof(char_u), 100);
5916 ga_init2(&spin.si_comppat, sizeof(char_u *), 20);
5917 ga_init2(&spin.si_prefcond, sizeof(char_u *), 50);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005918 hash_init(&spin.si_commonwords);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005919 spin.si_newcompID = 127; // start compound ID at first maximum
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005920
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005921 // default: fnames[0] is output file, following are input files
Bram Moolenaar927b7dd2020-06-29 22:24:56 +02005922 // When "fcount" is 1 there is only one file.
5923 innames = &fnames[fcount == 1 ? 0 : 1];
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005924 incount = fcount - 1;
5925
5926 wfname = alloc(MAXPATHL);
5927 if (wfname == NULL)
5928 return;
5929
5930 if (fcount >= 1)
5931 {
5932 len = (int)STRLEN(fnames[0]);
5933 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
5934 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005935 // For ":mkspell path/en.latin1.add" output file is
5936 // "path/en.latin1.add.spl".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005937 incount = 1;
5938 vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]);
5939 }
5940 else if (fcount == 1)
5941 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005942 // For ":mkspell path/vim" output file is "path/vim.latin1.spl".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005943 incount = 1;
5944 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
5945 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
5946 }
5947 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
5948 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005949 // Name ends in ".spl", use as the file name.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005950 vim_strncpy(wfname, fnames[0], MAXPATHL - 1);
5951 }
5952 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005953 // Name should be language, make the file name from it.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005954 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
5955 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
5956
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005957 // Check for .ascii.spl.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005958 if (strstr((char *)gettail(wfname), SPL_FNAME_ASCII) != NULL)
5959 spin.si_ascii = TRUE;
5960
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005961 // Check for .add.spl.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005962 if (strstr((char *)gettail(wfname), SPL_FNAME_ADD) != NULL)
5963 spin.si_add = TRUE;
5964 }
5965
5966 if (incount <= 0)
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00005967 emsg(_(e_invalid_argument)); // need at least output and input names
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005968 else if (vim_strchr(gettail(wfname), '_') != NULL)
Bram Moolenaar677658a2022-01-05 16:09:06 +00005969 emsg(_(e_output_file_name_must_not_have_region_name));
Bram Moolenaar2993ac52018-02-10 14:12:43 +01005970 else if (incount > MAXREGIONS)
Bram Moolenaar677658a2022-01-05 16:09:06 +00005971 semsg(_(e_only_up_to_nr_regions_supported), MAXREGIONS);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005972 else
5973 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005974 // Check for overwriting before doing things that may take a lot of
5975 // time.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005976 if (!over_write && mch_stat((char *)wfname, &st) >= 0)
5977 {
Bram Moolenaar108010a2021-06-27 22:03:33 +02005978 emsg(_(e_file_exists));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005979 goto theend;
5980 }
5981 if (mch_isdir(wfname))
5982 {
Bram Moolenaar4dea2d92022-03-31 11:37:57 +01005983 semsg(_(e_str_is_directory), wfname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005984 goto theend;
5985 }
5986
5987 fname = alloc(MAXPATHL);
5988 if (fname == NULL)
5989 goto theend;
5990
5991 /*
5992 * Init the aff and dic pointers.
5993 * Get the region names if there are more than 2 arguments.
5994 */
5995 for (i = 0; i < incount; ++i)
5996 {
5997 afile[i] = NULL;
5998
5999 if (incount > 1)
6000 {
6001 len = (int)STRLEN(innames[i]);
6002 if (STRLEN(gettail(innames[i])) < 5
6003 || innames[i][len - 3] != '_')
6004 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006005 semsg(_(e_invalid_region_in_str), innames[i]);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006006 goto theend;
6007 }
6008 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
6009 spin.si_region_name[i * 2 + 1] =
6010 TOLOWER_ASC(innames[i][len - 1]);
6011 }
6012 }
6013 spin.si_region_count = incount;
6014
6015 spin.si_foldroot = wordtree_alloc(&spin);
6016 spin.si_keeproot = wordtree_alloc(&spin);
6017 spin.si_prefroot = wordtree_alloc(&spin);
6018 if (spin.si_foldroot == NULL
6019 || spin.si_keeproot == NULL
6020 || spin.si_prefroot == NULL)
6021 {
6022 free_blocks(spin.si_blocks);
6023 goto theend;
6024 }
6025
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006026 // When not producing a .add.spl file clear the character table when
6027 // we encounter one in the .aff file. This means we dump the current
6028 // one in the .spl file if the .aff file doesn't define one. That's
6029 // better than guessing the contents, the table will match a
6030 // previously loaded spell file.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006031 if (!spin.si_add)
6032 spin.si_clear_chartab = TRUE;
6033
6034 /*
6035 * Read all the .aff and .dic files.
6036 * Text is converted to 'encoding'.
6037 * Words are stored in the case-folded and keep-case trees.
6038 */
6039 for (i = 0; i < incount && !error; ++i)
6040 {
6041 spin.si_conv.vc_type = CONV_NONE;
6042 spin.si_region = 1 << i;
6043
6044 vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]);
6045 if (mch_stat((char *)fname, &st) >= 0)
6046 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006047 // Read the .aff file. Will init "spin->si_conv" based on the
6048 // "SET" line.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006049 afile[i] = spell_read_aff(&spin, fname);
6050 if (afile[i] == NULL)
6051 error = TRUE;
6052 else
6053 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006054 // Read the .dic file and store the words in the trees.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006055 vim_snprintf((char *)fname, MAXPATHL, "%s.dic",
6056 innames[i]);
6057 if (spell_read_dic(&spin, fname, afile[i]) == FAIL)
6058 error = TRUE;
6059 }
6060 }
6061 else
6062 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006063 // No .aff file, try reading the file as a word list. Store
6064 // the words in the trees.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006065 if (spell_read_wordfile(&spin, innames[i]) == FAIL)
6066 error = TRUE;
6067 }
6068
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006069 // Free any conversion stuff.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006070 convert_setup(&spin.si_conv, NULL, NULL);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006071 }
6072
6073 if (spin.si_compflags != NULL && spin.si_nobreak)
Bram Moolenaar32526b32019-01-19 17:43:09 +01006074 msg(_("Warning: both compounding and NOBREAK specified"));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006075
6076 if (!error && !got_int)
6077 {
6078 /*
6079 * Combine tails in the tree.
6080 */
6081 spell_message(&spin, (char_u *)_(msg_compressing));
Bram Moolenaar408c23b2020-06-03 22:15:45 +02006082 wordtree_compress(&spin, spin.si_foldroot, "case-folded");
6083 wordtree_compress(&spin, spin.si_keeproot, "keep-case");
6084 wordtree_compress(&spin, spin.si_prefroot, "prefixes");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006085 }
6086
6087 if (!error && !got_int)
6088 {
6089 /*
6090 * Write the info in the spell file.
6091 */
6092 vim_snprintf((char *)IObuff, IOSIZE,
Bram Moolenaarc1669272018-06-19 14:23:53 +02006093 _("Writing spell file %s..."), wfname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006094 spell_message(&spin, IObuff);
6095
6096 error = write_vim_spell(&spin, wfname) == FAIL;
6097
6098 spell_message(&spin, (char_u *)_("Done!"));
6099 vim_snprintf((char *)IObuff, IOSIZE,
6100 _("Estimated runtime memory use: %d bytes"), spin.si_memtot);
6101 spell_message(&spin, IObuff);
6102
6103 /*
6104 * If the file is loaded need to reload it.
6105 */
6106 if (!error)
6107 spell_reload_one(wfname, added_word);
6108 }
6109
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006110 // Free the allocated memory.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006111 ga_clear(&spin.si_rep);
6112 ga_clear(&spin.si_repsal);
6113 ga_clear(&spin.si_sal);
6114 ga_clear(&spin.si_map);
6115 ga_clear(&spin.si_comppat);
6116 ga_clear(&spin.si_prefcond);
6117 hash_clear_all(&spin.si_commonwords, 0);
6118
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006119 // Free the .aff file structures.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006120 for (i = 0; i < incount; ++i)
6121 if (afile[i] != NULL)
6122 spell_free_aff(afile[i]);
6123
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006124 // Free all the bits and pieces at once.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006125 free_blocks(spin.si_blocks);
6126
6127 /*
6128 * If there is soundfolding info and no NOSUGFILE item create the
6129 * .sug file with the soundfolded word trie.
6130 */
6131 if (spin.si_sugtime != 0 && !error && !got_int)
6132 spell_make_sugfile(&spin, wfname);
6133
6134 }
6135
6136theend:
6137 vim_free(fname);
6138 vim_free(wfname);
6139}
6140
6141/*
6142 * Display a message for spell file processing when 'verbose' is set or using
6143 * ":mkspell". "str" can be IObuff.
6144 */
6145 static void
6146spell_message(spellinfo_T *spin, char_u *str)
6147{
6148 if (spin->si_verbose || p_verbose > 2)
6149 {
6150 if (!spin->si_verbose)
6151 verbose_enter();
Bram Moolenaar32526b32019-01-19 17:43:09 +01006152 msg((char *)str);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006153 out_flush();
6154 if (!spin->si_verbose)
6155 verbose_leave();
6156 }
6157}
6158
6159/*
6160 * ":[count]spellgood {word}"
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006161 * ":[count]spellwrong {word}"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006162 * ":[count]spellundo {word}"
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006163 * ":[count]spellrare {word}"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006164 */
6165 void
6166ex_spell(exarg_T *eap)
6167{
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006168 spell_add_word(eap->arg, (int)STRLEN(eap->arg),
6169 eap->cmdidx == CMD_spellwrong ? SPELL_ADD_BAD :
6170 eap->cmdidx == CMD_spellrare ? SPELL_ADD_RARE : SPELL_ADD_GOOD,
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006171 eap->forceit ? 0 : (int)eap->line2,
6172 eap->cmdidx == CMD_spellundo);
6173}
6174
6175/*
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006176 * Add "word[len]" to 'spellfile' as a good, rare or bad word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006177 */
6178 void
6179spell_add_word(
6180 char_u *word,
6181 int len,
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006182 int what, // SPELL_ADD_ values
6183 int idx, // "zG" and "zW": zero, otherwise index in
6184 // 'spellfile'
6185 int undo) // TRUE for "zug", "zuG", "zuw" and "zuW"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006186{
6187 FILE *fd = NULL;
6188 buf_T *buf = NULL;
6189 int new_spf = FALSE;
6190 char_u *fname;
6191 char_u *fnamebuf = NULL;
6192 char_u line[MAXWLEN * 2];
6193 long fpos, fpos_next = 0;
6194 int i;
6195 char_u *spf;
6196
Bram Moolenaar7c824682022-05-08 22:32:58 +01006197 if (enc_utf8 && !utf_valid_string(word, NULL))
6198 {
6199 emsg(_(e_illegal_character_in_word));
6200 return;
6201 }
6202
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006203 if (idx == 0) // use internal wordlist
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006204 {
6205 if (int_wordlist == NULL)
6206 {
6207 int_wordlist = vim_tempname('s', FALSE);
6208 if (int_wordlist == NULL)
6209 return;
6210 }
6211 fname = int_wordlist;
6212 }
6213 else
6214 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006215 // If 'spellfile' isn't set figure out a good default value.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006216 if (*curwin->w_s->b_p_spf == NUL)
6217 {
6218 init_spellfile();
6219 new_spf = TRUE;
6220 }
6221
6222 if (*curwin->w_s->b_p_spf == NUL)
6223 {
Bram Moolenaar74409f62022-01-01 15:58:22 +00006224 semsg(_(e_option_str_is_not_set), "spellfile");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006225 return;
6226 }
6227 fnamebuf = alloc(MAXPATHL);
6228 if (fnamebuf == NULL)
6229 return;
6230
6231 for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i)
6232 {
6233 copy_option_part(&spf, fnamebuf, MAXPATHL, ",");
6234 if (i == idx)
6235 break;
6236 if (*spf == NUL)
6237 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006238 semsg(_(e_spellfile_does_not_have_nr_entries), idx);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006239 vim_free(fnamebuf);
6240 return;
6241 }
6242 }
6243
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006244 // Check that the user isn't editing the .add file somewhere.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006245 buf = buflist_findname_exp(fnamebuf);
6246 if (buf != NULL && buf->b_ml.ml_mfp == NULL)
6247 buf = NULL;
6248 if (buf != NULL && bufIsChanged(buf))
6249 {
Bram Moolenaareb822a22021-12-31 15:09:27 +00006250 emsg(_(e_file_is_loaded_in_another_buffer));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006251 vim_free(fnamebuf);
6252 return;
6253 }
6254
6255 fname = fnamebuf;
6256 }
6257
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006258 if (what == SPELL_ADD_BAD || undo)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006259 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006260 // When the word appears as good word we need to remove that one,
6261 // since its flags sort before the one with WF_BANNED.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006262 fd = mch_fopen((char *)fname, "r");
6263 if (fd != NULL)
6264 {
6265 while (!vim_fgets(line, MAXWLEN * 2, fd))
6266 {
6267 fpos = fpos_next;
6268 fpos_next = ftell(fd);
Bram Moolenaar416b5f42022-02-25 21:47:48 +00006269 if (fpos_next < 0)
6270 break; // should never happen
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006271 if (STRNCMP(word, line, len) == 0
6272 && (line[len] == '/' || line[len] < ' '))
6273 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006274 // Found duplicate word. Remove it by writing a '#' at
6275 // the start of the line. Mixing reading and writing
6276 // doesn't work for all systems, close the file first.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006277 fclose(fd);
6278 fd = mch_fopen((char *)fname, "r+");
6279 if (fd == NULL)
6280 break;
6281 if (fseek(fd, fpos, SEEK_SET) == 0)
6282 {
6283 fputc('#', fd);
6284 if (undo)
6285 {
6286 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01006287 smsg(_("Word '%.*s' removed from %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006288 len, word, NameBuff);
6289 }
6290 }
Bram Moolenaar2c363a22021-02-03 20:14:23 +01006291 if (fseek(fd, fpos_next, SEEK_SET) != 0)
6292 {
6293 PERROR(_("Seek error in spellfile"));
6294 break;
6295 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006296 }
6297 }
6298 if (fd != NULL)
6299 fclose(fd);
6300 }
6301 }
6302
6303 if (!undo)
6304 {
6305 fd = mch_fopen((char *)fname, "a");
6306 if (fd == NULL && new_spf)
6307 {
6308 char_u *p;
6309
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006310 // We just initialized the 'spellfile' option and can't open the
6311 // file. We may need to create the "spell" directory first. We
6312 // already checked the runtime directory is writable in
6313 // init_spellfile().
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006314 if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname)
6315 {
6316 int c = *p;
6317
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006318 // The directory doesn't exist. Try creating it and opening
6319 // the file again.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006320 *p = NUL;
6321 vim_mkdir(fname, 0755);
6322 *p = c;
6323 fd = mch_fopen((char *)fname, "a");
6324 }
6325 }
6326
6327 if (fd == NULL)
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00006328 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006329 else
6330 {
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006331 if (what == SPELL_ADD_BAD)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006332 fprintf(fd, "%.*s/!\n", len, word);
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006333 else if (what == SPELL_ADD_RARE)
6334 fprintf(fd, "%.*s/?\n", len, word);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006335 else
6336 fprintf(fd, "%.*s\n", len, word);
6337 fclose(fd);
6338
6339 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01006340 smsg(_("Word '%.*s' added to %s"), len, word, NameBuff);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006341 }
6342 }
6343
6344 if (fd != NULL)
6345 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006346 // Update the .add.spl file.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006347 mkspell(1, &fname, FALSE, TRUE, TRUE);
6348
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006349 // If the .add file is edited somewhere, reload it.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006350 if (buf != NULL)
Rob Pilling8196e942022-02-11 15:12:10 +00006351 buf_reload(buf, buf->b_orig_mode, FALSE);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006352
6353 redraw_all_later(SOME_VALID);
6354 }
6355 vim_free(fnamebuf);
6356}
6357
6358/*
6359 * Initialize 'spellfile' for the current buffer.
6360 */
6361 static void
6362init_spellfile(void)
6363{
6364 char_u *buf;
6365 int l;
6366 char_u *fname;
6367 char_u *rtp;
6368 char_u *lend;
6369 int aspath = FALSE;
6370 char_u *lstart = curbuf->b_s.b_p_spl;
6371
6372 if (*curwin->w_s->b_p_spl != NUL && curwin->w_s->b_langp.ga_len > 0)
6373 {
6374 buf = alloc(MAXPATHL);
6375 if (buf == NULL)
6376 return;
6377
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006378 // Find the end of the language name. Exclude the region. If there
6379 // is a path separator remember the start of the tail.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006380 for (lend = curwin->w_s->b_p_spl; *lend != NUL
6381 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend)
6382 if (vim_ispathsep(*lend))
6383 {
6384 aspath = TRUE;
6385 lstart = lend + 1;
6386 }
6387
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006388 // Loop over all entries in 'runtimepath'. Use the first one where we
6389 // are allowed to write.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006390 rtp = p_rtp;
6391 while (*rtp != NUL)
6392 {
6393 if (aspath)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006394 // Use directory of an entry with path, e.g., for
6395 // "/dir/lg.utf-8.spl" use "/dir".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006396 vim_strncpy(buf, curbuf->b_s.b_p_spl,
6397 lstart - curbuf->b_s.b_p_spl - 1);
6398 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006399 // Copy the path from 'runtimepath' to buf[].
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006400 copy_option_part(&rtp, buf, MAXPATHL, ",");
6401 if (filewritable(buf) == 2)
6402 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006403 // Use the first language name from 'spelllang' and the
6404 // encoding used in the first loaded .spl file.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006405 if (aspath)
6406 vim_strncpy(buf, curbuf->b_s.b_p_spl,
6407 lend - curbuf->b_s.b_p_spl);
6408 else
6409 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006410 // Create the "spell" directory if it doesn't exist yet.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006411 l = (int)STRLEN(buf);
6412 vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell");
6413 if (filewritable(buf) != 2)
6414 vim_mkdir(buf, 0755);
6415
6416 l = (int)STRLEN(buf);
6417 vim_snprintf((char *)buf + l, MAXPATHL - l,
6418 "/%.*s", (int)(lend - lstart), lstart);
6419 }
6420 l = (int)STRLEN(buf);
6421 fname = LANGP_ENTRY(curwin->w_s->b_langp, 0)
6422 ->lp_slang->sl_fname;
6423 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add",
6424 fname != NULL
6425 && strstr((char *)gettail(fname), ".ascii.") != NULL
6426 ? (char_u *)"ascii" : spell_enc());
Bram Moolenaar31e5c602022-04-15 13:53:33 +01006427 set_option_value_give_err((char_u *)"spellfile",
6428 0L, buf, OPT_LOCAL);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006429 break;
6430 }
6431 aspath = FALSE;
6432 }
6433
6434 vim_free(buf);
6435 }
6436}
6437
6438
6439
6440/*
6441 * Set the spell character tables from strings in the affix file.
6442 */
6443 static int
6444set_spell_chartab(char_u *fol, char_u *low, char_u *upp)
6445{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006446 // We build the new tables here first, so that we can compare with the
6447 // previous one.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006448 spelltab_T new_st;
6449 char_u *pf = fol, *pl = low, *pu = upp;
6450 int f, l, u;
6451
6452 clear_spell_chartab(&new_st);
6453
6454 while (*pf != NUL)
6455 {
6456 if (*pl == NUL || *pu == NUL)
6457 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006458 emsg(_(e_format_error_in_affix_file_fol_low_or_upp));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006459 return FAIL;
6460 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006461 f = mb_ptr2char_adv(&pf);
6462 l = mb_ptr2char_adv(&pl);
6463 u = mb_ptr2char_adv(&pu);
Bram Moolenaar264b74f2019-01-24 17:18:42 +01006464
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006465 // Every character that appears is a word character.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006466 if (f < 256)
6467 new_st.st_isw[f] = TRUE;
6468 if (l < 256)
6469 new_st.st_isw[l] = TRUE;
6470 if (u < 256)
6471 new_st.st_isw[u] = TRUE;
6472
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006473 // if "LOW" and "FOL" are not the same the "LOW" char needs
6474 // case-folding
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006475 if (l < 256 && l != f)
6476 {
6477 if (f >= 256)
6478 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006479 emsg(_(e_character_in_fol_low_or_upp_is_out_of_range));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006480 return FAIL;
6481 }
6482 new_st.st_fold[l] = f;
6483 }
6484
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006485 // if "UPP" and "FOL" are not the same the "UPP" char needs
6486 // case-folding, it's upper case and the "UPP" is the upper case of
6487 // "FOL" .
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006488 if (u < 256 && u != f)
6489 {
6490 if (f >= 256)
6491 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006492 emsg(_(e_character_in_fol_low_or_upp_is_out_of_range));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006493 return FAIL;
6494 }
6495 new_st.st_fold[u] = f;
6496 new_st.st_isu[u] = TRUE;
6497 new_st.st_upper[f] = u;
6498 }
6499 }
6500
6501 if (*pl != NUL || *pu != NUL)
6502 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006503 emsg(_(e_format_error_in_affix_file_fol_low_or_upp));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006504 return FAIL;
6505 }
6506
6507 return set_spell_finish(&new_st);
6508}
6509
6510/*
6511 * Set the spell character tables from strings in the .spl file.
6512 */
6513 static void
6514set_spell_charflags(
6515 char_u *flags,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006516 int cnt, // length of "flags"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006517 char_u *fol)
6518{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006519 // We build the new tables here first, so that we can compare with the
6520 // previous one.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006521 spelltab_T new_st;
6522 int i;
6523 char_u *p = fol;
6524 int c;
6525
6526 clear_spell_chartab(&new_st);
6527
6528 for (i = 0; i < 128; ++i)
6529 {
6530 if (i < cnt)
6531 {
6532 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
6533 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
6534 }
6535
6536 if (*p != NUL)
6537 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006538 c = mb_ptr2char_adv(&p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006539 new_st.st_fold[i + 128] = c;
6540 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
6541 new_st.st_upper[c] = i + 128;
6542 }
6543 }
6544
6545 (void)set_spell_finish(&new_st);
6546}
6547
6548 static int
6549set_spell_finish(spelltab_T *new_st)
6550{
6551 int i;
6552
6553 if (did_set_spelltab)
6554 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006555 // check that it's the same table
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006556 for (i = 0; i < 256; ++i)
6557 {
6558 if (spelltab.st_isw[i] != new_st->st_isw[i]
6559 || spelltab.st_isu[i] != new_st->st_isu[i]
6560 || spelltab.st_fold[i] != new_st->st_fold[i]
6561 || spelltab.st_upper[i] != new_st->st_upper[i])
6562 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006563 emsg(_(e_word_characters_differ_between_spell_files));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006564 return FAIL;
6565 }
6566 }
6567 }
6568 else
6569 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006570 // copy the new spelltab into the one being used
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006571 spelltab = *new_st;
6572 did_set_spelltab = TRUE;
6573 }
6574
6575 return OK;
6576}
6577
6578/*
6579 * Write the table with prefix conditions to the .spl file.
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +00006580 * When "fd" is NULL only count the length of what is written and return it.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006581 */
6582 static int
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +00006583write_spell_prefcond(FILE *fd, garray_T *gap, size_t *fwv)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006584{
6585 int i;
6586 char_u *p;
6587 int len;
6588 int totlen;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006589
6590 if (fd != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006591 put_bytes(fd, (long_u)gap->ga_len, 2); // <prefcondcnt>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006592
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006593 totlen = 2 + gap->ga_len; // length of <prefcondcnt> and <condlen> bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006594
6595 for (i = 0; i < gap->ga_len; ++i)
6596 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006597 // <prefcond> : <condlen> <condstr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006598 p = ((char_u **)gap->ga_data)[i];
6599 if (p != NULL)
6600 {
6601 len = (int)STRLEN(p);
6602 if (fd != NULL)
6603 {
6604 fputc(len, fd);
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +00006605 *fwv &= fwrite(p, (size_t)len, (size_t)1, fd);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006606 }
6607 totlen += len;
6608 }
6609 else if (fd != NULL)
6610 fputc(0, fd);
6611 }
6612
6613 return totlen;
6614}
6615
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006616/*
6617 * Use map string "map" for languages "lp".
6618 */
6619 static void
6620set_map_str(slang_T *lp, char_u *map)
6621{
6622 char_u *p;
6623 int headc = 0;
6624 int c;
6625 int i;
6626
6627 if (*map == NUL)
6628 {
6629 lp->sl_has_map = FALSE;
6630 return;
6631 }
6632 lp->sl_has_map = TRUE;
6633
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006634 // Init the array and hash tables empty.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006635 for (i = 0; i < 256; ++i)
6636 lp->sl_map_array[i] = 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006637 hash_init(&lp->sl_map_hash);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006638
6639 /*
6640 * The similar characters are stored separated with slashes:
6641 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and
6642 * before the same slash. For characters above 255 sl_map_hash is used.
6643 */
6644 for (p = map; *p != NUL; )
6645 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006646 c = mb_cptr2char_adv(&p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006647 if (c == '/')
6648 headc = 0;
6649 else
6650 {
6651 if (headc == 0)
6652 headc = c;
6653
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006654 // Characters above 255 don't fit in sl_map_array[], put them in
6655 // the hash table. Each entry is the char, a NUL the headchar and
6656 // a NUL.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006657 if (c >= 256)
6658 {
6659 int cl = mb_char2len(c);
6660 int headcl = mb_char2len(headc);
6661 char_u *b;
6662 hash_T hash;
6663 hashitem_T *hi;
6664
Bram Moolenaar964b3742019-05-24 18:54:09 +02006665 b = alloc(cl + headcl + 2);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006666 if (b == NULL)
6667 return;
6668 mb_char2bytes(c, b);
6669 b[cl] = NUL;
6670 mb_char2bytes(headc, b + cl + 1);
6671 b[cl + 1 + headcl] = NUL;
6672 hash = hash_hash(b);
6673 hi = hash_lookup(&lp->sl_map_hash, b, hash);
6674 if (HASHITEM_EMPTY(hi))
6675 hash_add_item(&lp->sl_map_hash, hi, b, hash);
6676 else
6677 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006678 // This should have been checked when generating the .spl
6679 // file.
Bram Moolenaar677658a2022-01-05 16:09:06 +00006680 emsg(_(e_duplicate_char_in_map_entry));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006681 vim_free(b);
6682 }
6683 }
6684 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006685 lp->sl_map_array[c] = headc;
6686 }
6687 }
6688}
6689
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006690#endif // FEAT_SPELL