blob: 4a0de5237ffea85b5c5ed5e9341efb3c5b28842a [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * spellfile.c: code for reading and writing spell files.
12 *
13 * See spell.c for information about spell checking.
14 */
15
16/*
17 * Vim spell file format: <HEADER>
18 * <SECTIONS>
19 * <LWORDTREE>
20 * <KWORDTREE>
21 * <PREFIXTREE>
22 *
23 * <HEADER>: <fileID> <versionnr>
24 *
25 * <fileID> 8 bytes "VIMspell"
26 * <versionnr> 1 byte VIMSPELLVERSION
27 *
28 *
29 * Sections make it possible to add information to the .spl file without
30 * making it incompatible with previous versions. There are two kinds of
31 * sections:
32 * 1. Not essential for correct spell checking. E.g. for making suggestions.
33 * These are skipped when not supported.
34 * 2. Optional information, but essential for spell checking when present.
35 * E.g. conditions for affixes. When this section is present but not
36 * supported an error message is given.
37 *
38 * <SECTIONS>: <section> ... <sectionend>
39 *
40 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
41 *
42 * <sectionID> 1 byte number from 0 to 254 identifying the section
43 *
44 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct
45 * spell checking
46 *
47 * <sectionlen> 4 bytes length of section contents, MSB first
48 *
49 * <sectionend> 1 byte SN_END
50 *
51 *
52 * sectionID == SN_INFO: <infotext>
53 * <infotext> N bytes free format text with spell file info (version,
54 * website, etc)
55 *
56 * sectionID == SN_REGION: <regionname> ...
Bram Moolenaar2993ac52018-02-10 14:12:43 +010057 * <regionname> 2 bytes Up to MAXREGIONS region names: ca, au, etc. Lower
58 * case. First <regionname> is region 1.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +020059 *
60 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
61 * <folcharslen> <folchars>
62 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
63 * <charflags> N bytes List of flags (first one is for character 128):
64 * 0x01 word character CF_WORD
65 * 0x02 upper-case character CF_UPPER
66 * <folcharslen> 2 bytes Number of bytes in <folchars>.
67 * <folchars> N bytes Folded characters, first one is for character 128.
68 *
69 * sectionID == SN_MIDWORD: <midword>
70 * <midword> N bytes Characters that are word characters only when used
71 * in the middle of a word.
72 *
73 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
74 * <prefcondcnt> 2 bytes Number of <prefcond> items following.
75 * <prefcond> : <condlen> <condstr>
76 * <condlen> 1 byte Length of <condstr>.
77 * <condstr> N bytes Condition for the prefix.
78 *
79 * sectionID == SN_REP: <repcount> <rep> ...
80 * <repcount> 2 bytes number of <rep> items, MSB first.
81 * <rep> : <repfromlen> <repfrom> <reptolen> <repto>
82 * <repfromlen> 1 byte length of <repfrom>
83 * <repfrom> N bytes "from" part of replacement
84 * <reptolen> 1 byte length of <repto>
85 * <repto> N bytes "to" part of replacement
86 *
87 * sectionID == SN_REPSAL: <repcount> <rep> ...
88 * just like SN_REP but for soundfolded words
89 *
90 * sectionID == SN_SAL: <salflags> <salcount> <sal> ...
91 * <salflags> 1 byte flags for soundsalike conversion:
92 * SAL_F0LLOWUP
93 * SAL_COLLAPSE
94 * SAL_REM_ACCENTS
95 * <salcount> 2 bytes number of <sal> items following
96 * <sal> : <salfromlen> <salfrom> <saltolen> <salto>
97 * <salfromlen> 1 byte length of <salfrom>
98 * <salfrom> N bytes "from" part of soundsalike
99 * <saltolen> 1 byte length of <salto>
100 * <salto> N bytes "to" part of soundsalike
101 *
102 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
103 * <sofofromlen> 2 bytes length of <sofofrom>
104 * <sofofrom> N bytes "from" part of soundfold
105 * <sofotolen> 2 bytes length of <sofoto>
106 * <sofoto> N bytes "to" part of soundfold
107 *
108 * sectionID == SN_SUGFILE: <timestamp>
109 * <timestamp> 8 bytes time in seconds that must match with .sug file
110 *
111 * sectionID == SN_NOSPLITSUGS: nothing
112 *
113 * sectionID == SN_NOCOMPOUNDSUGS: nothing
114 *
115 * sectionID == SN_WORDS: <word> ...
116 * <word> N bytes NUL terminated common word
117 *
118 * sectionID == SN_MAP: <mapstr>
119 * <mapstr> N bytes String with sequences of similar characters,
120 * separated by slashes.
121 *
122 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions>
123 * <comppatcount> <comppattern> ... <compflags>
124 * <compmax> 1 byte Maximum nr of words in compound word.
125 * <compminlen> 1 byte Minimal word length for compounding.
126 * <compsylmax> 1 byte Maximum nr of syllables in compound word.
127 * <compoptions> 2 bytes COMP_ flags.
128 * <comppatcount> 2 bytes number of <comppattern> following
129 * <compflags> N bytes Flags from COMPOUNDRULE items, separated by
130 * slashes.
131 *
132 * <comppattern>: <comppatlen> <comppattext>
133 * <comppatlen> 1 byte length of <comppattext>
134 * <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN
135 *
136 * sectionID == SN_NOBREAK: (empty, its presence is what matters)
137 *
138 * sectionID == SN_SYLLABLE: <syllable>
139 * <syllable> N bytes String from SYLLABLE item.
140 *
141 * <LWORDTREE>: <wordtree>
142 *
143 * <KWORDTREE>: <wordtree>
144 *
145 * <PREFIXTREE>: <wordtree>
146 *
147 *
148 * <wordtree>: <nodecount> <nodedata> ...
149 *
150 * <nodecount> 4 bytes Number of nodes following. MSB first.
151 *
152 * <nodedata>: <siblingcount> <sibling> ...
153 *
154 * <siblingcount> 1 byte Number of siblings in this node. The siblings
155 * follow in sorted order.
156 *
157 * <sibling>: <byte> [ <nodeidx> <xbyte>
158 * | <flags> [<flags2>] [<region>] [<affixID>]
159 * | [<pflags>] <affixID> <prefcondnr> ]
160 *
161 * <byte> 1 byte Byte value of the sibling. Special cases:
162 * BY_NOFLAGS: End of word without flags and for all
163 * regions.
164 * For PREFIXTREE <affixID> and
165 * <prefcondnr> follow.
166 * BY_FLAGS: End of word, <flags> follow.
167 * For PREFIXTREE <pflags>, <affixID>
168 * and <prefcondnr> follow.
169 * BY_FLAGS2: End of word, <flags> and <flags2>
170 * follow. Not used in PREFIXTREE.
171 * BY_INDEX: Child of sibling is shared, <nodeidx>
172 * and <xbyte> follow.
173 *
174 * <nodeidx> 3 bytes Index of child for this sibling, MSB first.
175 *
176 * <xbyte> 1 byte byte value of the sibling.
177 *
178 * <flags> 1 byte bitmask of:
179 * WF_ALLCAP word must have only capitals
180 * WF_ONECAP first char of word must be capital
181 * WF_KEEPCAP keep-case word
182 * WF_FIXCAP keep-case word, all caps not allowed
183 * WF_RARE rare word
184 * WF_BANNED bad word
185 * WF_REGION <region> follows
186 * WF_AFX <affixID> follows
187 *
188 * <flags2> 1 byte Bitmask of:
189 * WF_HAS_AFF >> 8 word includes affix
190 * WF_NEEDCOMP >> 8 word only valid in compound
191 * WF_NOSUGGEST >> 8 word not used for suggestions
192 * WF_COMPROOT >> 8 word already a compound
193 * WF_NOCOMPBEF >> 8 no compounding before this word
194 * WF_NOCOMPAFT >> 8 no compounding after this word
195 *
196 * <pflags> 1 byte bitmask of:
197 * WFP_RARE rare prefix
198 * WFP_NC non-combining prefix
199 * WFP_UP letter after prefix made upper case
200 *
201 * <region> 1 byte Bitmask for regions in which word is valid. When
202 * omitted it's valid in all regions.
203 * Lowest bit is for region 1.
204 *
205 * <affixID> 1 byte ID of affix that can be used with this word. In
206 * PREFIXTREE used for the required prefix ID.
207 *
208 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list
209 * from HEADER.
210 *
211 * All text characters are in 'encoding', but stored as single bytes.
212 */
213
214/*
215 * Vim .sug file format: <SUGHEADER>
216 * <SUGWORDTREE>
217 * <SUGTABLE>
218 *
219 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
220 *
221 * <fileID> 6 bytes "VIMsug"
222 * <versionnr> 1 byte VIMSUGVERSION
223 * <timestamp> 8 bytes timestamp that must match with .spl file
224 *
225 *
226 * <SUGWORDTREE>: <wordtree> (see above, no flags or region used)
227 *
228 *
229 * <SUGTABLE>: <sugwcount> <sugline> ...
230 *
231 * <sugwcount> 4 bytes number of <sugline> following
232 *
233 * <sugline>: <sugnr> ... NUL
234 *
235 * <sugnr>: X bytes word number that results in this soundfolded word,
236 * stored as an offset to the previous number in as
237 * few bytes as possible, see offset2bytes())
238 */
239
240#include "vim.h"
241
242#if defined(FEAT_SPELL) || defined(PROTO)
243
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100244#ifndef UNIX // it's in os_unix.h for Unix
245# include <time.h> // for time_t
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200246#endif
247
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100248#ifndef UNIX // it's in os_unix.h for Unix
249# include <time.h> // for time_t
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200250#endif
251
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100252// Special byte values for <byte>. Some are only used in the tree for
253// postponed prefixes, some only in the other trees. This is a bit messy...
254#define BY_NOFLAGS 0 // end of word without flags or region; for
255 // postponed prefix: no <pflags>
256#define BY_INDEX 1 // child is shared, index follows
257#define BY_FLAGS 2 // end of word, <flags> byte follows; for
258 // postponed prefix: <pflags> follows
259#define BY_FLAGS2 3 // end of word, <flags> and <flags2> bytes
260 // follow; never used in prefix tree
261#define BY_SPECIAL BY_FLAGS2 // highest special byte value
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200262
Bram Moolenaar3d2a47c2019-11-07 20:48:42 +0100263#define ZERO_FLAG 65009 // used when flag is zero: "0"
264
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100265// Flags used in .spl file for soundsalike flags.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200266#define SAL_F0LLOWUP 1
267#define SAL_COLLAPSE 2
268#define SAL_REM_ACCENTS 4
269
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100270#define VIMSPELLMAGIC "VIMspell" // string at start of Vim spell file
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200271#define VIMSPELLMAGICL 8
272#define VIMSPELLVERSION 50
273
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100274// Section IDs. Only renumber them when VIMSPELLVERSION changes!
275#define SN_REGION 0 // <regionname> section
276#define SN_CHARFLAGS 1 // charflags section
277#define SN_MIDWORD 2 // <midword> section
278#define SN_PREFCOND 3 // <prefcond> section
279#define SN_REP 4 // REP items section
280#define SN_SAL 5 // SAL items section
281#define SN_SOFO 6 // soundfolding section
282#define SN_MAP 7 // MAP items section
283#define SN_COMPOUND 8 // compound words section
284#define SN_SYLLABLE 9 // syllable section
285#define SN_NOBREAK 10 // NOBREAK section
286#define SN_SUGFILE 11 // timestamp for .sug file
287#define SN_REPSAL 12 // REPSAL items section
288#define SN_WORDS 13 // common words
289#define SN_NOSPLITSUGS 14 // don't split word for suggestions
290#define SN_INFO 15 // info section
291#define SN_NOCOMPOUNDSUGS 16 // don't compound for suggestions
292#define SN_END 255 // end of sections
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200293
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100294#define SNF_REQUIRED 1 // <sectionflags>: required section
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200295
296#define CF_WORD 0x01
297#define CF_UPPER 0x02
298
Bram Moolenaaraeea7212020-04-02 18:50:46 +0200299/*
300 * Loop through all the siblings of a node (including the node)
301 */
302#define FOR_ALL_NODE_SIBLINGS(node, np) \
303 for ((np) = (node); (np) != NULL; (np) = (np)->wn_sibling)
304
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200305static int set_spell_finish(spelltab_T *new_st);
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +0000306static int write_spell_prefcond(FILE *fd, garray_T *gap, size_t *fwv);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200307static int read_region_section(FILE *fd, slang_T *slang, int len);
308static int read_charflags_section(FILE *fd);
309static int read_prefcond_section(FILE *fd, slang_T *lp);
310static int read_rep_section(FILE *fd, garray_T *gap, short *first);
311static int read_sal_section(FILE *fd, slang_T *slang);
312static int read_words_section(FILE *fd, slang_T *lp, int len);
313static int read_sofo_section(FILE *fd, slang_T *slang);
314static int read_compound(FILE *fd, slang_T *slang, int len);
315static int set_sofo(slang_T *lp, char_u *from, char_u *to);
316static void set_sal_first(slang_T *lp);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200317static int *mb_str2wide(char_u *s);
Bram Moolenaar07399e72020-08-24 20:05:50 +0200318static int spell_read_tree(FILE *fd, char_u **bytsp, long *bytsp_len, idx_T **idxsp, int prefixtree, int prefixcnt);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200319static idx_T read_tree_node(FILE *fd, char_u *byts, idx_T *idxs, int maxidx, idx_T startidx, int prefixtree, int maxprefcondnr);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200320static void set_spell_charflags(char_u *flags, int cnt, char_u *upp);
321static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp);
322static void set_map_str(slang_T *lp, char_u *map);
323
324
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200325static char *e_afftrailing = N_("Trailing text in %s line %d: %s");
326static char *e_affname = N_("Affix name too long in %s line %d: %s");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200327static char *msg_compressing = N_("Compressing word tree...");
328
329/*
330 * Load one spell file and store the info into a slang_T.
331 *
332 * This is invoked in three ways:
333 * - From spell_load_cb() to load a spell file for the first time. "lang" is
334 * the language name, "old_lp" is NULL. Will allocate an slang_T.
335 * - To reload a spell file that was changed. "lang" is NULL and "old_lp"
336 * points to the existing slang_T.
337 * - Just after writing a .spl file; it's read back to produce the .sug file.
338 * "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T.
339 *
340 * Returns the slang_T the spell file was loaded into. NULL for error.
341 */
342 slang_T *
343spell_load_file(
344 char_u *fname,
345 char_u *lang,
346 slang_T *old_lp,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100347 int silent) // no error if file doesn't exist
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200348{
349 FILE *fd;
350 char_u buf[VIMSPELLMAGICL];
351 char_u *p;
352 int i;
353 int n;
354 int len;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200355 slang_T *lp = NULL;
356 int c = 0;
357 int res;
Bram Moolenaarce6db022020-01-07 20:11:42 +0100358 int did_estack_push = FALSE;
Bram Moolenaare31ee862020-01-07 20:59:34 +0100359 ESTACK_CHECK_DECLARATION
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200360
361 fd = mch_fopen((char *)fname, "r");
362 if (fd == NULL)
363 {
364 if (!silent)
Bram Moolenaar460ae5d2022-01-01 14:19:49 +0000365 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200366 else if (p_verbose > 2)
367 {
368 verbose_enter();
Bram Moolenaar460ae5d2022-01-01 14:19:49 +0000369 smsg((const char *)e_cant_open_file_str, fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200370 verbose_leave();
371 }
372 goto endFAIL;
373 }
374 if (p_verbose > 2)
375 {
376 verbose_enter();
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100377 smsg(_("Reading spell file \"%s\""), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200378 verbose_leave();
379 }
380
381 if (old_lp == NULL)
382 {
383 lp = slang_alloc(lang);
384 if (lp == NULL)
385 goto endFAIL;
386
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100387 // Remember the file name, used to reload the file when it's updated.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200388 lp->sl_fname = vim_strsave(fname);
389 if (lp->sl_fname == NULL)
390 goto endFAIL;
391
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100392 // Check for .add.spl (_add.spl for VMS).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200393 lp->sl_add = strstr((char *)gettail(fname), SPL_FNAME_ADD) != NULL;
394 }
395 else
396 lp = old_lp;
397
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100398 // Set sourcing_name, so that error messages mention the file name.
Bram Moolenaar1a47ae32019-12-29 23:04:25 +0100399 estack_push(ETYPE_SPELL, fname, 0);
Bram Moolenaare31ee862020-01-07 20:59:34 +0100400 ESTACK_CHECK_SETUP
Bram Moolenaarce6db022020-01-07 20:11:42 +0100401 did_estack_push = TRUE;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200402
403 /*
404 * <HEADER>: <fileID>
405 */
406 for (i = 0; i < VIMSPELLMAGICL; ++i)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100407 buf[i] = getc(fd); // <fileID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200408 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
409 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000410 emsg(_(e_this_does_not_look_like_spell_file));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200411 goto endFAIL;
412 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100413 c = getc(fd); // <versionnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200414 if (c < VIMSPELLVERSION)
415 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000416 emsg(_(e_old_spell_file_needs_to_be_updated));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200417 goto endFAIL;
418 }
419 else if (c > VIMSPELLVERSION)
420 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000421 emsg(_(e_spell_file_is_for_newer_version_of_vim));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200422 goto endFAIL;
423 }
424
425
426 /*
427 * <SECTIONS>: <section> ... <sectionend>
428 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
429 */
430 for (;;)
431 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100432 n = getc(fd); // <sectionID> or <sectionend>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200433 if (n == SN_END)
434 break;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100435 c = getc(fd); // <sectionflags>
436 len = get4c(fd); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200437 if (len < 0)
438 goto truncerr;
439
440 res = 0;
441 switch (n)
442 {
443 case SN_INFO:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100444 lp->sl_info = read_string(fd, len); // <infotext>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200445 if (lp->sl_info == NULL)
446 goto endFAIL;
447 break;
448
449 case SN_REGION:
450 res = read_region_section(fd, lp, len);
451 break;
452
453 case SN_CHARFLAGS:
454 res = read_charflags_section(fd);
455 break;
456
457 case SN_MIDWORD:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100458 lp->sl_midword = read_string(fd, len); // <midword>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200459 if (lp->sl_midword == NULL)
460 goto endFAIL;
461 break;
462
463 case SN_PREFCOND:
464 res = read_prefcond_section(fd, lp);
465 break;
466
467 case SN_REP:
468 res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first);
469 break;
470
471 case SN_REPSAL:
472 res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first);
473 break;
474
475 case SN_SAL:
476 res = read_sal_section(fd, lp);
477 break;
478
479 case SN_SOFO:
480 res = read_sofo_section(fd, lp);
481 break;
482
483 case SN_MAP:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100484 p = read_string(fd, len); // <mapstr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200485 if (p == NULL)
486 goto endFAIL;
487 set_map_str(lp, p);
488 vim_free(p);
489 break;
490
491 case SN_WORDS:
492 res = read_words_section(fd, lp, len);
493 break;
494
495 case SN_SUGFILE:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100496 lp->sl_sugtime = get8ctime(fd); // <timestamp>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200497 break;
498
499 case SN_NOSPLITSUGS:
500 lp->sl_nosplitsugs = TRUE;
501 break;
502
503 case SN_NOCOMPOUNDSUGS:
504 lp->sl_nocompoundsugs = TRUE;
505 break;
506
507 case SN_COMPOUND:
508 res = read_compound(fd, lp, len);
509 break;
510
511 case SN_NOBREAK:
512 lp->sl_nobreak = TRUE;
513 break;
514
515 case SN_SYLLABLE:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100516 lp->sl_syllable = read_string(fd, len); // <syllable>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200517 if (lp->sl_syllable == NULL)
518 goto endFAIL;
Bram Moolenaarfc2a47f2020-08-20 15:41:55 +0200519 if (init_syl_tab(lp) != OK)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200520 goto endFAIL;
521 break;
522
523 default:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100524 // Unsupported section. When it's required give an error
525 // message. When it's not required skip the contents.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200526 if (c & SNF_REQUIRED)
527 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000528 emsg(_(e_unsupported_section_in_spell_file));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200529 goto endFAIL;
530 }
531 while (--len >= 0)
532 if (getc(fd) < 0)
533 goto truncerr;
534 break;
535 }
536someerror:
537 if (res == SP_FORMERROR)
538 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000539 emsg(_(e_format_error_in_spell_file));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200540 goto endFAIL;
541 }
542 if (res == SP_TRUNCERROR)
543 {
544truncerr:
Bram Moolenaar677658a2022-01-05 16:09:06 +0000545 emsg(_(e_truncated_spell_file));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200546 goto endFAIL;
547 }
548 if (res == SP_OTHERERROR)
549 goto endFAIL;
550 }
551
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100552 // <LWORDTREE>
Bram Moolenaar07399e72020-08-24 20:05:50 +0200553 res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fbyts_len,
554 &lp->sl_fidxs, FALSE, 0);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200555 if (res != 0)
556 goto someerror;
557
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100558 // <KWORDTREE>
Bram Moolenaar07399e72020-08-24 20:05:50 +0200559 res = spell_read_tree(fd, &lp->sl_kbyts, NULL, &lp->sl_kidxs, FALSE, 0);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200560 if (res != 0)
561 goto someerror;
562
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100563 // <PREFIXTREE>
Bram Moolenaar07399e72020-08-24 20:05:50 +0200564 res = spell_read_tree(fd, &lp->sl_pbyts, NULL, &lp->sl_pidxs, TRUE,
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200565 lp->sl_prefixcnt);
566 if (res != 0)
567 goto someerror;
568
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100569 // For a new file link it in the list of spell files.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200570 if (old_lp == NULL && lang != NULL)
571 {
572 lp->sl_next = first_lang;
573 first_lang = lp;
574 }
575
576 goto endOK;
577
578endFAIL:
579 if (lang != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100580 // truncating the name signals the error to spell_load_lang()
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200581 *lang = NUL;
582 if (lp != NULL && old_lp == NULL)
583 slang_free(lp);
584 lp = NULL;
585
586endOK:
587 if (fd != NULL)
588 fclose(fd);
Bram Moolenaarce6db022020-01-07 20:11:42 +0100589 if (did_estack_push)
Bram Moolenaare31ee862020-01-07 20:59:34 +0100590 {
591 ESTACK_CHECK_NOW
Bram Moolenaarce6db022020-01-07 20:11:42 +0100592 estack_pop();
Bram Moolenaare31ee862020-01-07 20:59:34 +0100593 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200594
595 return lp;
596}
597
598/*
599 * Fill in the wordcount fields for a trie.
600 * Returns the total number of words.
601 */
602 static void
603tree_count_words(char_u *byts, idx_T *idxs)
604{
605 int depth;
606 idx_T arridx[MAXWLEN];
607 int curi[MAXWLEN];
608 int c;
609 idx_T n;
610 int wordcount[MAXWLEN];
611
612 arridx[0] = 0;
613 curi[0] = 1;
614 wordcount[0] = 0;
615 depth = 0;
616 while (depth >= 0 && !got_int)
617 {
618 if (curi[depth] > byts[arridx[depth]])
619 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100620 // Done all bytes at this node, go up one level.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200621 idxs[arridx[depth]] = wordcount[depth];
622 if (depth > 0)
623 wordcount[depth - 1] += wordcount[depth];
624
625 --depth;
626 fast_breakcheck();
627 }
628 else
629 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100630 // Do one more byte at this node.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200631 n = arridx[depth] + curi[depth];
632 ++curi[depth];
633
634 c = byts[n];
635 if (c == 0)
636 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100637 // End of word, count it.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200638 ++wordcount[depth];
639
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100640 // Skip over any other NUL bytes (same word with different
641 // flags).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200642 while (byts[n + 1] == 0)
643 {
644 ++n;
645 ++curi[depth];
646 }
647 }
648 else
649 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100650 // Normal char, go one level deeper to count the words.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200651 ++depth;
652 arridx[depth] = idxs[n];
653 curi[depth] = 1;
654 wordcount[depth] = 0;
655 }
656 }
657 }
658}
659
660/*
661 * Load the .sug files for languages that have one and weren't loaded yet.
662 */
663 void
664suggest_load_files(void)
665{
666 langp_T *lp;
667 int lpi;
668 slang_T *slang;
669 char_u *dotp;
670 FILE *fd;
671 char_u buf[MAXWLEN];
672 int i;
673 time_t timestamp;
674 int wcount;
675 int wordnr;
676 garray_T ga;
677 int c;
678
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100679 // Do this for all languages that support sound folding.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200680 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
681 {
682 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
683 slang = lp->lp_slang;
684 if (slang->sl_sugtime != 0 && !slang->sl_sugloaded)
685 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100686 // Change ".spl" to ".sug" and open the file. When the file isn't
687 // found silently skip it. Do set "sl_sugloaded" so that we
688 // don't try again and again.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200689 slang->sl_sugloaded = TRUE;
690
691 dotp = vim_strrchr(slang->sl_fname, '.');
692 if (dotp == NULL || fnamecmp(dotp, ".spl") != 0)
693 continue;
694 STRCPY(dotp, ".sug");
695 fd = mch_fopen((char *)slang->sl_fname, "r");
696 if (fd == NULL)
697 goto nextone;
698
699 /*
700 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
701 */
702 for (i = 0; i < VIMSUGMAGICL; ++i)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100703 buf[i] = getc(fd); // <fileID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200704 if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0)
705 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000706 semsg(_(e_this_does_not_look_like_sug_file_str),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200707 slang->sl_fname);
708 goto nextone;
709 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100710 c = getc(fd); // <versionnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200711 if (c < VIMSUGVERSION)
712 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000713 semsg(_(e_old_sug_file_needs_to_be_updated_str),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200714 slang->sl_fname);
715 goto nextone;
716 }
717 else if (c > VIMSUGVERSION)
718 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000719 semsg(_(e_sug_file_is_for_newer_version_of_vim_str),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200720 slang->sl_fname);
721 goto nextone;
722 }
723
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100724 // Check the timestamp, it must be exactly the same as the one in
725 // the .spl file. Otherwise the word numbers won't match.
726 timestamp = get8ctime(fd); // <timestamp>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200727 if (timestamp != slang->sl_sugtime)
728 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000729 semsg(_(e_sug_file_doesnt_match_spl_file_str),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200730 slang->sl_fname);
731 goto nextone;
732 }
733
734 /*
735 * <SUGWORDTREE>: <wordtree>
736 * Read the trie with the soundfolded words.
737 */
Bram Moolenaar07399e72020-08-24 20:05:50 +0200738 if (spell_read_tree(fd, &slang->sl_sbyts, NULL, &slang->sl_sidxs,
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200739 FALSE, 0) != 0)
740 {
741someerror:
Bram Moolenaar677658a2022-01-05 16:09:06 +0000742 semsg(_(e_error_while_reading_sug_file_str),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200743 slang->sl_fname);
744 slang_clear_sug(slang);
745 goto nextone;
746 }
747
748 /*
749 * <SUGTABLE>: <sugwcount> <sugline> ...
750 *
751 * Read the table with word numbers. We use a file buffer for
752 * this, because it's so much like a file with lines. Makes it
753 * possible to swap the info and save on memory use.
754 */
755 slang->sl_sugbuf = open_spellbuf();
756 if (slang->sl_sugbuf == NULL)
757 goto someerror;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100758 // <sugwcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200759 wcount = get4c(fd);
760 if (wcount < 0)
761 goto someerror;
762
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100763 // Read all the wordnr lists into the buffer, one NUL terminated
764 // list per line.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200765 ga_init2(&ga, 1, 100);
766 for (wordnr = 0; wordnr < wcount; ++wordnr)
767 {
768 ga.ga_len = 0;
769 for (;;)
770 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100771 c = getc(fd); // <sugline>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200772 if (c < 0 || ga_grow(&ga, 1) == FAIL)
773 goto someerror;
774 ((char_u *)ga.ga_data)[ga.ga_len++] = c;
775 if (c == NUL)
776 break;
777 }
778 if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr,
779 ga.ga_data, ga.ga_len, TRUE) == FAIL)
780 goto someerror;
781 }
782 ga_clear(&ga);
783
784 /*
785 * Need to put word counts in the word tries, so that we can find
786 * a word by its number.
787 */
788 tree_count_words(slang->sl_fbyts, slang->sl_fidxs);
789 tree_count_words(slang->sl_sbyts, slang->sl_sidxs);
790
791nextone:
792 if (fd != NULL)
793 fclose(fd);
794 STRCPY(dotp, ".spl");
795 }
796 }
797}
798
799
800/*
801 * Read a length field from "fd" in "cnt_bytes" bytes.
802 * Allocate memory, read the string into it and add a NUL at the end.
803 * Returns NULL when the count is zero.
804 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result
805 * otherwise.
806 */
807 static char_u *
808read_cnt_string(FILE *fd, int cnt_bytes, int *cntp)
809{
810 int cnt = 0;
811 int i;
812 char_u *str;
813
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100814 // read the length bytes, MSB first
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200815 for (i = 0; i < cnt_bytes; ++i)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200816 {
Bram Moolenaar4ad739f2020-09-02 10:25:45 +0200817 int c = getc(fd);
818
819 if (c == EOF)
820 {
821 *cntp = SP_TRUNCERROR;
822 return NULL;
823 }
824 cnt = (cnt << 8) + (unsigned)c;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200825 }
826 *cntp = cnt;
827 if (cnt == 0)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100828 return NULL; // nothing to read, return NULL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200829
830 str = read_string(fd, cnt);
831 if (str == NULL)
832 *cntp = SP_OTHERERROR;
833 return str;
834}
835
836/*
837 * Read SN_REGION: <regionname> ...
838 * Return SP_*ERROR flags.
839 */
840 static int
841read_region_section(FILE *fd, slang_T *lp, int len)
842{
843 int i;
844
Bram Moolenaar2993ac52018-02-10 14:12:43 +0100845 if (len > MAXREGIONS * 2)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200846 return SP_FORMERROR;
847 for (i = 0; i < len; ++i)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100848 lp->sl_regions[i] = getc(fd); // <regionname>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200849 lp->sl_regions[len] = NUL;
850 return 0;
851}
852
853/*
854 * Read SN_CHARFLAGS section: <charflagslen> <charflags>
855 * <folcharslen> <folchars>
856 * Return SP_*ERROR flags.
857 */
858 static int
859read_charflags_section(FILE *fd)
860{
861 char_u *flags;
862 char_u *fol;
863 int flagslen, follen;
864
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100865 // <charflagslen> <charflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200866 flags = read_cnt_string(fd, 1, &flagslen);
867 if (flagslen < 0)
868 return flagslen;
869
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100870 // <folcharslen> <folchars>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200871 fol = read_cnt_string(fd, 2, &follen);
872 if (follen < 0)
873 {
874 vim_free(flags);
875 return follen;
876 }
877
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100878 // Set the word-char flags and fill SPELL_ISUPPER() table.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200879 if (flags != NULL && fol != NULL)
880 set_spell_charflags(flags, flagslen, fol);
881
882 vim_free(flags);
883 vim_free(fol);
884
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100885 // When <charflagslen> is zero then <fcharlen> must also be zero.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200886 if ((flags == NULL) != (fol == NULL))
887 return SP_FORMERROR;
888 return 0;
889}
890
891/*
892 * Read SN_PREFCOND section.
893 * Return SP_*ERROR flags.
894 */
895 static int
896read_prefcond_section(FILE *fd, slang_T *lp)
897{
898 int cnt;
899 int i;
900 int n;
901 char_u *p;
902 char_u buf[MAXWLEN + 1];
903
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100904 // <prefcondcnt> <prefcond> ...
905 cnt = get2c(fd); // <prefcondcnt>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200906 if (cnt <= 0)
907 return SP_FORMERROR;
908
Bram Moolenaarc799fe22019-05-28 23:08:19 +0200909 lp->sl_prefprog = ALLOC_CLEAR_MULT(regprog_T *, cnt);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200910 if (lp->sl_prefprog == NULL)
911 return SP_OTHERERROR;
912 lp->sl_prefixcnt = cnt;
913
914 for (i = 0; i < cnt; ++i)
915 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100916 // <prefcond> : <condlen> <condstr>
917 n = getc(fd); // <condlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200918 if (n < 0 || n >= MAXWLEN)
919 return SP_FORMERROR;
920
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100921 // When <condlen> is zero we have an empty condition. Otherwise
922 // compile the regexp program used to check for the condition.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200923 if (n > 0)
924 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100925 buf[0] = '^'; // always match at one position only
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200926 p = buf + 1;
927 while (n-- > 0)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100928 *p++ = getc(fd); // <condstr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200929 *p = NUL;
930 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
931 }
932 }
933 return 0;
934}
935
936/*
937 * Read REP or REPSAL items section from "fd": <repcount> <rep> ...
938 * Return SP_*ERROR flags.
939 */
940 static int
941read_rep_section(FILE *fd, garray_T *gap, short *first)
942{
943 int cnt;
944 fromto_T *ftp;
945 int i;
946
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100947 cnt = get2c(fd); // <repcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200948 if (cnt < 0)
949 return SP_TRUNCERROR;
950
951 if (ga_grow(gap, cnt) == FAIL)
952 return SP_OTHERERROR;
953
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100954 // <rep> : <repfromlen> <repfrom> <reptolen> <repto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200955 for (; gap->ga_len < cnt; ++gap->ga_len)
956 {
957 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
958 ftp->ft_from = read_cnt_string(fd, 1, &i);
959 if (i < 0)
960 return i;
961 if (i == 0)
962 return SP_FORMERROR;
963 ftp->ft_to = read_cnt_string(fd, 1, &i);
964 if (i <= 0)
965 {
966 vim_free(ftp->ft_from);
967 if (i < 0)
968 return i;
969 return SP_FORMERROR;
970 }
971 }
972
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100973 // Fill the first-index table.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200974 for (i = 0; i < 256; ++i)
975 first[i] = -1;
976 for (i = 0; i < gap->ga_len; ++i)
977 {
978 ftp = &((fromto_T *)gap->ga_data)[i];
979 if (first[*ftp->ft_from] == -1)
980 first[*ftp->ft_from] = i;
981 }
982 return 0;
983}
984
985/*
986 * Read SN_SAL section: <salflags> <salcount> <sal> ...
987 * Return SP_*ERROR flags.
988 */
989 static int
990read_sal_section(FILE *fd, slang_T *slang)
991{
992 int i;
993 int cnt;
994 garray_T *gap;
995 salitem_T *smp;
996 int ccnt;
997 char_u *p;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200998
999 slang->sl_sofo = FALSE;
1000
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001001 i = getc(fd); // <salflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001002 if (i & SAL_F0LLOWUP)
1003 slang->sl_followup = TRUE;
1004 if (i & SAL_COLLAPSE)
1005 slang->sl_collapse = TRUE;
1006 if (i & SAL_REM_ACCENTS)
1007 slang->sl_rem_accents = TRUE;
1008
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001009 cnt = get2c(fd); // <salcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001010 if (cnt < 0)
1011 return SP_TRUNCERROR;
1012
1013 gap = &slang->sl_sal;
1014 ga_init2(gap, sizeof(salitem_T), 10);
1015 if (ga_grow(gap, cnt + 1) == FAIL)
1016 return SP_OTHERERROR;
1017
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001018 // <sal> : <salfromlen> <salfrom> <saltolen> <salto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001019 for (; gap->ga_len < cnt; ++gap->ga_len)
1020 {
Bram Moolenaar97d2f342020-07-10 20:03:03 +02001021 int c = NUL;
1022
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001023 smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001024 ccnt = getc(fd); // <salfromlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001025 if (ccnt < 0)
1026 return SP_TRUNCERROR;
1027 if ((p = alloc(ccnt + 2)) == NULL)
1028 return SP_OTHERERROR;
1029 smp->sm_lead = p;
1030
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001031 // Read up to the first special char into sm_lead.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001032 for (i = 0; i < ccnt; ++i)
1033 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001034 c = getc(fd); // <salfrom>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001035 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
1036 break;
1037 *p++ = c;
1038 }
1039 smp->sm_leadlen = (int)(p - smp->sm_lead);
1040 *p++ = NUL;
1041
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001042 // Put (abc) chars in sm_oneof, if any.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001043 if (c == '(')
1044 {
1045 smp->sm_oneof = p;
1046 for (++i; i < ccnt; ++i)
1047 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001048 c = getc(fd); // <salfrom>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001049 if (c == ')')
1050 break;
1051 *p++ = c;
1052 }
1053 *p++ = NUL;
1054 if (++i < ccnt)
1055 c = getc(fd);
1056 }
1057 else
1058 smp->sm_oneof = NULL;
1059
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001060 // Any following chars go in sm_rules.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001061 smp->sm_rules = p;
1062 if (i < ccnt)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001063 // store the char we got while checking for end of sm_lead
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001064 *p++ = c;
1065 for (++i; i < ccnt; ++i)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001066 *p++ = getc(fd); // <salfrom>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001067 *p++ = NUL;
1068
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001069 // <saltolen> <salto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001070 smp->sm_to = read_cnt_string(fd, 1, &ccnt);
1071 if (ccnt < 0)
1072 {
1073 vim_free(smp->sm_lead);
1074 return ccnt;
1075 }
1076
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001077 if (has_mbyte)
1078 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001079 // convert the multi-byte strings to wide char strings
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001080 smp->sm_lead_w = mb_str2wide(smp->sm_lead);
1081 smp->sm_leadlen = mb_charlen(smp->sm_lead);
1082 if (smp->sm_oneof == NULL)
1083 smp->sm_oneof_w = NULL;
1084 else
1085 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
1086 if (smp->sm_to == NULL)
1087 smp->sm_to_w = NULL;
1088 else
1089 smp->sm_to_w = mb_str2wide(smp->sm_to);
1090 if (smp->sm_lead_w == NULL
1091 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL)
1092 || (smp->sm_to_w == NULL && smp->sm_to != NULL))
1093 {
1094 vim_free(smp->sm_lead);
1095 vim_free(smp->sm_to);
1096 vim_free(smp->sm_lead_w);
1097 vim_free(smp->sm_oneof_w);
1098 vim_free(smp->sm_to_w);
1099 return SP_OTHERERROR;
1100 }
1101 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001102 }
1103
1104 if (gap->ga_len > 0)
1105 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001106 // Add one extra entry to mark the end with an empty sm_lead. Avoids
1107 // that we need to check the index every time.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001108 smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
1109 if ((p = alloc(1)) == NULL)
1110 return SP_OTHERERROR;
1111 p[0] = NUL;
1112 smp->sm_lead = p;
1113 smp->sm_leadlen = 0;
1114 smp->sm_oneof = NULL;
1115 smp->sm_rules = p;
1116 smp->sm_to = NULL;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001117 if (has_mbyte)
1118 {
1119 smp->sm_lead_w = mb_str2wide(smp->sm_lead);
1120 smp->sm_leadlen = 0;
1121 smp->sm_oneof_w = NULL;
1122 smp->sm_to_w = NULL;
1123 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001124 ++gap->ga_len;
1125 }
1126
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001127 // Fill the first-index table.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001128 set_sal_first(slang);
1129
1130 return 0;
1131}
1132
1133/*
1134 * Read SN_WORDS: <word> ...
1135 * Return SP_*ERROR flags.
1136 */
1137 static int
1138read_words_section(FILE *fd, slang_T *lp, int len)
1139{
1140 int done = 0;
1141 int i;
1142 int c;
1143 char_u word[MAXWLEN];
1144
1145 while (done < len)
1146 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001147 // Read one word at a time.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001148 for (i = 0; ; ++i)
1149 {
1150 c = getc(fd);
1151 if (c == EOF)
1152 return SP_TRUNCERROR;
1153 word[i] = c;
1154 if (word[i] == NUL)
1155 break;
1156 if (i == MAXWLEN - 1)
1157 return SP_FORMERROR;
1158 }
1159
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001160 // Init the count to 10.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001161 count_common_word(lp, word, -1, 10);
1162 done += i + 1;
1163 }
1164 return 0;
1165}
1166
1167/*
1168 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
1169 * Return SP_*ERROR flags.
1170 */
1171 static int
1172read_sofo_section(FILE *fd, slang_T *slang)
1173{
1174 int cnt;
1175 char_u *from, *to;
1176 int res;
1177
1178 slang->sl_sofo = TRUE;
1179
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001180 // <sofofromlen> <sofofrom>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001181 from = read_cnt_string(fd, 2, &cnt);
1182 if (cnt < 0)
1183 return cnt;
1184
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001185 // <sofotolen> <sofoto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001186 to = read_cnt_string(fd, 2, &cnt);
1187 if (cnt < 0)
1188 {
1189 vim_free(from);
1190 return cnt;
1191 }
1192
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001193 // Store the info in slang->sl_sal and/or slang->sl_sal_first.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001194 if (from != NULL && to != NULL)
1195 res = set_sofo(slang, from, to);
1196 else if (from != NULL || to != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001197 res = SP_FORMERROR; // only one of two strings is an error
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001198 else
1199 res = 0;
1200
1201 vim_free(from);
1202 vim_free(to);
1203 return res;
1204}
1205
1206/*
1207 * Read the compound section from the .spl file:
1208 * <compmax> <compminlen> <compsylmax> <compoptions> <compflags>
1209 * Returns SP_*ERROR flags.
1210 */
1211 static int
1212read_compound(FILE *fd, slang_T *slang, int len)
1213{
1214 int todo = len;
1215 int c;
1216 int atstart;
1217 char_u *pat;
1218 char_u *pp;
1219 char_u *cp;
1220 char_u *ap;
1221 char_u *crp;
1222 int cnt;
1223 garray_T *gap;
1224
1225 if (todo < 2)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001226 return SP_FORMERROR; // need at least two bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001227
1228 --todo;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001229 c = getc(fd); // <compmax>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001230 if (c < 2)
1231 c = MAXWLEN;
1232 slang->sl_compmax = c;
1233
1234 --todo;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001235 c = getc(fd); // <compminlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001236 if (c < 1)
1237 c = 0;
1238 slang->sl_compminlen = c;
1239
1240 --todo;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001241 c = getc(fd); // <compsylmax>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001242 if (c < 1)
1243 c = MAXWLEN;
1244 slang->sl_compsylmax = c;
1245
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001246 c = getc(fd); // <compoptions>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001247 if (c != 0)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001248 ungetc(c, fd); // be backwards compatible with Vim 7.0b
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001249 else
1250 {
1251 --todo;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001252 c = getc(fd); // only use the lower byte for now
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001253 --todo;
1254 slang->sl_compoptions = c;
1255
1256 gap = &slang->sl_comppat;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001257 c = get2c(fd); // <comppatcount>
Bram Moolenaarb85d3622021-08-11 15:54:59 +02001258 if (c < 0)
1259 return SP_TRUNCERROR;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001260 todo -= 2;
1261 ga_init2(gap, sizeof(char_u *), c);
1262 if (ga_grow(gap, c) == OK)
1263 while (--c >= 0)
1264 {
1265 ((char_u **)(gap->ga_data))[gap->ga_len++] =
Bram Moolenaarb85d3622021-08-11 15:54:59 +02001266 read_cnt_string(fd, 1, &cnt);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001267 // <comppatlen> <comppattext>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001268 if (cnt < 0)
1269 return cnt;
1270 todo -= cnt + 1;
1271 }
1272 }
1273 if (todo < 0)
1274 return SP_FORMERROR;
1275
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001276 // Turn the COMPOUNDRULE items into a regexp pattern:
1277 // "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$".
1278 // Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes.
1279 // Conversion to utf-8 may double the size.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001280 c = todo * 2 + 7;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001281 if (enc_utf8)
1282 c += todo * 2;
Bram Moolenaar964b3742019-05-24 18:54:09 +02001283 pat = alloc(c);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001284 if (pat == NULL)
1285 return SP_OTHERERROR;
1286
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001287 // We also need a list of all flags that can appear at the start and one
1288 // for all flags.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001289 cp = alloc(todo + 1);
1290 if (cp == NULL)
1291 {
1292 vim_free(pat);
1293 return SP_OTHERERROR;
1294 }
1295 slang->sl_compstartflags = cp;
1296 *cp = NUL;
1297
1298 ap = alloc(todo + 1);
1299 if (ap == NULL)
1300 {
1301 vim_free(pat);
1302 return SP_OTHERERROR;
1303 }
1304 slang->sl_compallflags = ap;
1305 *ap = NUL;
1306
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001307 // And a list of all patterns in their original form, for checking whether
1308 // compounding may work in match_compoundrule(). This is freed when we
1309 // encounter a wildcard, the check doesn't work then.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001310 crp = alloc(todo + 1);
1311 slang->sl_comprules = crp;
1312
1313 pp = pat;
1314 *pp++ = '^';
1315 *pp++ = '\\';
1316 *pp++ = '(';
1317
1318 atstart = 1;
1319 while (todo-- > 0)
1320 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001321 c = getc(fd); // <compflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001322 if (c == EOF)
1323 {
1324 vim_free(pat);
1325 return SP_TRUNCERROR;
1326 }
1327
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001328 // Add all flags to "sl_compallflags".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001329 if (vim_strchr((char_u *)"?*+[]/", c) == NULL
1330 && !byte_in_str(slang->sl_compallflags, c))
1331 {
1332 *ap++ = c;
1333 *ap = NUL;
1334 }
1335
1336 if (atstart != 0)
1337 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001338 // At start of item: copy flags to "sl_compstartflags". For a
1339 // [abc] item set "atstart" to 2 and copy up to the ']'.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001340 if (c == '[')
1341 atstart = 2;
1342 else if (c == ']')
1343 atstart = 0;
1344 else
1345 {
1346 if (!byte_in_str(slang->sl_compstartflags, c))
1347 {
1348 *cp++ = c;
1349 *cp = NUL;
1350 }
1351 if (atstart == 1)
1352 atstart = 0;
1353 }
1354 }
1355
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001356 // Copy flag to "sl_comprules", unless we run into a wildcard.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001357 if (crp != NULL)
1358 {
1359 if (c == '?' || c == '+' || c == '*')
1360 {
Bram Moolenaard23a8232018-02-10 18:45:26 +01001361 VIM_CLEAR(slang->sl_comprules);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001362 crp = NULL;
1363 }
1364 else
1365 *crp++ = c;
1366 }
1367
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001368 if (c == '/') // slash separates two items
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001369 {
1370 *pp++ = '\\';
1371 *pp++ = '|';
1372 atstart = 1;
1373 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001374 else // normal char, "[abc]" and '*' are copied as-is
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001375 {
1376 if (c == '?' || c == '+' || c == '~')
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001377 *pp++ = '\\'; // "a?" becomes "a\?", "a+" becomes "a\+"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001378 if (enc_utf8)
1379 pp += mb_char2bytes(c, pp);
1380 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001381 *pp++ = c;
1382 }
1383 }
1384
1385 *pp++ = '\\';
1386 *pp++ = ')';
1387 *pp++ = '$';
1388 *pp = NUL;
1389
1390 if (crp != NULL)
1391 *crp = NUL;
1392
1393 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT);
1394 vim_free(pat);
1395 if (slang->sl_compprog == NULL)
1396 return SP_FORMERROR;
1397
1398 return 0;
1399}
1400
1401/*
1402 * Set the SOFOFROM and SOFOTO items in language "lp".
1403 * Returns SP_*ERROR flags when there is something wrong.
1404 */
1405 static int
1406set_sofo(slang_T *lp, char_u *from, char_u *to)
1407{
1408 int i;
1409
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001410 garray_T *gap;
1411 char_u *s;
1412 char_u *p;
1413 int c;
1414 int *inp;
1415
1416 if (has_mbyte)
1417 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001418 // Use "sl_sal" as an array with 256 pointers to a list of wide
1419 // characters. The index is the low byte of the character.
1420 // The list contains from-to pairs with a terminating NUL.
1421 // sl_sal_first[] is used for latin1 "from" characters.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001422 gap = &lp->sl_sal;
1423 ga_init2(gap, sizeof(int *), 1);
1424 if (ga_grow(gap, 256) == FAIL)
1425 return SP_OTHERERROR;
1426 vim_memset(gap->ga_data, 0, sizeof(int *) * 256);
1427 gap->ga_len = 256;
1428
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001429 // First count the number of items for each list. Temporarily use
1430 // sl_sal_first[] for this.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001431 for (p = from, s = to; *p != NUL && *s != NUL; )
1432 {
1433 c = mb_cptr2char_adv(&p);
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001434 MB_CPTR_ADV(s);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001435 if (c >= 256)
1436 ++lp->sl_sal_first[c & 0xff];
1437 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001438 if (*p != NUL || *s != NUL) // lengths differ
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001439 return SP_FORMERROR;
1440
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001441 // Allocate the lists.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001442 for (i = 0; i < 256; ++i)
1443 if (lp->sl_sal_first[i] > 0)
1444 {
1445 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
1446 if (p == NULL)
1447 return SP_OTHERERROR;
1448 ((int **)gap->ga_data)[i] = (int *)p;
1449 *(int *)p = 0;
1450 }
1451
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001452 // Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
1453 // list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001454 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
1455 for (p = from, s = to; *p != NUL && *s != NUL; )
1456 {
1457 c = mb_cptr2char_adv(&p);
1458 i = mb_cptr2char_adv(&s);
1459 if (c >= 256)
1460 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001461 // Append the from-to chars at the end of the list with
1462 // the low byte.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001463 inp = ((int **)gap->ga_data)[c & 0xff];
1464 while (*inp != 0)
1465 ++inp;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001466 *inp++ = c; // from char
1467 *inp++ = i; // to char
1468 *inp++ = NUL; // NUL at the end
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001469 }
1470 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001471 // mapping byte to char is done in sl_sal_first[]
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001472 lp->sl_sal_first[c] = i;
1473 }
1474 }
1475 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001476 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001477 // mapping bytes to bytes is done in sl_sal_first[]
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001478 if (STRLEN(from) != STRLEN(to))
1479 return SP_FORMERROR;
1480
1481 for (i = 0; to[i] != NUL; ++i)
1482 lp->sl_sal_first[from[i]] = to[i];
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001483 lp->sl_sal.ga_len = 1; // indicates we have soundfolding
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001484 }
1485
1486 return 0;
1487}
1488
1489/*
1490 * Fill the first-index table for "lp".
1491 */
1492 static void
1493set_sal_first(slang_T *lp)
1494{
1495 salfirst_T *sfirst;
1496 int i;
1497 salitem_T *smp;
1498 int c;
1499 garray_T *gap = &lp->sl_sal;
1500
1501 sfirst = lp->sl_sal_first;
1502 for (i = 0; i < 256; ++i)
1503 sfirst[i] = -1;
1504 smp = (salitem_T *)gap->ga_data;
1505 for (i = 0; i < gap->ga_len; ++i)
1506 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001507 if (has_mbyte)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001508 // Use the lowest byte of the first character. For latin1 it's
1509 // the character, for other encodings it should differ for most
1510 // characters.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001511 c = *smp[i].sm_lead_w & 0xff;
1512 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001513 c = *smp[i].sm_lead;
1514 if (sfirst[c] == -1)
1515 {
1516 sfirst[c] = i;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001517 if (has_mbyte)
1518 {
1519 int n;
1520
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001521 // Make sure all entries with this byte are following each
1522 // other. Move the ones that are in the wrong position. Do
1523 // keep the same ordering!
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001524 while (i + 1 < gap->ga_len
1525 && (*smp[i + 1].sm_lead_w & 0xff) == c)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001526 // Skip over entry with same index byte.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001527 ++i;
1528
1529 for (n = 1; i + n < gap->ga_len; ++n)
1530 if ((*smp[i + n].sm_lead_w & 0xff) == c)
1531 {
1532 salitem_T tsal;
1533
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001534 // Move entry with same index byte after the entries
1535 // we already found.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001536 ++i;
1537 --n;
1538 tsal = smp[i + n];
1539 mch_memmove(smp + i + 1, smp + i,
1540 sizeof(salitem_T) * n);
1541 smp[i] = tsal;
1542 }
1543 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001544 }
1545 }
1546}
1547
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001548/*
1549 * Turn a multi-byte string into a wide character string.
1550 * Return it in allocated memory (NULL for out-of-memory)
1551 */
1552 static int *
1553mb_str2wide(char_u *s)
1554{
1555 int *res;
1556 char_u *p;
1557 int i = 0;
1558
Bram Moolenaarc799fe22019-05-28 23:08:19 +02001559 res = ALLOC_MULT(int, mb_charlen(s) + 1);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001560 if (res != NULL)
1561 {
1562 for (p = s; *p != NUL; )
1563 res[i++] = mb_ptr2char_adv(&p);
1564 res[i] = NUL;
1565 }
1566 return res;
1567}
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001568
1569/*
1570 * Read a tree from the .spl or .sug file.
1571 * Allocates the memory and stores pointers in "bytsp" and "idxsp".
1572 * This is skipped when the tree has zero length.
1573 * Returns zero when OK, SP_ value for an error.
1574 */
1575 static int
1576spell_read_tree(
1577 FILE *fd,
1578 char_u **bytsp,
Bram Moolenaar07399e72020-08-24 20:05:50 +02001579 long *bytsp_len,
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001580 idx_T **idxsp,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001581 int prefixtree, // TRUE for the prefix tree
1582 int prefixcnt) // when "prefixtree" is TRUE: prefix count
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001583{
Bram Moolenaar6d3c8582017-02-26 15:27:23 +01001584 long len;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001585 int idx;
1586 char_u *bp;
1587 idx_T *ip;
1588
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001589 // The tree size was computed when writing the file, so that we can
1590 // allocate it as one long block. <nodecount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001591 len = get4c(fd);
1592 if (len < 0)
1593 return SP_TRUNCERROR;
Bram Moolenaar6d3c8582017-02-26 15:27:23 +01001594 if (len >= LONG_MAX / (long)sizeof(int))
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001595 // Invalid length, multiply with sizeof(int) would overflow.
Bram Moolenaar399c2972017-02-09 21:07:12 +01001596 return SP_FORMERROR;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001597 if (len > 0)
1598 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001599 // Allocate the byte array.
Bram Moolenaar18a4ba22019-05-24 19:39:03 +02001600 bp = alloc(len);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001601 if (bp == NULL)
1602 return SP_OTHERERROR;
1603 *bytsp = bp;
Bram Moolenaar07399e72020-08-24 20:05:50 +02001604 if (bytsp_len != NULL)
1605 *bytsp_len = len;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001606
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001607 // Allocate the index array.
Bram Moolenaarc799fe22019-05-28 23:08:19 +02001608 ip = lalloc_clear(len * sizeof(int), TRUE);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001609 if (ip == NULL)
1610 return SP_OTHERERROR;
1611 *idxsp = ip;
1612
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001613 // Recursively read the tree and store it in the array.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001614 idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt);
1615 if (idx < 0)
1616 return idx;
1617 }
1618 return 0;
1619}
1620
1621/*
1622 * Read one row of siblings from the spell file and store it in the byte array
1623 * "byts" and index array "idxs". Recursively read the children.
1624 *
1625 * NOTE: The code here must match put_node()!
1626 *
1627 * Returns the index (>= 0) following the siblings.
1628 * Returns SP_TRUNCERROR if the file is shorter than expected.
1629 * Returns SP_FORMERROR if there is a format error.
1630 */
1631 static idx_T
1632read_tree_node(
1633 FILE *fd,
1634 char_u *byts,
1635 idx_T *idxs,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001636 int maxidx, // size of arrays
1637 idx_T startidx, // current index in "byts" and "idxs"
1638 int prefixtree, // TRUE for reading PREFIXTREE
1639 int maxprefcondnr) // maximum for <prefcondnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001640{
1641 int len;
1642 int i;
1643 int n;
1644 idx_T idx = startidx;
1645 int c;
1646 int c2;
1647#define SHARED_MASK 0x8000000
1648
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001649 len = getc(fd); // <siblingcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001650 if (len <= 0)
1651 return SP_TRUNCERROR;
1652
1653 if (startidx + len >= maxidx)
1654 return SP_FORMERROR;
1655 byts[idx++] = len;
1656
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001657 // Read the byte values, flag/region bytes and shared indexes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001658 for (i = 1; i <= len; ++i)
1659 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001660 c = getc(fd); // <byte>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001661 if (c < 0)
1662 return SP_TRUNCERROR;
1663 if (c <= BY_SPECIAL)
1664 {
1665 if (c == BY_NOFLAGS && !prefixtree)
1666 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001667 // No flags, all regions.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001668 idxs[idx] = 0;
1669 c = 0;
1670 }
1671 else if (c != BY_INDEX)
1672 {
1673 if (prefixtree)
1674 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001675 // Read the optional pflags byte, the prefix ID and the
1676 // condition nr. In idxs[] store the prefix ID in the low
1677 // byte, the condition index shifted up 8 bits, the flags
1678 // shifted up 24 bits.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001679 if (c == BY_FLAGS)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001680 c = getc(fd) << 24; // <pflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001681 else
1682 c = 0;
1683
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001684 c |= getc(fd); // <affixID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001685
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001686 n = get2c(fd); // <prefcondnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001687 if (n >= maxprefcondnr)
1688 return SP_FORMERROR;
1689 c |= (n << 8);
1690 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001691 else // c must be BY_FLAGS or BY_FLAGS2
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001692 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001693 // Read flags and optional region and prefix ID. In
1694 // idxs[] the flags go in the low two bytes, region above
1695 // that and prefix ID above the region.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001696 c2 = c;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001697 c = getc(fd); // <flags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001698 if (c2 == BY_FLAGS2)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001699 c = (getc(fd) << 8) + c; // <flags2>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001700 if (c & WF_REGION)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001701 c = (getc(fd) << 16) + c; // <region>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001702 if (c & WF_AFX)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001703 c = (getc(fd) << 24) + c; // <affixID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001704 }
1705
1706 idxs[idx] = c;
1707 c = 0;
1708 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001709 else // c == BY_INDEX
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001710 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001711 // <nodeidx>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001712 n = get3c(fd);
1713 if (n < 0 || n >= maxidx)
1714 return SP_FORMERROR;
1715 idxs[idx] = n + SHARED_MASK;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001716 c = getc(fd); // <xbyte>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001717 }
1718 }
1719 byts[idx++] = c;
1720 }
1721
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001722 // Recursively read the children for non-shared siblings.
1723 // Skip the end-of-word ones (zero byte value) and the shared ones (and
1724 // remove SHARED_MASK)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001725 for (i = 1; i <= len; ++i)
1726 if (byts[startidx + i] != 0)
1727 {
1728 if (idxs[startidx + i] & SHARED_MASK)
1729 idxs[startidx + i] &= ~SHARED_MASK;
1730 else
1731 {
1732 idxs[startidx + i] = idx;
1733 idx = read_tree_node(fd, byts, idxs, maxidx, idx,
1734 prefixtree, maxprefcondnr);
1735 if (idx < 0)
1736 break;
1737 }
1738 }
1739
1740 return idx;
1741}
1742
1743/*
1744 * Reload the spell file "fname" if it's loaded.
1745 */
1746 static void
1747spell_reload_one(
1748 char_u *fname,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001749 int added_word) // invoked through "zg"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001750{
1751 slang_T *slang;
1752 int didit = FALSE;
1753
Bram Moolenaaraeea7212020-04-02 18:50:46 +02001754 FOR_ALL_SPELL_LANGS(slang)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001755 {
Bram Moolenaar99499b12019-05-23 21:35:48 +02001756 if (fullpathcmp(fname, slang->sl_fname, FALSE, TRUE) == FPC_SAME)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001757 {
1758 slang_clear(slang);
1759 if (spell_load_file(fname, NULL, slang, FALSE) == NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001760 // reloading failed, clear the language
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001761 slang_clear(slang);
1762 redraw_all_later(SOME_VALID);
1763 didit = TRUE;
1764 }
1765 }
1766
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001767 // When "zg" was used and the file wasn't loaded yet, should redo
1768 // 'spelllang' to load it now.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001769 if (added_word && !didit)
1770 did_set_spelllang(curwin);
1771}
1772
1773
1774/*
1775 * Functions for ":mkspell".
1776 */
1777
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001778#define MAXLINELEN 500 // Maximum length in bytes of a line in a .aff
1779 // and .dic file.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001780/*
1781 * Main structure to store the contents of a ".aff" file.
1782 */
1783typedef struct afffile_S
1784{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001785 char_u *af_enc; // "SET", normalized, alloc'ed string or NULL
1786 int af_flagtype; // AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG
1787 unsigned af_rare; // RARE ID for rare word
1788 unsigned af_keepcase; // KEEPCASE ID for keep-case word
1789 unsigned af_bad; // BAD ID for banned word
1790 unsigned af_needaffix; // NEEDAFFIX ID
1791 unsigned af_circumfix; // CIRCUMFIX ID
1792 unsigned af_needcomp; // NEEDCOMPOUND ID
1793 unsigned af_comproot; // COMPOUNDROOT ID
1794 unsigned af_compforbid; // COMPOUNDFORBIDFLAG ID
1795 unsigned af_comppermit; // COMPOUNDPERMITFLAG ID
1796 unsigned af_nosuggest; // NOSUGGEST ID
1797 int af_pfxpostpone; // postpone prefixes without chop string and
1798 // without flags
1799 int af_ignoreextra; // IGNOREEXTRA present
1800 hashtab_T af_pref; // hashtable for prefixes, affheader_T
1801 hashtab_T af_suff; // hashtable for suffixes, affheader_T
1802 hashtab_T af_comp; // hashtable for compound flags, compitem_T
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001803} afffile_T;
1804
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001805#define AFT_CHAR 0 // flags are one character
1806#define AFT_LONG 1 // flags are two characters
1807#define AFT_CAPLONG 2 // flags are one or two characters
1808#define AFT_NUM 3 // flags are numbers, comma separated
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001809
1810typedef struct affentry_S affentry_T;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001811// Affix entry from ".aff" file. Used for prefixes and suffixes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001812struct affentry_S
1813{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001814 affentry_T *ae_next; // next affix with same name/number
1815 char_u *ae_chop; // text to chop off basic word (can be NULL)
1816 char_u *ae_add; // text to add to basic word (can be NULL)
1817 char_u *ae_flags; // flags on the affix (can be NULL)
1818 char_u *ae_cond; // condition (NULL for ".")
1819 regprog_T *ae_prog; // regexp program for ae_cond or NULL
1820 char ae_compforbid; // COMPOUNDFORBIDFLAG found
1821 char ae_comppermit; // COMPOUNDPERMITFLAG found
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001822};
1823
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001824#define AH_KEY_LEN 17 // 2 x 8 bytes + NUL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001825
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001826// Affix header from ".aff" file. Used for af_pref and af_suff.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001827typedef struct affheader_S
1828{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001829 char_u ah_key[AH_KEY_LEN]; // key for hashtab == name of affix
1830 unsigned ah_flag; // affix name as number, uses "af_flagtype"
1831 int ah_newID; // prefix ID after renumbering; 0 if not used
1832 int ah_combine; // suffix may combine with prefix
1833 int ah_follows; // another affix block should be following
1834 affentry_T *ah_first; // first affix entry
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001835} affheader_T;
1836
1837#define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
1838
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001839// Flag used in compound items.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001840typedef struct compitem_S
1841{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001842 char_u ci_key[AH_KEY_LEN]; // key for hashtab == name of compound
1843 unsigned ci_flag; // affix name as number, uses "af_flagtype"
1844 int ci_newID; // affix ID after renumbering.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001845} compitem_T;
1846
1847#define HI2CI(hi) ((compitem_T *)(hi)->hi_key)
1848
1849/*
1850 * Structure that is used to store the items in the word tree. This avoids
1851 * the need to keep track of each allocated thing, everything is freed all at
1852 * once after ":mkspell" is done.
1853 * Note: "sb_next" must be just before "sb_data" to make sure the alignment of
1854 * "sb_data" is correct for systems where pointers must be aligned on
1855 * pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc).
1856 */
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001857#define SBLOCKSIZE 16000 // size of sb_data
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001858typedef struct sblock_S sblock_T;
1859struct sblock_S
1860{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001861 int sb_used; // nr of bytes already in use
1862 sblock_T *sb_next; // next block in list
1863 char_u sb_data[1]; // data, actually longer
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001864};
1865
1866/*
1867 * A node in the tree.
1868 */
1869typedef struct wordnode_S wordnode_T;
1870struct wordnode_S
1871{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001872 union // shared to save space
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001873 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001874 char_u hashkey[6]; // the hash key, only used while compressing
1875 int index; // index in written nodes (valid after first
1876 // round)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001877 } wn_u1;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001878 union // shared to save space
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001879 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001880 wordnode_T *next; // next node with same hash key
1881 wordnode_T *wnode; // parent node that will write this node
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001882 } wn_u2;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001883 wordnode_T *wn_child; // child (next byte in word)
1884 wordnode_T *wn_sibling; // next sibling (alternate byte in word,
1885 // always sorted)
1886 int wn_refs; // Nr. of references to this node. Only
1887 // relevant for first node in a list of
1888 // siblings, in following siblings it is
1889 // always one.
1890 char_u wn_byte; // Byte for this node. NUL for word end
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001891
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001892 // Info for when "wn_byte" is NUL.
1893 // In PREFIXTREE "wn_region" is used for the prefcondnr.
1894 // In the soundfolded word tree "wn_flags" has the MSW of the wordnr and
1895 // "wn_region" the LSW of the wordnr.
1896 char_u wn_affixID; // supported/required prefix ID or 0
1897 short_u wn_flags; // WF_ flags
1898 short wn_region; // region mask
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001899
1900#ifdef SPELL_PRINTTREE
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001901 int wn_nr; // sequence nr for printing
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001902#endif
1903};
1904
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001905#define WN_MASK 0xffff // mask relevant bits of "wn_flags"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001906
1907#define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
1908
1909/*
1910 * Info used while reading the spell files.
1911 */
1912typedef struct spellinfo_S
1913{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001914 wordnode_T *si_foldroot; // tree with case-folded words
1915 long si_foldwcount; // nr of words in si_foldroot
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001916
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001917 wordnode_T *si_keeproot; // tree with keep-case words
1918 long si_keepwcount; // nr of words in si_keeproot
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001919
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001920 wordnode_T *si_prefroot; // tree with postponed prefixes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001921
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001922 long si_sugtree; // creating the soundfolding trie
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001923
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001924 sblock_T *si_blocks; // memory blocks used
1925 long si_blocks_cnt; // memory blocks allocated
1926 int si_did_emsg; // TRUE when ran out of memory
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001927
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001928 long si_compress_cnt; // words to add before lowering
1929 // compression limit
1930 wordnode_T *si_first_free; // List of nodes that have been freed during
1931 // compression, linked by "wn_child" field.
1932 long si_free_count; // number of nodes in si_first_free
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001933#ifdef SPELL_PRINTTREE
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001934 int si_wordnode_nr; // sequence nr for nodes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001935#endif
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001936 buf_T *si_spellbuf; // buffer used to store soundfold word table
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001937
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001938 int si_ascii; // handling only ASCII words
1939 int si_add; // addition file
1940 int si_clear_chartab; // when TRUE clear char tables
1941 int si_region; // region mask
1942 vimconv_T si_conv; // for conversion to 'encoding'
1943 int si_memtot; // runtime memory used
1944 int si_verbose; // verbose messages
1945 int si_msg_count; // number of words added since last message
1946 char_u *si_info; // info text chars or NULL
1947 int si_region_count; // number of regions supported (1 when there
1948 // are no regions)
Bram Moolenaar2993ac52018-02-10 14:12:43 +01001949 char_u si_region_name[MAXREGIONS * 2 + 1];
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001950 // region names; used only if
1951 // si_region_count > 1)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001952
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001953 garray_T si_rep; // list of fromto_T entries from REP lines
1954 garray_T si_repsal; // list of fromto_T entries from REPSAL lines
1955 garray_T si_sal; // list of fromto_T entries from SAL lines
1956 char_u *si_sofofr; // SOFOFROM text
1957 char_u *si_sofoto; // SOFOTO text
1958 int si_nosugfile; // NOSUGFILE item found
1959 int si_nosplitsugs; // NOSPLITSUGS item found
1960 int si_nocompoundsugs; // NOCOMPOUNDSUGS item found
1961 int si_followup; // soundsalike: ?
1962 int si_collapse; // soundsalike: ?
1963 hashtab_T si_commonwords; // hashtable for common words
1964 time_t si_sugtime; // timestamp for .sug file
1965 int si_rem_accents; // soundsalike: remove accents
1966 garray_T si_map; // MAP info concatenated
1967 char_u *si_midword; // MIDWORD chars or NULL
1968 int si_compmax; // max nr of words for compounding
1969 int si_compminlen; // minimal length for compounding
1970 int si_compsylmax; // max nr of syllables for compounding
1971 int si_compoptions; // COMP_ flags
1972 garray_T si_comppat; // CHECKCOMPOUNDPATTERN items, each stored as
1973 // a string
1974 char_u *si_compflags; // flags used for compounding
1975 char_u si_nobreak; // NOBREAK
1976 char_u *si_syllable; // syllable string
1977 garray_T si_prefcond; // table with conditions for postponed
1978 // prefixes, each stored as a string
1979 int si_newprefID; // current value for ah_newID
1980 int si_newcompID; // current value for compound ID
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001981} spellinfo_T;
1982
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001983static int is_aff_rule(char_u **items, int itemcnt, char *rulename, int mincount);
1984static void aff_process_flags(afffile_T *affile, affentry_T *entry);
1985static int spell_info_item(char_u *s);
1986static unsigned affitem2flag(int flagtype, char_u *item, char_u *fname, int lnum);
1987static unsigned get_affitem(int flagtype, char_u **pp);
1988static void process_compflags(spellinfo_T *spin, afffile_T *aff, char_u *compflags);
1989static void check_renumber(spellinfo_T *spin);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001990static void aff_check_number(int spinval, int affval, char *name);
1991static void aff_check_string(char_u *spinval, char_u *affval, char *name);
1992static int str_equal(char_u *s1, char_u *s2);
1993static void add_fromto(spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to);
1994static int sal_to_bool(char_u *s);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001995static int get_affix_flags(afffile_T *affile, char_u *afflist);
1996static int get_pfxlist(afffile_T *affile, char_u *afflist, char_u *store_afflist);
1997static void get_compflags(afffile_T *affile, char_u *afflist, char_u *store_afflist);
1998static int store_aff_word(spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int condit, int flags, char_u *pfxlist, int pfxlen);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001999static void *getroom(spellinfo_T *spin, size_t len, int align);
2000static char_u *getroom_save(spellinfo_T *spin, char_u *s);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002001static int store_word(spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix);
2002static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID);
2003static wordnode_T *get_wordnode(spellinfo_T *spin);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002004static void free_wordnode(spellinfo_T *spin, wordnode_T *n);
Bram Moolenaar408c23b2020-06-03 22:15:45 +02002005static void wordtree_compress(spellinfo_T *spin, wordnode_T *root, char *name);
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02002006static long node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, long *tot);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002007static int node_equal(wordnode_T *n1, wordnode_T *n2);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002008static void clear_node(wordnode_T *node);
2009static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002010static int sug_filltree(spellinfo_T *spin, slang_T *slang);
2011static int sug_maketable(spellinfo_T *spin);
2012static int sug_filltable(spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap);
2013static int offset2bytes(int nr, char_u *buf);
2014static void sug_write(spellinfo_T *spin, char_u *fname);
2015static void spell_message(spellinfo_T *spin, char_u *str);
2016static void init_spellfile(void);
2017
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002018// In the postponed prefixes tree wn_flags is used to store the WFP_ flags,
2019// but it must be negative to indicate the prefix tree to tree_add_word().
2020// Use a negative number with the lower 8 bits zero.
kylo252ae6f1d82022-02-16 19:24:07 +00002021#define PFX_FLAGS (-256)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002022
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002023// flags for "condit" argument of store_aff_word()
2024#define CONDIT_COMB 1 // affix must combine
2025#define CONDIT_CFIX 2 // affix must have CIRCUMFIX flag
2026#define CONDIT_SUF 4 // add a suffix for matching flags
2027#define CONDIT_AFF 8 // word already has an affix
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002028
2029/*
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02002030 * Tunable parameters for when the tree is compressed. Filled from the
2031 * 'mkspellmem' option.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002032 */
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002033static long compress_start = 30000; // memory / SBLOCKSIZE
2034static long compress_inc = 100; // memory / SBLOCKSIZE
2035static long compress_added = 500000; // word count
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002036
2037/*
2038 * Check the 'mkspellmem' option. Return FAIL if it's wrong.
2039 * Sets "sps_flags".
2040 */
2041 int
2042spell_check_msm(void)
2043{
2044 char_u *p = p_msm;
2045 long start = 0;
2046 long incr = 0;
2047 long added = 0;
2048
2049 if (!VIM_ISDIGIT(*p))
2050 return FAIL;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002051 // block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002052 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102);
2053 if (*p != ',')
2054 return FAIL;
2055 ++p;
2056 if (!VIM_ISDIGIT(*p))
2057 return FAIL;
2058 incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10);
2059 if (*p != ',')
2060 return FAIL;
2061 ++p;
2062 if (!VIM_ISDIGIT(*p))
2063 return FAIL;
2064 added = getdigits(&p) * 1024;
2065 if (*p != NUL)
2066 return FAIL;
2067
2068 if (start == 0 || incr == 0 || added == 0 || incr > start)
2069 return FAIL;
2070
2071 compress_start = start;
2072 compress_inc = incr;
2073 compress_added = added;
2074 return OK;
2075}
2076
2077#ifdef SPELL_PRINTTREE
2078/*
2079 * For debugging the tree code: print the current tree in a (more or less)
2080 * readable format, so that we can see what happens when adding a word and/or
2081 * compressing the tree.
2082 * Based on code from Olaf Seibert.
2083 */
2084#define PRINTLINESIZE 1000
2085#define PRINTWIDTH 6
2086
2087#define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
2088 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
2089
2090static char line1[PRINTLINESIZE];
2091static char line2[PRINTLINESIZE];
2092static char line3[PRINTLINESIZE];
2093
2094 static void
2095spell_clear_flags(wordnode_T *node)
2096{
2097 wordnode_T *np;
2098
Bram Moolenaaraeea7212020-04-02 18:50:46 +02002099 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002100 {
2101 np->wn_u1.index = FALSE;
2102 spell_clear_flags(np->wn_child);
2103 }
2104}
2105
2106 static void
2107spell_print_node(wordnode_T *node, int depth)
2108{
2109 if (node->wn_u1.index)
2110 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002111 // Done this node before, print the reference.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002112 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
2113 PRINTSOME(line2, depth, " ", 0, 0);
2114 PRINTSOME(line3, depth, " ", 0, 0);
Bram Moolenaar32526b32019-01-19 17:43:09 +01002115 msg(line1);
2116 msg(line2);
2117 msg(line3);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002118 }
2119 else
2120 {
2121 node->wn_u1.index = TRUE;
2122
2123 if (node->wn_byte != NUL)
2124 {
2125 if (node->wn_child != NULL)
2126 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0);
2127 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002128 // Cannot happen?
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002129 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0);
2130 }
2131 else
2132 PRINTSOME(line1, depth, " $ ", 0, 0);
2133
2134 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs);
2135
2136 if (node->wn_sibling != NULL)
2137 PRINTSOME(line3, depth, " | ", 0, 0);
2138 else
2139 PRINTSOME(line3, depth, " ", 0, 0);
2140
2141 if (node->wn_byte == NUL)
2142 {
Bram Moolenaar32526b32019-01-19 17:43:09 +01002143 msg(line1);
2144 msg(line2);
2145 msg(line3);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002146 }
2147
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002148 // do the children
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002149 if (node->wn_byte != NUL && node->wn_child != NULL)
2150 spell_print_node(node->wn_child, depth + 1);
2151
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002152 // do the siblings
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002153 if (node->wn_sibling != NULL)
2154 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002155 // get rid of all parent details except |
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002156 STRCPY(line1, line3);
2157 STRCPY(line2, line3);
2158 spell_print_node(node->wn_sibling, depth);
2159 }
2160 }
2161}
2162
2163 static void
2164spell_print_tree(wordnode_T *root)
2165{
2166 if (root != NULL)
2167 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002168 // Clear the "wn_u1.index" fields, used to remember what has been
2169 // done.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002170 spell_clear_flags(root);
2171
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002172 // Recursively print the tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002173 spell_print_node(root, 0);
2174 }
2175}
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002176#endif // SPELL_PRINTTREE
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002177
2178/*
2179 * Read the affix file "fname".
2180 * Returns an afffile_T, NULL for complete failure.
2181 */
2182 static afffile_T *
2183spell_read_aff(spellinfo_T *spin, char_u *fname)
2184{
2185 FILE *fd;
2186 afffile_T *aff;
2187 char_u rline[MAXLINELEN];
2188 char_u *line;
2189 char_u *pc = NULL;
2190#define MAXITEMCNT 30
2191 char_u *(items[MAXITEMCNT]);
2192 int itemcnt;
2193 char_u *p;
2194 int lnum = 0;
2195 affheader_T *cur_aff = NULL;
2196 int did_postpone_prefix = FALSE;
2197 int aff_todo = 0;
2198 hashtab_T *tp;
2199 char_u *low = NULL;
2200 char_u *fol = NULL;
2201 char_u *upp = NULL;
2202 int do_rep;
2203 int do_repsal;
2204 int do_sal;
2205 int do_mapline;
2206 int found_map = FALSE;
2207 hashitem_T *hi;
2208 int l;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002209 int compminlen = 0; // COMPOUNDMIN value
2210 int compsylmax = 0; // COMPOUNDSYLMAX value
2211 int compoptions = 0; // COMP_ flags
2212 int compmax = 0; // COMPOUNDWORDMAX value
2213 char_u *compflags = NULL; // COMPOUNDFLAG and COMPOUNDRULE
2214 // concatenated
2215 char_u *midword = NULL; // MIDWORD value
2216 char_u *syllable = NULL; // SYLLABLE value
2217 char_u *sofofrom = NULL; // SOFOFROM value
2218 char_u *sofoto = NULL; // SOFOTO value
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002219
2220 /*
2221 * Open the file.
2222 */
2223 fd = mch_fopen((char *)fname, "r");
2224 if (fd == NULL)
2225 {
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00002226 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002227 return NULL;
2228 }
2229
Bram Moolenaarc1669272018-06-19 14:23:53 +02002230 vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s..."), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002231 spell_message(spin, IObuff);
2232
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002233 // Only do REP lines when not done in another .aff file already.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002234 do_rep = spin->si_rep.ga_len == 0;
2235
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002236 // Only do REPSAL lines when not done in another .aff file already.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002237 do_repsal = spin->si_repsal.ga_len == 0;
2238
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002239 // Only do SAL lines when not done in another .aff file already.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002240 do_sal = spin->si_sal.ga_len == 0;
2241
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002242 // Only do MAP lines when not done in another .aff file already.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002243 do_mapline = spin->si_map.ga_len == 0;
2244
2245 /*
2246 * Allocate and init the afffile_T structure.
2247 */
2248 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE);
2249 if (aff == NULL)
2250 {
2251 fclose(fd);
2252 return NULL;
2253 }
2254 hash_init(&aff->af_pref);
2255 hash_init(&aff->af_suff);
2256 hash_init(&aff->af_comp);
2257
2258 /*
2259 * Read all the lines in the file one by one.
2260 */
2261 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
2262 {
2263 line_breakcheck();
2264 ++lnum;
2265
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002266 // Skip comment lines.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002267 if (*rline == '#')
2268 continue;
2269
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002270 // Convert from "SET" to 'encoding' when needed.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002271 vim_free(pc);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002272 if (spin->si_conv.vc_type != CONV_NONE)
2273 {
2274 pc = string_convert(&spin->si_conv, rline, NULL);
2275 if (pc == NULL)
2276 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002277 smsg(_("Conversion failure for word in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002278 fname, lnum, rline);
2279 continue;
2280 }
2281 line = pc;
2282 }
2283 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002284 {
2285 pc = NULL;
2286 line = rline;
2287 }
2288
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002289 // Split the line up in white separated items. Put a NUL after each
2290 // item.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002291 itemcnt = 0;
2292 for (p = line; ; )
2293 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002294 while (*p != NUL && *p <= ' ') // skip white space and CR/NL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002295 ++p;
2296 if (*p == NUL)
2297 break;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002298 if (itemcnt == MAXITEMCNT) // too many items
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002299 break;
2300 items[itemcnt++] = p;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002301 // A few items have arbitrary text argument, don't split them.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002302 if (itemcnt == 2 && spell_info_item(items[0]))
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002303 while (*p >= ' ' || *p == TAB) // skip until CR/NL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002304 ++p;
2305 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002306 while (*p > ' ') // skip until white space or CR/NL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002307 ++p;
2308 if (*p == NUL)
2309 break;
2310 *p++ = NUL;
2311 }
2312
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002313 // Handle non-empty lines.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002314 if (itemcnt > 0)
2315 {
2316 if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL)
2317 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002318 // Setup for conversion from "ENC" to 'encoding'.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002319 aff->af_enc = enc_canonize(items[1]);
2320 if (aff->af_enc != NULL && !spin->si_ascii
2321 && convert_setup(&spin->si_conv, aff->af_enc,
2322 p_enc) == FAIL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002323 smsg(_("Conversion in %s not supported: from %s to %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002324 fname, aff->af_enc, p_enc);
2325 spin->si_conv.vc_fail = TRUE;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002326 }
2327 else if (is_aff_rule(items, itemcnt, "FLAG", 2)
2328 && aff->af_flagtype == AFT_CHAR)
2329 {
2330 if (STRCMP(items[1], "long") == 0)
2331 aff->af_flagtype = AFT_LONG;
2332 else if (STRCMP(items[1], "num") == 0)
2333 aff->af_flagtype = AFT_NUM;
2334 else if (STRCMP(items[1], "caplong") == 0)
2335 aff->af_flagtype = AFT_CAPLONG;
2336 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002337 smsg(_("Invalid value for FLAG in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002338 fname, lnum, items[1]);
2339 if (aff->af_rare != 0
2340 || aff->af_keepcase != 0
2341 || aff->af_bad != 0
2342 || aff->af_needaffix != 0
2343 || aff->af_circumfix != 0
2344 || aff->af_needcomp != 0
2345 || aff->af_comproot != 0
2346 || aff->af_nosuggest != 0
2347 || compflags != NULL
2348 || aff->af_suff.ht_used > 0
2349 || aff->af_pref.ht_used > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002350 smsg(_("FLAG after using flags in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002351 fname, lnum, items[1]);
2352 }
2353 else if (spell_info_item(items[0]))
2354 {
2355 p = (char_u *)getroom(spin,
2356 (spin->si_info == NULL ? 0 : STRLEN(spin->si_info))
2357 + STRLEN(items[0])
2358 + STRLEN(items[1]) + 3, FALSE);
2359 if (p != NULL)
2360 {
2361 if (spin->si_info != NULL)
2362 {
2363 STRCPY(p, spin->si_info);
2364 STRCAT(p, "\n");
2365 }
2366 STRCAT(p, items[0]);
2367 STRCAT(p, " ");
2368 STRCAT(p, items[1]);
2369 spin->si_info = p;
2370 }
2371 }
2372 else if (is_aff_rule(items, itemcnt, "MIDWORD", 2)
2373 && midword == NULL)
2374 {
2375 midword = getroom_save(spin, items[1]);
2376 }
2377 else if (is_aff_rule(items, itemcnt, "TRY", 2))
2378 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002379 // ignored, we look in the tree for what chars may appear
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002380 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002381 // TODO: remove "RAR" later
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002382 else if ((is_aff_rule(items, itemcnt, "RAR", 2)
2383 || is_aff_rule(items, itemcnt, "RARE", 2))
2384 && aff->af_rare == 0)
2385 {
2386 aff->af_rare = affitem2flag(aff->af_flagtype, items[1],
2387 fname, lnum);
2388 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002389 // TODO: remove "KEP" later
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002390 else if ((is_aff_rule(items, itemcnt, "KEP", 2)
2391 || is_aff_rule(items, itemcnt, "KEEPCASE", 2))
2392 && aff->af_keepcase == 0)
2393 {
2394 aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1],
2395 fname, lnum);
2396 }
2397 else if ((is_aff_rule(items, itemcnt, "BAD", 2)
2398 || is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2))
2399 && aff->af_bad == 0)
2400 {
2401 aff->af_bad = affitem2flag(aff->af_flagtype, items[1],
2402 fname, lnum);
2403 }
2404 else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2)
2405 && aff->af_needaffix == 0)
2406 {
2407 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1],
2408 fname, lnum);
2409 }
2410 else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2)
2411 && aff->af_circumfix == 0)
2412 {
2413 aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1],
2414 fname, lnum);
2415 }
2416 else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2)
2417 && aff->af_nosuggest == 0)
2418 {
2419 aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1],
2420 fname, lnum);
2421 }
2422 else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2)
2423 || is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2))
2424 && aff->af_needcomp == 0)
2425 {
2426 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1],
2427 fname, lnum);
2428 }
2429 else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2)
2430 && aff->af_comproot == 0)
2431 {
2432 aff->af_comproot = affitem2flag(aff->af_flagtype, items[1],
2433 fname, lnum);
2434 }
2435 else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2)
2436 && aff->af_compforbid == 0)
2437 {
2438 aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1],
2439 fname, lnum);
2440 if (aff->af_pref.ht_used > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002441 smsg(_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002442 fname, lnum);
2443 }
2444 else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2)
2445 && aff->af_comppermit == 0)
2446 {
2447 aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1],
2448 fname, lnum);
2449 if (aff->af_pref.ht_used > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002450 smsg(_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002451 fname, lnum);
2452 }
2453 else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2)
2454 && compflags == NULL)
2455 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002456 // Turn flag "c" into COMPOUNDRULE compatible string "c+",
2457 // "Na" into "Na+", "1234" into "1234+".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002458 p = getroom(spin, STRLEN(items[1]) + 2, FALSE);
2459 if (p != NULL)
2460 {
2461 STRCPY(p, items[1]);
2462 STRCAT(p, "+");
2463 compflags = p;
2464 }
2465 }
2466 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2))
2467 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002468 // We don't use the count, but do check that it's a number and
2469 // not COMPOUNDRULE mistyped.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002470 if (atoi((char *)items[1]) == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002471 smsg(_("Wrong COMPOUNDRULES value in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002472 fname, lnum, items[1]);
2473 }
2474 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2))
2475 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002476 // Don't use the first rule if it is a number.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002477 if (compflags != NULL || *skipdigits(items[1]) != NUL)
2478 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002479 // Concatenate this string to previously defined ones,
2480 // using a slash to separate them.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002481 l = (int)STRLEN(items[1]) + 1;
2482 if (compflags != NULL)
2483 l += (int)STRLEN(compflags) + 1;
2484 p = getroom(spin, l, FALSE);
2485 if (p != NULL)
2486 {
2487 if (compflags != NULL)
2488 {
2489 STRCPY(p, compflags);
2490 STRCAT(p, "/");
2491 }
2492 STRCAT(p, items[1]);
2493 compflags = p;
2494 }
2495 }
2496 }
2497 else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2)
2498 && compmax == 0)
2499 {
2500 compmax = atoi((char *)items[1]);
2501 if (compmax == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002502 smsg(_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002503 fname, lnum, items[1]);
2504 }
2505 else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2)
2506 && compminlen == 0)
2507 {
2508 compminlen = atoi((char *)items[1]);
2509 if (compminlen == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002510 smsg(_("Wrong COMPOUNDMIN value in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002511 fname, lnum, items[1]);
2512 }
2513 else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2)
2514 && compsylmax == 0)
2515 {
2516 compsylmax = atoi((char *)items[1]);
2517 if (compsylmax == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002518 smsg(_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002519 fname, lnum, items[1]);
2520 }
2521 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1))
2522 {
2523 compoptions |= COMP_CHECKDUP;
2524 }
2525 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1))
2526 {
2527 compoptions |= COMP_CHECKREP;
2528 }
2529 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1))
2530 {
2531 compoptions |= COMP_CHECKCASE;
2532 }
2533 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1))
2534 {
2535 compoptions |= COMP_CHECKTRIPLE;
2536 }
2537 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2))
2538 {
2539 if (atoi((char *)items[1]) == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002540 smsg(_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002541 fname, lnum, items[1]);
2542 }
2543 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3))
2544 {
2545 garray_T *gap = &spin->si_comppat;
2546 int i;
2547
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002548 // Only add the couple if it isn't already there.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002549 for (i = 0; i < gap->ga_len - 1; i += 2)
2550 if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0
2551 && STRCMP(((char_u **)(gap->ga_data))[i + 1],
2552 items[2]) == 0)
2553 break;
2554 if (i >= gap->ga_len && ga_grow(gap, 2) == OK)
2555 {
2556 ((char_u **)(gap->ga_data))[gap->ga_len++]
2557 = getroom_save(spin, items[1]);
2558 ((char_u **)(gap->ga_data))[gap->ga_len++]
2559 = getroom_save(spin, items[2]);
2560 }
2561 }
2562 else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2)
2563 && syllable == NULL)
2564 {
2565 syllable = getroom_save(spin, items[1]);
2566 }
2567 else if (is_aff_rule(items, itemcnt, "NOBREAK", 1))
2568 {
2569 spin->si_nobreak = TRUE;
2570 }
2571 else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1))
2572 {
2573 spin->si_nosplitsugs = TRUE;
2574 }
2575 else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1))
2576 {
2577 spin->si_nocompoundsugs = TRUE;
2578 }
2579 else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1))
2580 {
2581 spin->si_nosugfile = TRUE;
2582 }
2583 else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1))
2584 {
2585 aff->af_pfxpostpone = TRUE;
2586 }
2587 else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1))
2588 {
2589 aff->af_ignoreextra = TRUE;
2590 }
2591 else if ((STRCMP(items[0], "PFX") == 0
2592 || STRCMP(items[0], "SFX") == 0)
2593 && aff_todo == 0
2594 && itemcnt >= 4)
2595 {
2596 int lasti = 4;
2597 char_u key[AH_KEY_LEN];
2598
2599 if (*items[0] == 'P')
2600 tp = &aff->af_pref;
2601 else
2602 tp = &aff->af_suff;
2603
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002604 // Myspell allows the same affix name to be used multiple
2605 // times. The affix files that do this have an undocumented
2606 // "S" flag on all but the last block, thus we check for that
2607 // and store it in ah_follows.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002608 vim_strncpy(key, items[1], AH_KEY_LEN - 1);
2609 hi = hash_find(tp, key);
2610 if (!HASHITEM_EMPTY(hi))
2611 {
2612 cur_aff = HI2AH(hi);
2613 if (cur_aff->ah_combine != (*items[2] == 'Y'))
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002614 smsg(_("Different combining flag in continued affix block in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002615 fname, lnum, items[1]);
2616 if (!cur_aff->ah_follows)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002617 smsg(_("Duplicate affix in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002618 fname, lnum, items[1]);
2619 }
2620 else
2621 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002622 // New affix letter.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002623 cur_aff = (affheader_T *)getroom(spin,
2624 sizeof(affheader_T), TRUE);
2625 if (cur_aff == NULL)
2626 break;
2627 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1],
2628 fname, lnum);
2629 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN)
2630 break;
2631 if (cur_aff->ah_flag == aff->af_bad
2632 || cur_aff->ah_flag == aff->af_rare
2633 || cur_aff->ah_flag == aff->af_keepcase
2634 || cur_aff->ah_flag == aff->af_needaffix
2635 || cur_aff->ah_flag == aff->af_circumfix
2636 || cur_aff->ah_flag == aff->af_nosuggest
2637 || cur_aff->ah_flag == aff->af_needcomp
2638 || cur_aff->ah_flag == aff->af_comproot)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002639 smsg(_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002640 fname, lnum, items[1]);
2641 STRCPY(cur_aff->ah_key, items[1]);
2642 hash_add(tp, cur_aff->ah_key);
2643
2644 cur_aff->ah_combine = (*items[2] == 'Y');
2645 }
2646
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002647 // Check for the "S" flag, which apparently means that another
2648 // block with the same affix name is following.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002649 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0)
2650 {
2651 ++lasti;
2652 cur_aff->ah_follows = TRUE;
2653 }
2654 else
2655 cur_aff->ah_follows = FALSE;
2656
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002657 // Myspell allows extra text after the item, but that might
2658 // mean mistakes go unnoticed. Require a comment-starter.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002659 if (itemcnt > lasti && *items[lasti] != '#')
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002660 smsg(_(e_afftrailing), fname, lnum, items[lasti]);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002661
2662 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002663 smsg(_("Expected Y or N in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002664 fname, lnum, items[2]);
2665
2666 if (*items[0] == 'P' && aff->af_pfxpostpone)
2667 {
2668 if (cur_aff->ah_newID == 0)
2669 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002670 // Use a new number in the .spl file later, to be able
2671 // to handle multiple .aff files.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002672 check_renumber(spin);
2673 cur_aff->ah_newID = ++spin->si_newprefID;
2674
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002675 // We only really use ah_newID if the prefix is
2676 // postponed. We know that only after handling all
2677 // the items.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002678 did_postpone_prefix = FALSE;
2679 }
2680 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002681 // Did use the ID in a previous block.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002682 did_postpone_prefix = TRUE;
2683 }
2684
2685 aff_todo = atoi((char *)items[3]);
2686 }
2687 else if ((STRCMP(items[0], "PFX") == 0
2688 || STRCMP(items[0], "SFX") == 0)
2689 && aff_todo > 0
2690 && STRCMP(cur_aff->ah_key, items[1]) == 0
2691 && itemcnt >= 5)
2692 {
2693 affentry_T *aff_entry;
2694 int upper = FALSE;
2695 int lasti = 5;
2696
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002697 // Myspell allows extra text after the item, but that might
2698 // mean mistakes go unnoticed. Require a comment-starter,
2699 // unless IGNOREEXTRA is used. Hunspell uses a "-" item.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002700 if (itemcnt > lasti
2701 && !aff->af_ignoreextra
2702 && *items[lasti] != '#'
2703 && (STRCMP(items[lasti], "-") != 0
2704 || itemcnt != lasti + 1))
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002705 smsg(_(e_afftrailing), fname, lnum, items[lasti]);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002706
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002707 // New item for an affix letter.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002708 --aff_todo;
2709 aff_entry = (affentry_T *)getroom(spin,
2710 sizeof(affentry_T), TRUE);
2711 if (aff_entry == NULL)
2712 break;
2713
2714 if (STRCMP(items[2], "0") != 0)
2715 aff_entry->ae_chop = getroom_save(spin, items[2]);
2716 if (STRCMP(items[3], "0") != 0)
2717 {
2718 aff_entry->ae_add = getroom_save(spin, items[3]);
2719
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002720 // Recognize flags on the affix: abcd/XYZ
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002721 aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/');
2722 if (aff_entry->ae_flags != NULL)
2723 {
2724 *aff_entry->ae_flags++ = NUL;
2725 aff_process_flags(aff, aff_entry);
2726 }
2727 }
2728
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002729 // Don't use an affix entry with non-ASCII characters when
2730 // "spin->si_ascii" is TRUE.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002731 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop)
2732 || has_non_ascii(aff_entry->ae_add)))
2733 {
2734 aff_entry->ae_next = cur_aff->ah_first;
2735 cur_aff->ah_first = aff_entry;
2736
2737 if (STRCMP(items[4], ".") != 0)
2738 {
2739 char_u buf[MAXLINELEN];
2740
2741 aff_entry->ae_cond = getroom_save(spin, items[4]);
2742 if (*items[0] == 'P')
2743 sprintf((char *)buf, "^%s", items[4]);
2744 else
2745 sprintf((char *)buf, "%s$", items[4]);
2746 aff_entry->ae_prog = vim_regcomp(buf,
2747 RE_MAGIC + RE_STRING + RE_STRICT);
2748 if (aff_entry->ae_prog == NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002749 smsg(_("Broken condition in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002750 fname, lnum, items[4]);
2751 }
2752
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002753 // For postponed prefixes we need an entry in si_prefcond
2754 // for the condition. Use an existing one if possible.
2755 // Can't be done for an affix with flags, ignoring
2756 // COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002757 if (*items[0] == 'P' && aff->af_pfxpostpone
2758 && aff_entry->ae_flags == NULL)
2759 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002760 // When the chop string is one lower-case letter and
2761 // the add string ends in the upper-case letter we set
2762 // the "upper" flag, clear "ae_chop" and remove the
2763 // letters from "ae_add". The condition must either
2764 // be empty or start with the same letter.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002765 if (aff_entry->ae_chop != NULL
2766 && aff_entry->ae_add != NULL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002767 && aff_entry->ae_chop[(*mb_ptr2len)(
Bram Moolenaar264b74f2019-01-24 17:18:42 +01002768 aff_entry->ae_chop)] == NUL)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002769 {
2770 int c, c_up;
2771
2772 c = PTR2CHAR(aff_entry->ae_chop);
2773 c_up = SPELL_TOUPPER(c);
2774 if (c_up != c
2775 && (aff_entry->ae_cond == NULL
2776 || PTR2CHAR(aff_entry->ae_cond) == c))
2777 {
2778 p = aff_entry->ae_add
2779 + STRLEN(aff_entry->ae_add);
Bram Moolenaar91acfff2017-03-12 19:22:36 +01002780 MB_PTR_BACK(aff_entry->ae_add, p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002781 if (PTR2CHAR(p) == c_up)
2782 {
2783 upper = TRUE;
2784 aff_entry->ae_chop = NULL;
2785 *p = NUL;
2786
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002787 // The condition is matched with the
2788 // actual word, thus must check for the
2789 // upper-case letter.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002790 if (aff_entry->ae_cond != NULL)
2791 {
2792 char_u buf[MAXLINELEN];
Bram Moolenaar264b74f2019-01-24 17:18:42 +01002793
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002794 if (has_mbyte)
2795 {
2796 onecap_copy(items[4], buf, TRUE);
2797 aff_entry->ae_cond = getroom_save(
2798 spin, buf);
2799 }
2800 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002801 *aff_entry->ae_cond = c_up;
2802 if (aff_entry->ae_cond != NULL)
2803 {
2804 sprintf((char *)buf, "^%s",
2805 aff_entry->ae_cond);
2806 vim_regfree(aff_entry->ae_prog);
2807 aff_entry->ae_prog = vim_regcomp(
2808 buf, RE_MAGIC + RE_STRING);
2809 }
2810 }
2811 }
2812 }
2813 }
2814
2815 if (aff_entry->ae_chop == NULL
2816 && aff_entry->ae_flags == NULL)
2817 {
2818 int idx;
2819 char_u **pp;
2820 int n;
2821
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002822 // Find a previously used condition.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002823 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0;
2824 --idx)
2825 {
2826 p = ((char_u **)spin->si_prefcond.ga_data)[idx];
2827 if (str_equal(p, aff_entry->ae_cond))
2828 break;
2829 }
2830 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK)
2831 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002832 // Not found, add a new condition.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002833 idx = spin->si_prefcond.ga_len++;
2834 pp = ((char_u **)spin->si_prefcond.ga_data)
2835 + idx;
2836 if (aff_entry->ae_cond == NULL)
2837 *pp = NULL;
2838 else
2839 *pp = getroom_save(spin,
2840 aff_entry->ae_cond);
2841 }
2842
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002843 // Add the prefix to the prefix tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002844 if (aff_entry->ae_add == NULL)
2845 p = (char_u *)"";
2846 else
2847 p = aff_entry->ae_add;
2848
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002849 // PFX_FLAGS is a negative number, so that
2850 // tree_add_word() knows this is the prefix tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002851 n = PFX_FLAGS;
2852 if (!cur_aff->ah_combine)
2853 n |= WFP_NC;
2854 if (upper)
2855 n |= WFP_UP;
2856 if (aff_entry->ae_comppermit)
2857 n |= WFP_COMPPERMIT;
2858 if (aff_entry->ae_compforbid)
2859 n |= WFP_COMPFORBID;
2860 tree_add_word(spin, p, spin->si_prefroot, n,
2861 idx, cur_aff->ah_newID);
2862 did_postpone_prefix = TRUE;
2863 }
2864
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002865 // Didn't actually use ah_newID, backup si_newprefID.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002866 if (aff_todo == 0 && !did_postpone_prefix)
2867 {
2868 --spin->si_newprefID;
2869 cur_aff->ah_newID = 0;
2870 }
2871 }
2872 }
2873 }
2874 else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL)
2875 {
2876 fol = vim_strsave(items[1]);
2877 }
2878 else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL)
2879 {
2880 low = vim_strsave(items[1]);
2881 }
2882 else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL)
2883 {
2884 upp = vim_strsave(items[1]);
2885 }
2886 else if (is_aff_rule(items, itemcnt, "REP", 2)
2887 || is_aff_rule(items, itemcnt, "REPSAL", 2))
2888 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002889 // Ignore REP/REPSAL count
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002890 if (!isdigit(*items[1]))
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002891 smsg(_("Expected REP(SAL) count in %s line %d"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002892 fname, lnum);
2893 }
2894 else if ((STRCMP(items[0], "REP") == 0
2895 || STRCMP(items[0], "REPSAL") == 0)
2896 && itemcnt >= 3)
2897 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002898 // REP/REPSAL item
2899 // Myspell ignores extra arguments, we require it starts with
2900 // # to detect mistakes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002901 if (itemcnt > 3 && items[3][0] != '#')
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002902 smsg(_(e_afftrailing), fname, lnum, items[3]);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002903 if (items[0][3] == 'S' ? do_repsal : do_rep)
2904 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002905 // Replace underscore with space (can't include a space
2906 // directly).
Bram Moolenaar91acfff2017-03-12 19:22:36 +01002907 for (p = items[1]; *p != NUL; MB_PTR_ADV(p))
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002908 if (*p == '_')
2909 *p = ' ';
Bram Moolenaar91acfff2017-03-12 19:22:36 +01002910 for (p = items[2]; *p != NUL; MB_PTR_ADV(p))
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002911 if (*p == '_')
2912 *p = ' ';
2913 add_fromto(spin, items[0][3] == 'S'
2914 ? &spin->si_repsal
2915 : &spin->si_rep, items[1], items[2]);
2916 }
2917 }
2918 else if (is_aff_rule(items, itemcnt, "MAP", 2))
2919 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002920 // MAP item or count
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002921 if (!found_map)
2922 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002923 // First line contains the count.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002924 found_map = TRUE;
2925 if (!isdigit(*items[1]))
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002926 smsg(_("Expected MAP count in %s line %d"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002927 fname, lnum);
2928 }
2929 else if (do_mapline)
2930 {
2931 int c;
2932
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002933 // Check that every character appears only once.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002934 for (p = items[1]; *p != NUL; )
2935 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002936 c = mb_ptr2char_adv(&p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002937 if ((spin->si_map.ga_len > 0
2938 && vim_strchr(spin->si_map.ga_data, c)
2939 != NULL)
2940 || vim_strchr(p, c) != NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002941 smsg(_("Duplicate character in MAP in %s line %d"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002942 fname, lnum);
2943 }
2944
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002945 // We simply concatenate all the MAP strings, separated by
2946 // slashes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002947 ga_concat(&spin->si_map, items[1]);
2948 ga_append(&spin->si_map, '/');
2949 }
2950 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002951 // Accept "SAL from to" and "SAL from to #comment".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002952 else if (is_aff_rule(items, itemcnt, "SAL", 3))
2953 {
2954 if (do_sal)
2955 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002956 // SAL item (sounds-a-like)
2957 // Either one of the known keys or a from-to pair.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002958 if (STRCMP(items[1], "followup") == 0)
2959 spin->si_followup = sal_to_bool(items[2]);
2960 else if (STRCMP(items[1], "collapse_result") == 0)
2961 spin->si_collapse = sal_to_bool(items[2]);
2962 else if (STRCMP(items[1], "remove_accents") == 0)
2963 spin->si_rem_accents = sal_to_bool(items[2]);
2964 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002965 // when "to" is "_" it means empty
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002966 add_fromto(spin, &spin->si_sal, items[1],
2967 STRCMP(items[2], "_") == 0 ? (char_u *)""
2968 : items[2]);
2969 }
2970 }
2971 else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2)
2972 && sofofrom == NULL)
2973 {
2974 sofofrom = getroom_save(spin, items[1]);
2975 }
2976 else if (is_aff_rule(items, itemcnt, "SOFOTO", 2)
2977 && sofoto == NULL)
2978 {
2979 sofoto = getroom_save(spin, items[1]);
2980 }
2981 else if (STRCMP(items[0], "COMMON") == 0)
2982 {
2983 int i;
2984
2985 for (i = 1; i < itemcnt; ++i)
2986 {
2987 if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords,
2988 items[i])))
2989 {
2990 p = vim_strsave(items[i]);
2991 if (p == NULL)
2992 break;
2993 hash_add(&spin->si_commonwords, p);
2994 }
2995 }
2996 }
2997 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002998 smsg(_("Unrecognized or duplicate item in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002999 fname, lnum, items[0]);
3000 }
3001 }
3002
3003 if (fol != NULL || low != NULL || upp != NULL)
3004 {
3005 if (spin->si_clear_chartab)
3006 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003007 // Clear the char type tables, don't want to use any of the
3008 // currently used spell properties.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003009 init_spell_chartab();
3010 spin->si_clear_chartab = FALSE;
3011 }
3012
3013 /*
3014 * Don't write a word table for an ASCII file, so that we don't check
3015 * for conflicts with a word table that matches 'encoding'.
3016 * Don't write one for utf-8 either, we use utf_*() and
3017 * mb_get_class(), the list of chars in the file will be incomplete.
3018 */
Bram Moolenaar264b74f2019-01-24 17:18:42 +01003019 if (!spin->si_ascii && !enc_utf8)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003020 {
3021 if (fol == NULL || low == NULL || upp == NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003022 smsg(_("Missing FOL/LOW/UPP line in %s"), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003023 else
3024 (void)set_spell_chartab(fol, low, upp);
3025 }
3026
3027 vim_free(fol);
3028 vim_free(low);
3029 vim_free(upp);
3030 }
3031
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003032 // Use compound specifications of the .aff file for the spell info.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003033 if (compmax != 0)
3034 {
3035 aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX");
3036 spin->si_compmax = compmax;
3037 }
3038
3039 if (compminlen != 0)
3040 {
3041 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN");
3042 spin->si_compminlen = compminlen;
3043 }
3044
3045 if (compsylmax != 0)
3046 {
3047 if (syllable == NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003048 smsg(_("COMPOUNDSYLMAX used without SYLLABLE"));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003049 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX");
3050 spin->si_compsylmax = compsylmax;
3051 }
3052
3053 if (compoptions != 0)
3054 {
3055 aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options");
3056 spin->si_compoptions |= compoptions;
3057 }
3058
3059 if (compflags != NULL)
3060 process_compflags(spin, aff, compflags);
3061
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003062 // Check that we didn't use too many renumbered flags.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003063 if (spin->si_newcompID < spin->si_newprefID)
3064 {
3065 if (spin->si_newcompID == 127 || spin->si_newcompID == 255)
Bram Moolenaar32526b32019-01-19 17:43:09 +01003066 msg(_("Too many postponed prefixes"));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003067 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127)
Bram Moolenaar32526b32019-01-19 17:43:09 +01003068 msg(_("Too many compound flags"));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003069 else
Bram Moolenaar32526b32019-01-19 17:43:09 +01003070 msg(_("Too many postponed prefixes and/or compound flags"));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003071 }
3072
3073 if (syllable != NULL)
3074 {
3075 aff_check_string(spin->si_syllable, syllable, "SYLLABLE");
3076 spin->si_syllable = syllable;
3077 }
3078
3079 if (sofofrom != NULL || sofoto != NULL)
3080 {
3081 if (sofofrom == NULL || sofoto == NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003082 smsg(_("Missing SOFO%s line in %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003083 sofofrom == NULL ? "FROM" : "TO", fname);
3084 else if (spin->si_sal.ga_len > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003085 smsg(_("Both SAL and SOFO lines in %s"), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003086 else
3087 {
3088 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM");
3089 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO");
3090 spin->si_sofofr = sofofrom;
3091 spin->si_sofoto = sofoto;
3092 }
3093 }
3094
3095 if (midword != NULL)
3096 {
3097 aff_check_string(spin->si_midword, midword, "MIDWORD");
3098 spin->si_midword = midword;
3099 }
3100
3101 vim_free(pc);
3102 fclose(fd);
3103 return aff;
3104}
3105
3106/*
3107 * Return TRUE when items[0] equals "rulename", there are "mincount" items or
3108 * a comment is following after item "mincount".
3109 */
3110 static int
3111is_aff_rule(
3112 char_u **items,
3113 int itemcnt,
3114 char *rulename,
3115 int mincount)
3116{
3117 return (STRCMP(items[0], rulename) == 0
3118 && (itemcnt == mincount
3119 || (itemcnt > mincount && items[mincount][0] == '#')));
3120}
3121
3122/*
3123 * For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from
3124 * ae_flags to ae_comppermit and ae_compforbid.
3125 */
3126 static void
3127aff_process_flags(afffile_T *affile, affentry_T *entry)
3128{
3129 char_u *p;
3130 char_u *prevp;
3131 unsigned flag;
3132
3133 if (entry->ae_flags != NULL
3134 && (affile->af_compforbid != 0 || affile->af_comppermit != 0))
3135 {
3136 for (p = entry->ae_flags; *p != NUL; )
3137 {
3138 prevp = p;
3139 flag = get_affitem(affile->af_flagtype, &p);
3140 if (flag == affile->af_comppermit || flag == affile->af_compforbid)
3141 {
3142 STRMOVE(prevp, p);
3143 p = prevp;
3144 if (flag == affile->af_comppermit)
3145 entry->ae_comppermit = TRUE;
3146 else
3147 entry->ae_compforbid = TRUE;
3148 }
3149 if (affile->af_flagtype == AFT_NUM && *p == ',')
3150 ++p;
3151 }
3152 if (*entry->ae_flags == NUL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003153 entry->ae_flags = NULL; // nothing left
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003154 }
3155}
3156
3157/*
3158 * Return TRUE if "s" is the name of an info item in the affix file.
3159 */
3160 static int
3161spell_info_item(char_u *s)
3162{
3163 return STRCMP(s, "NAME") == 0
3164 || STRCMP(s, "HOME") == 0
3165 || STRCMP(s, "VERSION") == 0
3166 || STRCMP(s, "AUTHOR") == 0
3167 || STRCMP(s, "EMAIL") == 0
3168 || STRCMP(s, "COPYRIGHT") == 0;
3169}
3170
3171/*
3172 * Turn an affix flag name into a number, according to the FLAG type.
3173 * returns zero for failure.
3174 */
3175 static unsigned
3176affitem2flag(
3177 int flagtype,
3178 char_u *item,
3179 char_u *fname,
3180 int lnum)
3181{
3182 unsigned res;
3183 char_u *p = item;
3184
3185 res = get_affitem(flagtype, &p);
3186 if (res == 0)
3187 {
3188 if (flagtype == AFT_NUM)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003189 smsg(_("Flag is not a number in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003190 fname, lnum, item);
3191 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003192 smsg(_("Illegal flag in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003193 fname, lnum, item);
3194 }
3195 if (*p != NUL)
3196 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003197 smsg(_(e_affname), fname, lnum, item);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003198 return 0;
3199 }
3200
3201 return res;
3202}
3203
3204/*
3205 * Get one affix name from "*pp" and advance the pointer.
Bram Moolenaar3d2a47c2019-11-07 20:48:42 +01003206 * Returns ZERO_FLAG for "0".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003207 * Returns zero for an error, still advances the pointer then.
3208 */
3209 static unsigned
3210get_affitem(int flagtype, char_u **pp)
3211{
3212 int res;
3213
3214 if (flagtype == AFT_NUM)
3215 {
3216 if (!VIM_ISDIGIT(**pp))
3217 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003218 ++*pp; // always advance, avoid getting stuck
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003219 return 0;
3220 }
3221 res = getdigits(pp);
Bram Moolenaar3d2a47c2019-11-07 20:48:42 +01003222 if (res == 0)
3223 res = ZERO_FLAG;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003224 }
3225 else
3226 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003227 res = mb_ptr2char_adv(pp);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003228 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG
3229 && res >= 'A' && res <= 'Z'))
3230 {
3231 if (**pp == NUL)
3232 return 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003233 res = mb_ptr2char_adv(pp) + (res << 16);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003234 }
3235 }
3236 return res;
3237}
3238
3239/*
3240 * Process the "compflags" string used in an affix file and append it to
3241 * spin->si_compflags.
3242 * The processing involves changing the affix names to ID numbers, so that
3243 * they fit in one byte.
3244 */
3245 static void
3246process_compflags(
3247 spellinfo_T *spin,
3248 afffile_T *aff,
3249 char_u *compflags)
3250{
3251 char_u *p;
3252 char_u *prevp;
3253 unsigned flag;
3254 compitem_T *ci;
3255 int id;
3256 int len;
3257 char_u *tp;
3258 char_u key[AH_KEY_LEN];
3259 hashitem_T *hi;
3260
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003261 // Make room for the old and the new compflags, concatenated with a / in
3262 // between. Processing it makes it shorter, but we don't know by how
3263 // much, thus allocate the maximum.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003264 len = (int)STRLEN(compflags) + 1;
3265 if (spin->si_compflags != NULL)
3266 len += (int)STRLEN(spin->si_compflags) + 1;
3267 p = getroom(spin, len, FALSE);
3268 if (p == NULL)
3269 return;
3270 if (spin->si_compflags != NULL)
3271 {
3272 STRCPY(p, spin->si_compflags);
3273 STRCAT(p, "/");
3274 }
3275 spin->si_compflags = p;
3276 tp = p + STRLEN(p);
3277
3278 for (p = compflags; *p != NUL; )
3279 {
3280 if (vim_strchr((char_u *)"/?*+[]", *p) != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003281 // Copy non-flag characters directly.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003282 *tp++ = *p++;
3283 else
3284 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003285 // First get the flag number, also checks validity.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003286 prevp = p;
3287 flag = get_affitem(aff->af_flagtype, &p);
3288 if (flag != 0)
3289 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003290 // Find the flag in the hashtable. If it was used before, use
3291 // the existing ID. Otherwise add a new entry.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003292 vim_strncpy(key, prevp, p - prevp);
3293 hi = hash_find(&aff->af_comp, key);
3294 if (!HASHITEM_EMPTY(hi))
3295 id = HI2CI(hi)->ci_newID;
3296 else
3297 {
3298 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE);
3299 if (ci == NULL)
3300 break;
3301 STRCPY(ci->ci_key, key);
3302 ci->ci_flag = flag;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003303 // Avoid using a flag ID that has a special meaning in a
3304 // regexp (also inside []).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003305 do
3306 {
3307 check_renumber(spin);
3308 id = spin->si_newcompID--;
3309 } while (vim_strchr((char_u *)"/?*+[]\\-^", id) != NULL);
3310 ci->ci_newID = id;
3311 hash_add(&aff->af_comp, ci->ci_key);
3312 }
3313 *tp++ = id;
3314 }
3315 if (aff->af_flagtype == AFT_NUM && *p == ',')
3316 ++p;
3317 }
3318 }
3319
3320 *tp = NUL;
3321}
3322
3323/*
3324 * Check that the new IDs for postponed affixes and compounding don't overrun
3325 * each other. We have almost 255 available, but start at 0-127 to avoid
3326 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255.
3327 * When that is used up an error message is given.
3328 */
3329 static void
3330check_renumber(spellinfo_T *spin)
3331{
3332 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128)
3333 {
3334 spin->si_newprefID = 127;
3335 spin->si_newcompID = 255;
3336 }
3337}
3338
3339/*
3340 * Return TRUE if flag "flag" appears in affix list "afflist".
3341 */
3342 static int
3343flag_in_afflist(int flagtype, char_u *afflist, unsigned flag)
3344{
3345 char_u *p;
3346 unsigned n;
3347
3348 switch (flagtype)
3349 {
3350 case AFT_CHAR:
3351 return vim_strchr(afflist, flag) != NULL;
3352
3353 case AFT_CAPLONG:
3354 case AFT_LONG:
3355 for (p = afflist; *p != NUL; )
3356 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003357 n = mb_ptr2char_adv(&p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003358 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z'))
3359 && *p != NUL)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003360 n = mb_ptr2char_adv(&p) + (n << 16);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003361 if (n == flag)
3362 return TRUE;
3363 }
3364 break;
3365
3366 case AFT_NUM:
3367 for (p = afflist; *p != NUL; )
3368 {
3369 n = getdigits(&p);
Bram Moolenaar3d2a47c2019-11-07 20:48:42 +01003370 if (n == 0)
3371 n = ZERO_FLAG;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003372 if (n == flag)
3373 return TRUE;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003374 if (*p != NUL) // skip over comma
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003375 ++p;
3376 }
3377 break;
3378 }
3379 return FALSE;
3380}
3381
3382/*
3383 * Give a warning when "spinval" and "affval" numbers are set and not the same.
3384 */
3385 static void
3386aff_check_number(int spinval, int affval, char *name)
3387{
3388 if (spinval != 0 && spinval != affval)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003389 smsg(_("%s value differs from what is used in another .aff file"), name);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003390}
3391
3392/*
3393 * Give a warning when "spinval" and "affval" strings are set and not the same.
3394 */
3395 static void
3396aff_check_string(char_u *spinval, char_u *affval, char *name)
3397{
3398 if (spinval != NULL && STRCMP(spinval, affval) != 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003399 smsg(_("%s value differs from what is used in another .aff file"), name);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003400}
3401
3402/*
3403 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being
3404 * NULL as equal.
3405 */
3406 static int
3407str_equal(char_u *s1, char_u *s2)
3408{
3409 if (s1 == NULL || s2 == NULL)
3410 return s1 == s2;
3411 return STRCMP(s1, s2) == 0;
3412}
3413
3414/*
3415 * Add a from-to item to "gap". Used for REP and SAL items.
3416 * They are stored case-folded.
3417 */
3418 static void
3419add_fromto(
3420 spellinfo_T *spin,
3421 garray_T *gap,
3422 char_u *from,
3423 char_u *to)
3424{
3425 fromto_T *ftp;
3426 char_u word[MAXWLEN];
3427
3428 if (ga_grow(gap, 1) == OK)
3429 {
3430 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
Bram Moolenaar4f135272021-06-11 19:07:40 +02003431 (void)spell_casefold(curwin, from, (int)STRLEN(from), word, MAXWLEN);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003432 ftp->ft_from = getroom_save(spin, word);
Bram Moolenaar4f135272021-06-11 19:07:40 +02003433 (void)spell_casefold(curwin, to, (int)STRLEN(to), word, MAXWLEN);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003434 ftp->ft_to = getroom_save(spin, word);
3435 ++gap->ga_len;
3436 }
3437}
3438
3439/*
3440 * Convert a boolean argument in a SAL line to TRUE or FALSE;
3441 */
3442 static int
3443sal_to_bool(char_u *s)
3444{
3445 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0;
3446}
3447
3448/*
3449 * Free the structure filled by spell_read_aff().
3450 */
3451 static void
3452spell_free_aff(afffile_T *aff)
3453{
3454 hashtab_T *ht;
3455 hashitem_T *hi;
3456 int todo;
3457 affheader_T *ah;
3458 affentry_T *ae;
3459
3460 vim_free(aff->af_enc);
3461
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003462 // All this trouble to free the "ae_prog" items...
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003463 for (ht = &aff->af_pref; ; ht = &aff->af_suff)
3464 {
3465 todo = (int)ht->ht_used;
3466 for (hi = ht->ht_array; todo > 0; ++hi)
3467 {
3468 if (!HASHITEM_EMPTY(hi))
3469 {
3470 --todo;
3471 ah = HI2AH(hi);
3472 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
3473 vim_regfree(ae->ae_prog);
3474 }
3475 }
3476 if (ht == &aff->af_suff)
3477 break;
3478 }
3479
3480 hash_clear(&aff->af_pref);
3481 hash_clear(&aff->af_suff);
3482 hash_clear(&aff->af_comp);
3483}
3484
3485/*
3486 * Read dictionary file "fname".
3487 * Returns OK or FAIL;
3488 */
3489 static int
3490spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile)
3491{
3492 hashtab_T ht;
3493 char_u line[MAXLINELEN];
3494 char_u *p;
3495 char_u *afflist;
3496 char_u store_afflist[MAXWLEN];
3497 int pfxlen;
3498 int need_affix;
3499 char_u *dw;
3500 char_u *pc;
3501 char_u *w;
3502 int l;
3503 hash_T hash;
3504 hashitem_T *hi;
3505 FILE *fd;
3506 int lnum = 1;
3507 int non_ascii = 0;
3508 int retval = OK;
3509 char_u message[MAXLINELEN + MAXWLEN];
3510 int flags;
3511 int duplicate = 0;
Bram Moolenaar408c23b2020-06-03 22:15:45 +02003512 time_T last_msg_time = 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003513
3514 /*
3515 * Open the file.
3516 */
3517 fd = mch_fopen((char *)fname, "r");
3518 if (fd == NULL)
3519 {
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00003520 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003521 return FAIL;
3522 }
3523
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003524 // The hashtable is only used to detect duplicated words.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003525 hash_init(&ht);
3526
3527 vim_snprintf((char *)IObuff, IOSIZE,
Bram Moolenaarc1669272018-06-19 14:23:53 +02003528 _("Reading dictionary file %s..."), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003529 spell_message(spin, IObuff);
3530
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003531 // start with a message for the first line
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003532 spin->si_msg_count = 999999;
3533
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003534 // Read and ignore the first line: word count.
Bram Moolenaare90d63e2020-09-02 12:58:48 +02003535 if (vim_fgets(line, MAXLINELEN, fd) || !vim_isdigit(*skipwhite(line)))
Bram Moolenaar677658a2022-01-05 16:09:06 +00003536 semsg(_(e_no_word_count_in_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003537
3538 /*
3539 * Read all the lines in the file one by one.
3540 * The words are converted to 'encoding' here, before being added to
3541 * the hashtable.
3542 */
3543 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
3544 {
3545 line_breakcheck();
3546 ++lnum;
3547 if (line[0] == '#' || line[0] == '/')
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003548 continue; // comment line
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003549
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003550 // Remove CR, LF and white space from the end. White space halfway
3551 // the word is kept to allow e.g., "et al.".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003552 l = (int)STRLEN(line);
3553 while (l > 0 && line[l - 1] <= ' ')
3554 --l;
3555 if (l == 0)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003556 continue; // empty line
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003557 line[l] = NUL;
3558
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003559 // Convert from "SET" to 'encoding' when needed.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003560 if (spin->si_conv.vc_type != CONV_NONE)
3561 {
3562 pc = string_convert(&spin->si_conv, line, NULL);
3563 if (pc == NULL)
3564 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003565 smsg(_("Conversion failure for word in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003566 fname, lnum, line);
3567 continue;
3568 }
3569 w = pc;
3570 }
3571 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003572 {
3573 pc = NULL;
3574 w = line;
3575 }
3576
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003577 // Truncate the word at the "/", set "afflist" to what follows.
3578 // Replace "\/" by "/" and "\\" by "\".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003579 afflist = NULL;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003580 for (p = w; *p != NUL; MB_PTR_ADV(p))
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003581 {
3582 if (*p == '\\' && (p[1] == '\\' || p[1] == '/'))
3583 STRMOVE(p, p + 1);
3584 else if (*p == '/')
3585 {
3586 *p = NUL;
3587 afflist = p + 1;
3588 break;
3589 }
3590 }
3591
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003592 // Skip non-ASCII words when "spin->si_ascii" is TRUE.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003593 if (spin->si_ascii && has_non_ascii(w))
3594 {
3595 ++non_ascii;
3596 vim_free(pc);
3597 continue;
3598 }
3599
Bram Moolenaar408c23b2020-06-03 22:15:45 +02003600 // This takes time, print a message every 10000 words, but not more
3601 // often than once per second.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003602 if (spin->si_verbose && spin->si_msg_count > 10000)
3603 {
3604 spin->si_msg_count = 0;
Bram Moolenaar408c23b2020-06-03 22:15:45 +02003605 if (vim_time() > last_msg_time)
3606 {
3607 last_msg_time = vim_time();
3608 vim_snprintf((char *)message, sizeof(message),
3609 _("line %6d, word %6ld - %s"),
3610 lnum, spin->si_foldwcount + spin->si_keepwcount, w);
3611 msg_start();
3612 msg_outtrans_long_attr(message, 0);
3613 msg_clr_eos();
3614 msg_didout = FALSE;
3615 msg_col = 0;
3616 out_flush();
3617 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003618 }
3619
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003620 // Store the word in the hashtable to be able to find duplicates.
=?UTF-8?q?Dundar=20G=C3=B6c?=420fabc2022-01-28 15:28:04 +00003621 dw = getroom_save(spin, w);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003622 if (dw == NULL)
3623 {
3624 retval = FAIL;
3625 vim_free(pc);
3626 break;
3627 }
3628
3629 hash = hash_hash(dw);
3630 hi = hash_lookup(&ht, dw, hash);
3631 if (!HASHITEM_EMPTY(hi))
3632 {
3633 if (p_verbose > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003634 smsg(_("Duplicate word in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003635 fname, lnum, dw);
3636 else if (duplicate == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003637 smsg(_("First duplicate word in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003638 fname, lnum, dw);
3639 ++duplicate;
3640 }
3641 else
3642 hash_add_item(&ht, hi, dw, hash);
3643
3644 flags = 0;
3645 store_afflist[0] = NUL;
3646 pfxlen = 0;
3647 need_affix = FALSE;
3648 if (afflist != NULL)
3649 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003650 // Extract flags from the affix list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003651 flags |= get_affix_flags(affile, afflist);
3652
3653 if (affile->af_needaffix != 0 && flag_in_afflist(
3654 affile->af_flagtype, afflist, affile->af_needaffix))
3655 need_affix = TRUE;
3656
3657 if (affile->af_pfxpostpone)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003658 // Need to store the list of prefix IDs with the word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003659 pfxlen = get_pfxlist(affile, afflist, store_afflist);
3660
3661 if (spin->si_compflags != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003662 // Need to store the list of compound flags with the word.
3663 // Concatenate them to the list of prefix IDs.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003664 get_compflags(affile, afflist, store_afflist + pfxlen);
3665 }
3666
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003667 // Add the word to the word tree(s).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003668 if (store_word(spin, dw, flags, spin->si_region,
3669 store_afflist, need_affix) == FAIL)
3670 retval = FAIL;
3671
3672 if (afflist != NULL)
3673 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003674 // Find all matching suffixes and add the resulting words.
3675 // Additionally do matching prefixes that combine.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003676 if (store_aff_word(spin, dw, afflist, affile,
3677 &affile->af_suff, &affile->af_pref,
3678 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
3679 retval = FAIL;
3680
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003681 // Find all matching prefixes and add the resulting words.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003682 if (store_aff_word(spin, dw, afflist, affile,
3683 &affile->af_pref, NULL,
3684 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
3685 retval = FAIL;
3686 }
3687
3688 vim_free(pc);
3689 }
3690
3691 if (duplicate > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003692 smsg(_("%d duplicate word(s) in %s"), duplicate, fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003693 if (spin->si_ascii && non_ascii > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003694 smsg(_("Ignored %d word(s) with non-ASCII characters in %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003695 non_ascii, fname);
3696 hash_clear(&ht);
3697
3698 fclose(fd);
3699 return retval;
3700}
3701
3702/*
3703 * Check for affix flags in "afflist" that are turned into word flags.
3704 * Return WF_ flags.
3705 */
3706 static int
3707get_affix_flags(afffile_T *affile, char_u *afflist)
3708{
3709 int flags = 0;
3710
3711 if (affile->af_keepcase != 0 && flag_in_afflist(
3712 affile->af_flagtype, afflist, affile->af_keepcase))
3713 flags |= WF_KEEPCAP | WF_FIXCAP;
3714 if (affile->af_rare != 0 && flag_in_afflist(
3715 affile->af_flagtype, afflist, affile->af_rare))
3716 flags |= WF_RARE;
3717 if (affile->af_bad != 0 && flag_in_afflist(
3718 affile->af_flagtype, afflist, affile->af_bad))
3719 flags |= WF_BANNED;
3720 if (affile->af_needcomp != 0 && flag_in_afflist(
3721 affile->af_flagtype, afflist, affile->af_needcomp))
3722 flags |= WF_NEEDCOMP;
3723 if (affile->af_comproot != 0 && flag_in_afflist(
3724 affile->af_flagtype, afflist, affile->af_comproot))
3725 flags |= WF_COMPROOT;
3726 if (affile->af_nosuggest != 0 && flag_in_afflist(
3727 affile->af_flagtype, afflist, affile->af_nosuggest))
3728 flags |= WF_NOSUGGEST;
3729 return flags;
3730}
3731
3732/*
3733 * Get the list of prefix IDs from the affix list "afflist".
3734 * Used for PFXPOSTPONE.
3735 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL
3736 * and return the number of affixes.
3737 */
3738 static int
3739get_pfxlist(
3740 afffile_T *affile,
3741 char_u *afflist,
3742 char_u *store_afflist)
3743{
3744 char_u *p;
3745 char_u *prevp;
3746 int cnt = 0;
3747 int id;
3748 char_u key[AH_KEY_LEN];
3749 hashitem_T *hi;
3750
3751 for (p = afflist; *p != NUL; )
3752 {
3753 prevp = p;
3754 if (get_affitem(affile->af_flagtype, &p) != 0)
3755 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003756 // A flag is a postponed prefix flag if it appears in "af_pref"
3757 // and its ID is not zero.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003758 vim_strncpy(key, prevp, p - prevp);
3759 hi = hash_find(&affile->af_pref, key);
3760 if (!HASHITEM_EMPTY(hi))
3761 {
3762 id = HI2AH(hi)->ah_newID;
3763 if (id != 0)
3764 store_afflist[cnt++] = id;
3765 }
3766 }
3767 if (affile->af_flagtype == AFT_NUM && *p == ',')
3768 ++p;
3769 }
3770
3771 store_afflist[cnt] = NUL;
3772 return cnt;
3773}
3774
3775/*
3776 * Get the list of compound IDs from the affix list "afflist" that are used
3777 * for compound words.
3778 * Puts the flags in "store_afflist[]".
3779 */
3780 static void
3781get_compflags(
3782 afffile_T *affile,
3783 char_u *afflist,
3784 char_u *store_afflist)
3785{
3786 char_u *p;
3787 char_u *prevp;
3788 int cnt = 0;
3789 char_u key[AH_KEY_LEN];
3790 hashitem_T *hi;
3791
3792 for (p = afflist; *p != NUL; )
3793 {
3794 prevp = p;
3795 if (get_affitem(affile->af_flagtype, &p) != 0)
3796 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003797 // A flag is a compound flag if it appears in "af_comp".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003798 vim_strncpy(key, prevp, p - prevp);
3799 hi = hash_find(&affile->af_comp, key);
3800 if (!HASHITEM_EMPTY(hi))
3801 store_afflist[cnt++] = HI2CI(hi)->ci_newID;
3802 }
3803 if (affile->af_flagtype == AFT_NUM && *p == ',')
3804 ++p;
3805 }
3806
3807 store_afflist[cnt] = NUL;
3808}
3809
3810/*
3811 * Apply affixes to a word and store the resulting words.
3812 * "ht" is the hashtable with affentry_T that need to be applied, either
3813 * prefixes or suffixes.
3814 * "xht", when not NULL, is the prefix hashtable, to be used additionally on
3815 * the resulting words for combining affixes.
3816 *
3817 * Returns FAIL when out of memory.
3818 */
3819 static int
3820store_aff_word(
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003821 spellinfo_T *spin, // spell info
3822 char_u *word, // basic word start
3823 char_u *afflist, // list of names of supported affixes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003824 afffile_T *affile,
3825 hashtab_T *ht,
3826 hashtab_T *xht,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003827 int condit, // CONDIT_SUF et al.
3828 int flags, // flags for the word
3829 char_u *pfxlist, // list of prefix IDs
3830 int pfxlen) // nr of flags in "pfxlist" for prefixes, rest
3831 // is compound flags
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003832{
3833 int todo;
3834 hashitem_T *hi;
3835 affheader_T *ah;
3836 affentry_T *ae;
3837 char_u newword[MAXWLEN];
3838 int retval = OK;
3839 int i, j;
3840 char_u *p;
3841 int use_flags;
3842 char_u *use_pfxlist;
3843 int use_pfxlen;
3844 int need_affix;
3845 char_u store_afflist[MAXWLEN];
3846 char_u pfx_pfxlist[MAXWLEN];
3847 size_t wordlen = STRLEN(word);
3848 int use_condit;
3849
3850 todo = (int)ht->ht_used;
3851 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
3852 {
3853 if (!HASHITEM_EMPTY(hi))
3854 {
3855 --todo;
3856 ah = HI2AH(hi);
3857
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003858 // Check that the affix combines, if required, and that the word
3859 // supports this affix.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003860 if (((condit & CONDIT_COMB) == 0 || ah->ah_combine)
3861 && flag_in_afflist(affile->af_flagtype, afflist,
3862 ah->ah_flag))
3863 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003864 // Loop over all affix entries with this name.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003865 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
3866 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003867 // Check the condition. It's not logical to match case
3868 // here, but it is required for compatibility with
3869 // Myspell.
3870 // Another requirement from Myspell is that the chop
3871 // string is shorter than the word itself.
3872 // For prefixes, when "PFXPOSTPONE" was used, only do
3873 // prefixes with a chop string and/or flags.
3874 // When a previously added affix had CIRCUMFIX this one
3875 // must have it too, if it had not then this one must not
3876 // have one either.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003877 if ((xht != NULL || !affile->af_pfxpostpone
3878 || ae->ae_chop != NULL
3879 || ae->ae_flags != NULL)
3880 && (ae->ae_chop == NULL
3881 || STRLEN(ae->ae_chop) < wordlen)
3882 && (ae->ae_prog == NULL
3883 || vim_regexec_prog(&ae->ae_prog, FALSE,
3884 word, (colnr_T)0))
3885 && (((condit & CONDIT_CFIX) == 0)
3886 == ((condit & CONDIT_AFF) == 0
3887 || ae->ae_flags == NULL
3888 || !flag_in_afflist(affile->af_flagtype,
3889 ae->ae_flags, affile->af_circumfix))))
3890 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003891 // Match. Remove the chop and add the affix.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003892 if (xht == NULL)
3893 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003894 // prefix: chop/add at the start of the word
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003895 if (ae->ae_add == NULL)
3896 *newword = NUL;
3897 else
3898 vim_strncpy(newword, ae->ae_add, MAXWLEN - 1);
3899 p = word;
3900 if (ae->ae_chop != NULL)
3901 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003902 // Skip chop string.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003903 if (has_mbyte)
3904 {
3905 i = mb_charlen(ae->ae_chop);
3906 for ( ; i > 0; --i)
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003907 MB_PTR_ADV(p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003908 }
3909 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003910 p += STRLEN(ae->ae_chop);
3911 }
3912 STRCAT(newword, p);
3913 }
3914 else
3915 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003916 // suffix: chop/add at the end of the word
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003917 vim_strncpy(newword, word, MAXWLEN - 1);
3918 if (ae->ae_chop != NULL)
3919 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003920 // Remove chop string.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003921 p = newword + STRLEN(newword);
3922 i = (int)MB_CHARLEN(ae->ae_chop);
3923 for ( ; i > 0; --i)
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003924 MB_PTR_BACK(newword, p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003925 *p = NUL;
3926 }
3927 if (ae->ae_add != NULL)
3928 STRCAT(newword, ae->ae_add);
3929 }
3930
3931 use_flags = flags;
3932 use_pfxlist = pfxlist;
3933 use_pfxlen = pfxlen;
3934 need_affix = FALSE;
3935 use_condit = condit | CONDIT_COMB | CONDIT_AFF;
3936 if (ae->ae_flags != NULL)
3937 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003938 // Extract flags from the affix list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003939 use_flags |= get_affix_flags(affile, ae->ae_flags);
3940
3941 if (affile->af_needaffix != 0 && flag_in_afflist(
3942 affile->af_flagtype, ae->ae_flags,
3943 affile->af_needaffix))
3944 need_affix = TRUE;
3945
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003946 // When there is a CIRCUMFIX flag the other affix
3947 // must also have it and we don't add the word
3948 // with one affix.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003949 if (affile->af_circumfix != 0 && flag_in_afflist(
3950 affile->af_flagtype, ae->ae_flags,
3951 affile->af_circumfix))
3952 {
3953 use_condit |= CONDIT_CFIX;
3954 if ((condit & CONDIT_CFIX) == 0)
3955 need_affix = TRUE;
3956 }
3957
3958 if (affile->af_pfxpostpone
3959 || spin->si_compflags != NULL)
3960 {
3961 if (affile->af_pfxpostpone)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003962 // Get prefix IDS from the affix list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003963 use_pfxlen = get_pfxlist(affile,
3964 ae->ae_flags, store_afflist);
3965 else
3966 use_pfxlen = 0;
3967 use_pfxlist = store_afflist;
3968
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003969 // Combine the prefix IDs. Avoid adding the
3970 // same ID twice.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003971 for (i = 0; i < pfxlen; ++i)
3972 {
3973 for (j = 0; j < use_pfxlen; ++j)
3974 if (pfxlist[i] == use_pfxlist[j])
3975 break;
3976 if (j == use_pfxlen)
3977 use_pfxlist[use_pfxlen++] = pfxlist[i];
3978 }
3979
3980 if (spin->si_compflags != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003981 // Get compound IDS from the affix list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003982 get_compflags(affile, ae->ae_flags,
3983 use_pfxlist + use_pfxlen);
3984
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003985 // Combine the list of compound flags.
3986 // Concatenate them to the prefix IDs list.
3987 // Avoid adding the same ID twice.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003988 for (i = pfxlen; pfxlist[i] != NUL; ++i)
3989 {
3990 for (j = use_pfxlen;
3991 use_pfxlist[j] != NUL; ++j)
3992 if (pfxlist[i] == use_pfxlist[j])
3993 break;
3994 if (use_pfxlist[j] == NUL)
3995 {
3996 use_pfxlist[j++] = pfxlist[i];
3997 use_pfxlist[j] = NUL;
3998 }
3999 }
4000 }
4001 }
4002
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004003 // Obey a "COMPOUNDFORBIDFLAG" of the affix: don't
4004 // use the compound flags.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004005 if (use_pfxlist != NULL && ae->ae_compforbid)
4006 {
4007 vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen);
4008 use_pfxlist = pfx_pfxlist;
4009 }
4010
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004011 // When there are postponed prefixes...
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004012 if (spin->si_prefroot != NULL
4013 && spin->si_prefroot->wn_sibling != NULL)
4014 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004015 // ... add a flag to indicate an affix was used.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004016 use_flags |= WF_HAS_AFF;
4017
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004018 // ... don't use a prefix list if combining
4019 // affixes is not allowed. But do use the
4020 // compound flags after them.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004021 if (!ah->ah_combine && use_pfxlist != NULL)
4022 use_pfxlist += use_pfxlen;
4023 }
4024
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004025 // When compounding is supported and there is no
4026 // "COMPOUNDPERMITFLAG" then forbid compounding on the
4027 // side where the affix is applied.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004028 if (spin->si_compflags != NULL && !ae->ae_comppermit)
4029 {
4030 if (xht != NULL)
4031 use_flags |= WF_NOCOMPAFT;
4032 else
4033 use_flags |= WF_NOCOMPBEF;
4034 }
4035
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004036 // Store the modified word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004037 if (store_word(spin, newword, use_flags,
4038 spin->si_region, use_pfxlist,
4039 need_affix) == FAIL)
4040 retval = FAIL;
4041
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004042 // When added a prefix or a first suffix and the affix
4043 // has flags may add a(nother) suffix. RECURSIVE!
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004044 if ((condit & CONDIT_SUF) && ae->ae_flags != NULL)
4045 if (store_aff_word(spin, newword, ae->ae_flags,
4046 affile, &affile->af_suff, xht,
4047 use_condit & (xht == NULL
4048 ? ~0 : ~CONDIT_SUF),
4049 use_flags, use_pfxlist, pfxlen) == FAIL)
4050 retval = FAIL;
4051
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004052 // When added a suffix and combining is allowed also
4053 // try adding a prefix additionally. Both for the
4054 // word flags and for the affix flags. RECURSIVE!
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004055 if (xht != NULL && ah->ah_combine)
4056 {
4057 if (store_aff_word(spin, newword,
4058 afflist, affile,
4059 xht, NULL, use_condit,
4060 use_flags, use_pfxlist,
4061 pfxlen) == FAIL
4062 || (ae->ae_flags != NULL
4063 && store_aff_word(spin, newword,
4064 ae->ae_flags, affile,
4065 xht, NULL, use_condit,
4066 use_flags, use_pfxlist,
4067 pfxlen) == FAIL))
4068 retval = FAIL;
4069 }
4070 }
4071 }
4072 }
4073 }
4074 }
4075
4076 return retval;
4077}
4078
4079/*
4080 * Read a file with a list of words.
4081 */
4082 static int
4083spell_read_wordfile(spellinfo_T *spin, char_u *fname)
4084{
4085 FILE *fd;
4086 long lnum = 0;
4087 char_u rline[MAXLINELEN];
4088 char_u *line;
4089 char_u *pc = NULL;
4090 char_u *p;
4091 int l;
4092 int retval = OK;
4093 int did_word = FALSE;
4094 int non_ascii = 0;
4095 int flags;
4096 int regionmask;
4097
4098 /*
4099 * Open the file.
4100 */
4101 fd = mch_fopen((char *)fname, "r");
4102 if (fd == NULL)
4103 {
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00004104 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004105 return FAIL;
4106 }
4107
Bram Moolenaarc1669272018-06-19 14:23:53 +02004108 vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s..."), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004109 spell_message(spin, IObuff);
4110
4111 /*
4112 * Read all the lines in the file one by one.
4113 */
4114 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
4115 {
4116 line_breakcheck();
4117 ++lnum;
4118
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004119 // Skip comment lines.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004120 if (*rline == '#')
4121 continue;
4122
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004123 // Remove CR, LF and white space from the end.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004124 l = (int)STRLEN(rline);
4125 while (l > 0 && rline[l - 1] <= ' ')
4126 --l;
4127 if (l == 0)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004128 continue; // empty or blank line
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004129 rline[l] = NUL;
4130
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004131 // Convert from "/encoding={encoding}" to 'encoding' when needed.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004132 vim_free(pc);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004133 if (spin->si_conv.vc_type != CONV_NONE)
4134 {
4135 pc = string_convert(&spin->si_conv, rline, NULL);
4136 if (pc == NULL)
4137 {
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004138 smsg(_("Conversion failure for word in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004139 fname, lnum, rline);
4140 continue;
4141 }
4142 line = pc;
4143 }
4144 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004145 {
4146 pc = NULL;
4147 line = rline;
4148 }
4149
4150 if (*line == '/')
4151 {
4152 ++line;
4153 if (STRNCMP(line, "encoding=", 9) == 0)
4154 {
4155 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004156 smsg(_("Duplicate /encoding= line ignored in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004157 fname, lnum, line - 1);
4158 else if (did_word)
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004159 smsg(_("/encoding= line after word ignored in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004160 fname, lnum, line - 1);
4161 else
4162 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004163 char_u *enc;
4164
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004165 // Setup for conversion to 'encoding'.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004166 line += 9;
4167 enc = enc_canonize(line);
4168 if (enc != NULL && !spin->si_ascii
4169 && convert_setup(&spin->si_conv, enc,
4170 p_enc) == FAIL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01004171 smsg(_("Conversion in %s not supported: from %s to %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004172 fname, line, p_enc);
4173 vim_free(enc);
4174 spin->si_conv.vc_fail = TRUE;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004175 }
4176 continue;
4177 }
4178
4179 if (STRNCMP(line, "regions=", 8) == 0)
4180 {
4181 if (spin->si_region_count > 1)
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004182 smsg(_("Duplicate /regions= line ignored in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004183 fname, lnum, line);
4184 else
4185 {
4186 line += 8;
Bram Moolenaar2993ac52018-02-10 14:12:43 +01004187 if (STRLEN(line) > MAXREGIONS * 2)
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004188 smsg(_("Too many regions in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004189 fname, lnum, line);
4190 else
4191 {
4192 spin->si_region_count = (int)STRLEN(line) / 2;
4193 STRCPY(spin->si_region_name, line);
4194
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004195 // Adjust the mask for a word valid in all regions.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004196 spin->si_region = (1 << spin->si_region_count) - 1;
4197 }
4198 }
4199 continue;
4200 }
4201
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004202 smsg(_("/ line ignored in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004203 fname, lnum, line - 1);
4204 continue;
4205 }
4206
4207 flags = 0;
4208 regionmask = spin->si_region;
4209
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004210 // Check for flags and region after a slash.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004211 p = vim_strchr(line, '/');
4212 if (p != NULL)
4213 {
4214 *p++ = NUL;
4215 while (*p != NUL)
4216 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004217 if (*p == '=') // keep-case word
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004218 flags |= WF_KEEPCAP | WF_FIXCAP;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004219 else if (*p == '!') // Bad, bad, wicked word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004220 flags |= WF_BANNED;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004221 else if (*p == '?') // Rare word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004222 flags |= WF_RARE;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004223 else if (VIM_ISDIGIT(*p)) // region number(s)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004224 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004225 if ((flags & WF_REGION) == 0) // first one
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004226 regionmask = 0;
4227 flags |= WF_REGION;
4228
4229 l = *p - '0';
Bram Moolenaaree03b942017-10-27 00:57:05 +02004230 if (l == 0 || l > spin->si_region_count)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004231 {
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004232 smsg(_("Invalid region nr in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004233 fname, lnum, p);
4234 break;
4235 }
4236 regionmask |= 1 << (l - 1);
4237 }
4238 else
4239 {
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004240 smsg(_("Unrecognized flags in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004241 fname, lnum, p);
4242 break;
4243 }
4244 ++p;
4245 }
4246 }
4247
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004248 // Skip non-ASCII words when "spin->si_ascii" is TRUE.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004249 if (spin->si_ascii && has_non_ascii(line))
4250 {
4251 ++non_ascii;
4252 continue;
4253 }
4254
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004255 // Normal word: store it.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004256 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL)
4257 {
4258 retval = FAIL;
4259 break;
4260 }
4261 did_word = TRUE;
4262 }
4263
4264 vim_free(pc);
4265 fclose(fd);
4266
4267 if (spin->si_ascii && non_ascii > 0)
4268 {
4269 vim_snprintf((char *)IObuff, IOSIZE,
4270 _("Ignored %d words with non-ASCII characters"), non_ascii);
4271 spell_message(spin, IObuff);
4272 }
4273
4274 return retval;
4275}
4276
4277/*
4278 * Get part of an sblock_T, "len" bytes long.
4279 * This avoids calling free() for every little struct we use (and keeping
4280 * track of them).
4281 * The memory is cleared to all zeros.
4282 * Returns NULL when out of memory.
4283 */
4284 static void *
4285getroom(
4286 spellinfo_T *spin,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004287 size_t len, // length needed
4288 int align) // align for pointer
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004289{
4290 char_u *p;
4291 sblock_T *bl = spin->si_blocks;
4292
4293 if (align && bl != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004294 // Round size up for alignment. On some systems structures need to be
4295 // aligned to the size of a pointer (e.g., SPARC).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004296 bl->sb_used = (bl->sb_used + sizeof(char *) - 1)
4297 & ~(sizeof(char *) - 1);
4298
4299 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE)
4300 {
4301 if (len >= SBLOCKSIZE)
4302 bl = NULL;
4303 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004304 // Allocate a block of memory. It is not freed until much later.
Bram Moolenaarc799fe22019-05-28 23:08:19 +02004305 bl = alloc_clear(sizeof(sblock_T) + SBLOCKSIZE);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004306 if (bl == NULL)
4307 {
4308 if (!spin->si_did_emsg)
4309 {
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00004310 emsg(_(e_insufficient_memory_word_list_will_be_incomplete));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004311 spin->si_did_emsg = TRUE;
4312 }
4313 return NULL;
4314 }
4315 bl->sb_next = spin->si_blocks;
4316 spin->si_blocks = bl;
4317 bl->sb_used = 0;
4318 ++spin->si_blocks_cnt;
4319 }
4320
4321 p = bl->sb_data + bl->sb_used;
4322 bl->sb_used += (int)len;
4323
4324 return p;
4325}
4326
4327/*
4328 * Make a copy of a string into memory allocated with getroom().
4329 * Returns NULL when out of memory.
4330 */
4331 static char_u *
4332getroom_save(spellinfo_T *spin, char_u *s)
4333{
4334 char_u *sc;
4335
4336 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE);
4337 if (sc != NULL)
4338 STRCPY(sc, s);
4339 return sc;
4340}
4341
4342
4343/*
4344 * Free the list of allocated sblock_T.
4345 */
4346 static void
4347free_blocks(sblock_T *bl)
4348{
4349 sblock_T *next;
4350
4351 while (bl != NULL)
4352 {
4353 next = bl->sb_next;
4354 vim_free(bl);
4355 bl = next;
4356 }
4357}
4358
4359/*
4360 * Allocate the root of a word tree.
4361 * Returns NULL when out of memory.
4362 */
4363 static wordnode_T *
4364wordtree_alloc(spellinfo_T *spin)
4365{
4366 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
4367}
4368
4369/*
Bram Moolenaar5e59ea52022-07-01 22:26:20 +01004370 * Return TRUE if "word" contains valid word characters.
4371 * Control characters and trailing '/' are invalid. Space is OK.
4372 */
4373 static int
4374valid_spell_word(char_u *word)
4375{
4376 char_u *p;
4377
4378 if (enc_utf8 && !utf_valid_string(word, NULL))
4379 return FALSE;
4380 for (p = word; *p != NUL; p += mb_ptr2len(p))
4381 if (*p < ' ' || (p[0] == '/' && p[1] == NUL))
4382 return FALSE;
4383 return TRUE;
4384}
4385
4386/*
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004387 * Store a word in the tree(s).
4388 * Always store it in the case-folded tree. For a keep-case word this is
4389 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and
4390 * used to find suggestions.
4391 * For a keep-case word also store it in the keep-case tree.
4392 * When "pfxlist" is not NULL store the word for each postponed prefix ID and
4393 * compound flag.
4394 */
4395 static int
4396store_word(
4397 spellinfo_T *spin,
4398 char_u *word,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004399 int flags, // extra flags, WF_BANNED
4400 int region, // supported region(s)
4401 char_u *pfxlist, // list of prefix IDs or NULL
4402 int need_affix) // only store word with affix ID
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004403{
4404 int len = (int)STRLEN(word);
4405 int ct = captype(word, word + len);
4406 char_u foldword[MAXWLEN];
4407 int res = OK;
4408 char_u *p;
4409
Bram Moolenaar7c824682022-05-08 22:32:58 +01004410 // Avoid adding illegal bytes to the word tree.
Bram Moolenaar5e59ea52022-07-01 22:26:20 +01004411 if (!valid_spell_word(word))
Bram Moolenaar7c824682022-05-08 22:32:58 +01004412 return FAIL;
4413
Bram Moolenaar4f135272021-06-11 19:07:40 +02004414 (void)spell_casefold(curwin, word, len, foldword, MAXWLEN);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004415 for (p = pfxlist; res == OK; ++p)
4416 {
4417 if (!need_affix || (p != NULL && *p != NUL))
4418 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags,
4419 region, p == NULL ? 0 : *p);
4420 if (p == NULL || *p == NUL)
4421 break;
4422 }
4423 ++spin->si_foldwcount;
4424
4425 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP)))
4426 {
4427 for (p = pfxlist; res == OK; ++p)
4428 {
4429 if (!need_affix || (p != NULL && *p != NUL))
4430 res = tree_add_word(spin, word, spin->si_keeproot, flags,
4431 region, p == NULL ? 0 : *p);
4432 if (p == NULL || *p == NUL)
4433 break;
4434 }
4435 ++spin->si_keepwcount;
4436 }
4437 return res;
4438}
4439
4440/*
4441 * Add word "word" to a word tree at "root".
4442 * When "flags" < 0 we are adding to the prefix tree where "flags" is used for
4443 * "rare" and "region" is the condition nr.
4444 * Returns FAIL when out of memory.
4445 */
4446 static int
4447tree_add_word(
4448 spellinfo_T *spin,
4449 char_u *word,
4450 wordnode_T *root,
4451 int flags,
4452 int region,
4453 int affixID)
4454{
4455 wordnode_T *node = root;
4456 wordnode_T *np;
4457 wordnode_T *copyp, **copyprev;
4458 wordnode_T **prev = NULL;
4459 int i;
4460
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004461 // Add each byte of the word to the tree, including the NUL at the end.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004462 for (i = 0; ; ++i)
4463 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004464 // When there is more than one reference to this node we need to make
4465 // a copy, so that we can modify it. Copy the whole list of siblings
4466 // (we don't optimize for a partly shared list of siblings).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004467 if (node != NULL && node->wn_refs > 1)
4468 {
4469 --node->wn_refs;
4470 copyprev = prev;
Bram Moolenaaraeea7212020-04-02 18:50:46 +02004471 FOR_ALL_NODE_SIBLINGS(node, copyp)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004472 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004473 // Allocate a new node and copy the info.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004474 np = get_wordnode(spin);
4475 if (np == NULL)
4476 return FAIL;
4477 np->wn_child = copyp->wn_child;
4478 if (np->wn_child != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004479 ++np->wn_child->wn_refs; // child gets extra ref
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004480 np->wn_byte = copyp->wn_byte;
4481 if (np->wn_byte == NUL)
4482 {
4483 np->wn_flags = copyp->wn_flags;
4484 np->wn_region = copyp->wn_region;
4485 np->wn_affixID = copyp->wn_affixID;
4486 }
4487
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004488 // Link the new node in the list, there will be one ref.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004489 np->wn_refs = 1;
4490 if (copyprev != NULL)
4491 *copyprev = np;
4492 copyprev = &np->wn_sibling;
4493
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004494 // Let "node" point to the head of the copied list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004495 if (copyp == node)
4496 node = np;
4497 }
4498 }
4499
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004500 // Look for the sibling that has the same character. They are sorted
4501 // on byte value, thus stop searching when a sibling is found with a
4502 // higher byte value. For zero bytes (end of word) the sorting is
4503 // done on flags and then on affixID.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004504 while (node != NULL
4505 && (node->wn_byte < word[i]
4506 || (node->wn_byte == NUL
4507 && (flags < 0
4508 ? node->wn_affixID < (unsigned)affixID
4509 : (node->wn_flags < (unsigned)(flags & WN_MASK)
4510 || (node->wn_flags == (flags & WN_MASK)
4511 && (spin->si_sugtree
4512 ? (node->wn_region & 0xffff) < region
4513 : node->wn_affixID
4514 < (unsigned)affixID)))))))
4515 {
4516 prev = &node->wn_sibling;
4517 node = *prev;
4518 }
4519 if (node == NULL
4520 || node->wn_byte != word[i]
4521 || (word[i] == NUL
4522 && (flags < 0
4523 || spin->si_sugtree
4524 || node->wn_flags != (flags & WN_MASK)
4525 || node->wn_affixID != affixID)))
4526 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004527 // Allocate a new node.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004528 np = get_wordnode(spin);
4529 if (np == NULL)
4530 return FAIL;
4531 np->wn_byte = word[i];
4532
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004533 // If "node" is NULL this is a new child or the end of the sibling
4534 // list: ref count is one. Otherwise use ref count of sibling and
4535 // make ref count of sibling one (matters when inserting in front
4536 // of the list of siblings).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004537 if (node == NULL)
4538 np->wn_refs = 1;
4539 else
4540 {
4541 np->wn_refs = node->wn_refs;
4542 node->wn_refs = 1;
4543 }
4544 if (prev != NULL)
4545 *prev = np;
4546 np->wn_sibling = node;
4547 node = np;
4548 }
4549
4550 if (word[i] == NUL)
4551 {
4552 node->wn_flags = flags;
4553 node->wn_region |= region;
4554 node->wn_affixID = affixID;
4555 break;
4556 }
4557 prev = &node->wn_child;
4558 node = *prev;
4559 }
4560#ifdef SPELL_PRINTTREE
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01004561 smsg("Added \"%s\"", word);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004562 spell_print_tree(root->wn_sibling);
4563#endif
4564
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004565 // count nr of words added since last message
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004566 ++spin->si_msg_count;
4567
4568 if (spin->si_compress_cnt > 1)
4569 {
4570 if (--spin->si_compress_cnt == 1)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004571 // Did enough words to lower the block count limit.
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004572 spin->si_blocks_cnt += compress_inc;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004573 }
4574
4575 /*
4576 * When we have allocated lots of memory we need to compress the word tree
4577 * to free up some room. But compression is slow, and we might actually
4578 * need that room, thus only compress in the following situations:
4579 * 1. When not compressed before (si_compress_cnt == 0): when using
4580 * "compress_start" blocks.
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004581 * 2. When compressed before and used "compress_inc" blocks before
4582 * adding "compress_added" words (si_compress_cnt > 1).
4583 * 3. When compressed before, added "compress_added" words
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004584 * (si_compress_cnt == 1) and the number of free nodes drops below the
4585 * maximum word length.
4586 */
4587#ifndef SPELL_COMPRESS_ALLWAYS
4588 if (spin->si_compress_cnt == 1
4589 ? spin->si_free_count < MAXWLEN
4590 : spin->si_blocks_cnt >= compress_start)
4591#endif
4592 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004593 // Decrement the block counter. The effect is that we compress again
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004594 // when the freed up room has been used and another "compress_inc"
4595 // blocks have been allocated. Unless "compress_added" words have
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004596 // been added, then the limit is put back again.
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004597 spin->si_blocks_cnt -= compress_inc;
4598 spin->si_compress_cnt = compress_added;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004599
4600 if (spin->si_verbose)
4601 {
4602 msg_start();
Bram Moolenaar32526b32019-01-19 17:43:09 +01004603 msg_puts(_(msg_compressing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004604 msg_clr_eos();
4605 msg_didout = FALSE;
4606 msg_col = 0;
4607 out_flush();
4608 }
4609
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004610 // Compress both trees. Either they both have many nodes, which makes
4611 // compression useful, or one of them is small, which means
4612 // compression goes fast. But when filling the soundfold word tree
4613 // there is no keep-case tree.
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004614 wordtree_compress(spin, spin->si_foldroot, "case-folded");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004615 if (affixID >= 0)
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004616 wordtree_compress(spin, spin->si_keeproot, "keep-case");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004617 }
4618
4619 return OK;
4620}
4621
4622/*
4623 * Get a wordnode_T, either from the list of previously freed nodes or
4624 * allocate a new one.
4625 * Returns NULL when out of memory.
4626 */
4627 static wordnode_T *
4628get_wordnode(spellinfo_T *spin)
4629{
4630 wordnode_T *n;
4631
4632 if (spin->si_first_free == NULL)
4633 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
4634 else
4635 {
4636 n = spin->si_first_free;
4637 spin->si_first_free = n->wn_child;
Bram Moolenaara80faa82020-04-12 19:37:17 +02004638 CLEAR_POINTER(n);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004639 --spin->si_free_count;
4640 }
4641#ifdef SPELL_PRINTTREE
4642 if (n != NULL)
4643 n->wn_nr = ++spin->si_wordnode_nr;
4644#endif
4645 return n;
4646}
4647
4648/*
4649 * Decrement the reference count on a node (which is the head of a list of
4650 * siblings). If the reference count becomes zero free the node and its
4651 * siblings.
4652 * Returns the number of nodes actually freed.
4653 */
4654 static int
4655deref_wordnode(spellinfo_T *spin, wordnode_T *node)
4656{
4657 wordnode_T *np;
4658 int cnt = 0;
4659
4660 if (--node->wn_refs == 0)
4661 {
Bram Moolenaaraeea7212020-04-02 18:50:46 +02004662 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004663 {
4664 if (np->wn_child != NULL)
4665 cnt += deref_wordnode(spin, np->wn_child);
4666 free_wordnode(spin, np);
4667 ++cnt;
4668 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004669 ++cnt; // length field
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004670 }
4671 return cnt;
4672}
4673
4674/*
4675 * Free a wordnode_T for re-use later.
4676 * Only the "wn_child" field becomes invalid.
4677 */
4678 static void
4679free_wordnode(spellinfo_T *spin, wordnode_T *n)
4680{
4681 n->wn_child = spin->si_first_free;
4682 spin->si_first_free = n;
4683 ++spin->si_free_count;
4684}
4685
4686/*
4687 * Compress a tree: find tails that are identical and can be shared.
4688 */
4689 static void
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004690wordtree_compress(spellinfo_T *spin, wordnode_T *root, char *name)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004691{
4692 hashtab_T ht;
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02004693 long n;
4694 long tot = 0;
4695 long perc;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004696
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004697 // Skip the root itself, it's not actually used. The first sibling is the
4698 // start of the tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004699 if (root->wn_sibling != NULL)
4700 {
4701 hash_init(&ht);
4702 n = node_compress(spin, root->wn_sibling, &ht, &tot);
4703
4704#ifndef SPELL_PRINTTREE
4705 if (spin->si_verbose || p_verbose > 2)
4706#endif
4707 {
4708 if (tot > 1000000)
4709 perc = (tot - n) / (tot / 100);
4710 else if (tot == 0)
4711 perc = 0;
4712 else
4713 perc = (tot - n) * 100 / tot;
4714 vim_snprintf((char *)IObuff, IOSIZE,
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004715 _("Compressed %s: %ld of %ld nodes; %ld (%ld%%) remaining"),
4716 name, n, tot, tot - n, perc);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004717 spell_message(spin, IObuff);
4718 }
4719#ifdef SPELL_PRINTTREE
4720 spell_print_tree(root->wn_sibling);
4721#endif
4722 hash_clear(&ht);
4723 }
4724}
4725
4726/*
4727 * Compress a node, its siblings and its children, depth first.
4728 * Returns the number of compressed nodes.
4729 */
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02004730 static long
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004731node_compress(
4732 spellinfo_T *spin,
4733 wordnode_T *node,
4734 hashtab_T *ht,
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02004735 long *tot) // total count of nodes before compressing,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004736 // incremented while going through the tree
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004737{
4738 wordnode_T *np;
4739 wordnode_T *tp;
4740 wordnode_T *child;
4741 hash_T hash;
4742 hashitem_T *hi;
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02004743 long len = 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004744 unsigned nr, n;
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02004745 long compressed = 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004746
4747 /*
4748 * Go through the list of siblings. Compress each child and then try
4749 * finding an identical child to replace it.
4750 * Note that with "child" we mean not just the node that is pointed to,
4751 * but the whole list of siblings of which the child node is the first.
4752 */
4753 for (np = node; np != NULL && !got_int; np = np->wn_sibling)
4754 {
4755 ++len;
4756 if ((child = np->wn_child) != NULL)
4757 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004758 // Compress the child first. This fills hashkey.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004759 compressed += node_compress(spin, child, ht, tot);
4760
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004761 // Try to find an identical child.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004762 hash = hash_hash(child->wn_u1.hashkey);
4763 hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
4764 if (!HASHITEM_EMPTY(hi))
4765 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004766 // There are children we encountered before with a hash value
4767 // identical to the current child. Now check if there is one
4768 // that is really identical.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004769 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
4770 if (node_equal(child, tp))
4771 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004772 // Found one! Now use that child in place of the
4773 // current one. This means the current child and all
4774 // its siblings is unlinked from the tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004775 ++tp->wn_refs;
4776 compressed += deref_wordnode(spin, child);
4777 np->wn_child = tp;
4778 break;
4779 }
4780 if (tp == NULL)
4781 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004782 // No other child with this hash value equals the child of
4783 // the node, add it to the linked list after the first
4784 // item.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004785 tp = HI2WN(hi);
4786 child->wn_u2.next = tp->wn_u2.next;
4787 tp->wn_u2.next = child;
4788 }
4789 }
4790 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004791 // No other child has this hash value, add it to the
4792 // hashtable.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004793 hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
4794 }
4795 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004796 *tot += len + 1; // add one for the node that stores the length
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004797
4798 /*
4799 * Make a hash key for the node and its siblings, so that we can quickly
4800 * find a lookalike node. This must be done after compressing the sibling
4801 * list, otherwise the hash key would become invalid by the compression.
4802 */
4803 node->wn_u1.hashkey[0] = len;
4804 nr = 0;
Bram Moolenaaraeea7212020-04-02 18:50:46 +02004805 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004806 {
4807 if (np->wn_byte == NUL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004808 // end node: use wn_flags, wn_region and wn_affixID
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004809 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16);
4810 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004811 // byte node: use the byte value and the child pointer
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004812 n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8));
4813 nr = nr * 101 + n;
4814 }
4815
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004816 // Avoid NUL bytes, it terminates the hash key.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004817 n = nr & 0xff;
4818 node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
4819 n = (nr >> 8) & 0xff;
4820 node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
4821 n = (nr >> 16) & 0xff;
4822 node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
4823 n = (nr >> 24) & 0xff;
4824 node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
4825 node->wn_u1.hashkey[5] = NUL;
4826
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004827 // Check for CTRL-C pressed now and then.
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004828 veryfast_breakcheck();
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004829
4830 return compressed;
4831}
4832
4833/*
4834 * Return TRUE when two nodes have identical siblings and children.
4835 */
4836 static int
4837node_equal(wordnode_T *n1, wordnode_T *n2)
4838{
4839 wordnode_T *p1;
4840 wordnode_T *p2;
4841
4842 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
4843 p1 = p1->wn_sibling, p2 = p2->wn_sibling)
4844 if (p1->wn_byte != p2->wn_byte
4845 || (p1->wn_byte == NUL
4846 ? (p1->wn_flags != p2->wn_flags
4847 || p1->wn_region != p2->wn_region
4848 || p1->wn_affixID != p2->wn_affixID)
4849 : (p1->wn_child != p2->wn_child)))
4850 break;
4851
4852 return p1 == NULL && p2 == NULL;
4853}
4854
Bram Moolenaareae1b912019-05-09 15:12:55 +02004855static int rep_compare(const void *s1, const void *s2);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004856
4857/*
4858 * Function given to qsort() to sort the REP items on "from" string.
4859 */
4860 static int
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004861rep_compare(const void *s1, const void *s2)
4862{
4863 fromto_T *p1 = (fromto_T *)s1;
4864 fromto_T *p2 = (fromto_T *)s2;
4865
4866 return STRCMP(p1->ft_from, p2->ft_from);
4867}
4868
4869/*
4870 * Write the Vim .spl file "fname".
4871 * Return FAIL or OK;
4872 */
4873 static int
4874write_vim_spell(spellinfo_T *spin, char_u *fname)
4875{
4876 FILE *fd;
4877 int regionmask;
4878 int round;
4879 wordnode_T *tree;
4880 int nodecount;
4881 int i;
4882 int l;
4883 garray_T *gap;
4884 fromto_T *ftp;
4885 char_u *p;
4886 int rr;
4887 int retval = OK;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004888 size_t fwv = 1; // collect return value of fwrite() to avoid
4889 // warnings from picky compiler
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004890
4891 fd = mch_fopen((char *)fname, "w");
4892 if (fd == NULL)
4893 {
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00004894 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004895 return FAIL;
4896 }
4897
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004898 // <HEADER>: <fileID> <versionnr>
4899 // <fileID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004900 fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd);
4901 if (fwv != (size_t)1)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004902 // Catch first write error, don't try writing more.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004903 goto theend;
4904
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004905 putc(VIMSPELLVERSION, fd); // <versionnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004906
4907 /*
4908 * <SECTIONS>: <section> ... <sectionend>
4909 */
4910
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004911 // SN_INFO: <infotext>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004912 if (spin->si_info != NULL)
4913 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004914 putc(SN_INFO, fd); // <sectionID>
4915 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004916
4917 i = (int)STRLEN(spin->si_info);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004918 put_bytes(fd, (long_u)i, 4); // <sectionlen>
4919 fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); // <infotext>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004920 }
4921
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004922 // SN_REGION: <regionname> ...
4923 // Write the region names only if there is more than one.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004924 if (spin->si_region_count > 1)
4925 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004926 putc(SN_REGION, fd); // <sectionID>
4927 putc(SNF_REQUIRED, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004928 l = spin->si_region_count * 2;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004929 put_bytes(fd, (long_u)l, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004930 fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004931 // <regionname> ...
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004932 regionmask = (1 << spin->si_region_count) - 1;
4933 }
4934 else
4935 regionmask = 0;
4936
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004937 // SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars>
4938 //
4939 // The table with character flags and the table for case folding.
4940 // This makes sure the same characters are recognized as word characters
Dominique Pelleaf4a61a2021-12-27 17:21:41 +00004941 // when generating and when using a spell file.
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004942 // Skip this for ASCII, the table may conflict with the one used for
4943 // 'encoding'.
4944 // Also skip this for an .add.spl file, the main spell file must contain
4945 // the table (avoids that it conflicts). File is shorter too.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004946 if (!spin->si_ascii && !spin->si_add)
4947 {
4948 char_u folchars[128 * 8];
4949 int flags;
4950
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004951 putc(SN_CHARFLAGS, fd); // <sectionID>
4952 putc(SNF_REQUIRED, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004953
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004954 // Form the <folchars> string first, we need to know its length.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004955 l = 0;
4956 for (i = 128; i < 256; ++i)
4957 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004958 if (has_mbyte)
4959 l += mb_char2bytes(spelltab.st_fold[i], folchars + l);
4960 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004961 folchars[l++] = spelltab.st_fold[i];
4962 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004963 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004964
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004965 fputc(128, fd); // <charflagslen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004966 for (i = 128; i < 256; ++i)
4967 {
4968 flags = 0;
4969 if (spelltab.st_isw[i])
4970 flags |= CF_WORD;
4971 if (spelltab.st_isu[i])
4972 flags |= CF_UPPER;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004973 fputc(flags, fd); // <charflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004974 }
4975
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004976 put_bytes(fd, (long_u)l, 2); // <folcharslen>
4977 fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); // <folchars>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004978 }
4979
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004980 // SN_MIDWORD: <midword>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004981 if (spin->si_midword != NULL)
4982 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004983 putc(SN_MIDWORD, fd); // <sectionID>
4984 putc(SNF_REQUIRED, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004985
4986 i = (int)STRLEN(spin->si_midword);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004987 put_bytes(fd, (long_u)i, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004988 fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004989 // <midword>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004990 }
4991
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004992 // SN_PREFCOND: <prefcondcnt> <prefcond> ...
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004993 if (spin->si_prefcond.ga_len > 0)
4994 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004995 putc(SN_PREFCOND, fd); // <sectionID>
4996 putc(SNF_REQUIRED, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004997
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +00004998 l = write_spell_prefcond(NULL, &spin->si_prefcond, &fwv);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004999 put_bytes(fd, (long_u)l, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005000
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +00005001 write_spell_prefcond(fd, &spin->si_prefcond, &fwv);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005002 }
5003
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005004 // SN_REP: <repcount> <rep> ...
5005 // SN_SAL: <salflags> <salcount> <sal> ...
5006 // SN_REPSAL: <repcount> <rep> ...
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005007
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005008 // round 1: SN_REP section
5009 // round 2: SN_SAL section (unless SN_SOFO is used)
5010 // round 3: SN_REPSAL section
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005011 for (round = 1; round <= 3; ++round)
5012 {
5013 if (round == 1)
5014 gap = &spin->si_rep;
5015 else if (round == 2)
5016 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005017 // Don't write SN_SAL when using a SN_SOFO section
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005018 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
5019 continue;
5020 gap = &spin->si_sal;
5021 }
5022 else
5023 gap = &spin->si_repsal;
5024
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005025 // Don't write the section if there are no items.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005026 if (gap->ga_len == 0)
5027 continue;
5028
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005029 // Sort the REP/REPSAL items.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005030 if (round != 2)
5031 qsort(gap->ga_data, (size_t)gap->ga_len,
5032 sizeof(fromto_T), rep_compare);
5033
5034 i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005035 putc(i, fd); // <sectionID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005036
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005037 // This is for making suggestions, section is not required.
5038 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005039
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005040 // Compute the length of what follows.
5041 l = 2; // count <repcount> or <salcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005042 for (i = 0; i < gap->ga_len; ++i)
5043 {
5044 ftp = &((fromto_T *)gap->ga_data)[i];
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005045 l += 1 + (int)STRLEN(ftp->ft_from); // count <*fromlen> and <*from>
5046 l += 1 + (int)STRLEN(ftp->ft_to); // count <*tolen> and <*to>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005047 }
5048 if (round == 2)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005049 ++l; // count <salflags>
5050 put_bytes(fd, (long_u)l, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005051
5052 if (round == 2)
5053 {
5054 i = 0;
5055 if (spin->si_followup)
5056 i |= SAL_F0LLOWUP;
5057 if (spin->si_collapse)
5058 i |= SAL_COLLAPSE;
5059 if (spin->si_rem_accents)
5060 i |= SAL_REM_ACCENTS;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005061 putc(i, fd); // <salflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005062 }
5063
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005064 put_bytes(fd, (long_u)gap->ga_len, 2); // <repcount> or <salcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005065 for (i = 0; i < gap->ga_len; ++i)
5066 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005067 // <rep> : <repfromlen> <repfrom> <reptolen> <repto>
5068 // <sal> : <salfromlen> <salfrom> <saltolen> <salto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005069 ftp = &((fromto_T *)gap->ga_data)[i];
5070 for (rr = 1; rr <= 2; ++rr)
5071 {
5072 p = rr == 1 ? ftp->ft_from : ftp->ft_to;
5073 l = (int)STRLEN(p);
5074 putc(l, fd);
5075 if (l > 0)
5076 fwv &= fwrite(p, l, (size_t)1, fd);
5077 }
5078 }
5079
5080 }
5081
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005082 // SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
5083 // This is for making suggestions, section is not required.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005084 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
5085 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005086 putc(SN_SOFO, fd); // <sectionID>
5087 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005088
5089 l = (int)STRLEN(spin->si_sofofr);
5090 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005091 // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005092
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005093 put_bytes(fd, (long_u)l, 2); // <sofofromlen>
5094 fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); // <sofofrom>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005095
5096 l = (int)STRLEN(spin->si_sofoto);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005097 put_bytes(fd, (long_u)l, 2); // <sofotolen>
5098 fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); // <sofoto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005099 }
5100
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005101 // SN_WORDS: <word> ...
5102 // This is for making suggestions, section is not required.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005103 if (spin->si_commonwords.ht_used > 0)
5104 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005105 putc(SN_WORDS, fd); // <sectionID>
5106 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005107
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005108 // round 1: count the bytes
5109 // round 2: write the bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005110 for (round = 1; round <= 2; ++round)
5111 {
5112 int todo;
5113 int len = 0;
5114 hashitem_T *hi;
5115
5116 todo = (int)spin->si_commonwords.ht_used;
5117 for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi)
5118 if (!HASHITEM_EMPTY(hi))
5119 {
5120 l = (int)STRLEN(hi->hi_key) + 1;
5121 len += l;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005122 if (round == 2) // <word>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005123 fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd);
5124 --todo;
5125 }
5126 if (round == 1)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005127 put_bytes(fd, (long_u)len, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005128 }
5129 }
5130
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005131 // SN_MAP: <mapstr>
5132 // This is for making suggestions, section is not required.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005133 if (spin->si_map.ga_len > 0)
5134 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005135 putc(SN_MAP, fd); // <sectionID>
5136 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005137 l = spin->si_map.ga_len;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005138 put_bytes(fd, (long_u)l, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005139 fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005140 // <mapstr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005141 }
5142
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005143 // SN_SUGFILE: <timestamp>
5144 // This is used to notify that a .sug file may be available and at the
5145 // same time allows for checking that a .sug file that is found matches
5146 // with this .spl file. That's because the word numbers must be exactly
5147 // right.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005148 if (!spin->si_nosugfile
5149 && (spin->si_sal.ga_len > 0
5150 || (spin->si_sofofr != NULL && spin->si_sofoto != NULL)))
5151 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005152 putc(SN_SUGFILE, fd); // <sectionID>
5153 putc(0, fd); // <sectionflags>
5154 put_bytes(fd, (long_u)8, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005155
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005156 // Set si_sugtime and write it to the file.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005157 spin->si_sugtime = time(NULL);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005158 put_time(fd, spin->si_sugtime); // <timestamp>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005159 }
5160
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005161 // SN_NOSPLITSUGS: nothing
5162 // This is used to notify that no suggestions with word splits are to be
5163 // made.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005164 if (spin->si_nosplitsugs)
5165 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005166 putc(SN_NOSPLITSUGS, fd); // <sectionID>
5167 putc(0, fd); // <sectionflags>
5168 put_bytes(fd, (long_u)0, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005169 }
5170
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005171 // SN_NOCOMPUNDSUGS: nothing
5172 // This is used to notify that no suggestions with compounds are to be
5173 // made.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005174 if (spin->si_nocompoundsugs)
5175 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005176 putc(SN_NOCOMPOUNDSUGS, fd); // <sectionID>
5177 putc(0, fd); // <sectionflags>
5178 put_bytes(fd, (long_u)0, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005179 }
5180
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005181 // SN_COMPOUND: compound info.
5182 // We don't mark it required, when not supported all compound words will
5183 // be bad words.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005184 if (spin->si_compflags != NULL)
5185 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005186 putc(SN_COMPOUND, fd); // <sectionID>
5187 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005188
5189 l = (int)STRLEN(spin->si_compflags);
5190 for (i = 0; i < spin->si_comppat.ga_len; ++i)
5191 l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005192 put_bytes(fd, (long_u)(l + 7), 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005193
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005194 putc(spin->si_compmax, fd); // <compmax>
5195 putc(spin->si_compminlen, fd); // <compminlen>
5196 putc(spin->si_compsylmax, fd); // <compsylmax>
5197 putc(0, fd); // for Vim 7.0b compatibility
5198 putc(spin->si_compoptions, fd); // <compoptions>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005199 put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005200 // <comppatcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005201 for (i = 0; i < spin->si_comppat.ga_len; ++i)
5202 {
5203 p = ((char_u **)(spin->si_comppat.ga_data))[i];
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005204 putc((int)STRLEN(p), fd); // <comppatlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005205 fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005206 // <comppattext>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005207 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005208 // <compflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005209 fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags),
5210 (size_t)1, fd);
5211 }
5212
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005213 // SN_NOBREAK: NOBREAK flag
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005214 if (spin->si_nobreak)
5215 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005216 putc(SN_NOBREAK, fd); // <sectionID>
5217 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005218
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005219 // It's empty, the presence of the section flags the feature.
5220 put_bytes(fd, (long_u)0, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005221 }
5222
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005223 // SN_SYLLABLE: syllable info.
5224 // We don't mark it required, when not supported syllables will not be
5225 // counted.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005226 if (spin->si_syllable != NULL)
5227 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005228 putc(SN_SYLLABLE, fd); // <sectionID>
5229 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005230
5231 l = (int)STRLEN(spin->si_syllable);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005232 put_bytes(fd, (long_u)l, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005233 fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005234 // <syllable>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005235 }
5236
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005237 // end of <SECTIONS>
5238 putc(SN_END, fd); // <sectionend>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005239
5240
5241 /*
5242 * <LWORDTREE> <KWORDTREE> <PREFIXTREE>
5243 */
5244 spin->si_memtot = 0;
5245 for (round = 1; round <= 3; ++round)
5246 {
5247 if (round == 1)
5248 tree = spin->si_foldroot->wn_sibling;
5249 else if (round == 2)
5250 tree = spin->si_keeproot->wn_sibling;
5251 else
5252 tree = spin->si_prefroot->wn_sibling;
5253
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005254 // Clear the index and wnode fields in the tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005255 clear_node(tree);
5256
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005257 // Count the number of nodes. Needed to be able to allocate the
5258 // memory when reading the nodes. Also fills in index for shared
5259 // nodes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005260 nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
5261
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005262 // number of nodes in 4 bytes
5263 put_bytes(fd, (long_u)nodecount, 4); // <nodecount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005264 spin->si_memtot += nodecount + nodecount * sizeof(int);
5265
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005266 // Write the nodes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005267 (void)put_node(fd, tree, 0, regionmask, round == 3);
5268 }
5269
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005270 // Write another byte to check for errors (file system full).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005271 if (putc(0, fd) == EOF)
5272 retval = FAIL;
5273theend:
5274 if (fclose(fd) == EOF)
5275 retval = FAIL;
5276
5277 if (fwv != (size_t)1)
5278 retval = FAIL;
5279 if (retval == FAIL)
Bram Moolenaar40bcec12021-12-05 22:19:27 +00005280 emsg(_(e_error_while_writing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005281
5282 return retval;
5283}
5284
5285/*
5286 * Clear the index and wnode fields of "node", it siblings and its
5287 * children. This is needed because they are a union with other items to save
5288 * space.
5289 */
5290 static void
5291clear_node(wordnode_T *node)
5292{
5293 wordnode_T *np;
5294
5295 if (node != NULL)
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005296 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005297 {
5298 np->wn_u1.index = 0;
5299 np->wn_u2.wnode = NULL;
5300
5301 if (np->wn_byte != NUL)
5302 clear_node(np->wn_child);
5303 }
5304}
5305
5306
5307/*
5308 * Dump a word tree at node "node".
5309 *
5310 * This first writes the list of possible bytes (siblings). Then for each
5311 * byte recursively write the children.
5312 *
5313 * NOTE: The code here must match the code in read_tree_node(), since
5314 * assumptions are made about the indexes (so that we don't have to write them
5315 * in the file).
5316 *
5317 * Returns the number of nodes used.
5318 */
5319 static int
5320put_node(
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005321 FILE *fd, // NULL when only counting
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005322 wordnode_T *node,
5323 int idx,
5324 int regionmask,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005325 int prefixtree) // TRUE for PREFIXTREE
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005326{
5327 int newindex = idx;
5328 int siblingcount = 0;
5329 wordnode_T *np;
5330 int flags;
5331
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005332 // If "node" is zero the tree is empty.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005333 if (node == NULL)
5334 return 0;
5335
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005336 // Store the index where this node is written.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005337 node->wn_u1.index = idx;
5338
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005339 // Count the number of siblings.
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005340 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005341 ++siblingcount;
5342
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005343 // Write the sibling count.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005344 if (fd != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005345 putc(siblingcount, fd); // <siblingcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005346
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005347 // Write each sibling byte and optionally extra info.
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005348 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005349 {
5350 if (np->wn_byte == 0)
5351 {
5352 if (fd != NULL)
5353 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005354 // For a NUL byte (end of word) write the flags etc.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005355 if (prefixtree)
5356 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005357 // In PREFIXTREE write the required affixID and the
5358 // associated condition nr (stored in wn_region). The
5359 // byte value is misused to store the "rare" and "not
5360 // combining" flags
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005361 if (np->wn_flags == (short_u)PFX_FLAGS)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005362 putc(BY_NOFLAGS, fd); // <byte>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005363 else
5364 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005365 putc(BY_FLAGS, fd); // <byte>
5366 putc(np->wn_flags, fd); // <pflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005367 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005368 putc(np->wn_affixID, fd); // <affixID>
5369 put_bytes(fd, (long_u)np->wn_region, 2); // <prefcondnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005370 }
5371 else
5372 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005373 // For word trees we write the flag/region items.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005374 flags = np->wn_flags;
5375 if (regionmask != 0 && np->wn_region != regionmask)
5376 flags |= WF_REGION;
5377 if (np->wn_affixID != 0)
5378 flags |= WF_AFX;
5379 if (flags == 0)
5380 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005381 // word without flags or region
5382 putc(BY_NOFLAGS, fd); // <byte>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005383 }
5384 else
5385 {
5386 if (np->wn_flags >= 0x100)
5387 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005388 putc(BY_FLAGS2, fd); // <byte>
5389 putc(flags, fd); // <flags>
5390 putc((unsigned)flags >> 8, fd); // <flags2>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005391 }
5392 else
5393 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005394 putc(BY_FLAGS, fd); // <byte>
5395 putc(flags, fd); // <flags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005396 }
5397 if (flags & WF_REGION)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005398 putc(np->wn_region, fd); // <region>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005399 if (flags & WF_AFX)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005400 putc(np->wn_affixID, fd); // <affixID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005401 }
5402 }
5403 }
5404 }
5405 else
5406 {
5407 if (np->wn_child->wn_u1.index != 0
5408 && np->wn_child->wn_u2.wnode != node)
5409 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005410 // The child is written elsewhere, write the reference.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005411 if (fd != NULL)
5412 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005413 putc(BY_INDEX, fd); // <byte>
5414 // <nodeidx>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005415 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
5416 }
5417 }
5418 else if (np->wn_child->wn_u2.wnode == NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005419 // We will write the child below and give it an index.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005420 np->wn_child->wn_u2.wnode = node;
5421
5422 if (fd != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005423 if (putc(np->wn_byte, fd) == EOF) // <byte> or <xbyte>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005424 {
Bram Moolenaar40bcec12021-12-05 22:19:27 +00005425 emsg(_(e_error_while_writing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005426 return 0;
5427 }
5428 }
5429 }
5430
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005431 // Space used in the array when reading: one for each sibling and one for
5432 // the count.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005433 newindex += siblingcount + 1;
5434
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005435 // Recursively dump the children of each sibling.
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005436 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005437 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
5438 newindex = put_node(fd, np->wn_child, newindex, regionmask,
5439 prefixtree);
5440
5441 return newindex;
5442}
5443
5444
5445/*
5446 * ":mkspell [-ascii] outfile infile ..."
5447 * ":mkspell [-ascii] addfile"
5448 */
5449 void
5450ex_mkspell(exarg_T *eap)
5451{
5452 int fcount;
5453 char_u **fnames;
5454 char_u *arg = eap->arg;
5455 int ascii = FALSE;
5456
5457 if (STRNCMP(arg, "-ascii", 6) == 0)
5458 {
5459 ascii = TRUE;
5460 arg = skipwhite(arg + 6);
5461 }
5462
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005463 // Expand all the remaining arguments (e.g., $VIMRUNTIME).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005464 if (get_arglist_exp(arg, &fcount, &fnames, FALSE) == OK)
5465 {
5466 mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
5467 FreeWild(fcount, fnames);
5468 }
5469}
5470
5471/*
5472 * Create the .sug file.
5473 * Uses the soundfold info in "spin".
5474 * Writes the file with the name "wfname", with ".spl" changed to ".sug".
5475 */
5476 static void
5477spell_make_sugfile(spellinfo_T *spin, char_u *wfname)
5478{
5479 char_u *fname = NULL;
5480 int len;
5481 slang_T *slang;
5482 int free_slang = FALSE;
5483
5484 /*
5485 * Read back the .spl file that was written. This fills the required
5486 * info for soundfolding. This also uses less memory than the
5487 * pointer-linked version of the trie. And it avoids having two versions
5488 * of the code for the soundfolding stuff.
5489 * It might have been done already by spell_reload_one().
5490 */
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005491 FOR_ALL_SPELL_LANGS(slang)
Bram Moolenaar99499b12019-05-23 21:35:48 +02005492 if (fullpathcmp(wfname, slang->sl_fname, FALSE, TRUE) == FPC_SAME)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005493 break;
5494 if (slang == NULL)
5495 {
5496 spell_message(spin, (char_u *)_("Reading back spell file..."));
5497 slang = spell_load_file(wfname, NULL, NULL, FALSE);
5498 if (slang == NULL)
5499 return;
5500 free_slang = TRUE;
5501 }
5502
5503 /*
5504 * Clear the info in "spin" that is used.
5505 */
5506 spin->si_blocks = NULL;
5507 spin->si_blocks_cnt = 0;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005508 spin->si_compress_cnt = 0; // will stay at 0 all the time
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005509 spin->si_free_count = 0;
5510 spin->si_first_free = NULL;
5511 spin->si_foldwcount = 0;
5512
5513 /*
5514 * Go through the trie of good words, soundfold each word and add it to
5515 * the soundfold trie.
5516 */
5517 spell_message(spin, (char_u *)_("Performing soundfolding..."));
5518 if (sug_filltree(spin, slang) == FAIL)
5519 goto theend;
5520
5521 /*
5522 * Create the table which links each soundfold word with a list of the
5523 * good words it may come from. Creates buffer "spin->si_spellbuf".
5524 * This also removes the wordnr from the NUL byte entries to make
5525 * compression possible.
5526 */
5527 if (sug_maketable(spin) == FAIL)
5528 goto theend;
5529
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005530 smsg(_("Number of words after soundfolding: %ld"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005531 (long)spin->si_spellbuf->b_ml.ml_line_count);
5532
5533 /*
5534 * Compress the soundfold trie.
5535 */
5536 spell_message(spin, (char_u *)_(msg_compressing));
Bram Moolenaar408c23b2020-06-03 22:15:45 +02005537 wordtree_compress(spin, spin->si_foldroot, "case-folded");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005538
5539 /*
5540 * Write the .sug file.
5541 * Make the file name by changing ".spl" to ".sug".
5542 */
5543 fname = alloc(MAXPATHL);
5544 if (fname == NULL)
5545 goto theend;
5546 vim_strncpy(fname, wfname, MAXPATHL - 1);
5547 len = (int)STRLEN(fname);
5548 fname[len - 2] = 'u';
5549 fname[len - 1] = 'g';
5550 sug_write(spin, fname);
5551
5552theend:
5553 vim_free(fname);
5554 if (free_slang)
5555 slang_free(slang);
5556 free_blocks(spin->si_blocks);
5557 close_spellbuf(spin->si_spellbuf);
5558}
5559
5560/*
5561 * Build the soundfold trie for language "slang".
5562 */
5563 static int
5564sug_filltree(spellinfo_T *spin, slang_T *slang)
5565{
5566 char_u *byts;
5567 idx_T *idxs;
5568 int depth;
5569 idx_T arridx[MAXWLEN];
5570 int curi[MAXWLEN];
5571 char_u tword[MAXWLEN];
5572 char_u tsalword[MAXWLEN];
5573 int c;
5574 idx_T n;
5575 unsigned words_done = 0;
5576 int wordcount[MAXWLEN];
5577
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005578 // We use si_foldroot for the soundfolded trie.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005579 spin->si_foldroot = wordtree_alloc(spin);
5580 if (spin->si_foldroot == NULL)
5581 return FAIL;
5582
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005583 // let tree_add_word() know we're adding to the soundfolded tree
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005584 spin->si_sugtree = TRUE;
5585
5586 /*
5587 * Go through the whole case-folded tree, soundfold each word and put it
5588 * in the trie.
5589 */
5590 byts = slang->sl_fbyts;
5591 idxs = slang->sl_fidxs;
5592
5593 arridx[0] = 0;
5594 curi[0] = 1;
5595 wordcount[0] = 0;
5596
5597 depth = 0;
5598 while (depth >= 0 && !got_int)
5599 {
5600 if (curi[depth] > byts[arridx[depth]])
5601 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005602 // Done all bytes at this node, go up one level.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005603 idxs[arridx[depth]] = wordcount[depth];
5604 if (depth > 0)
5605 wordcount[depth - 1] += wordcount[depth];
5606
5607 --depth;
5608 line_breakcheck();
5609 }
5610 else
5611 {
5612
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005613 // Do one more byte at this node.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005614 n = arridx[depth] + curi[depth];
5615 ++curi[depth];
5616
5617 c = byts[n];
5618 if (c == 0)
5619 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005620 // Sound-fold the word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005621 tword[depth] = NUL;
5622 spell_soundfold(slang, tword, TRUE, tsalword);
5623
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005624 // We use the "flags" field for the MSB of the wordnr,
5625 // "region" for the LSB of the wordnr.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005626 if (tree_add_word(spin, tsalword, spin->si_foldroot,
5627 words_done >> 16, words_done & 0xffff,
5628 0) == FAIL)
5629 return FAIL;
5630
5631 ++words_done;
5632 ++wordcount[depth];
5633
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005634 // Reset the block count each time to avoid compression
5635 // kicking in.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005636 spin->si_blocks_cnt = 0;
5637
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005638 // Skip over any other NUL bytes (same word with different
Bram Moolenaar07399e72020-08-24 20:05:50 +02005639 // flags). But don't go over the end.
5640 while (n + 1 < slang->sl_fbyts_len && byts[n + 1] == 0)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005641 {
5642 ++n;
5643 ++curi[depth];
5644 }
5645 }
5646 else
5647 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005648 // Normal char, go one level deeper.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005649 tword[depth++] = c;
5650 arridx[depth] = idxs[n];
5651 curi[depth] = 1;
5652 wordcount[depth] = 0;
5653 }
5654 }
5655 }
5656
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005657 smsg(_("Total number of words: %d"), words_done);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005658
5659 return OK;
5660}
5661
5662/*
5663 * Make the table that links each word in the soundfold trie to the words it
5664 * can be produced from.
5665 * This is not unlike lines in a file, thus use a memfile to be able to access
5666 * the table efficiently.
5667 * Returns FAIL when out of memory.
5668 */
5669 static int
5670sug_maketable(spellinfo_T *spin)
5671{
5672 garray_T ga;
5673 int res = OK;
5674
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005675 // Allocate a buffer, open a memline for it and create the swap file
5676 // (uses a temp file, not a .swp file).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005677 spin->si_spellbuf = open_spellbuf();
5678 if (spin->si_spellbuf == NULL)
5679 return FAIL;
5680
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005681 // Use a buffer to store the line info, avoids allocating many small
5682 // pieces of memory.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005683 ga_init2(&ga, 1, 100);
5684
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005685 // recursively go through the tree
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005686 if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1)
5687 res = FAIL;
5688
5689 ga_clear(&ga);
5690 return res;
5691}
5692
5693/*
5694 * Fill the table for one node and its children.
5695 * Returns the wordnr at the start of the node.
5696 * Returns -1 when out of memory.
5697 */
5698 static int
5699sug_filltable(
5700 spellinfo_T *spin,
5701 wordnode_T *node,
5702 int startwordnr,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005703 garray_T *gap) // place to store line of numbers
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005704{
5705 wordnode_T *p, *np;
5706 int wordnr = startwordnr;
5707 int nr;
5708 int prev_nr;
5709
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005710 FOR_ALL_NODE_SIBLINGS(node, p)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005711 {
5712 if (p->wn_byte == NUL)
5713 {
5714 gap->ga_len = 0;
5715 prev_nr = 0;
5716 for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling)
5717 {
5718 if (ga_grow(gap, 10) == FAIL)
5719 return -1;
5720
5721 nr = (np->wn_flags << 16) + (np->wn_region & 0xffff);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005722 // Compute the offset from the previous nr and store the
5723 // offset in a way that it takes a minimum number of bytes.
5724 // It's a bit like utf-8, but without the need to mark
5725 // following bytes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005726 nr -= prev_nr;
5727 prev_nr += nr;
5728 gap->ga_len += offset2bytes(nr,
5729 (char_u *)gap->ga_data + gap->ga_len);
5730 }
5731
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005732 // add the NUL byte
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005733 ((char_u *)gap->ga_data)[gap->ga_len++] = NUL;
5734
5735 if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr,
5736 gap->ga_data, gap->ga_len, TRUE) == FAIL)
5737 return -1;
5738 ++wordnr;
5739
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005740 // Remove extra NUL entries, we no longer need them. We don't
Dominique Pelleaf4a61a2021-12-27 17:21:41 +00005741 // bother freeing the nodes, they won't be reused anyway.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005742 while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL)
5743 p->wn_sibling = p->wn_sibling->wn_sibling;
5744
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005745 // Clear the flags on the remaining NUL node, so that compression
5746 // works a lot better.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005747 p->wn_flags = 0;
5748 p->wn_region = 0;
5749 }
5750 else
5751 {
5752 wordnr = sug_filltable(spin, p->wn_child, wordnr, gap);
5753 if (wordnr == -1)
5754 return -1;
5755 }
5756 }
5757 return wordnr;
5758}
5759
5760/*
5761 * Convert an offset into a minimal number of bytes.
5762 * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL
5763 * bytes.
5764 */
5765 static int
5766offset2bytes(int nr, char_u *buf)
5767{
5768 int rem;
5769 int b1, b2, b3, b4;
5770
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005771 // Split the number in parts of base 255. We need to avoid NUL bytes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005772 b1 = nr % 255 + 1;
5773 rem = nr / 255;
5774 b2 = rem % 255 + 1;
5775 rem = rem / 255;
5776 b3 = rem % 255 + 1;
5777 b4 = rem / 255 + 1;
5778
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005779 if (b4 > 1 || b3 > 0x1f) // 4 bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005780 {
5781 buf[0] = 0xe0 + b4;
5782 buf[1] = b3;
5783 buf[2] = b2;
5784 buf[3] = b1;
5785 return 4;
5786 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005787 if (b3 > 1 || b2 > 0x3f ) // 3 bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005788 {
5789 buf[0] = 0xc0 + b3;
5790 buf[1] = b2;
5791 buf[2] = b1;
5792 return 3;
5793 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005794 if (b2 > 1 || b1 > 0x7f ) // 2 bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005795 {
5796 buf[0] = 0x80 + b2;
5797 buf[1] = b1;
5798 return 2;
5799 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005800 // 1 byte
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005801 buf[0] = b1;
5802 return 1;
5803}
5804
5805/*
5806 * Write the .sug file in "fname".
5807 */
5808 static void
5809sug_write(spellinfo_T *spin, char_u *fname)
5810{
5811 FILE *fd;
5812 wordnode_T *tree;
5813 int nodecount;
5814 int wcount;
5815 char_u *line;
5816 linenr_T lnum;
5817 int len;
5818
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005819 // Create the file. Note that an existing file is silently overwritten!
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005820 fd = mch_fopen((char *)fname, "w");
5821 if (fd == NULL)
5822 {
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00005823 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005824 return;
5825 }
5826
5827 vim_snprintf((char *)IObuff, IOSIZE,
Bram Moolenaarc1669272018-06-19 14:23:53 +02005828 _("Writing suggestion file %s..."), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005829 spell_message(spin, IObuff);
5830
5831 /*
5832 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
5833 */
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005834 if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) // <fileID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005835 {
Bram Moolenaar40bcec12021-12-05 22:19:27 +00005836 emsg(_(e_error_while_writing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005837 goto theend;
5838 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005839 putc(VIMSUGVERSION, fd); // <versionnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005840
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005841 // Write si_sugtime to the file.
5842 put_time(fd, spin->si_sugtime); // <timestamp>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005843
5844 /*
5845 * <SUGWORDTREE>
5846 */
5847 spin->si_memtot = 0;
5848 tree = spin->si_foldroot->wn_sibling;
5849
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005850 // Clear the index and wnode fields in the tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005851 clear_node(tree);
5852
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005853 // Count the number of nodes. Needed to be able to allocate the
5854 // memory when reading the nodes. Also fills in index for shared
5855 // nodes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005856 nodecount = put_node(NULL, tree, 0, 0, FALSE);
5857
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005858 // number of nodes in 4 bytes
5859 put_bytes(fd, (long_u)nodecount, 4); // <nodecount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005860 spin->si_memtot += nodecount + nodecount * sizeof(int);
5861
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005862 // Write the nodes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005863 (void)put_node(fd, tree, 0, 0, FALSE);
5864
5865 /*
5866 * <SUGTABLE>: <sugwcount> <sugline> ...
5867 */
5868 wcount = spin->si_spellbuf->b_ml.ml_line_count;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005869 put_bytes(fd, (long_u)wcount, 4); // <sugwcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005870
5871 for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum)
5872 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005873 // <sugline>: <sugnr> ... NUL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005874 line = ml_get_buf(spin->si_spellbuf, lnum, FALSE);
5875 len = (int)STRLEN(line) + 1;
5876 if (fwrite(line, (size_t)len, (size_t)1, fd) == 0)
5877 {
Bram Moolenaar40bcec12021-12-05 22:19:27 +00005878 emsg(_(e_error_while_writing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005879 goto theend;
5880 }
5881 spin->si_memtot += len;
5882 }
5883
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005884 // Write another byte to check for errors.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005885 if (putc(0, fd) == EOF)
Bram Moolenaar40bcec12021-12-05 22:19:27 +00005886 emsg(_(e_error_while_writing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005887
5888 vim_snprintf((char *)IObuff, IOSIZE,
5889 _("Estimated runtime memory use: %d bytes"), spin->si_memtot);
5890 spell_message(spin, IObuff);
5891
5892theend:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005893 // close the file
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005894 fclose(fd);
5895}
5896
5897
5898/*
5899 * Create a Vim spell file from one or more word lists.
5900 * "fnames[0]" is the output file name.
5901 * "fnames[fcount - 1]" is the last input file name.
5902 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name
5903 * and ".spl" is appended to make the output file name.
5904 */
5905 void
5906mkspell(
5907 int fcount,
5908 char_u **fnames,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005909 int ascii, // -ascii argument given
5910 int over_write, // overwrite existing output file
5911 int added_word) // invoked through "zg"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005912{
5913 char_u *fname = NULL;
5914 char_u *wfname;
5915 char_u **innames;
5916 int incount;
Bram Moolenaar2993ac52018-02-10 14:12:43 +01005917 afffile_T *(afile[MAXREGIONS]);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005918 int i;
5919 int len;
5920 stat_T st;
5921 int error = FALSE;
5922 spellinfo_T spin;
5923
Bram Moolenaara80faa82020-04-12 19:37:17 +02005924 CLEAR_FIELD(spin);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005925 spin.si_verbose = !added_word;
5926 spin.si_ascii = ascii;
5927 spin.si_followup = TRUE;
5928 spin.si_rem_accents = TRUE;
Bram Moolenaar04935fb2022-01-08 16:19:22 +00005929 ga_init2(&spin.si_rep, sizeof(fromto_T), 20);
5930 ga_init2(&spin.si_repsal, sizeof(fromto_T), 20);
5931 ga_init2(&spin.si_sal, sizeof(fromto_T), 20);
5932 ga_init2(&spin.si_map, sizeof(char_u), 100);
5933 ga_init2(&spin.si_comppat, sizeof(char_u *), 20);
5934 ga_init2(&spin.si_prefcond, sizeof(char_u *), 50);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005935 hash_init(&spin.si_commonwords);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005936 spin.si_newcompID = 127; // start compound ID at first maximum
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005937
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005938 // default: fnames[0] is output file, following are input files
Bram Moolenaar927b7dd2020-06-29 22:24:56 +02005939 // When "fcount" is 1 there is only one file.
5940 innames = &fnames[fcount == 1 ? 0 : 1];
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005941 incount = fcount - 1;
5942
5943 wfname = alloc(MAXPATHL);
5944 if (wfname == NULL)
5945 return;
5946
5947 if (fcount >= 1)
5948 {
5949 len = (int)STRLEN(fnames[0]);
5950 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
5951 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005952 // For ":mkspell path/en.latin1.add" output file is
5953 // "path/en.latin1.add.spl".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005954 incount = 1;
5955 vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]);
5956 }
5957 else if (fcount == 1)
5958 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005959 // For ":mkspell path/vim" output file is "path/vim.latin1.spl".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005960 incount = 1;
5961 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
5962 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
5963 }
5964 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
5965 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005966 // Name ends in ".spl", use as the file name.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005967 vim_strncpy(wfname, fnames[0], MAXPATHL - 1);
5968 }
5969 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005970 // Name should be language, make the file name from it.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005971 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
5972 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
5973
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005974 // Check for .ascii.spl.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005975 if (strstr((char *)gettail(wfname), SPL_FNAME_ASCII) != NULL)
5976 spin.si_ascii = TRUE;
5977
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005978 // Check for .add.spl.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005979 if (strstr((char *)gettail(wfname), SPL_FNAME_ADD) != NULL)
5980 spin.si_add = TRUE;
5981 }
5982
5983 if (incount <= 0)
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00005984 emsg(_(e_invalid_argument)); // need at least output and input names
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005985 else if (vim_strchr(gettail(wfname), '_') != NULL)
Bram Moolenaar677658a2022-01-05 16:09:06 +00005986 emsg(_(e_output_file_name_must_not_have_region_name));
Bram Moolenaar2993ac52018-02-10 14:12:43 +01005987 else if (incount > MAXREGIONS)
Bram Moolenaar677658a2022-01-05 16:09:06 +00005988 semsg(_(e_only_up_to_nr_regions_supported), MAXREGIONS);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005989 else
5990 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005991 // Check for overwriting before doing things that may take a lot of
5992 // time.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005993 if (!over_write && mch_stat((char *)wfname, &st) >= 0)
5994 {
Bram Moolenaar108010a2021-06-27 22:03:33 +02005995 emsg(_(e_file_exists));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005996 goto theend;
5997 }
5998 if (mch_isdir(wfname))
5999 {
Bram Moolenaar4dea2d92022-03-31 11:37:57 +01006000 semsg(_(e_str_is_directory), wfname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006001 goto theend;
6002 }
6003
6004 fname = alloc(MAXPATHL);
6005 if (fname == NULL)
6006 goto theend;
6007
6008 /*
6009 * Init the aff and dic pointers.
6010 * Get the region names if there are more than 2 arguments.
6011 */
6012 for (i = 0; i < incount; ++i)
6013 {
6014 afile[i] = NULL;
6015
6016 if (incount > 1)
6017 {
6018 len = (int)STRLEN(innames[i]);
6019 if (STRLEN(gettail(innames[i])) < 5
6020 || innames[i][len - 3] != '_')
6021 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006022 semsg(_(e_invalid_region_in_str), innames[i]);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006023 goto theend;
6024 }
6025 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
6026 spin.si_region_name[i * 2 + 1] =
6027 TOLOWER_ASC(innames[i][len - 1]);
6028 }
6029 }
6030 spin.si_region_count = incount;
6031
6032 spin.si_foldroot = wordtree_alloc(&spin);
6033 spin.si_keeproot = wordtree_alloc(&spin);
6034 spin.si_prefroot = wordtree_alloc(&spin);
6035 if (spin.si_foldroot == NULL
6036 || spin.si_keeproot == NULL
6037 || spin.si_prefroot == NULL)
6038 {
6039 free_blocks(spin.si_blocks);
6040 goto theend;
6041 }
6042
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006043 // When not producing a .add.spl file clear the character table when
6044 // we encounter one in the .aff file. This means we dump the current
6045 // one in the .spl file if the .aff file doesn't define one. That's
6046 // better than guessing the contents, the table will match a
6047 // previously loaded spell file.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006048 if (!spin.si_add)
6049 spin.si_clear_chartab = TRUE;
6050
6051 /*
6052 * Read all the .aff and .dic files.
6053 * Text is converted to 'encoding'.
6054 * Words are stored in the case-folded and keep-case trees.
6055 */
6056 for (i = 0; i < incount && !error; ++i)
6057 {
6058 spin.si_conv.vc_type = CONV_NONE;
6059 spin.si_region = 1 << i;
6060
6061 vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]);
6062 if (mch_stat((char *)fname, &st) >= 0)
6063 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006064 // Read the .aff file. Will init "spin->si_conv" based on the
6065 // "SET" line.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006066 afile[i] = spell_read_aff(&spin, fname);
6067 if (afile[i] == NULL)
6068 error = TRUE;
6069 else
6070 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006071 // Read the .dic file and store the words in the trees.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006072 vim_snprintf((char *)fname, MAXPATHL, "%s.dic",
6073 innames[i]);
6074 if (spell_read_dic(&spin, fname, afile[i]) == FAIL)
6075 error = TRUE;
6076 }
6077 }
6078 else
6079 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006080 // No .aff file, try reading the file as a word list. Store
6081 // the words in the trees.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006082 if (spell_read_wordfile(&spin, innames[i]) == FAIL)
6083 error = TRUE;
6084 }
6085
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006086 // Free any conversion stuff.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006087 convert_setup(&spin.si_conv, NULL, NULL);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006088 }
6089
6090 if (spin.si_compflags != NULL && spin.si_nobreak)
Bram Moolenaar32526b32019-01-19 17:43:09 +01006091 msg(_("Warning: both compounding and NOBREAK specified"));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006092
6093 if (!error && !got_int)
6094 {
6095 /*
6096 * Combine tails in the tree.
6097 */
6098 spell_message(&spin, (char_u *)_(msg_compressing));
Bram Moolenaar408c23b2020-06-03 22:15:45 +02006099 wordtree_compress(&spin, spin.si_foldroot, "case-folded");
6100 wordtree_compress(&spin, spin.si_keeproot, "keep-case");
6101 wordtree_compress(&spin, spin.si_prefroot, "prefixes");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006102 }
6103
6104 if (!error && !got_int)
6105 {
6106 /*
6107 * Write the info in the spell file.
6108 */
6109 vim_snprintf((char *)IObuff, IOSIZE,
Bram Moolenaarc1669272018-06-19 14:23:53 +02006110 _("Writing spell file %s..."), wfname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006111 spell_message(&spin, IObuff);
6112
6113 error = write_vim_spell(&spin, wfname) == FAIL;
6114
6115 spell_message(&spin, (char_u *)_("Done!"));
6116 vim_snprintf((char *)IObuff, IOSIZE,
6117 _("Estimated runtime memory use: %d bytes"), spin.si_memtot);
6118 spell_message(&spin, IObuff);
6119
6120 /*
6121 * If the file is loaded need to reload it.
6122 */
6123 if (!error)
6124 spell_reload_one(wfname, added_word);
6125 }
6126
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006127 // Free the allocated memory.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006128 ga_clear(&spin.si_rep);
6129 ga_clear(&spin.si_repsal);
6130 ga_clear(&spin.si_sal);
6131 ga_clear(&spin.si_map);
6132 ga_clear(&spin.si_comppat);
6133 ga_clear(&spin.si_prefcond);
6134 hash_clear_all(&spin.si_commonwords, 0);
6135
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006136 // Free the .aff file structures.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006137 for (i = 0; i < incount; ++i)
6138 if (afile[i] != NULL)
6139 spell_free_aff(afile[i]);
6140
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006141 // Free all the bits and pieces at once.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006142 free_blocks(spin.si_blocks);
6143
6144 /*
6145 * If there is soundfolding info and no NOSUGFILE item create the
6146 * .sug file with the soundfolded word trie.
6147 */
6148 if (spin.si_sugtime != 0 && !error && !got_int)
6149 spell_make_sugfile(&spin, wfname);
6150
6151 }
6152
6153theend:
6154 vim_free(fname);
6155 vim_free(wfname);
6156}
6157
6158/*
6159 * Display a message for spell file processing when 'verbose' is set or using
6160 * ":mkspell". "str" can be IObuff.
6161 */
6162 static void
6163spell_message(spellinfo_T *spin, char_u *str)
6164{
6165 if (spin->si_verbose || p_verbose > 2)
6166 {
6167 if (!spin->si_verbose)
6168 verbose_enter();
Bram Moolenaar32526b32019-01-19 17:43:09 +01006169 msg((char *)str);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006170 out_flush();
6171 if (!spin->si_verbose)
6172 verbose_leave();
6173 }
6174}
6175
6176/*
6177 * ":[count]spellgood {word}"
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006178 * ":[count]spellwrong {word}"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006179 * ":[count]spellundo {word}"
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006180 * ":[count]spellrare {word}"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006181 */
6182 void
6183ex_spell(exarg_T *eap)
6184{
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006185 spell_add_word(eap->arg, (int)STRLEN(eap->arg),
6186 eap->cmdidx == CMD_spellwrong ? SPELL_ADD_BAD :
6187 eap->cmdidx == CMD_spellrare ? SPELL_ADD_RARE : SPELL_ADD_GOOD,
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006188 eap->forceit ? 0 : (int)eap->line2,
6189 eap->cmdidx == CMD_spellundo);
6190}
6191
6192/*
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006193 * Add "word[len]" to 'spellfile' as a good, rare or bad word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006194 */
6195 void
6196spell_add_word(
6197 char_u *word,
6198 int len,
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006199 int what, // SPELL_ADD_ values
6200 int idx, // "zG" and "zW": zero, otherwise index in
6201 // 'spellfile'
6202 int undo) // TRUE for "zug", "zuG", "zuw" and "zuW"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006203{
6204 FILE *fd = NULL;
6205 buf_T *buf = NULL;
6206 int new_spf = FALSE;
6207 char_u *fname;
6208 char_u *fnamebuf = NULL;
6209 char_u line[MAXWLEN * 2];
6210 long fpos, fpos_next = 0;
6211 int i;
6212 char_u *spf;
6213
Bram Moolenaar5e59ea52022-07-01 22:26:20 +01006214 if (!valid_spell_word(word))
Bram Moolenaar7c824682022-05-08 22:32:58 +01006215 {
6216 emsg(_(e_illegal_character_in_word));
6217 return;
6218 }
6219
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006220 if (idx == 0) // use internal wordlist
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006221 {
6222 if (int_wordlist == NULL)
6223 {
6224 int_wordlist = vim_tempname('s', FALSE);
6225 if (int_wordlist == NULL)
6226 return;
6227 }
6228 fname = int_wordlist;
6229 }
6230 else
6231 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006232 // If 'spellfile' isn't set figure out a good default value.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006233 if (*curwin->w_s->b_p_spf == NUL)
6234 {
6235 init_spellfile();
6236 new_spf = TRUE;
6237 }
6238
6239 if (*curwin->w_s->b_p_spf == NUL)
6240 {
Bram Moolenaar74409f62022-01-01 15:58:22 +00006241 semsg(_(e_option_str_is_not_set), "spellfile");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006242 return;
6243 }
6244 fnamebuf = alloc(MAXPATHL);
6245 if (fnamebuf == NULL)
6246 return;
6247
6248 for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i)
6249 {
6250 copy_option_part(&spf, fnamebuf, MAXPATHL, ",");
6251 if (i == idx)
6252 break;
6253 if (*spf == NUL)
6254 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006255 semsg(_(e_spellfile_does_not_have_nr_entries), idx);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006256 vim_free(fnamebuf);
6257 return;
6258 }
6259 }
6260
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006261 // Check that the user isn't editing the .add file somewhere.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006262 buf = buflist_findname_exp(fnamebuf);
6263 if (buf != NULL && buf->b_ml.ml_mfp == NULL)
6264 buf = NULL;
6265 if (buf != NULL && bufIsChanged(buf))
6266 {
Bram Moolenaareb822a22021-12-31 15:09:27 +00006267 emsg(_(e_file_is_loaded_in_another_buffer));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006268 vim_free(fnamebuf);
6269 return;
6270 }
6271
6272 fname = fnamebuf;
6273 }
6274
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006275 if (what == SPELL_ADD_BAD || undo)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006276 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006277 // When the word appears as good word we need to remove that one,
6278 // since its flags sort before the one with WF_BANNED.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006279 fd = mch_fopen((char *)fname, "r");
6280 if (fd != NULL)
6281 {
6282 while (!vim_fgets(line, MAXWLEN * 2, fd))
6283 {
6284 fpos = fpos_next;
6285 fpos_next = ftell(fd);
Bram Moolenaar416b5f42022-02-25 21:47:48 +00006286 if (fpos_next < 0)
6287 break; // should never happen
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006288 if (STRNCMP(word, line, len) == 0
6289 && (line[len] == '/' || line[len] < ' '))
6290 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006291 // Found duplicate word. Remove it by writing a '#' at
6292 // the start of the line. Mixing reading and writing
6293 // doesn't work for all systems, close the file first.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006294 fclose(fd);
6295 fd = mch_fopen((char *)fname, "r+");
6296 if (fd == NULL)
6297 break;
6298 if (fseek(fd, fpos, SEEK_SET) == 0)
6299 {
6300 fputc('#', fd);
6301 if (undo)
6302 {
6303 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01006304 smsg(_("Word '%.*s' removed from %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006305 len, word, NameBuff);
6306 }
6307 }
Bram Moolenaar2c363a22021-02-03 20:14:23 +01006308 if (fseek(fd, fpos_next, SEEK_SET) != 0)
6309 {
6310 PERROR(_("Seek error in spellfile"));
6311 break;
6312 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006313 }
6314 }
6315 if (fd != NULL)
6316 fclose(fd);
6317 }
6318 }
6319
6320 if (!undo)
6321 {
6322 fd = mch_fopen((char *)fname, "a");
6323 if (fd == NULL && new_spf)
6324 {
6325 char_u *p;
6326
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006327 // We just initialized the 'spellfile' option and can't open the
6328 // file. We may need to create the "spell" directory first. We
6329 // already checked the runtime directory is writable in
6330 // init_spellfile().
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006331 if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname)
6332 {
6333 int c = *p;
6334
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006335 // The directory doesn't exist. Try creating it and opening
6336 // the file again.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006337 *p = NUL;
6338 vim_mkdir(fname, 0755);
6339 *p = c;
6340 fd = mch_fopen((char *)fname, "a");
6341 }
6342 }
6343
6344 if (fd == NULL)
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00006345 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006346 else
6347 {
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006348 if (what == SPELL_ADD_BAD)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006349 fprintf(fd, "%.*s/!\n", len, word);
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006350 else if (what == SPELL_ADD_RARE)
6351 fprintf(fd, "%.*s/?\n", len, word);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006352 else
6353 fprintf(fd, "%.*s\n", len, word);
6354 fclose(fd);
6355
6356 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01006357 smsg(_("Word '%.*s' added to %s"), len, word, NameBuff);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006358 }
6359 }
6360
6361 if (fd != NULL)
6362 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006363 // Update the .add.spl file.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006364 mkspell(1, &fname, FALSE, TRUE, TRUE);
6365
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006366 // If the .add file is edited somewhere, reload it.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006367 if (buf != NULL)
Rob Pilling8196e942022-02-11 15:12:10 +00006368 buf_reload(buf, buf->b_orig_mode, FALSE);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006369
6370 redraw_all_later(SOME_VALID);
6371 }
6372 vim_free(fnamebuf);
6373}
6374
6375/*
6376 * Initialize 'spellfile' for the current buffer.
6377 */
6378 static void
6379init_spellfile(void)
6380{
6381 char_u *buf;
6382 int l;
6383 char_u *fname;
6384 char_u *rtp;
6385 char_u *lend;
6386 int aspath = FALSE;
6387 char_u *lstart = curbuf->b_s.b_p_spl;
6388
6389 if (*curwin->w_s->b_p_spl != NUL && curwin->w_s->b_langp.ga_len > 0)
6390 {
6391 buf = alloc(MAXPATHL);
6392 if (buf == NULL)
6393 return;
6394
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006395 // Find the end of the language name. Exclude the region. If there
6396 // is a path separator remember the start of the tail.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006397 for (lend = curwin->w_s->b_p_spl; *lend != NUL
6398 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend)
6399 if (vim_ispathsep(*lend))
6400 {
6401 aspath = TRUE;
6402 lstart = lend + 1;
6403 }
6404
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006405 // Loop over all entries in 'runtimepath'. Use the first one where we
6406 // are allowed to write.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006407 rtp = p_rtp;
6408 while (*rtp != NUL)
6409 {
6410 if (aspath)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006411 // Use directory of an entry with path, e.g., for
6412 // "/dir/lg.utf-8.spl" use "/dir".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006413 vim_strncpy(buf, curbuf->b_s.b_p_spl,
6414 lstart - curbuf->b_s.b_p_spl - 1);
6415 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006416 // Copy the path from 'runtimepath' to buf[].
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006417 copy_option_part(&rtp, buf, MAXPATHL, ",");
6418 if (filewritable(buf) == 2)
6419 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006420 // Use the first language name from 'spelllang' and the
6421 // encoding used in the first loaded .spl file.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006422 if (aspath)
6423 vim_strncpy(buf, curbuf->b_s.b_p_spl,
6424 lend - curbuf->b_s.b_p_spl);
6425 else
6426 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006427 // Create the "spell" directory if it doesn't exist yet.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006428 l = (int)STRLEN(buf);
6429 vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell");
6430 if (filewritable(buf) != 2)
6431 vim_mkdir(buf, 0755);
6432
6433 l = (int)STRLEN(buf);
6434 vim_snprintf((char *)buf + l, MAXPATHL - l,
6435 "/%.*s", (int)(lend - lstart), lstart);
6436 }
6437 l = (int)STRLEN(buf);
6438 fname = LANGP_ENTRY(curwin->w_s->b_langp, 0)
6439 ->lp_slang->sl_fname;
6440 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add",
6441 fname != NULL
6442 && strstr((char *)gettail(fname), ".ascii.") != NULL
6443 ? (char_u *)"ascii" : spell_enc());
Bram Moolenaar31e5c602022-04-15 13:53:33 +01006444 set_option_value_give_err((char_u *)"spellfile",
6445 0L, buf, OPT_LOCAL);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006446 break;
6447 }
6448 aspath = FALSE;
6449 }
6450
6451 vim_free(buf);
6452 }
6453}
6454
6455
6456
6457/*
6458 * Set the spell character tables from strings in the affix file.
6459 */
6460 static int
6461set_spell_chartab(char_u *fol, char_u *low, char_u *upp)
6462{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006463 // We build the new tables here first, so that we can compare with the
6464 // previous one.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006465 spelltab_T new_st;
6466 char_u *pf = fol, *pl = low, *pu = upp;
6467 int f, l, u;
6468
6469 clear_spell_chartab(&new_st);
6470
6471 while (*pf != NUL)
6472 {
6473 if (*pl == NUL || *pu == NUL)
6474 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006475 emsg(_(e_format_error_in_affix_file_fol_low_or_upp));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006476 return FAIL;
6477 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006478 f = mb_ptr2char_adv(&pf);
6479 l = mb_ptr2char_adv(&pl);
6480 u = mb_ptr2char_adv(&pu);
Bram Moolenaar264b74f2019-01-24 17:18:42 +01006481
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006482 // Every character that appears is a word character.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006483 if (f < 256)
6484 new_st.st_isw[f] = TRUE;
6485 if (l < 256)
6486 new_st.st_isw[l] = TRUE;
6487 if (u < 256)
6488 new_st.st_isw[u] = TRUE;
6489
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006490 // if "LOW" and "FOL" are not the same the "LOW" char needs
6491 // case-folding
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006492 if (l < 256 && l != f)
6493 {
6494 if (f >= 256)
6495 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006496 emsg(_(e_character_in_fol_low_or_upp_is_out_of_range));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006497 return FAIL;
6498 }
6499 new_st.st_fold[l] = f;
6500 }
6501
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006502 // if "UPP" and "FOL" are not the same the "UPP" char needs
6503 // case-folding, it's upper case and the "UPP" is the upper case of
6504 // "FOL" .
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006505 if (u < 256 && u != f)
6506 {
6507 if (f >= 256)
6508 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006509 emsg(_(e_character_in_fol_low_or_upp_is_out_of_range));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006510 return FAIL;
6511 }
6512 new_st.st_fold[u] = f;
6513 new_st.st_isu[u] = TRUE;
6514 new_st.st_upper[f] = u;
6515 }
6516 }
6517
6518 if (*pl != NUL || *pu != NUL)
6519 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006520 emsg(_(e_format_error_in_affix_file_fol_low_or_upp));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006521 return FAIL;
6522 }
6523
6524 return set_spell_finish(&new_st);
6525}
6526
6527/*
6528 * Set the spell character tables from strings in the .spl file.
6529 */
6530 static void
6531set_spell_charflags(
6532 char_u *flags,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006533 int cnt, // length of "flags"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006534 char_u *fol)
6535{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006536 // We build the new tables here first, so that we can compare with the
6537 // previous one.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006538 spelltab_T new_st;
6539 int i;
6540 char_u *p = fol;
6541 int c;
6542
6543 clear_spell_chartab(&new_st);
6544
6545 for (i = 0; i < 128; ++i)
6546 {
6547 if (i < cnt)
6548 {
6549 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
6550 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
6551 }
6552
6553 if (*p != NUL)
6554 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006555 c = mb_ptr2char_adv(&p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006556 new_st.st_fold[i + 128] = c;
6557 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
6558 new_st.st_upper[c] = i + 128;
6559 }
6560 }
6561
6562 (void)set_spell_finish(&new_st);
6563}
6564
6565 static int
6566set_spell_finish(spelltab_T *new_st)
6567{
6568 int i;
6569
6570 if (did_set_spelltab)
6571 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006572 // check that it's the same table
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006573 for (i = 0; i < 256; ++i)
6574 {
6575 if (spelltab.st_isw[i] != new_st->st_isw[i]
6576 || spelltab.st_isu[i] != new_st->st_isu[i]
6577 || spelltab.st_fold[i] != new_st->st_fold[i]
6578 || spelltab.st_upper[i] != new_st->st_upper[i])
6579 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006580 emsg(_(e_word_characters_differ_between_spell_files));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006581 return FAIL;
6582 }
6583 }
6584 }
6585 else
6586 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006587 // copy the new spelltab into the one being used
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006588 spelltab = *new_st;
6589 did_set_spelltab = TRUE;
6590 }
6591
6592 return OK;
6593}
6594
6595/*
6596 * Write the table with prefix conditions to the .spl file.
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +00006597 * When "fd" is NULL only count the length of what is written and return it.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006598 */
6599 static int
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +00006600write_spell_prefcond(FILE *fd, garray_T *gap, size_t *fwv)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006601{
6602 int i;
6603 char_u *p;
6604 int len;
6605 int totlen;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006606
6607 if (fd != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006608 put_bytes(fd, (long_u)gap->ga_len, 2); // <prefcondcnt>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006609
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006610 totlen = 2 + gap->ga_len; // length of <prefcondcnt> and <condlen> bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006611
6612 for (i = 0; i < gap->ga_len; ++i)
6613 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006614 // <prefcond> : <condlen> <condstr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006615 p = ((char_u **)gap->ga_data)[i];
6616 if (p != NULL)
6617 {
6618 len = (int)STRLEN(p);
6619 if (fd != NULL)
6620 {
6621 fputc(len, fd);
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +00006622 *fwv &= fwrite(p, (size_t)len, (size_t)1, fd);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006623 }
6624 totlen += len;
6625 }
6626 else if (fd != NULL)
6627 fputc(0, fd);
6628 }
6629
6630 return totlen;
6631}
6632
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006633/*
6634 * Use map string "map" for languages "lp".
6635 */
6636 static void
6637set_map_str(slang_T *lp, char_u *map)
6638{
6639 char_u *p;
6640 int headc = 0;
6641 int c;
6642 int i;
6643
6644 if (*map == NUL)
6645 {
6646 lp->sl_has_map = FALSE;
6647 return;
6648 }
6649 lp->sl_has_map = TRUE;
6650
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006651 // Init the array and hash tables empty.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006652 for (i = 0; i < 256; ++i)
6653 lp->sl_map_array[i] = 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006654 hash_init(&lp->sl_map_hash);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006655
6656 /*
6657 * The similar characters are stored separated with slashes:
6658 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and
6659 * before the same slash. For characters above 255 sl_map_hash is used.
6660 */
6661 for (p = map; *p != NUL; )
6662 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006663 c = mb_cptr2char_adv(&p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006664 if (c == '/')
6665 headc = 0;
6666 else
6667 {
6668 if (headc == 0)
6669 headc = c;
6670
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006671 // Characters above 255 don't fit in sl_map_array[], put them in
6672 // the hash table. Each entry is the char, a NUL the headchar and
6673 // a NUL.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006674 if (c >= 256)
6675 {
6676 int cl = mb_char2len(c);
6677 int headcl = mb_char2len(headc);
6678 char_u *b;
6679 hash_T hash;
6680 hashitem_T *hi;
6681
Bram Moolenaar964b3742019-05-24 18:54:09 +02006682 b = alloc(cl + headcl + 2);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006683 if (b == NULL)
6684 return;
6685 mb_char2bytes(c, b);
6686 b[cl] = NUL;
6687 mb_char2bytes(headc, b + cl + 1);
6688 b[cl + 1 + headcl] = NUL;
6689 hash = hash_hash(b);
6690 hi = hash_lookup(&lp->sl_map_hash, b, hash);
6691 if (HASHITEM_EMPTY(hi))
6692 hash_add_item(&lp->sl_map_hash, hi, b, hash);
6693 else
6694 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006695 // This should have been checked when generating the .spl
6696 // file.
Bram Moolenaar677658a2022-01-05 16:09:06 +00006697 emsg(_(e_duplicate_char_in_map_entry));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006698 vim_free(b);
6699 }
6700 }
6701 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006702 lp->sl_map_array[c] = headc;
6703 }
6704 }
6705}
6706
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006707#endif // FEAT_SPELL