blob: 0b9536dc16c9a28dbfe43b8176b07e8430f394f6 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * spellfile.c: code for reading and writing spell files.
12 *
13 * See spell.c for information about spell checking.
14 */
15
16/*
17 * Vim spell file format: <HEADER>
18 * <SECTIONS>
19 * <LWORDTREE>
20 * <KWORDTREE>
21 * <PREFIXTREE>
22 *
23 * <HEADER>: <fileID> <versionnr>
24 *
25 * <fileID> 8 bytes "VIMspell"
26 * <versionnr> 1 byte VIMSPELLVERSION
27 *
28 *
29 * Sections make it possible to add information to the .spl file without
30 * making it incompatible with previous versions. There are two kinds of
31 * sections:
32 * 1. Not essential for correct spell checking. E.g. for making suggestions.
33 * These are skipped when not supported.
34 * 2. Optional information, but essential for spell checking when present.
35 * E.g. conditions for affixes. When this section is present but not
36 * supported an error message is given.
37 *
38 * <SECTIONS>: <section> ... <sectionend>
39 *
40 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
41 *
42 * <sectionID> 1 byte number from 0 to 254 identifying the section
43 *
44 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct
45 * spell checking
46 *
47 * <sectionlen> 4 bytes length of section contents, MSB first
48 *
49 * <sectionend> 1 byte SN_END
50 *
51 *
52 * sectionID == SN_INFO: <infotext>
53 * <infotext> N bytes free format text with spell file info (version,
54 * website, etc)
55 *
56 * sectionID == SN_REGION: <regionname> ...
Bram Moolenaar2993ac52018-02-10 14:12:43 +010057 * <regionname> 2 bytes Up to MAXREGIONS region names: ca, au, etc. Lower
58 * case. First <regionname> is region 1.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +020059 *
60 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
61 * <folcharslen> <folchars>
62 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
63 * <charflags> N bytes List of flags (first one is for character 128):
64 * 0x01 word character CF_WORD
65 * 0x02 upper-case character CF_UPPER
66 * <folcharslen> 2 bytes Number of bytes in <folchars>.
67 * <folchars> N bytes Folded characters, first one is for character 128.
68 *
69 * sectionID == SN_MIDWORD: <midword>
70 * <midword> N bytes Characters that are word characters only when used
71 * in the middle of a word.
72 *
73 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
74 * <prefcondcnt> 2 bytes Number of <prefcond> items following.
75 * <prefcond> : <condlen> <condstr>
76 * <condlen> 1 byte Length of <condstr>.
77 * <condstr> N bytes Condition for the prefix.
78 *
79 * sectionID == SN_REP: <repcount> <rep> ...
80 * <repcount> 2 bytes number of <rep> items, MSB first.
81 * <rep> : <repfromlen> <repfrom> <reptolen> <repto>
82 * <repfromlen> 1 byte length of <repfrom>
83 * <repfrom> N bytes "from" part of replacement
84 * <reptolen> 1 byte length of <repto>
85 * <repto> N bytes "to" part of replacement
86 *
87 * sectionID == SN_REPSAL: <repcount> <rep> ...
88 * just like SN_REP but for soundfolded words
89 *
90 * sectionID == SN_SAL: <salflags> <salcount> <sal> ...
91 * <salflags> 1 byte flags for soundsalike conversion:
92 * SAL_F0LLOWUP
93 * SAL_COLLAPSE
94 * SAL_REM_ACCENTS
95 * <salcount> 2 bytes number of <sal> items following
96 * <sal> : <salfromlen> <salfrom> <saltolen> <salto>
97 * <salfromlen> 1 byte length of <salfrom>
98 * <salfrom> N bytes "from" part of soundsalike
99 * <saltolen> 1 byte length of <salto>
100 * <salto> N bytes "to" part of soundsalike
101 *
102 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
103 * <sofofromlen> 2 bytes length of <sofofrom>
104 * <sofofrom> N bytes "from" part of soundfold
105 * <sofotolen> 2 bytes length of <sofoto>
106 * <sofoto> N bytes "to" part of soundfold
107 *
108 * sectionID == SN_SUGFILE: <timestamp>
109 * <timestamp> 8 bytes time in seconds that must match with .sug file
110 *
111 * sectionID == SN_NOSPLITSUGS: nothing
112 *
113 * sectionID == SN_NOCOMPOUNDSUGS: nothing
114 *
115 * sectionID == SN_WORDS: <word> ...
116 * <word> N bytes NUL terminated common word
117 *
118 * sectionID == SN_MAP: <mapstr>
119 * <mapstr> N bytes String with sequences of similar characters,
120 * separated by slashes.
121 *
122 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions>
123 * <comppatcount> <comppattern> ... <compflags>
124 * <compmax> 1 byte Maximum nr of words in compound word.
125 * <compminlen> 1 byte Minimal word length for compounding.
126 * <compsylmax> 1 byte Maximum nr of syllables in compound word.
127 * <compoptions> 2 bytes COMP_ flags.
128 * <comppatcount> 2 bytes number of <comppattern> following
129 * <compflags> N bytes Flags from COMPOUNDRULE items, separated by
130 * slashes.
131 *
132 * <comppattern>: <comppatlen> <comppattext>
133 * <comppatlen> 1 byte length of <comppattext>
134 * <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN
135 *
136 * sectionID == SN_NOBREAK: (empty, its presence is what matters)
137 *
138 * sectionID == SN_SYLLABLE: <syllable>
139 * <syllable> N bytes String from SYLLABLE item.
140 *
141 * <LWORDTREE>: <wordtree>
142 *
143 * <KWORDTREE>: <wordtree>
144 *
145 * <PREFIXTREE>: <wordtree>
146 *
147 *
148 * <wordtree>: <nodecount> <nodedata> ...
149 *
150 * <nodecount> 4 bytes Number of nodes following. MSB first.
151 *
152 * <nodedata>: <siblingcount> <sibling> ...
153 *
154 * <siblingcount> 1 byte Number of siblings in this node. The siblings
155 * follow in sorted order.
156 *
157 * <sibling>: <byte> [ <nodeidx> <xbyte>
158 * | <flags> [<flags2>] [<region>] [<affixID>]
159 * | [<pflags>] <affixID> <prefcondnr> ]
160 *
161 * <byte> 1 byte Byte value of the sibling. Special cases:
162 * BY_NOFLAGS: End of word without flags and for all
163 * regions.
164 * For PREFIXTREE <affixID> and
165 * <prefcondnr> follow.
166 * BY_FLAGS: End of word, <flags> follow.
167 * For PREFIXTREE <pflags>, <affixID>
168 * and <prefcondnr> follow.
169 * BY_FLAGS2: End of word, <flags> and <flags2>
170 * follow. Not used in PREFIXTREE.
171 * BY_INDEX: Child of sibling is shared, <nodeidx>
172 * and <xbyte> follow.
173 *
174 * <nodeidx> 3 bytes Index of child for this sibling, MSB first.
175 *
176 * <xbyte> 1 byte byte value of the sibling.
177 *
178 * <flags> 1 byte bitmask of:
179 * WF_ALLCAP word must have only capitals
180 * WF_ONECAP first char of word must be capital
181 * WF_KEEPCAP keep-case word
182 * WF_FIXCAP keep-case word, all caps not allowed
183 * WF_RARE rare word
184 * WF_BANNED bad word
185 * WF_REGION <region> follows
186 * WF_AFX <affixID> follows
187 *
188 * <flags2> 1 byte Bitmask of:
189 * WF_HAS_AFF >> 8 word includes affix
190 * WF_NEEDCOMP >> 8 word only valid in compound
191 * WF_NOSUGGEST >> 8 word not used for suggestions
192 * WF_COMPROOT >> 8 word already a compound
193 * WF_NOCOMPBEF >> 8 no compounding before this word
194 * WF_NOCOMPAFT >> 8 no compounding after this word
195 *
196 * <pflags> 1 byte bitmask of:
197 * WFP_RARE rare prefix
198 * WFP_NC non-combining prefix
199 * WFP_UP letter after prefix made upper case
200 *
201 * <region> 1 byte Bitmask for regions in which word is valid. When
202 * omitted it's valid in all regions.
203 * Lowest bit is for region 1.
204 *
205 * <affixID> 1 byte ID of affix that can be used with this word. In
206 * PREFIXTREE used for the required prefix ID.
207 *
208 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list
209 * from HEADER.
210 *
211 * All text characters are in 'encoding', but stored as single bytes.
212 */
213
214/*
215 * Vim .sug file format: <SUGHEADER>
216 * <SUGWORDTREE>
217 * <SUGTABLE>
218 *
219 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
220 *
221 * <fileID> 6 bytes "VIMsug"
222 * <versionnr> 1 byte VIMSUGVERSION
223 * <timestamp> 8 bytes timestamp that must match with .spl file
224 *
225 *
226 * <SUGWORDTREE>: <wordtree> (see above, no flags or region used)
227 *
228 *
229 * <SUGTABLE>: <sugwcount> <sugline> ...
230 *
231 * <sugwcount> 4 bytes number of <sugline> following
232 *
233 * <sugline>: <sugnr> ... NUL
234 *
235 * <sugnr>: X bytes word number that results in this soundfolded word,
236 * stored as an offset to the previous number in as
237 * few bytes as possible, see offset2bytes())
238 */
239
240#include "vim.h"
241
242#if defined(FEAT_SPELL) || defined(PROTO)
243
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100244#ifndef UNIX // it's in os_unix.h for Unix
245# include <time.h> // for time_t
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200246#endif
247
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100248#ifndef UNIX // it's in os_unix.h for Unix
249# include <time.h> // for time_t
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200250#endif
251
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100252// Special byte values for <byte>. Some are only used in the tree for
253// postponed prefixes, some only in the other trees. This is a bit messy...
254#define BY_NOFLAGS 0 // end of word without flags or region; for
255 // postponed prefix: no <pflags>
256#define BY_INDEX 1 // child is shared, index follows
257#define BY_FLAGS 2 // end of word, <flags> byte follows; for
258 // postponed prefix: <pflags> follows
259#define BY_FLAGS2 3 // end of word, <flags> and <flags2> bytes
260 // follow; never used in prefix tree
261#define BY_SPECIAL BY_FLAGS2 // highest special byte value
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200262
Bram Moolenaar3d2a47c2019-11-07 20:48:42 +0100263#define ZERO_FLAG 65009 // used when flag is zero: "0"
264
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100265// Flags used in .spl file for soundsalike flags.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200266#define SAL_F0LLOWUP 1
267#define SAL_COLLAPSE 2
268#define SAL_REM_ACCENTS 4
269
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100270#define VIMSPELLMAGIC "VIMspell" // string at start of Vim spell file
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200271#define VIMSPELLMAGICL 8
272#define VIMSPELLVERSION 50
273
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100274// Section IDs. Only renumber them when VIMSPELLVERSION changes!
275#define SN_REGION 0 // <regionname> section
276#define SN_CHARFLAGS 1 // charflags section
277#define SN_MIDWORD 2 // <midword> section
278#define SN_PREFCOND 3 // <prefcond> section
279#define SN_REP 4 // REP items section
280#define SN_SAL 5 // SAL items section
281#define SN_SOFO 6 // soundfolding section
282#define SN_MAP 7 // MAP items section
283#define SN_COMPOUND 8 // compound words section
284#define SN_SYLLABLE 9 // syllable section
285#define SN_NOBREAK 10 // NOBREAK section
286#define SN_SUGFILE 11 // timestamp for .sug file
287#define SN_REPSAL 12 // REPSAL items section
288#define SN_WORDS 13 // common words
289#define SN_NOSPLITSUGS 14 // don't split word for suggestions
290#define SN_INFO 15 // info section
291#define SN_NOCOMPOUNDSUGS 16 // don't compound for suggestions
292#define SN_END 255 // end of sections
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200293
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100294#define SNF_REQUIRED 1 // <sectionflags>: required section
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200295
296#define CF_WORD 0x01
297#define CF_UPPER 0x02
298
Bram Moolenaaraeea7212020-04-02 18:50:46 +0200299/*
300 * Loop through all the siblings of a node (including the node)
301 */
302#define FOR_ALL_NODE_SIBLINGS(node, np) \
303 for ((np) = (node); (np) != NULL; (np) = (np)->wn_sibling)
304
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200305static int set_spell_finish(spelltab_T *new_st);
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +0000306static int write_spell_prefcond(FILE *fd, garray_T *gap, size_t *fwv);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200307static int read_region_section(FILE *fd, slang_T *slang, int len);
308static int read_charflags_section(FILE *fd);
309static int read_prefcond_section(FILE *fd, slang_T *lp);
310static int read_rep_section(FILE *fd, garray_T *gap, short *first);
311static int read_sal_section(FILE *fd, slang_T *slang);
312static int read_words_section(FILE *fd, slang_T *lp, int len);
313static int read_sofo_section(FILE *fd, slang_T *slang);
314static int read_compound(FILE *fd, slang_T *slang, int len);
315static int set_sofo(slang_T *lp, char_u *from, char_u *to);
316static void set_sal_first(slang_T *lp);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200317static int *mb_str2wide(char_u *s);
Bram Moolenaar07399e72020-08-24 20:05:50 +0200318static int spell_read_tree(FILE *fd, char_u **bytsp, long *bytsp_len, idx_T **idxsp, int prefixtree, int prefixcnt);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200319static idx_T read_tree_node(FILE *fd, char_u *byts, idx_T *idxs, int maxidx, idx_T startidx, int prefixtree, int maxprefcondnr);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200320static void set_spell_charflags(char_u *flags, int cnt, char_u *upp);
321static int set_spell_chartab(char_u *fol, char_u *low, char_u *upp);
322static void set_map_str(slang_T *lp, char_u *map);
323
324
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200325static char *e_afftrailing = N_("Trailing text in %s line %d: %s");
326static char *e_affname = N_("Affix name too long in %s line %d: %s");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200327static char *msg_compressing = N_("Compressing word tree...");
328
329/*
330 * Load one spell file and store the info into a slang_T.
331 *
332 * This is invoked in three ways:
333 * - From spell_load_cb() to load a spell file for the first time. "lang" is
334 * the language name, "old_lp" is NULL. Will allocate an slang_T.
335 * - To reload a spell file that was changed. "lang" is NULL and "old_lp"
336 * points to the existing slang_T.
337 * - Just after writing a .spl file; it's read back to produce the .sug file.
338 * "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T.
339 *
340 * Returns the slang_T the spell file was loaded into. NULL for error.
341 */
342 slang_T *
343spell_load_file(
344 char_u *fname,
345 char_u *lang,
346 slang_T *old_lp,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100347 int silent) // no error if file doesn't exist
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200348{
349 FILE *fd;
350 char_u buf[VIMSPELLMAGICL];
351 char_u *p;
352 int i;
353 int n;
354 int len;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200355 slang_T *lp = NULL;
356 int c = 0;
357 int res;
Bram Moolenaarce6db022020-01-07 20:11:42 +0100358 int did_estack_push = FALSE;
ichizok7e5fe382023-04-15 13:17:50 +0100359 ESTACK_CHECK_DECLARATION;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200360
361 fd = mch_fopen((char *)fname, "r");
362 if (fd == NULL)
363 {
364 if (!silent)
Bram Moolenaar460ae5d2022-01-01 14:19:49 +0000365 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200366 else if (p_verbose > 2)
367 {
368 verbose_enter();
Bram Moolenaar460ae5d2022-01-01 14:19:49 +0000369 smsg((const char *)e_cant_open_file_str, fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200370 verbose_leave();
371 }
372 goto endFAIL;
373 }
374 if (p_verbose > 2)
375 {
376 verbose_enter();
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100377 smsg(_("Reading spell file \"%s\""), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200378 verbose_leave();
379 }
380
381 if (old_lp == NULL)
382 {
383 lp = slang_alloc(lang);
384 if (lp == NULL)
385 goto endFAIL;
386
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100387 // Remember the file name, used to reload the file when it's updated.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200388 lp->sl_fname = vim_strsave(fname);
389 if (lp->sl_fname == NULL)
390 goto endFAIL;
391
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100392 // Check for .add.spl (_add.spl for VMS).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200393 lp->sl_add = strstr((char *)gettail(fname), SPL_FNAME_ADD) != NULL;
394 }
395 else
396 lp = old_lp;
397
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100398 // Set sourcing_name, so that error messages mention the file name.
Bram Moolenaar1a47ae32019-12-29 23:04:25 +0100399 estack_push(ETYPE_SPELL, fname, 0);
ichizok7e5fe382023-04-15 13:17:50 +0100400 ESTACK_CHECK_SETUP;
Bram Moolenaarce6db022020-01-07 20:11:42 +0100401 did_estack_push = TRUE;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200402
403 /*
404 * <HEADER>: <fileID>
405 */
406 for (i = 0; i < VIMSPELLMAGICL; ++i)
Bram Moolenaar963ab262022-09-05 10:55:27 +0100407 buf[i] = (c = getc(fd)) == EOF ? 0 : c; // <fileID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200408 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
409 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000410 emsg(_(e_this_does_not_look_like_spell_file));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200411 goto endFAIL;
412 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100413 c = getc(fd); // <versionnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200414 if (c < VIMSPELLVERSION)
415 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000416 emsg(_(e_old_spell_file_needs_to_be_updated));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200417 goto endFAIL;
418 }
419 else if (c > VIMSPELLVERSION)
420 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000421 emsg(_(e_spell_file_is_for_newer_version_of_vim));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200422 goto endFAIL;
423 }
424
425
426 /*
427 * <SECTIONS>: <section> ... <sectionend>
428 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
429 */
430 for (;;)
431 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100432 n = getc(fd); // <sectionID> or <sectionend>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200433 if (n == SN_END)
434 break;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100435 c = getc(fd); // <sectionflags>
436 len = get4c(fd); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200437 if (len < 0)
438 goto truncerr;
439
440 res = 0;
441 switch (n)
442 {
443 case SN_INFO:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100444 lp->sl_info = read_string(fd, len); // <infotext>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200445 if (lp->sl_info == NULL)
446 goto endFAIL;
447 break;
448
449 case SN_REGION:
450 res = read_region_section(fd, lp, len);
451 break;
452
453 case SN_CHARFLAGS:
454 res = read_charflags_section(fd);
455 break;
456
457 case SN_MIDWORD:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100458 lp->sl_midword = read_string(fd, len); // <midword>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200459 if (lp->sl_midword == NULL)
460 goto endFAIL;
461 break;
462
463 case SN_PREFCOND:
464 res = read_prefcond_section(fd, lp);
465 break;
466
467 case SN_REP:
468 res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first);
469 break;
470
471 case SN_REPSAL:
472 res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first);
473 break;
474
475 case SN_SAL:
476 res = read_sal_section(fd, lp);
477 break;
478
479 case SN_SOFO:
480 res = read_sofo_section(fd, lp);
481 break;
482
483 case SN_MAP:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100484 p = read_string(fd, len); // <mapstr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200485 if (p == NULL)
486 goto endFAIL;
487 set_map_str(lp, p);
488 vim_free(p);
489 break;
490
491 case SN_WORDS:
492 res = read_words_section(fd, lp, len);
493 break;
494
495 case SN_SUGFILE:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100496 lp->sl_sugtime = get8ctime(fd); // <timestamp>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200497 break;
498
499 case SN_NOSPLITSUGS:
500 lp->sl_nosplitsugs = TRUE;
501 break;
502
503 case SN_NOCOMPOUNDSUGS:
504 lp->sl_nocompoundsugs = TRUE;
505 break;
506
507 case SN_COMPOUND:
508 res = read_compound(fd, lp, len);
509 break;
510
511 case SN_NOBREAK:
512 lp->sl_nobreak = TRUE;
513 break;
514
515 case SN_SYLLABLE:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100516 lp->sl_syllable = read_string(fd, len); // <syllable>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200517 if (lp->sl_syllable == NULL)
518 goto endFAIL;
Bram Moolenaarfc2a47f2020-08-20 15:41:55 +0200519 if (init_syl_tab(lp) != OK)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200520 goto endFAIL;
521 break;
522
523 default:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100524 // Unsupported section. When it's required give an error
525 // message. When it's not required skip the contents.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200526 if (c & SNF_REQUIRED)
527 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000528 emsg(_(e_unsupported_section_in_spell_file));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200529 goto endFAIL;
530 }
531 while (--len >= 0)
532 if (getc(fd) < 0)
533 goto truncerr;
534 break;
535 }
536someerror:
537 if (res == SP_FORMERROR)
538 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000539 emsg(_(e_format_error_in_spell_file));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200540 goto endFAIL;
541 }
542 if (res == SP_TRUNCERROR)
543 {
544truncerr:
Bram Moolenaar677658a2022-01-05 16:09:06 +0000545 emsg(_(e_truncated_spell_file));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200546 goto endFAIL;
547 }
548 if (res == SP_OTHERERROR)
549 goto endFAIL;
550 }
551
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100552 // <LWORDTREE>
Bram Moolenaar07399e72020-08-24 20:05:50 +0200553 res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fbyts_len,
554 &lp->sl_fidxs, FALSE, 0);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200555 if (res != 0)
556 goto someerror;
557
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100558 // <KWORDTREE>
Bram Moolenaar07399e72020-08-24 20:05:50 +0200559 res = spell_read_tree(fd, &lp->sl_kbyts, NULL, &lp->sl_kidxs, FALSE, 0);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200560 if (res != 0)
561 goto someerror;
562
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100563 // <PREFIXTREE>
Bram Moolenaar07399e72020-08-24 20:05:50 +0200564 res = spell_read_tree(fd, &lp->sl_pbyts, NULL, &lp->sl_pidxs, TRUE,
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200565 lp->sl_prefixcnt);
566 if (res != 0)
567 goto someerror;
568
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100569 // For a new file link it in the list of spell files.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200570 if (old_lp == NULL && lang != NULL)
571 {
572 lp->sl_next = first_lang;
573 first_lang = lp;
574 }
575
576 goto endOK;
577
578endFAIL:
579 if (lang != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100580 // truncating the name signals the error to spell_load_lang()
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200581 *lang = NUL;
582 if (lp != NULL && old_lp == NULL)
583 slang_free(lp);
584 lp = NULL;
585
586endOK:
587 if (fd != NULL)
588 fclose(fd);
Bram Moolenaarce6db022020-01-07 20:11:42 +0100589 if (did_estack_push)
Bram Moolenaare31ee862020-01-07 20:59:34 +0100590 {
ichizok7e5fe382023-04-15 13:17:50 +0100591 ESTACK_CHECK_NOW;
Bram Moolenaarce6db022020-01-07 20:11:42 +0100592 estack_pop();
Bram Moolenaare31ee862020-01-07 20:59:34 +0100593 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200594
595 return lp;
596}
597
598/*
599 * Fill in the wordcount fields for a trie.
600 * Returns the total number of words.
601 */
602 static void
603tree_count_words(char_u *byts, idx_T *idxs)
604{
605 int depth;
606 idx_T arridx[MAXWLEN];
607 int curi[MAXWLEN];
608 int c;
609 idx_T n;
610 int wordcount[MAXWLEN];
611
612 arridx[0] = 0;
613 curi[0] = 1;
614 wordcount[0] = 0;
615 depth = 0;
616 while (depth >= 0 && !got_int)
617 {
618 if (curi[depth] > byts[arridx[depth]])
619 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100620 // Done all bytes at this node, go up one level.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200621 idxs[arridx[depth]] = wordcount[depth];
622 if (depth > 0)
623 wordcount[depth - 1] += wordcount[depth];
624
625 --depth;
626 fast_breakcheck();
627 }
628 else
629 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100630 // Do one more byte at this node.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200631 n = arridx[depth] + curi[depth];
632 ++curi[depth];
633
634 c = byts[n];
635 if (c == 0)
636 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100637 // End of word, count it.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200638 ++wordcount[depth];
639
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100640 // Skip over any other NUL bytes (same word with different
641 // flags).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200642 while (byts[n + 1] == 0)
643 {
644 ++n;
645 ++curi[depth];
646 }
647 }
648 else
649 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100650 // Normal char, go one level deeper to count the words.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200651 ++depth;
652 arridx[depth] = idxs[n];
653 curi[depth] = 1;
654 wordcount[depth] = 0;
655 }
656 }
657 }
658}
659
660/*
661 * Load the .sug files for languages that have one and weren't loaded yet.
662 */
663 void
664suggest_load_files(void)
665{
666 langp_T *lp;
667 int lpi;
668 slang_T *slang;
669 char_u *dotp;
670 FILE *fd;
671 char_u buf[MAXWLEN];
672 int i;
673 time_t timestamp;
674 int wcount;
675 int wordnr;
676 garray_T ga;
677 int c;
678
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100679 // Do this for all languages that support sound folding.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200680 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
681 {
682 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
683 slang = lp->lp_slang;
684 if (slang->sl_sugtime != 0 && !slang->sl_sugloaded)
685 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100686 // Change ".spl" to ".sug" and open the file. When the file isn't
687 // found silently skip it. Do set "sl_sugloaded" so that we
688 // don't try again and again.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200689 slang->sl_sugloaded = TRUE;
690
691 dotp = vim_strrchr(slang->sl_fname, '.');
692 if (dotp == NULL || fnamecmp(dotp, ".spl") != 0)
693 continue;
694 STRCPY(dotp, ".sug");
695 fd = mch_fopen((char *)slang->sl_fname, "r");
696 if (fd == NULL)
697 goto nextone;
698
699 /*
700 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
701 */
702 for (i = 0; i < VIMSUGMAGICL; ++i)
Bram Moolenaar963ab262022-09-05 10:55:27 +0100703 buf[i] = (c = getc(fd)) == EOF ? 0 : c; // <fileID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200704 if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0)
705 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000706 semsg(_(e_this_does_not_look_like_sug_file_str),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200707 slang->sl_fname);
708 goto nextone;
709 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100710 c = getc(fd); // <versionnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200711 if (c < VIMSUGVERSION)
712 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000713 semsg(_(e_old_sug_file_needs_to_be_updated_str),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200714 slang->sl_fname);
715 goto nextone;
716 }
717 else if (c > VIMSUGVERSION)
718 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000719 semsg(_(e_sug_file_is_for_newer_version_of_vim_str),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200720 slang->sl_fname);
721 goto nextone;
722 }
723
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100724 // Check the timestamp, it must be exactly the same as the one in
725 // the .spl file. Otherwise the word numbers won't match.
726 timestamp = get8ctime(fd); // <timestamp>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200727 if (timestamp != slang->sl_sugtime)
728 {
Bram Moolenaar677658a2022-01-05 16:09:06 +0000729 semsg(_(e_sug_file_doesnt_match_spl_file_str),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200730 slang->sl_fname);
731 goto nextone;
732 }
733
734 /*
735 * <SUGWORDTREE>: <wordtree>
736 * Read the trie with the soundfolded words.
737 */
Bram Moolenaar07399e72020-08-24 20:05:50 +0200738 if (spell_read_tree(fd, &slang->sl_sbyts, NULL, &slang->sl_sidxs,
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200739 FALSE, 0) != 0)
740 {
741someerror:
Bram Moolenaar677658a2022-01-05 16:09:06 +0000742 semsg(_(e_error_while_reading_sug_file_str),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200743 slang->sl_fname);
744 slang_clear_sug(slang);
745 goto nextone;
746 }
747
748 /*
749 * <SUGTABLE>: <sugwcount> <sugline> ...
750 *
751 * Read the table with word numbers. We use a file buffer for
752 * this, because it's so much like a file with lines. Makes it
753 * possible to swap the info and save on memory use.
754 */
755 slang->sl_sugbuf = open_spellbuf();
756 if (slang->sl_sugbuf == NULL)
757 goto someerror;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100758 // <sugwcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200759 wcount = get4c(fd);
760 if (wcount < 0)
761 goto someerror;
762
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100763 // Read all the wordnr lists into the buffer, one NUL terminated
764 // list per line.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200765 ga_init2(&ga, 1, 100);
766 for (wordnr = 0; wordnr < wcount; ++wordnr)
767 {
768 ga.ga_len = 0;
769 for (;;)
770 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100771 c = getc(fd); // <sugline>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200772 if (c < 0 || ga_grow(&ga, 1) == FAIL)
773 goto someerror;
774 ((char_u *)ga.ga_data)[ga.ga_len++] = c;
775 if (c == NUL)
776 break;
777 }
778 if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr,
779 ga.ga_data, ga.ga_len, TRUE) == FAIL)
780 goto someerror;
781 }
782 ga_clear(&ga);
783
784 /*
785 * Need to put word counts in the word tries, so that we can find
786 * a word by its number.
787 */
788 tree_count_words(slang->sl_fbyts, slang->sl_fidxs);
789 tree_count_words(slang->sl_sbyts, slang->sl_sidxs);
790
791nextone:
792 if (fd != NULL)
793 fclose(fd);
794 STRCPY(dotp, ".spl");
795 }
796 }
797}
798
799
800/*
801 * Read a length field from "fd" in "cnt_bytes" bytes.
802 * Allocate memory, read the string into it and add a NUL at the end.
803 * Returns NULL when the count is zero.
804 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result
805 * otherwise.
806 */
807 static char_u *
808read_cnt_string(FILE *fd, int cnt_bytes, int *cntp)
809{
810 int cnt = 0;
811 int i;
812 char_u *str;
813
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100814 // read the length bytes, MSB first
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200815 for (i = 0; i < cnt_bytes; ++i)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200816 {
Bram Moolenaar4ad739f2020-09-02 10:25:45 +0200817 int c = getc(fd);
818
819 if (c == EOF)
820 {
821 *cntp = SP_TRUNCERROR;
822 return NULL;
823 }
824 cnt = (cnt << 8) + (unsigned)c;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200825 }
826 *cntp = cnt;
827 if (cnt == 0)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100828 return NULL; // nothing to read, return NULL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200829
830 str = read_string(fd, cnt);
831 if (str == NULL)
832 *cntp = SP_OTHERERROR;
833 return str;
834}
835
836/*
837 * Read SN_REGION: <regionname> ...
838 * Return SP_*ERROR flags.
839 */
840 static int
841read_region_section(FILE *fd, slang_T *lp, int len)
842{
843 int i;
Bram Moolenaarc1eb1312022-09-04 13:45:15 +0100844 int c = 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200845
Bram Moolenaar2993ac52018-02-10 14:12:43 +0100846 if (len > MAXREGIONS * 2)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200847 return SP_FORMERROR;
848 for (i = 0; i < len; ++i)
Bram Moolenaarc7d2ff22022-09-05 11:04:14 +0100849 lp->sl_regions[i] = (c = getc(fd)) == EOF ? 0 : c; // <regionname>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200850 lp->sl_regions[len] = NUL;
Bram Moolenaar3c770762022-09-04 11:55:19 +0100851 return c == EOF ? SP_TRUNCERROR : 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200852}
853
854/*
855 * Read SN_CHARFLAGS section: <charflagslen> <charflags>
856 * <folcharslen> <folchars>
857 * Return SP_*ERROR flags.
858 */
859 static int
860read_charflags_section(FILE *fd)
861{
862 char_u *flags;
863 char_u *fol;
864 int flagslen, follen;
865
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100866 // <charflagslen> <charflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200867 flags = read_cnt_string(fd, 1, &flagslen);
868 if (flagslen < 0)
869 return flagslen;
870
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100871 // <folcharslen> <folchars>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200872 fol = read_cnt_string(fd, 2, &follen);
873 if (follen < 0)
874 {
875 vim_free(flags);
876 return follen;
877 }
878
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100879 // Set the word-char flags and fill SPELL_ISUPPER() table.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200880 if (flags != NULL && fol != NULL)
881 set_spell_charflags(flags, flagslen, fol);
882
883 vim_free(flags);
884 vim_free(fol);
885
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100886 // When <charflagslen> is zero then <fcharlen> must also be zero.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200887 if ((flags == NULL) != (fol == NULL))
888 return SP_FORMERROR;
889 return 0;
890}
891
892/*
893 * Read SN_PREFCOND section.
894 * Return SP_*ERROR flags.
895 */
896 static int
897read_prefcond_section(FILE *fd, slang_T *lp)
898{
899 int cnt;
900 int i;
901 int n;
Bram Moolenaar3c770762022-09-04 11:55:19 +0100902 int c;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200903 char_u *p;
904 char_u buf[MAXWLEN + 1];
905
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100906 // <prefcondcnt> <prefcond> ...
907 cnt = get2c(fd); // <prefcondcnt>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200908 if (cnt <= 0)
909 return SP_FORMERROR;
910
Bram Moolenaarc799fe22019-05-28 23:08:19 +0200911 lp->sl_prefprog = ALLOC_CLEAR_MULT(regprog_T *, cnt);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200912 if (lp->sl_prefprog == NULL)
913 return SP_OTHERERROR;
914 lp->sl_prefixcnt = cnt;
915
916 for (i = 0; i < cnt; ++i)
917 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100918 // <prefcond> : <condlen> <condstr>
919 n = getc(fd); // <condlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200920 if (n < 0 || n >= MAXWLEN)
921 return SP_FORMERROR;
922
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100923 // When <condlen> is zero we have an empty condition. Otherwise
924 // compile the regexp program used to check for the condition.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200925 if (n > 0)
926 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100927 buf[0] = '^'; // always match at one position only
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200928 p = buf + 1;
929 while (n-- > 0)
Bram Moolenaarc7d2ff22022-09-05 11:04:14 +0100930 *p++ = (c = getc(fd)) == EOF ? 0 : c; // <condstr>
Bram Moolenaar3c770762022-09-04 11:55:19 +0100931 if (c == EOF)
932 break;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200933 *p = NUL;
934 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
935 }
936 }
937 return 0;
938}
939
940/*
941 * Read REP or REPSAL items section from "fd": <repcount> <rep> ...
942 * Return SP_*ERROR flags.
943 */
944 static int
945read_rep_section(FILE *fd, garray_T *gap, short *first)
946{
947 int cnt;
948 fromto_T *ftp;
949 int i;
950
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100951 cnt = get2c(fd); // <repcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200952 if (cnt < 0)
953 return SP_TRUNCERROR;
954
955 if (ga_grow(gap, cnt) == FAIL)
956 return SP_OTHERERROR;
957
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100958 // <rep> : <repfromlen> <repfrom> <reptolen> <repto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200959 for (; gap->ga_len < cnt; ++gap->ga_len)
960 {
961 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
962 ftp->ft_from = read_cnt_string(fd, 1, &i);
963 if (i < 0)
964 return i;
965 if (i == 0)
966 return SP_FORMERROR;
967 ftp->ft_to = read_cnt_string(fd, 1, &i);
968 if (i <= 0)
969 {
970 vim_free(ftp->ft_from);
971 if (i < 0)
972 return i;
973 return SP_FORMERROR;
974 }
975 }
976
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +0100977 // Fill the first-index table.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +0200978 for (i = 0; i < 256; ++i)
979 first[i] = -1;
980 for (i = 0; i < gap->ga_len; ++i)
981 {
982 ftp = &((fromto_T *)gap->ga_data)[i];
983 if (first[*ftp->ft_from] == -1)
984 first[*ftp->ft_from] = i;
985 }
986 return 0;
987}
988
989/*
990 * Read SN_SAL section: <salflags> <salcount> <sal> ...
991 * Return SP_*ERROR flags.
992 */
993 static int
994read_sal_section(FILE *fd, slang_T *slang)
995{
996 int i;
997 int cnt;
998 garray_T *gap;
999 salitem_T *smp;
1000 int ccnt;
1001 char_u *p;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001002
1003 slang->sl_sofo = FALSE;
1004
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001005 i = getc(fd); // <salflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001006 if (i & SAL_F0LLOWUP)
1007 slang->sl_followup = TRUE;
1008 if (i & SAL_COLLAPSE)
1009 slang->sl_collapse = TRUE;
1010 if (i & SAL_REM_ACCENTS)
1011 slang->sl_rem_accents = TRUE;
1012
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001013 cnt = get2c(fd); // <salcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001014 if (cnt < 0)
1015 return SP_TRUNCERROR;
1016
1017 gap = &slang->sl_sal;
1018 ga_init2(gap, sizeof(salitem_T), 10);
1019 if (ga_grow(gap, cnt + 1) == FAIL)
1020 return SP_OTHERERROR;
1021
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001022 // <sal> : <salfromlen> <salfrom> <saltolen> <salto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001023 for (; gap->ga_len < cnt; ++gap->ga_len)
1024 {
Bram Moolenaar97d2f342020-07-10 20:03:03 +02001025 int c = NUL;
1026
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001027 smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001028 ccnt = getc(fd); // <salfromlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001029 if (ccnt < 0)
1030 return SP_TRUNCERROR;
1031 if ((p = alloc(ccnt + 2)) == NULL)
1032 return SP_OTHERERROR;
1033 smp->sm_lead = p;
1034
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001035 // Read up to the first special char into sm_lead.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001036 for (i = 0; i < ccnt; ++i)
1037 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001038 c = getc(fd); // <salfrom>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001039 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
1040 break;
1041 *p++ = c;
1042 }
1043 smp->sm_leadlen = (int)(p - smp->sm_lead);
1044 *p++ = NUL;
1045
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001046 // Put (abc) chars in sm_oneof, if any.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001047 if (c == '(')
1048 {
1049 smp->sm_oneof = p;
1050 for (++i; i < ccnt; ++i)
1051 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001052 c = getc(fd); // <salfrom>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001053 if (c == ')')
1054 break;
1055 *p++ = c;
1056 }
1057 *p++ = NUL;
1058 if (++i < ccnt)
1059 c = getc(fd);
1060 }
1061 else
1062 smp->sm_oneof = NULL;
1063
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001064 // Any following chars go in sm_rules.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001065 smp->sm_rules = p;
1066 if (i < ccnt)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001067 // store the char we got while checking for end of sm_lead
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001068 *p++ = c;
1069 for (++i; i < ccnt; ++i)
Bram Moolenaarc7d2ff22022-09-05 11:04:14 +01001070 *p++ = (c = getc(fd)) == EOF ? 0 : c; // <salfrom>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001071 *p++ = NUL;
1072
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001073 // <saltolen> <salto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001074 smp->sm_to = read_cnt_string(fd, 1, &ccnt);
1075 if (ccnt < 0)
1076 {
1077 vim_free(smp->sm_lead);
1078 return ccnt;
1079 }
1080
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001081 if (has_mbyte)
1082 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001083 // convert the multi-byte strings to wide char strings
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001084 smp->sm_lead_w = mb_str2wide(smp->sm_lead);
1085 smp->sm_leadlen = mb_charlen(smp->sm_lead);
1086 if (smp->sm_oneof == NULL)
1087 smp->sm_oneof_w = NULL;
1088 else
1089 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
1090 if (smp->sm_to == NULL)
1091 smp->sm_to_w = NULL;
1092 else
1093 smp->sm_to_w = mb_str2wide(smp->sm_to);
1094 if (smp->sm_lead_w == NULL
1095 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL)
1096 || (smp->sm_to_w == NULL && smp->sm_to != NULL))
1097 {
1098 vim_free(smp->sm_lead);
1099 vim_free(smp->sm_to);
1100 vim_free(smp->sm_lead_w);
1101 vim_free(smp->sm_oneof_w);
1102 vim_free(smp->sm_to_w);
1103 return SP_OTHERERROR;
1104 }
1105 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001106 }
1107
1108 if (gap->ga_len > 0)
1109 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001110 // Add one extra entry to mark the end with an empty sm_lead. Avoids
1111 // that we need to check the index every time.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001112 smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
1113 if ((p = alloc(1)) == NULL)
1114 return SP_OTHERERROR;
1115 p[0] = NUL;
1116 smp->sm_lead = p;
1117 smp->sm_leadlen = 0;
1118 smp->sm_oneof = NULL;
1119 smp->sm_rules = p;
1120 smp->sm_to = NULL;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001121 if (has_mbyte)
1122 {
1123 smp->sm_lead_w = mb_str2wide(smp->sm_lead);
1124 smp->sm_leadlen = 0;
1125 smp->sm_oneof_w = NULL;
1126 smp->sm_to_w = NULL;
1127 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001128 ++gap->ga_len;
1129 }
1130
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001131 // Fill the first-index table.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001132 set_sal_first(slang);
1133
1134 return 0;
1135}
1136
1137/*
1138 * Read SN_WORDS: <word> ...
1139 * Return SP_*ERROR flags.
1140 */
1141 static int
1142read_words_section(FILE *fd, slang_T *lp, int len)
1143{
1144 int done = 0;
1145 int i;
1146 int c;
1147 char_u word[MAXWLEN];
1148
1149 while (done < len)
1150 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001151 // Read one word at a time.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001152 for (i = 0; ; ++i)
1153 {
1154 c = getc(fd);
1155 if (c == EOF)
1156 return SP_TRUNCERROR;
1157 word[i] = c;
1158 if (word[i] == NUL)
1159 break;
1160 if (i == MAXWLEN - 1)
1161 return SP_FORMERROR;
1162 }
1163
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001164 // Init the count to 10.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001165 count_common_word(lp, word, -1, 10);
1166 done += i + 1;
1167 }
1168 return 0;
1169}
1170
1171/*
1172 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
1173 * Return SP_*ERROR flags.
1174 */
1175 static int
1176read_sofo_section(FILE *fd, slang_T *slang)
1177{
1178 int cnt;
1179 char_u *from, *to;
1180 int res;
1181
1182 slang->sl_sofo = TRUE;
1183
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001184 // <sofofromlen> <sofofrom>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001185 from = read_cnt_string(fd, 2, &cnt);
1186 if (cnt < 0)
1187 return cnt;
1188
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001189 // <sofotolen> <sofoto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001190 to = read_cnt_string(fd, 2, &cnt);
1191 if (cnt < 0)
1192 {
1193 vim_free(from);
1194 return cnt;
1195 }
1196
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001197 // Store the info in slang->sl_sal and/or slang->sl_sal_first.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001198 if (from != NULL && to != NULL)
1199 res = set_sofo(slang, from, to);
1200 else if (from != NULL || to != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001201 res = SP_FORMERROR; // only one of two strings is an error
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001202 else
1203 res = 0;
1204
1205 vim_free(from);
1206 vim_free(to);
1207 return res;
1208}
1209
1210/*
1211 * Read the compound section from the .spl file:
1212 * <compmax> <compminlen> <compsylmax> <compoptions> <compflags>
1213 * Returns SP_*ERROR flags.
1214 */
1215 static int
1216read_compound(FILE *fd, slang_T *slang, int len)
1217{
1218 int todo = len;
1219 int c;
1220 int atstart;
1221 char_u *pat;
1222 char_u *pp;
1223 char_u *cp;
1224 char_u *ap;
1225 char_u *crp;
1226 int cnt;
1227 garray_T *gap;
1228
1229 if (todo < 2)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001230 return SP_FORMERROR; // need at least two bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001231
1232 --todo;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001233 c = getc(fd); // <compmax>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001234 if (c < 2)
1235 c = MAXWLEN;
1236 slang->sl_compmax = c;
1237
1238 --todo;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001239 c = getc(fd); // <compminlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001240 if (c < 1)
1241 c = 0;
1242 slang->sl_compminlen = c;
1243
1244 --todo;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001245 c = getc(fd); // <compsylmax>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001246 if (c < 1)
1247 c = MAXWLEN;
1248 slang->sl_compsylmax = c;
1249
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001250 c = getc(fd); // <compoptions>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001251 if (c != 0)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001252 ungetc(c, fd); // be backwards compatible with Vim 7.0b
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001253 else
1254 {
1255 --todo;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001256 c = getc(fd); // only use the lower byte for now
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001257 --todo;
1258 slang->sl_compoptions = c;
1259
1260 gap = &slang->sl_comppat;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001261 c = get2c(fd); // <comppatcount>
Bram Moolenaarb85d3622021-08-11 15:54:59 +02001262 if (c < 0)
1263 return SP_TRUNCERROR;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001264 todo -= 2;
1265 ga_init2(gap, sizeof(char_u *), c);
1266 if (ga_grow(gap, c) == OK)
1267 while (--c >= 0)
1268 {
1269 ((char_u **)(gap->ga_data))[gap->ga_len++] =
Bram Moolenaarb85d3622021-08-11 15:54:59 +02001270 read_cnt_string(fd, 1, &cnt);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001271 // <comppatlen> <comppattext>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001272 if (cnt < 0)
1273 return cnt;
1274 todo -= cnt + 1;
1275 }
1276 }
1277 if (todo < 0)
1278 return SP_FORMERROR;
1279
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001280 // Turn the COMPOUNDRULE items into a regexp pattern:
1281 // "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$".
1282 // Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes.
1283 // Conversion to utf-8 may double the size.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001284 c = todo * 2 + 7;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001285 if (enc_utf8)
1286 c += todo * 2;
Bram Moolenaar964b3742019-05-24 18:54:09 +02001287 pat = alloc(c);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001288 if (pat == NULL)
1289 return SP_OTHERERROR;
1290
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001291 // We also need a list of all flags that can appear at the start and one
1292 // for all flags.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001293 cp = alloc(todo + 1);
1294 if (cp == NULL)
1295 {
1296 vim_free(pat);
1297 return SP_OTHERERROR;
1298 }
1299 slang->sl_compstartflags = cp;
1300 *cp = NUL;
1301
1302 ap = alloc(todo + 1);
1303 if (ap == NULL)
1304 {
1305 vim_free(pat);
1306 return SP_OTHERERROR;
1307 }
1308 slang->sl_compallflags = ap;
1309 *ap = NUL;
1310
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001311 // And a list of all patterns in their original form, for checking whether
1312 // compounding may work in match_compoundrule(). This is freed when we
1313 // encounter a wildcard, the check doesn't work then.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001314 crp = alloc(todo + 1);
1315 slang->sl_comprules = crp;
1316
1317 pp = pat;
1318 *pp++ = '^';
1319 *pp++ = '\\';
1320 *pp++ = '(';
1321
1322 atstart = 1;
1323 while (todo-- > 0)
1324 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001325 c = getc(fd); // <compflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001326 if (c == EOF)
1327 {
1328 vim_free(pat);
1329 return SP_TRUNCERROR;
1330 }
1331
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001332 // Add all flags to "sl_compallflags".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001333 if (vim_strchr((char_u *)"?*+[]/", c) == NULL
1334 && !byte_in_str(slang->sl_compallflags, c))
1335 {
1336 *ap++ = c;
1337 *ap = NUL;
1338 }
1339
1340 if (atstart != 0)
1341 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001342 // At start of item: copy flags to "sl_compstartflags". For a
1343 // [abc] item set "atstart" to 2 and copy up to the ']'.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001344 if (c == '[')
1345 atstart = 2;
1346 else if (c == ']')
1347 atstart = 0;
1348 else
1349 {
1350 if (!byte_in_str(slang->sl_compstartflags, c))
1351 {
1352 *cp++ = c;
1353 *cp = NUL;
1354 }
1355 if (atstart == 1)
1356 atstart = 0;
1357 }
1358 }
1359
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001360 // Copy flag to "sl_comprules", unless we run into a wildcard.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001361 if (crp != NULL)
1362 {
1363 if (c == '?' || c == '+' || c == '*')
1364 {
Bram Moolenaard23a8232018-02-10 18:45:26 +01001365 VIM_CLEAR(slang->sl_comprules);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001366 crp = NULL;
1367 }
1368 else
1369 *crp++ = c;
1370 }
1371
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001372 if (c == '/') // slash separates two items
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001373 {
1374 *pp++ = '\\';
1375 *pp++ = '|';
1376 atstart = 1;
1377 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001378 else // normal char, "[abc]" and '*' are copied as-is
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001379 {
1380 if (c == '?' || c == '+' || c == '~')
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001381 *pp++ = '\\'; // "a?" becomes "a\?", "a+" becomes "a\+"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001382 if (enc_utf8)
1383 pp += mb_char2bytes(c, pp);
1384 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001385 *pp++ = c;
1386 }
1387 }
1388
1389 *pp++ = '\\';
1390 *pp++ = ')';
1391 *pp++ = '$';
1392 *pp = NUL;
1393
1394 if (crp != NULL)
1395 *crp = NUL;
1396
1397 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT);
1398 vim_free(pat);
1399 if (slang->sl_compprog == NULL)
1400 return SP_FORMERROR;
1401
1402 return 0;
1403}
1404
1405/*
1406 * Set the SOFOFROM and SOFOTO items in language "lp".
1407 * Returns SP_*ERROR flags when there is something wrong.
1408 */
1409 static int
1410set_sofo(slang_T *lp, char_u *from, char_u *to)
1411{
1412 int i;
1413
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001414 garray_T *gap;
1415 char_u *s;
1416 char_u *p;
1417 int c;
1418 int *inp;
1419
1420 if (has_mbyte)
1421 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001422 // Use "sl_sal" as an array with 256 pointers to a list of wide
1423 // characters. The index is the low byte of the character.
1424 // The list contains from-to pairs with a terminating NUL.
1425 // sl_sal_first[] is used for latin1 "from" characters.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001426 gap = &lp->sl_sal;
1427 ga_init2(gap, sizeof(int *), 1);
1428 if (ga_grow(gap, 256) == FAIL)
1429 return SP_OTHERERROR;
1430 vim_memset(gap->ga_data, 0, sizeof(int *) * 256);
1431 gap->ga_len = 256;
1432
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001433 // First count the number of items for each list. Temporarily use
1434 // sl_sal_first[] for this.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001435 for (p = from, s = to; *p != NUL && *s != NUL; )
1436 {
1437 c = mb_cptr2char_adv(&p);
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001438 MB_CPTR_ADV(s);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001439 if (c >= 256)
1440 ++lp->sl_sal_first[c & 0xff];
1441 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001442 if (*p != NUL || *s != NUL) // lengths differ
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001443 return SP_FORMERROR;
1444
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001445 // Allocate the lists.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001446 for (i = 0; i < 256; ++i)
1447 if (lp->sl_sal_first[i] > 0)
1448 {
1449 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
1450 if (p == NULL)
1451 return SP_OTHERERROR;
1452 ((int **)gap->ga_data)[i] = (int *)p;
1453 *(int *)p = 0;
1454 }
1455
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001456 // Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
1457 // list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001458 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
1459 for (p = from, s = to; *p != NUL && *s != NUL; )
1460 {
1461 c = mb_cptr2char_adv(&p);
1462 i = mb_cptr2char_adv(&s);
1463 if (c >= 256)
1464 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001465 // Append the from-to chars at the end of the list with
1466 // the low byte.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001467 inp = ((int **)gap->ga_data)[c & 0xff];
1468 while (*inp != 0)
1469 ++inp;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001470 *inp++ = c; // from char
1471 *inp++ = i; // to char
1472 *inp++ = NUL; // NUL at the end
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001473 }
1474 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001475 // mapping byte to char is done in sl_sal_first[]
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001476 lp->sl_sal_first[c] = i;
1477 }
1478 }
1479 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001480 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001481 // mapping bytes to bytes is done in sl_sal_first[]
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001482 if (STRLEN(from) != STRLEN(to))
1483 return SP_FORMERROR;
1484
1485 for (i = 0; to[i] != NUL; ++i)
1486 lp->sl_sal_first[from[i]] = to[i];
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001487 lp->sl_sal.ga_len = 1; // indicates we have soundfolding
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001488 }
1489
1490 return 0;
1491}
1492
1493/*
1494 * Fill the first-index table for "lp".
1495 */
1496 static void
1497set_sal_first(slang_T *lp)
1498{
1499 salfirst_T *sfirst;
1500 int i;
1501 salitem_T *smp;
1502 int c;
1503 garray_T *gap = &lp->sl_sal;
1504
1505 sfirst = lp->sl_sal_first;
1506 for (i = 0; i < 256; ++i)
1507 sfirst[i] = -1;
1508 smp = (salitem_T *)gap->ga_data;
1509 for (i = 0; i < gap->ga_len; ++i)
1510 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001511 if (has_mbyte)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001512 // Use the lowest byte of the first character. For latin1 it's
1513 // the character, for other encodings it should differ for most
1514 // characters.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001515 c = *smp[i].sm_lead_w & 0xff;
1516 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001517 c = *smp[i].sm_lead;
1518 if (sfirst[c] == -1)
1519 {
1520 sfirst[c] = i;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001521 if (has_mbyte)
1522 {
1523 int n;
1524
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001525 // Make sure all entries with this byte are following each
1526 // other. Move the ones that are in the wrong position. Do
1527 // keep the same ordering!
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001528 while (i + 1 < gap->ga_len
1529 && (*smp[i + 1].sm_lead_w & 0xff) == c)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001530 // Skip over entry with same index byte.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001531 ++i;
1532
1533 for (n = 1; i + n < gap->ga_len; ++n)
1534 if ((*smp[i + n].sm_lead_w & 0xff) == c)
1535 {
1536 salitem_T tsal;
1537
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001538 // Move entry with same index byte after the entries
1539 // we already found.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001540 ++i;
1541 --n;
1542 tsal = smp[i + n];
1543 mch_memmove(smp + i + 1, smp + i,
1544 sizeof(salitem_T) * n);
1545 smp[i] = tsal;
1546 }
1547 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001548 }
1549 }
1550}
1551
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001552/*
1553 * Turn a multi-byte string into a wide character string.
1554 * Return it in allocated memory (NULL for out-of-memory)
1555 */
1556 static int *
1557mb_str2wide(char_u *s)
1558{
1559 int *res;
1560 char_u *p;
1561 int i = 0;
1562
Bram Moolenaarc799fe22019-05-28 23:08:19 +02001563 res = ALLOC_MULT(int, mb_charlen(s) + 1);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00001564 if (res == NULL)
1565 return NULL;
1566
1567 for (p = s; *p != NUL; )
1568 res[i++] = mb_ptr2char_adv(&p);
1569 res[i] = NUL;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001570 return res;
1571}
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001572
1573/*
1574 * Read a tree from the .spl or .sug file.
1575 * Allocates the memory and stores pointers in "bytsp" and "idxsp".
1576 * This is skipped when the tree has zero length.
1577 * Returns zero when OK, SP_ value for an error.
1578 */
1579 static int
1580spell_read_tree(
1581 FILE *fd,
1582 char_u **bytsp,
Bram Moolenaar07399e72020-08-24 20:05:50 +02001583 long *bytsp_len,
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001584 idx_T **idxsp,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001585 int prefixtree, // TRUE for the prefix tree
1586 int prefixcnt) // when "prefixtree" is TRUE: prefix count
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001587{
Bram Moolenaar6d3c8582017-02-26 15:27:23 +01001588 long len;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001589 int idx;
1590 char_u *bp;
1591 idx_T *ip;
1592
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001593 // The tree size was computed when writing the file, so that we can
1594 // allocate it as one long block. <nodecount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001595 len = get4c(fd);
1596 if (len < 0)
1597 return SP_TRUNCERROR;
Bram Moolenaar6d3c8582017-02-26 15:27:23 +01001598 if (len >= LONG_MAX / (long)sizeof(int))
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001599 // Invalid length, multiply with sizeof(int) would overflow.
Bram Moolenaar399c2972017-02-09 21:07:12 +01001600 return SP_FORMERROR;
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00001601 if (len <= 0)
1602 return 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001603
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00001604 // Allocate the byte array.
1605 bp = alloc(len);
1606 if (bp == NULL)
1607 return SP_OTHERERROR;
1608 *bytsp = bp;
1609 if (bytsp_len != NULL)
1610 *bytsp_len = len;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001611
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00001612 // Allocate the index array.
1613 ip = lalloc_clear(len * sizeof(int), TRUE);
1614 if (ip == NULL)
1615 return SP_OTHERERROR;
1616 *idxsp = ip;
1617
1618 // Recursively read the tree and store it in the array.
1619 idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt);
1620 if (idx < 0)
1621 return idx;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001622 return 0;
1623}
1624
1625/*
1626 * Read one row of siblings from the spell file and store it in the byte array
1627 * "byts" and index array "idxs". Recursively read the children.
1628 *
1629 * NOTE: The code here must match put_node()!
1630 *
1631 * Returns the index (>= 0) following the siblings.
1632 * Returns SP_TRUNCERROR if the file is shorter than expected.
1633 * Returns SP_FORMERROR if there is a format error.
1634 */
1635 static idx_T
1636read_tree_node(
1637 FILE *fd,
1638 char_u *byts,
1639 idx_T *idxs,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001640 int maxidx, // size of arrays
1641 idx_T startidx, // current index in "byts" and "idxs"
1642 int prefixtree, // TRUE for reading PREFIXTREE
1643 int maxprefcondnr) // maximum for <prefcondnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001644{
1645 int len;
1646 int i;
1647 int n;
1648 idx_T idx = startidx;
1649 int c;
1650 int c2;
1651#define SHARED_MASK 0x8000000
1652
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001653 len = getc(fd); // <siblingcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001654 if (len <= 0)
1655 return SP_TRUNCERROR;
1656
1657 if (startidx + len >= maxidx)
1658 return SP_FORMERROR;
1659 byts[idx++] = len;
1660
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001661 // Read the byte values, flag/region bytes and shared indexes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001662 for (i = 1; i <= len; ++i)
1663 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001664 c = getc(fd); // <byte>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001665 if (c < 0)
1666 return SP_TRUNCERROR;
1667 if (c <= BY_SPECIAL)
1668 {
1669 if (c == BY_NOFLAGS && !prefixtree)
1670 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001671 // No flags, all regions.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001672 idxs[idx] = 0;
1673 c = 0;
1674 }
1675 else if (c != BY_INDEX)
1676 {
1677 if (prefixtree)
1678 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001679 // Read the optional pflags byte, the prefix ID and the
1680 // condition nr. In idxs[] store the prefix ID in the low
1681 // byte, the condition index shifted up 8 bits, the flags
1682 // shifted up 24 bits.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001683 if (c == BY_FLAGS)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001684 c = getc(fd) << 24; // <pflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001685 else
1686 c = 0;
1687
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001688 c |= getc(fd); // <affixID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001689
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001690 n = get2c(fd); // <prefcondnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001691 if (n >= maxprefcondnr)
1692 return SP_FORMERROR;
1693 c |= (n << 8);
1694 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001695 else // c must be BY_FLAGS or BY_FLAGS2
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001696 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001697 // Read flags and optional region and prefix ID. In
1698 // idxs[] the flags go in the low two bytes, region above
1699 // that and prefix ID above the region.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001700 c2 = c;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001701 c = getc(fd); // <flags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001702 if (c2 == BY_FLAGS2)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001703 c = (getc(fd) << 8) + c; // <flags2>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001704 if (c & WF_REGION)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001705 c = (getc(fd) << 16) + c; // <region>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001706 if (c & WF_AFX)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001707 c = (getc(fd) << 24) + c; // <affixID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001708 }
1709
1710 idxs[idx] = c;
1711 c = 0;
1712 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001713 else // c == BY_INDEX
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001714 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001715 // <nodeidx>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001716 n = get3c(fd);
1717 if (n < 0 || n >= maxidx)
1718 return SP_FORMERROR;
1719 idxs[idx] = n + SHARED_MASK;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001720 c = getc(fd); // <xbyte>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001721 }
1722 }
1723 byts[idx++] = c;
1724 }
1725
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001726 // Recursively read the children for non-shared siblings.
1727 // Skip the end-of-word ones (zero byte value) and the shared ones (and
1728 // remove SHARED_MASK)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001729 for (i = 1; i <= len; ++i)
1730 if (byts[startidx + i] != 0)
1731 {
1732 if (idxs[startidx + i] & SHARED_MASK)
1733 idxs[startidx + i] &= ~SHARED_MASK;
1734 else
1735 {
1736 idxs[startidx + i] = idx;
1737 idx = read_tree_node(fd, byts, idxs, maxidx, idx,
1738 prefixtree, maxprefcondnr);
1739 if (idx < 0)
1740 break;
1741 }
1742 }
1743
1744 return idx;
1745}
1746
1747/*
1748 * Reload the spell file "fname" if it's loaded.
1749 */
1750 static void
1751spell_reload_one(
1752 char_u *fname,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001753 int added_word) // invoked through "zg"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001754{
1755 slang_T *slang;
1756 int didit = FALSE;
1757
Bram Moolenaaraeea7212020-04-02 18:50:46 +02001758 FOR_ALL_SPELL_LANGS(slang)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001759 {
Bram Moolenaar99499b12019-05-23 21:35:48 +02001760 if (fullpathcmp(fname, slang->sl_fname, FALSE, TRUE) == FPC_SAME)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001761 {
1762 slang_clear(slang);
1763 if (spell_load_file(fname, NULL, slang, FALSE) == NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001764 // reloading failed, clear the language
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001765 slang_clear(slang);
Bram Moolenaara4d158b2022-08-14 14:17:45 +01001766 redraw_all_later(UPD_SOME_VALID);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001767 didit = TRUE;
1768 }
1769 }
1770
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001771 // When "zg" was used and the file wasn't loaded yet, should redo
1772 // 'spelllang' to load it now.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001773 if (added_word && !didit)
Yegappan Lakshmananaf936912023-02-20 12:16:39 +00001774 parse_spelllang(curwin);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001775}
1776
1777
1778/*
1779 * Functions for ":mkspell".
1780 */
1781
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001782#define MAXLINELEN 500 // Maximum length in bytes of a line in a .aff
1783 // and .dic file.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001784/*
1785 * Main structure to store the contents of a ".aff" file.
1786 */
1787typedef struct afffile_S
1788{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001789 char_u *af_enc; // "SET", normalized, alloc'ed string or NULL
1790 int af_flagtype; // AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG
1791 unsigned af_rare; // RARE ID for rare word
1792 unsigned af_keepcase; // KEEPCASE ID for keep-case word
1793 unsigned af_bad; // BAD ID for banned word
1794 unsigned af_needaffix; // NEEDAFFIX ID
1795 unsigned af_circumfix; // CIRCUMFIX ID
1796 unsigned af_needcomp; // NEEDCOMPOUND ID
1797 unsigned af_comproot; // COMPOUNDROOT ID
1798 unsigned af_compforbid; // COMPOUNDFORBIDFLAG ID
1799 unsigned af_comppermit; // COMPOUNDPERMITFLAG ID
1800 unsigned af_nosuggest; // NOSUGGEST ID
1801 int af_pfxpostpone; // postpone prefixes without chop string and
1802 // without flags
1803 int af_ignoreextra; // IGNOREEXTRA present
1804 hashtab_T af_pref; // hashtable for prefixes, affheader_T
1805 hashtab_T af_suff; // hashtable for suffixes, affheader_T
1806 hashtab_T af_comp; // hashtable for compound flags, compitem_T
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001807} afffile_T;
1808
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001809#define AFT_CHAR 0 // flags are one character
1810#define AFT_LONG 1 // flags are two characters
1811#define AFT_CAPLONG 2 // flags are one or two characters
1812#define AFT_NUM 3 // flags are numbers, comma separated
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001813
1814typedef struct affentry_S affentry_T;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001815// Affix entry from ".aff" file. Used for prefixes and suffixes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001816struct affentry_S
1817{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001818 affentry_T *ae_next; // next affix with same name/number
1819 char_u *ae_chop; // text to chop off basic word (can be NULL)
1820 char_u *ae_add; // text to add to basic word (can be NULL)
1821 char_u *ae_flags; // flags on the affix (can be NULL)
1822 char_u *ae_cond; // condition (NULL for ".")
1823 regprog_T *ae_prog; // regexp program for ae_cond or NULL
1824 char ae_compforbid; // COMPOUNDFORBIDFLAG found
1825 char ae_comppermit; // COMPOUNDPERMITFLAG found
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001826};
1827
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001828#define AH_KEY_LEN 17 // 2 x 8 bytes + NUL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001829
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001830// Affix header from ".aff" file. Used for af_pref and af_suff.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001831typedef struct affheader_S
1832{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001833 char_u ah_key[AH_KEY_LEN]; // key for hashtab == name of affix
1834 unsigned ah_flag; // affix name as number, uses "af_flagtype"
1835 int ah_newID; // prefix ID after renumbering; 0 if not used
1836 int ah_combine; // suffix may combine with prefix
1837 int ah_follows; // another affix block should be following
1838 affentry_T *ah_first; // first affix entry
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001839} affheader_T;
1840
1841#define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
1842
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001843// Flag used in compound items.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001844typedef struct compitem_S
1845{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001846 char_u ci_key[AH_KEY_LEN]; // key for hashtab == name of compound
1847 unsigned ci_flag; // affix name as number, uses "af_flagtype"
1848 int ci_newID; // affix ID after renumbering.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001849} compitem_T;
1850
1851#define HI2CI(hi) ((compitem_T *)(hi)->hi_key)
1852
1853/*
1854 * Structure that is used to store the items in the word tree. This avoids
1855 * the need to keep track of each allocated thing, everything is freed all at
1856 * once after ":mkspell" is done.
1857 * Note: "sb_next" must be just before "sb_data" to make sure the alignment of
1858 * "sb_data" is correct for systems where pointers must be aligned on
1859 * pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc).
1860 */
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001861#define SBLOCKSIZE 16000 // size of sb_data
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001862typedef struct sblock_S sblock_T;
1863struct sblock_S
1864{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001865 int sb_used; // nr of bytes already in use
1866 sblock_T *sb_next; // next block in list
1867 char_u sb_data[1]; // data, actually longer
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001868};
1869
1870/*
1871 * A node in the tree.
1872 */
1873typedef struct wordnode_S wordnode_T;
1874struct wordnode_S
1875{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001876 union // shared to save space
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001877 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001878 char_u hashkey[6]; // the hash key, only used while compressing
1879 int index; // index in written nodes (valid after first
1880 // round)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001881 } wn_u1;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001882 union // shared to save space
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001883 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001884 wordnode_T *next; // next node with same hash key
1885 wordnode_T *wnode; // parent node that will write this node
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001886 } wn_u2;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001887 wordnode_T *wn_child; // child (next byte in word)
1888 wordnode_T *wn_sibling; // next sibling (alternate byte in word,
1889 // always sorted)
1890 int wn_refs; // Nr. of references to this node. Only
1891 // relevant for first node in a list of
1892 // siblings, in following siblings it is
1893 // always one.
1894 char_u wn_byte; // Byte for this node. NUL for word end
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001895
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001896 // Info for when "wn_byte" is NUL.
1897 // In PREFIXTREE "wn_region" is used for the prefcondnr.
1898 // In the soundfolded word tree "wn_flags" has the MSW of the wordnr and
1899 // "wn_region" the LSW of the wordnr.
1900 char_u wn_affixID; // supported/required prefix ID or 0
1901 short_u wn_flags; // WF_ flags
1902 short wn_region; // region mask
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001903
1904#ifdef SPELL_PRINTTREE
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001905 int wn_nr; // sequence nr for printing
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001906#endif
1907};
1908
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001909#define WN_MASK 0xffff // mask relevant bits of "wn_flags"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001910
1911#define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
1912
1913/*
1914 * Info used while reading the spell files.
1915 */
1916typedef struct spellinfo_S
1917{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001918 wordnode_T *si_foldroot; // tree with case-folded words
1919 long si_foldwcount; // nr of words in si_foldroot
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001920
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001921 wordnode_T *si_keeproot; // tree with keep-case words
1922 long si_keepwcount; // nr of words in si_keeproot
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001923
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001924 wordnode_T *si_prefroot; // tree with postponed prefixes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001925
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001926 long si_sugtree; // creating the soundfolding trie
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001927
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001928 sblock_T *si_blocks; // memory blocks used
1929 long si_blocks_cnt; // memory blocks allocated
1930 int si_did_emsg; // TRUE when ran out of memory
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001931
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001932 long si_compress_cnt; // words to add before lowering
1933 // compression limit
1934 wordnode_T *si_first_free; // List of nodes that have been freed during
1935 // compression, linked by "wn_child" field.
1936 long si_free_count; // number of nodes in si_first_free
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001937#ifdef SPELL_PRINTTREE
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001938 int si_wordnode_nr; // sequence nr for nodes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001939#endif
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001940 buf_T *si_spellbuf; // buffer used to store soundfold word table
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001941
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001942 int si_ascii; // handling only ASCII words
1943 int si_add; // addition file
1944 int si_clear_chartab; // when TRUE clear char tables
1945 int si_region; // region mask
1946 vimconv_T si_conv; // for conversion to 'encoding'
1947 int si_memtot; // runtime memory used
1948 int si_verbose; // verbose messages
1949 int si_msg_count; // number of words added since last message
1950 char_u *si_info; // info text chars or NULL
1951 int si_region_count; // number of regions supported (1 when there
1952 // are no regions)
Bram Moolenaar2993ac52018-02-10 14:12:43 +01001953 char_u si_region_name[MAXREGIONS * 2 + 1];
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001954 // region names; used only if
1955 // si_region_count > 1)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001956
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01001957 garray_T si_rep; // list of fromto_T entries from REP lines
1958 garray_T si_repsal; // list of fromto_T entries from REPSAL lines
1959 garray_T si_sal; // list of fromto_T entries from SAL lines
1960 char_u *si_sofofr; // SOFOFROM text
1961 char_u *si_sofoto; // SOFOTO text
1962 int si_nosugfile; // NOSUGFILE item found
1963 int si_nosplitsugs; // NOSPLITSUGS item found
1964 int si_nocompoundsugs; // NOCOMPOUNDSUGS item found
1965 int si_followup; // soundsalike: ?
1966 int si_collapse; // soundsalike: ?
1967 hashtab_T si_commonwords; // hashtable for common words
1968 time_t si_sugtime; // timestamp for .sug file
1969 int si_rem_accents; // soundsalike: remove accents
1970 garray_T si_map; // MAP info concatenated
1971 char_u *si_midword; // MIDWORD chars or NULL
1972 int si_compmax; // max nr of words for compounding
1973 int si_compminlen; // minimal length for compounding
1974 int si_compsylmax; // max nr of syllables for compounding
1975 int si_compoptions; // COMP_ flags
1976 garray_T si_comppat; // CHECKCOMPOUNDPATTERN items, each stored as
1977 // a string
1978 char_u *si_compflags; // flags used for compounding
1979 char_u si_nobreak; // NOBREAK
1980 char_u *si_syllable; // syllable string
1981 garray_T si_prefcond; // table with conditions for postponed
1982 // prefixes, each stored as a string
1983 int si_newprefID; // current value for ah_newID
1984 int si_newcompID; // current value for compound ID
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001985} spellinfo_T;
1986
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001987static int is_aff_rule(char_u **items, int itemcnt, char *rulename, int mincount);
1988static void aff_process_flags(afffile_T *affile, affentry_T *entry);
1989static int spell_info_item(char_u *s);
1990static unsigned affitem2flag(int flagtype, char_u *item, char_u *fname, int lnum);
1991static unsigned get_affitem(int flagtype, char_u **pp);
1992static void process_compflags(spellinfo_T *spin, afffile_T *aff, char_u *compflags);
1993static void check_renumber(spellinfo_T *spin);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001994static void aff_check_number(int spinval, int affval, char *name);
1995static void aff_check_string(char_u *spinval, char_u *affval, char *name);
1996static int str_equal(char_u *s1, char_u *s2);
1997static void add_fromto(spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to);
1998static int sal_to_bool(char_u *s);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001999static int get_affix_flags(afffile_T *affile, char_u *afflist);
2000static int get_pfxlist(afffile_T *affile, char_u *afflist, char_u *store_afflist);
2001static void get_compflags(afffile_T *affile, char_u *afflist, char_u *store_afflist);
2002static int store_aff_word(spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int condit, int flags, char_u *pfxlist, int pfxlen);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002003static void *getroom(spellinfo_T *spin, size_t len, int align);
2004static char_u *getroom_save(spellinfo_T *spin, char_u *s);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002005static int store_word(spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix);
2006static int tree_add_word(spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID);
2007static wordnode_T *get_wordnode(spellinfo_T *spin);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002008static void free_wordnode(spellinfo_T *spin, wordnode_T *n);
Bram Moolenaar408c23b2020-06-03 22:15:45 +02002009static void wordtree_compress(spellinfo_T *spin, wordnode_T *root, char *name);
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02002010static long node_compress(spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, long *tot);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002011static int node_equal(wordnode_T *n1, wordnode_T *n2);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002012static void clear_node(wordnode_T *node);
2013static int put_node(FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002014static int sug_filltree(spellinfo_T *spin, slang_T *slang);
2015static int sug_maketable(spellinfo_T *spin);
2016static int sug_filltable(spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap);
2017static int offset2bytes(int nr, char_u *buf);
2018static void sug_write(spellinfo_T *spin, char_u *fname);
2019static void spell_message(spellinfo_T *spin, char_u *str);
2020static void init_spellfile(void);
2021
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002022// In the postponed prefixes tree wn_flags is used to store the WFP_ flags,
2023// but it must be negative to indicate the prefix tree to tree_add_word().
2024// Use a negative number with the lower 8 bits zero.
kylo252ae6f1d82022-02-16 19:24:07 +00002025#define PFX_FLAGS (-256)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002026
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002027// flags for "condit" argument of store_aff_word()
2028#define CONDIT_COMB 1 // affix must combine
2029#define CONDIT_CFIX 2 // affix must have CIRCUMFIX flag
2030#define CONDIT_SUF 4 // add a suffix for matching flags
2031#define CONDIT_AFF 8 // word already has an affix
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002032
2033/*
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02002034 * Tunable parameters for when the tree is compressed. Filled from the
2035 * 'mkspellmem' option.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002036 */
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002037static long compress_start = 30000; // memory / SBLOCKSIZE
2038static long compress_inc = 100; // memory / SBLOCKSIZE
2039static long compress_added = 500000; // word count
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002040
2041/*
2042 * Check the 'mkspellmem' option. Return FAIL if it's wrong.
2043 * Sets "sps_flags".
2044 */
2045 int
2046spell_check_msm(void)
2047{
2048 char_u *p = p_msm;
2049 long start = 0;
2050 long incr = 0;
2051 long added = 0;
2052
2053 if (!VIM_ISDIGIT(*p))
2054 return FAIL;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002055 // block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002056 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102);
2057 if (*p != ',')
2058 return FAIL;
2059 ++p;
2060 if (!VIM_ISDIGIT(*p))
2061 return FAIL;
2062 incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10);
2063 if (*p != ',')
2064 return FAIL;
2065 ++p;
2066 if (!VIM_ISDIGIT(*p))
2067 return FAIL;
2068 added = getdigits(&p) * 1024;
2069 if (*p != NUL)
2070 return FAIL;
2071
2072 if (start == 0 || incr == 0 || added == 0 || incr > start)
2073 return FAIL;
2074
2075 compress_start = start;
2076 compress_inc = incr;
2077 compress_added = added;
2078 return OK;
2079}
2080
2081#ifdef SPELL_PRINTTREE
2082/*
2083 * For debugging the tree code: print the current tree in a (more or less)
2084 * readable format, so that we can see what happens when adding a word and/or
2085 * compressing the tree.
2086 * Based on code from Olaf Seibert.
2087 */
2088#define PRINTLINESIZE 1000
2089#define PRINTWIDTH 6
2090
2091#define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
2092 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
2093
2094static char line1[PRINTLINESIZE];
2095static char line2[PRINTLINESIZE];
2096static char line3[PRINTLINESIZE];
2097
2098 static void
2099spell_clear_flags(wordnode_T *node)
2100{
2101 wordnode_T *np;
2102
Bram Moolenaaraeea7212020-04-02 18:50:46 +02002103 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002104 {
2105 np->wn_u1.index = FALSE;
2106 spell_clear_flags(np->wn_child);
2107 }
2108}
2109
2110 static void
2111spell_print_node(wordnode_T *node, int depth)
2112{
2113 if (node->wn_u1.index)
2114 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002115 // Done this node before, print the reference.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002116 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
2117 PRINTSOME(line2, depth, " ", 0, 0);
2118 PRINTSOME(line3, depth, " ", 0, 0);
Bram Moolenaar32526b32019-01-19 17:43:09 +01002119 msg(line1);
2120 msg(line2);
2121 msg(line3);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002122 }
2123 else
2124 {
2125 node->wn_u1.index = TRUE;
2126
2127 if (node->wn_byte != NUL)
2128 {
2129 if (node->wn_child != NULL)
2130 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0);
2131 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002132 // Cannot happen?
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002133 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0);
2134 }
2135 else
2136 PRINTSOME(line1, depth, " $ ", 0, 0);
2137
2138 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs);
2139
2140 if (node->wn_sibling != NULL)
2141 PRINTSOME(line3, depth, " | ", 0, 0);
2142 else
2143 PRINTSOME(line3, depth, " ", 0, 0);
2144
2145 if (node->wn_byte == NUL)
2146 {
Bram Moolenaar32526b32019-01-19 17:43:09 +01002147 msg(line1);
2148 msg(line2);
2149 msg(line3);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002150 }
2151
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002152 // do the children
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002153 if (node->wn_byte != NUL && node->wn_child != NULL)
2154 spell_print_node(node->wn_child, depth + 1);
2155
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002156 // do the siblings
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002157 if (node->wn_sibling != NULL)
2158 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002159 // get rid of all parent details except |
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002160 STRCPY(line1, line3);
2161 STRCPY(line2, line3);
2162 spell_print_node(node->wn_sibling, depth);
2163 }
2164 }
2165}
2166
2167 static void
2168spell_print_tree(wordnode_T *root)
2169{
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00002170 if (root == NULL)
2171 return;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002172
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00002173 // Clear the "wn_u1.index" fields, used to remember what has been done.
2174 spell_clear_flags(root);
2175
2176 // Recursively print the tree.
2177 spell_print_node(root, 0);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002178}
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002179#endif // SPELL_PRINTTREE
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002180
2181/*
2182 * Read the affix file "fname".
2183 * Returns an afffile_T, NULL for complete failure.
2184 */
2185 static afffile_T *
2186spell_read_aff(spellinfo_T *spin, char_u *fname)
2187{
2188 FILE *fd;
2189 afffile_T *aff;
2190 char_u rline[MAXLINELEN];
2191 char_u *line;
2192 char_u *pc = NULL;
2193#define MAXITEMCNT 30
2194 char_u *(items[MAXITEMCNT]);
2195 int itemcnt;
2196 char_u *p;
2197 int lnum = 0;
2198 affheader_T *cur_aff = NULL;
2199 int did_postpone_prefix = FALSE;
2200 int aff_todo = 0;
2201 hashtab_T *tp;
2202 char_u *low = NULL;
2203 char_u *fol = NULL;
2204 char_u *upp = NULL;
2205 int do_rep;
2206 int do_repsal;
2207 int do_sal;
2208 int do_mapline;
2209 int found_map = FALSE;
2210 hashitem_T *hi;
2211 int l;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002212 int compminlen = 0; // COMPOUNDMIN value
2213 int compsylmax = 0; // COMPOUNDSYLMAX value
2214 int compoptions = 0; // COMP_ flags
2215 int compmax = 0; // COMPOUNDWORDMAX value
2216 char_u *compflags = NULL; // COMPOUNDFLAG and COMPOUNDRULE
2217 // concatenated
2218 char_u *midword = NULL; // MIDWORD value
2219 char_u *syllable = NULL; // SYLLABLE value
2220 char_u *sofofrom = NULL; // SOFOFROM value
2221 char_u *sofoto = NULL; // SOFOTO value
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002222
2223 /*
2224 * Open the file.
2225 */
2226 fd = mch_fopen((char *)fname, "r");
2227 if (fd == NULL)
2228 {
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00002229 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002230 return NULL;
2231 }
2232
Bram Moolenaarc1669272018-06-19 14:23:53 +02002233 vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s..."), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002234 spell_message(spin, IObuff);
2235
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002236 // Only do REP lines when not done in another .aff file already.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002237 do_rep = spin->si_rep.ga_len == 0;
2238
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002239 // Only do REPSAL lines when not done in another .aff file already.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002240 do_repsal = spin->si_repsal.ga_len == 0;
2241
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002242 // Only do SAL lines when not done in another .aff file already.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002243 do_sal = spin->si_sal.ga_len == 0;
2244
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002245 // Only do MAP lines when not done in another .aff file already.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002246 do_mapline = spin->si_map.ga_len == 0;
2247
2248 /*
2249 * Allocate and init the afffile_T structure.
2250 */
2251 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE);
2252 if (aff == NULL)
2253 {
2254 fclose(fd);
2255 return NULL;
2256 }
2257 hash_init(&aff->af_pref);
2258 hash_init(&aff->af_suff);
2259 hash_init(&aff->af_comp);
2260
2261 /*
2262 * Read all the lines in the file one by one.
2263 */
2264 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
2265 {
2266 line_breakcheck();
2267 ++lnum;
2268
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002269 // Skip comment lines.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002270 if (*rline == '#')
2271 continue;
2272
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002273 // Convert from "SET" to 'encoding' when needed.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002274 vim_free(pc);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002275 if (spin->si_conv.vc_type != CONV_NONE)
2276 {
2277 pc = string_convert(&spin->si_conv, rline, NULL);
2278 if (pc == NULL)
2279 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002280 smsg(_("Conversion failure for word in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002281 fname, lnum, rline);
2282 continue;
2283 }
2284 line = pc;
2285 }
2286 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002287 {
2288 pc = NULL;
2289 line = rline;
2290 }
2291
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002292 // Split the line up in white separated items. Put a NUL after each
2293 // item.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002294 itemcnt = 0;
2295 for (p = line; ; )
2296 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002297 while (*p != NUL && *p <= ' ') // skip white space and CR/NL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002298 ++p;
2299 if (*p == NUL)
2300 break;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002301 if (itemcnt == MAXITEMCNT) // too many items
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002302 break;
2303 items[itemcnt++] = p;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002304 // A few items have arbitrary text argument, don't split them.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002305 if (itemcnt == 2 && spell_info_item(items[0]))
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002306 while (*p >= ' ' || *p == TAB) // skip until CR/NL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002307 ++p;
2308 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002309 while (*p > ' ') // skip until white space or CR/NL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002310 ++p;
2311 if (*p == NUL)
2312 break;
2313 *p++ = NUL;
2314 }
2315
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002316 // Handle non-empty lines.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002317 if (itemcnt > 0)
2318 {
2319 if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL)
2320 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002321 // Setup for conversion from "ENC" to 'encoding'.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002322 aff->af_enc = enc_canonize(items[1]);
2323 if (aff->af_enc != NULL && !spin->si_ascii
2324 && convert_setup(&spin->si_conv, aff->af_enc,
2325 p_enc) == FAIL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002326 smsg(_("Conversion in %s not supported: from %s to %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002327 fname, aff->af_enc, p_enc);
2328 spin->si_conv.vc_fail = TRUE;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002329 }
2330 else if (is_aff_rule(items, itemcnt, "FLAG", 2)
2331 && aff->af_flagtype == AFT_CHAR)
2332 {
2333 if (STRCMP(items[1], "long") == 0)
2334 aff->af_flagtype = AFT_LONG;
2335 else if (STRCMP(items[1], "num") == 0)
2336 aff->af_flagtype = AFT_NUM;
2337 else if (STRCMP(items[1], "caplong") == 0)
2338 aff->af_flagtype = AFT_CAPLONG;
2339 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002340 smsg(_("Invalid value for FLAG in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002341 fname, lnum, items[1]);
2342 if (aff->af_rare != 0
2343 || aff->af_keepcase != 0
2344 || aff->af_bad != 0
2345 || aff->af_needaffix != 0
2346 || aff->af_circumfix != 0
2347 || aff->af_needcomp != 0
2348 || aff->af_comproot != 0
2349 || aff->af_nosuggest != 0
2350 || compflags != NULL
2351 || aff->af_suff.ht_used > 0
2352 || aff->af_pref.ht_used > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002353 smsg(_("FLAG after using flags in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002354 fname, lnum, items[1]);
2355 }
2356 else if (spell_info_item(items[0]))
2357 {
2358 p = (char_u *)getroom(spin,
2359 (spin->si_info == NULL ? 0 : STRLEN(spin->si_info))
2360 + STRLEN(items[0])
2361 + STRLEN(items[1]) + 3, FALSE);
2362 if (p != NULL)
2363 {
2364 if (spin->si_info != NULL)
2365 {
2366 STRCPY(p, spin->si_info);
2367 STRCAT(p, "\n");
2368 }
2369 STRCAT(p, items[0]);
2370 STRCAT(p, " ");
2371 STRCAT(p, items[1]);
2372 spin->si_info = p;
2373 }
2374 }
2375 else if (is_aff_rule(items, itemcnt, "MIDWORD", 2)
2376 && midword == NULL)
2377 {
2378 midword = getroom_save(spin, items[1]);
2379 }
2380 else if (is_aff_rule(items, itemcnt, "TRY", 2))
2381 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002382 // ignored, we look in the tree for what chars may appear
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002383 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002384 // TODO: remove "RAR" later
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002385 else if ((is_aff_rule(items, itemcnt, "RAR", 2)
2386 || is_aff_rule(items, itemcnt, "RARE", 2))
2387 && aff->af_rare == 0)
2388 {
2389 aff->af_rare = affitem2flag(aff->af_flagtype, items[1],
2390 fname, lnum);
2391 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002392 // TODO: remove "KEP" later
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002393 else if ((is_aff_rule(items, itemcnt, "KEP", 2)
2394 || is_aff_rule(items, itemcnt, "KEEPCASE", 2))
2395 && aff->af_keepcase == 0)
2396 {
2397 aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1],
2398 fname, lnum);
2399 }
2400 else if ((is_aff_rule(items, itemcnt, "BAD", 2)
2401 || is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2))
2402 && aff->af_bad == 0)
2403 {
2404 aff->af_bad = affitem2flag(aff->af_flagtype, items[1],
2405 fname, lnum);
2406 }
2407 else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2)
2408 && aff->af_needaffix == 0)
2409 {
2410 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1],
2411 fname, lnum);
2412 }
2413 else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2)
2414 && aff->af_circumfix == 0)
2415 {
2416 aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1],
2417 fname, lnum);
2418 }
2419 else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2)
2420 && aff->af_nosuggest == 0)
2421 {
2422 aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1],
2423 fname, lnum);
2424 }
2425 else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2)
2426 || is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2))
2427 && aff->af_needcomp == 0)
2428 {
2429 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1],
2430 fname, lnum);
2431 }
2432 else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2)
2433 && aff->af_comproot == 0)
2434 {
2435 aff->af_comproot = affitem2flag(aff->af_flagtype, items[1],
2436 fname, lnum);
2437 }
2438 else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2)
2439 && aff->af_compforbid == 0)
2440 {
2441 aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1],
2442 fname, lnum);
2443 if (aff->af_pref.ht_used > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002444 smsg(_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002445 fname, lnum);
2446 }
2447 else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2)
2448 && aff->af_comppermit == 0)
2449 {
2450 aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1],
2451 fname, lnum);
2452 if (aff->af_pref.ht_used > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002453 smsg(_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002454 fname, lnum);
2455 }
2456 else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2)
2457 && compflags == NULL)
2458 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002459 // Turn flag "c" into COMPOUNDRULE compatible string "c+",
2460 // "Na" into "Na+", "1234" into "1234+".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002461 p = getroom(spin, STRLEN(items[1]) + 2, FALSE);
2462 if (p != NULL)
2463 {
2464 STRCPY(p, items[1]);
2465 STRCAT(p, "+");
2466 compflags = p;
2467 }
2468 }
2469 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2))
2470 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002471 // We don't use the count, but do check that it's a number and
2472 // not COMPOUNDRULE mistyped.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002473 if (atoi((char *)items[1]) == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002474 smsg(_("Wrong COMPOUNDRULES value in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002475 fname, lnum, items[1]);
2476 }
2477 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2))
2478 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002479 // Don't use the first rule if it is a number.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002480 if (compflags != NULL || *skipdigits(items[1]) != NUL)
2481 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002482 // Concatenate this string to previously defined ones,
2483 // using a slash to separate them.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002484 l = (int)STRLEN(items[1]) + 1;
2485 if (compflags != NULL)
2486 l += (int)STRLEN(compflags) + 1;
2487 p = getroom(spin, l, FALSE);
2488 if (p != NULL)
2489 {
2490 if (compflags != NULL)
2491 {
2492 STRCPY(p, compflags);
2493 STRCAT(p, "/");
2494 }
2495 STRCAT(p, items[1]);
2496 compflags = p;
2497 }
2498 }
2499 }
2500 else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2)
2501 && compmax == 0)
2502 {
2503 compmax = atoi((char *)items[1]);
2504 if (compmax == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002505 smsg(_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002506 fname, lnum, items[1]);
2507 }
2508 else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2)
2509 && compminlen == 0)
2510 {
2511 compminlen = atoi((char *)items[1]);
2512 if (compminlen == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002513 smsg(_("Wrong COMPOUNDMIN value in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002514 fname, lnum, items[1]);
2515 }
2516 else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2)
2517 && compsylmax == 0)
2518 {
2519 compsylmax = atoi((char *)items[1]);
2520 if (compsylmax == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002521 smsg(_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002522 fname, lnum, items[1]);
2523 }
2524 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1))
2525 {
2526 compoptions |= COMP_CHECKDUP;
2527 }
2528 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1))
2529 {
2530 compoptions |= COMP_CHECKREP;
2531 }
2532 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1))
2533 {
2534 compoptions |= COMP_CHECKCASE;
2535 }
2536 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1))
2537 {
2538 compoptions |= COMP_CHECKTRIPLE;
2539 }
2540 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2))
2541 {
2542 if (atoi((char *)items[1]) == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002543 smsg(_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002544 fname, lnum, items[1]);
2545 }
2546 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3))
2547 {
2548 garray_T *gap = &spin->si_comppat;
2549 int i;
2550
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002551 // Only add the couple if it isn't already there.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002552 for (i = 0; i < gap->ga_len - 1; i += 2)
2553 if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0
2554 && STRCMP(((char_u **)(gap->ga_data))[i + 1],
2555 items[2]) == 0)
2556 break;
2557 if (i >= gap->ga_len && ga_grow(gap, 2) == OK)
2558 {
2559 ((char_u **)(gap->ga_data))[gap->ga_len++]
2560 = getroom_save(spin, items[1]);
2561 ((char_u **)(gap->ga_data))[gap->ga_len++]
2562 = getroom_save(spin, items[2]);
2563 }
2564 }
2565 else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2)
2566 && syllable == NULL)
2567 {
2568 syllable = getroom_save(spin, items[1]);
2569 }
2570 else if (is_aff_rule(items, itemcnt, "NOBREAK", 1))
2571 {
2572 spin->si_nobreak = TRUE;
2573 }
2574 else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1))
2575 {
2576 spin->si_nosplitsugs = TRUE;
2577 }
2578 else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1))
2579 {
2580 spin->si_nocompoundsugs = TRUE;
2581 }
2582 else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1))
2583 {
2584 spin->si_nosugfile = TRUE;
2585 }
2586 else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1))
2587 {
2588 aff->af_pfxpostpone = TRUE;
2589 }
2590 else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1))
2591 {
2592 aff->af_ignoreextra = TRUE;
2593 }
2594 else if ((STRCMP(items[0], "PFX") == 0
2595 || STRCMP(items[0], "SFX") == 0)
2596 && aff_todo == 0
2597 && itemcnt >= 4)
2598 {
2599 int lasti = 4;
2600 char_u key[AH_KEY_LEN];
2601
2602 if (*items[0] == 'P')
2603 tp = &aff->af_pref;
2604 else
2605 tp = &aff->af_suff;
2606
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002607 // Myspell allows the same affix name to be used multiple
2608 // times. The affix files that do this have an undocumented
2609 // "S" flag on all but the last block, thus we check for that
2610 // and store it in ah_follows.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002611 vim_strncpy(key, items[1], AH_KEY_LEN - 1);
2612 hi = hash_find(tp, key);
2613 if (!HASHITEM_EMPTY(hi))
2614 {
2615 cur_aff = HI2AH(hi);
2616 if (cur_aff->ah_combine != (*items[2] == 'Y'))
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002617 smsg(_("Different combining flag in continued affix block in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002618 fname, lnum, items[1]);
2619 if (!cur_aff->ah_follows)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002620 smsg(_("Duplicate affix in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002621 fname, lnum, items[1]);
2622 }
2623 else
2624 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002625 // New affix letter.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002626 cur_aff = (affheader_T *)getroom(spin,
2627 sizeof(affheader_T), TRUE);
2628 if (cur_aff == NULL)
2629 break;
2630 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1],
2631 fname, lnum);
2632 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN)
2633 break;
2634 if (cur_aff->ah_flag == aff->af_bad
2635 || cur_aff->ah_flag == aff->af_rare
2636 || cur_aff->ah_flag == aff->af_keepcase
2637 || cur_aff->ah_flag == aff->af_needaffix
2638 || cur_aff->ah_flag == aff->af_circumfix
2639 || cur_aff->ah_flag == aff->af_nosuggest
2640 || cur_aff->ah_flag == aff->af_needcomp
2641 || cur_aff->ah_flag == aff->af_comproot)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002642 smsg(_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002643 fname, lnum, items[1]);
2644 STRCPY(cur_aff->ah_key, items[1]);
Bram Moolenaaref2c3252022-11-25 16:31:51 +00002645 hash_add(tp, cur_aff->ah_key, "spelling");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002646
2647 cur_aff->ah_combine = (*items[2] == 'Y');
2648 }
2649
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002650 // Check for the "S" flag, which apparently means that another
2651 // block with the same affix name is following.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002652 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0)
2653 {
2654 ++lasti;
2655 cur_aff->ah_follows = TRUE;
2656 }
2657 else
2658 cur_aff->ah_follows = FALSE;
2659
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002660 // Myspell allows extra text after the item, but that might
2661 // mean mistakes go unnoticed. Require a comment-starter.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002662 if (itemcnt > lasti && *items[lasti] != '#')
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002663 smsg(_(e_afftrailing), fname, lnum, items[lasti]);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002664
2665 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002666 smsg(_("Expected Y or N in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002667 fname, lnum, items[2]);
2668
2669 if (*items[0] == 'P' && aff->af_pfxpostpone)
2670 {
2671 if (cur_aff->ah_newID == 0)
2672 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002673 // Use a new number in the .spl file later, to be able
2674 // to handle multiple .aff files.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002675 check_renumber(spin);
2676 cur_aff->ah_newID = ++spin->si_newprefID;
2677
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002678 // We only really use ah_newID if the prefix is
2679 // postponed. We know that only after handling all
2680 // the items.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002681 did_postpone_prefix = FALSE;
2682 }
2683 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002684 // Did use the ID in a previous block.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002685 did_postpone_prefix = TRUE;
2686 }
2687
2688 aff_todo = atoi((char *)items[3]);
2689 }
2690 else if ((STRCMP(items[0], "PFX") == 0
2691 || STRCMP(items[0], "SFX") == 0)
2692 && aff_todo > 0
2693 && STRCMP(cur_aff->ah_key, items[1]) == 0
2694 && itemcnt >= 5)
2695 {
2696 affentry_T *aff_entry;
2697 int upper = FALSE;
2698 int lasti = 5;
2699
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002700 // Myspell allows extra text after the item, but that might
2701 // mean mistakes go unnoticed. Require a comment-starter,
2702 // unless IGNOREEXTRA is used. Hunspell uses a "-" item.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002703 if (itemcnt > lasti
2704 && !aff->af_ignoreextra
2705 && *items[lasti] != '#'
2706 && (STRCMP(items[lasti], "-") != 0
2707 || itemcnt != lasti + 1))
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002708 smsg(_(e_afftrailing), fname, lnum, items[lasti]);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002709
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002710 // New item for an affix letter.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002711 --aff_todo;
2712 aff_entry = (affentry_T *)getroom(spin,
2713 sizeof(affentry_T), TRUE);
2714 if (aff_entry == NULL)
2715 break;
2716
2717 if (STRCMP(items[2], "0") != 0)
2718 aff_entry->ae_chop = getroom_save(spin, items[2]);
2719 if (STRCMP(items[3], "0") != 0)
2720 {
2721 aff_entry->ae_add = getroom_save(spin, items[3]);
2722
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002723 // Recognize flags on the affix: abcd/XYZ
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002724 aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/');
2725 if (aff_entry->ae_flags != NULL)
2726 {
2727 *aff_entry->ae_flags++ = NUL;
2728 aff_process_flags(aff, aff_entry);
2729 }
2730 }
2731
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002732 // Don't use an affix entry with non-ASCII characters when
2733 // "spin->si_ascii" is TRUE.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002734 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop)
2735 || has_non_ascii(aff_entry->ae_add)))
2736 {
2737 aff_entry->ae_next = cur_aff->ah_first;
2738 cur_aff->ah_first = aff_entry;
2739
2740 if (STRCMP(items[4], ".") != 0)
2741 {
2742 char_u buf[MAXLINELEN];
2743
2744 aff_entry->ae_cond = getroom_save(spin, items[4]);
2745 if (*items[0] == 'P')
2746 sprintf((char *)buf, "^%s", items[4]);
2747 else
2748 sprintf((char *)buf, "%s$", items[4]);
2749 aff_entry->ae_prog = vim_regcomp(buf,
2750 RE_MAGIC + RE_STRING + RE_STRICT);
2751 if (aff_entry->ae_prog == NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002752 smsg(_("Broken condition in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002753 fname, lnum, items[4]);
2754 }
2755
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002756 // For postponed prefixes we need an entry in si_prefcond
2757 // for the condition. Use an existing one if possible.
2758 // Can't be done for an affix with flags, ignoring
2759 // COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002760 if (*items[0] == 'P' && aff->af_pfxpostpone
2761 && aff_entry->ae_flags == NULL)
2762 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002763 // When the chop string is one lower-case letter and
2764 // the add string ends in the upper-case letter we set
2765 // the "upper" flag, clear "ae_chop" and remove the
2766 // letters from "ae_add". The condition must either
2767 // be empty or start with the same letter.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002768 if (aff_entry->ae_chop != NULL
2769 && aff_entry->ae_add != NULL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002770 && aff_entry->ae_chop[(*mb_ptr2len)(
Bram Moolenaar264b74f2019-01-24 17:18:42 +01002771 aff_entry->ae_chop)] == NUL)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002772 {
2773 int c, c_up;
2774
2775 c = PTR2CHAR(aff_entry->ae_chop);
2776 c_up = SPELL_TOUPPER(c);
2777 if (c_up != c
2778 && (aff_entry->ae_cond == NULL
2779 || PTR2CHAR(aff_entry->ae_cond) == c))
2780 {
2781 p = aff_entry->ae_add
2782 + STRLEN(aff_entry->ae_add);
Bram Moolenaar91acfff2017-03-12 19:22:36 +01002783 MB_PTR_BACK(aff_entry->ae_add, p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002784 if (PTR2CHAR(p) == c_up)
2785 {
2786 upper = TRUE;
2787 aff_entry->ae_chop = NULL;
2788 *p = NUL;
2789
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002790 // The condition is matched with the
2791 // actual word, thus must check for the
2792 // upper-case letter.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002793 if (aff_entry->ae_cond != NULL)
2794 {
2795 char_u buf[MAXLINELEN];
Bram Moolenaar264b74f2019-01-24 17:18:42 +01002796
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002797 if (has_mbyte)
2798 {
2799 onecap_copy(items[4], buf, TRUE);
2800 aff_entry->ae_cond = getroom_save(
2801 spin, buf);
2802 }
2803 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002804 *aff_entry->ae_cond = c_up;
2805 if (aff_entry->ae_cond != NULL)
2806 {
2807 sprintf((char *)buf, "^%s",
2808 aff_entry->ae_cond);
2809 vim_regfree(aff_entry->ae_prog);
2810 aff_entry->ae_prog = vim_regcomp(
2811 buf, RE_MAGIC + RE_STRING);
2812 }
2813 }
2814 }
2815 }
2816 }
2817
2818 if (aff_entry->ae_chop == NULL
2819 && aff_entry->ae_flags == NULL)
2820 {
2821 int idx;
2822 char_u **pp;
2823 int n;
2824
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002825 // Find a previously used condition.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002826 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0;
2827 --idx)
2828 {
2829 p = ((char_u **)spin->si_prefcond.ga_data)[idx];
2830 if (str_equal(p, aff_entry->ae_cond))
2831 break;
2832 }
2833 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK)
2834 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002835 // Not found, add a new condition.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002836 idx = spin->si_prefcond.ga_len++;
2837 pp = ((char_u **)spin->si_prefcond.ga_data)
2838 + idx;
2839 if (aff_entry->ae_cond == NULL)
2840 *pp = NULL;
2841 else
2842 *pp = getroom_save(spin,
2843 aff_entry->ae_cond);
2844 }
2845
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002846 // Add the prefix to the prefix tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002847 if (aff_entry->ae_add == NULL)
2848 p = (char_u *)"";
2849 else
2850 p = aff_entry->ae_add;
2851
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002852 // PFX_FLAGS is a negative number, so that
2853 // tree_add_word() knows this is the prefix tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002854 n = PFX_FLAGS;
2855 if (!cur_aff->ah_combine)
2856 n |= WFP_NC;
2857 if (upper)
2858 n |= WFP_UP;
2859 if (aff_entry->ae_comppermit)
2860 n |= WFP_COMPPERMIT;
2861 if (aff_entry->ae_compforbid)
2862 n |= WFP_COMPFORBID;
2863 tree_add_word(spin, p, spin->si_prefroot, n,
2864 idx, cur_aff->ah_newID);
2865 did_postpone_prefix = TRUE;
2866 }
2867
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002868 // Didn't actually use ah_newID, backup si_newprefID.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002869 if (aff_todo == 0 && !did_postpone_prefix)
2870 {
2871 --spin->si_newprefID;
2872 cur_aff->ah_newID = 0;
2873 }
2874 }
2875 }
2876 }
2877 else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL)
2878 {
2879 fol = vim_strsave(items[1]);
2880 }
2881 else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL)
2882 {
2883 low = vim_strsave(items[1]);
2884 }
2885 else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL)
2886 {
2887 upp = vim_strsave(items[1]);
2888 }
2889 else if (is_aff_rule(items, itemcnt, "REP", 2)
2890 || is_aff_rule(items, itemcnt, "REPSAL", 2))
2891 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002892 // Ignore REP/REPSAL count
Keith Thompson184f71c2024-01-04 21:19:04 +01002893 if (!SAFE_isdigit(*items[1]))
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002894 smsg(_("Expected REP(SAL) count in %s line %d"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002895 fname, lnum);
2896 }
2897 else if ((STRCMP(items[0], "REP") == 0
2898 || STRCMP(items[0], "REPSAL") == 0)
2899 && itemcnt >= 3)
2900 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002901 // REP/REPSAL item
2902 // Myspell ignores extra arguments, we require it starts with
2903 // # to detect mistakes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002904 if (itemcnt > 3 && items[3][0] != '#')
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002905 smsg(_(e_afftrailing), fname, lnum, items[3]);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002906 if (items[0][3] == 'S' ? do_repsal : do_rep)
2907 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002908 // Replace underscore with space (can't include a space
2909 // directly).
Bram Moolenaar91acfff2017-03-12 19:22:36 +01002910 for (p = items[1]; *p != NUL; MB_PTR_ADV(p))
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002911 if (*p == '_')
2912 *p = ' ';
Bram Moolenaar91acfff2017-03-12 19:22:36 +01002913 for (p = items[2]; *p != NUL; MB_PTR_ADV(p))
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002914 if (*p == '_')
2915 *p = ' ';
2916 add_fromto(spin, items[0][3] == 'S'
2917 ? &spin->si_repsal
2918 : &spin->si_rep, items[1], items[2]);
2919 }
2920 }
2921 else if (is_aff_rule(items, itemcnt, "MAP", 2))
2922 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002923 // MAP item or count
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002924 if (!found_map)
2925 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002926 // First line contains the count.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002927 found_map = TRUE;
Keith Thompson184f71c2024-01-04 21:19:04 +01002928 if (!SAFE_isdigit(*items[1]))
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002929 smsg(_("Expected MAP count in %s line %d"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002930 fname, lnum);
2931 }
2932 else if (do_mapline)
2933 {
2934 int c;
2935
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002936 // Check that every character appears only once.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002937 for (p = items[1]; *p != NUL; )
2938 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002939 c = mb_ptr2char_adv(&p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002940 if ((spin->si_map.ga_len > 0
2941 && vim_strchr(spin->si_map.ga_data, c)
2942 != NULL)
2943 || vim_strchr(p, c) != NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01002944 smsg(_("Duplicate character in MAP in %s line %d"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002945 fname, lnum);
2946 }
2947
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002948 // We simply concatenate all the MAP strings, separated by
2949 // slashes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002950 ga_concat(&spin->si_map, items[1]);
2951 ga_append(&spin->si_map, '/');
2952 }
2953 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002954 // Accept "SAL from to" and "SAL from to #comment".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002955 else if (is_aff_rule(items, itemcnt, "SAL", 3))
2956 {
2957 if (do_sal)
2958 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002959 // SAL item (sounds-a-like)
2960 // Either one of the known keys or a from-to pair.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002961 if (STRCMP(items[1], "followup") == 0)
2962 spin->si_followup = sal_to_bool(items[2]);
2963 else if (STRCMP(items[1], "collapse_result") == 0)
2964 spin->si_collapse = sal_to_bool(items[2]);
2965 else if (STRCMP(items[1], "remove_accents") == 0)
2966 spin->si_rem_accents = sal_to_bool(items[2]);
2967 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01002968 // when "to" is "_" it means empty
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002969 add_fromto(spin, &spin->si_sal, items[1],
2970 STRCMP(items[2], "_") == 0 ? (char_u *)""
2971 : items[2]);
2972 }
2973 }
2974 else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2)
2975 && sofofrom == NULL)
2976 {
2977 sofofrom = getroom_save(spin, items[1]);
2978 }
2979 else if (is_aff_rule(items, itemcnt, "SOFOTO", 2)
2980 && sofoto == NULL)
2981 {
2982 sofoto = getroom_save(spin, items[1]);
2983 }
2984 else if (STRCMP(items[0], "COMMON") == 0)
2985 {
2986 int i;
2987
2988 for (i = 1; i < itemcnt; ++i)
2989 {
2990 if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords,
2991 items[i])))
2992 {
2993 p = vim_strsave(items[i]);
2994 if (p == NULL)
2995 break;
Bram Moolenaaref2c3252022-11-25 16:31:51 +00002996 hash_add(&spin->si_commonwords, p, "spelling");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002997 }
2998 }
2999 }
3000 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003001 smsg(_("Unrecognized or duplicate item in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003002 fname, lnum, items[0]);
3003 }
3004 }
3005
3006 if (fol != NULL || low != NULL || upp != NULL)
3007 {
3008 if (spin->si_clear_chartab)
3009 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003010 // Clear the char type tables, don't want to use any of the
3011 // currently used spell properties.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003012 init_spell_chartab();
3013 spin->si_clear_chartab = FALSE;
3014 }
3015
3016 /*
3017 * Don't write a word table for an ASCII file, so that we don't check
3018 * for conflicts with a word table that matches 'encoding'.
3019 * Don't write one for utf-8 either, we use utf_*() and
3020 * mb_get_class(), the list of chars in the file will be incomplete.
3021 */
Bram Moolenaar264b74f2019-01-24 17:18:42 +01003022 if (!spin->si_ascii && !enc_utf8)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003023 {
3024 if (fol == NULL || low == NULL || upp == NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003025 smsg(_("Missing FOL/LOW/UPP line in %s"), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003026 else
3027 (void)set_spell_chartab(fol, low, upp);
3028 }
3029
3030 vim_free(fol);
3031 vim_free(low);
3032 vim_free(upp);
3033 }
3034
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003035 // Use compound specifications of the .aff file for the spell info.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003036 if (compmax != 0)
3037 {
3038 aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX");
3039 spin->si_compmax = compmax;
3040 }
3041
3042 if (compminlen != 0)
3043 {
3044 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN");
3045 spin->si_compminlen = compminlen;
3046 }
3047
3048 if (compsylmax != 0)
3049 {
3050 if (syllable == NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003051 smsg(_("COMPOUNDSYLMAX used without SYLLABLE"));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003052 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX");
3053 spin->si_compsylmax = compsylmax;
3054 }
3055
3056 if (compoptions != 0)
3057 {
3058 aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options");
3059 spin->si_compoptions |= compoptions;
3060 }
3061
3062 if (compflags != NULL)
3063 process_compflags(spin, aff, compflags);
3064
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003065 // Check that we didn't use too many renumbered flags.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003066 if (spin->si_newcompID < spin->si_newprefID)
3067 {
3068 if (spin->si_newcompID == 127 || spin->si_newcompID == 255)
Bram Moolenaar32526b32019-01-19 17:43:09 +01003069 msg(_("Too many postponed prefixes"));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003070 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127)
Bram Moolenaar32526b32019-01-19 17:43:09 +01003071 msg(_("Too many compound flags"));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003072 else
Bram Moolenaar32526b32019-01-19 17:43:09 +01003073 msg(_("Too many postponed prefixes and/or compound flags"));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003074 }
3075
3076 if (syllable != NULL)
3077 {
3078 aff_check_string(spin->si_syllable, syllable, "SYLLABLE");
3079 spin->si_syllable = syllable;
3080 }
3081
3082 if (sofofrom != NULL || sofoto != NULL)
3083 {
3084 if (sofofrom == NULL || sofoto == NULL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003085 smsg(_("Missing SOFO%s line in %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003086 sofofrom == NULL ? "FROM" : "TO", fname);
3087 else if (spin->si_sal.ga_len > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003088 smsg(_("Both SAL and SOFO lines in %s"), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003089 else
3090 {
3091 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM");
3092 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO");
3093 spin->si_sofofr = sofofrom;
3094 spin->si_sofoto = sofoto;
3095 }
3096 }
3097
3098 if (midword != NULL)
3099 {
3100 aff_check_string(spin->si_midword, midword, "MIDWORD");
3101 spin->si_midword = midword;
3102 }
3103
3104 vim_free(pc);
3105 fclose(fd);
3106 return aff;
3107}
3108
3109/*
3110 * Return TRUE when items[0] equals "rulename", there are "mincount" items or
3111 * a comment is following after item "mincount".
3112 */
3113 static int
3114is_aff_rule(
3115 char_u **items,
3116 int itemcnt,
3117 char *rulename,
3118 int mincount)
3119{
3120 return (STRCMP(items[0], rulename) == 0
3121 && (itemcnt == mincount
3122 || (itemcnt > mincount && items[mincount][0] == '#')));
3123}
3124
3125/*
3126 * For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from
3127 * ae_flags to ae_comppermit and ae_compforbid.
3128 */
3129 static void
3130aff_process_flags(afffile_T *affile, affentry_T *entry)
3131{
3132 char_u *p;
3133 char_u *prevp;
3134 unsigned flag;
3135
3136 if (entry->ae_flags != NULL
3137 && (affile->af_compforbid != 0 || affile->af_comppermit != 0))
3138 {
3139 for (p = entry->ae_flags; *p != NUL; )
3140 {
3141 prevp = p;
3142 flag = get_affitem(affile->af_flagtype, &p);
3143 if (flag == affile->af_comppermit || flag == affile->af_compforbid)
3144 {
3145 STRMOVE(prevp, p);
3146 p = prevp;
3147 if (flag == affile->af_comppermit)
3148 entry->ae_comppermit = TRUE;
3149 else
3150 entry->ae_compforbid = TRUE;
3151 }
3152 if (affile->af_flagtype == AFT_NUM && *p == ',')
3153 ++p;
3154 }
3155 if (*entry->ae_flags == NUL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003156 entry->ae_flags = NULL; // nothing left
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003157 }
3158}
3159
3160/*
3161 * Return TRUE if "s" is the name of an info item in the affix file.
3162 */
3163 static int
3164spell_info_item(char_u *s)
3165{
3166 return STRCMP(s, "NAME") == 0
3167 || STRCMP(s, "HOME") == 0
3168 || STRCMP(s, "VERSION") == 0
3169 || STRCMP(s, "AUTHOR") == 0
3170 || STRCMP(s, "EMAIL") == 0
3171 || STRCMP(s, "COPYRIGHT") == 0;
3172}
3173
3174/*
3175 * Turn an affix flag name into a number, according to the FLAG type.
3176 * returns zero for failure.
3177 */
3178 static unsigned
3179affitem2flag(
3180 int flagtype,
3181 char_u *item,
3182 char_u *fname,
3183 int lnum)
3184{
3185 unsigned res;
3186 char_u *p = item;
3187
3188 res = get_affitem(flagtype, &p);
3189 if (res == 0)
3190 {
3191 if (flagtype == AFT_NUM)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003192 smsg(_("Flag is not a number in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003193 fname, lnum, item);
3194 else
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003195 smsg(_("Illegal flag in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003196 fname, lnum, item);
3197 }
3198 if (*p != NUL)
3199 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003200 smsg(_(e_affname), fname, lnum, item);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003201 return 0;
3202 }
3203
3204 return res;
3205}
3206
3207/*
3208 * Get one affix name from "*pp" and advance the pointer.
Bram Moolenaar3d2a47c2019-11-07 20:48:42 +01003209 * Returns ZERO_FLAG for "0".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003210 * Returns zero for an error, still advances the pointer then.
3211 */
3212 static unsigned
3213get_affitem(int flagtype, char_u **pp)
3214{
3215 int res;
3216
3217 if (flagtype == AFT_NUM)
3218 {
3219 if (!VIM_ISDIGIT(**pp))
3220 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003221 ++*pp; // always advance, avoid getting stuck
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003222 return 0;
3223 }
3224 res = getdigits(pp);
Bram Moolenaar3d2a47c2019-11-07 20:48:42 +01003225 if (res == 0)
3226 res = ZERO_FLAG;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003227 }
3228 else
3229 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003230 res = mb_ptr2char_adv(pp);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003231 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG
3232 && res >= 'A' && res <= 'Z'))
3233 {
3234 if (**pp == NUL)
3235 return 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003236 res = mb_ptr2char_adv(pp) + (res << 16);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003237 }
3238 }
3239 return res;
3240}
3241
3242/*
3243 * Process the "compflags" string used in an affix file and append it to
3244 * spin->si_compflags.
3245 * The processing involves changing the affix names to ID numbers, so that
3246 * they fit in one byte.
3247 */
3248 static void
3249process_compflags(
3250 spellinfo_T *spin,
3251 afffile_T *aff,
3252 char_u *compflags)
3253{
3254 char_u *p;
3255 char_u *prevp;
3256 unsigned flag;
3257 compitem_T *ci;
3258 int id;
3259 int len;
3260 char_u *tp;
3261 char_u key[AH_KEY_LEN];
3262 hashitem_T *hi;
3263
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003264 // Make room for the old and the new compflags, concatenated with a / in
3265 // between. Processing it makes it shorter, but we don't know by how
3266 // much, thus allocate the maximum.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003267 len = (int)STRLEN(compflags) + 1;
3268 if (spin->si_compflags != NULL)
3269 len += (int)STRLEN(spin->si_compflags) + 1;
3270 p = getroom(spin, len, FALSE);
3271 if (p == NULL)
3272 return;
3273 if (spin->si_compflags != NULL)
3274 {
3275 STRCPY(p, spin->si_compflags);
3276 STRCAT(p, "/");
3277 }
3278 spin->si_compflags = p;
3279 tp = p + STRLEN(p);
3280
3281 for (p = compflags; *p != NUL; )
3282 {
3283 if (vim_strchr((char_u *)"/?*+[]", *p) != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003284 // Copy non-flag characters directly.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003285 *tp++ = *p++;
3286 else
3287 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003288 // First get the flag number, also checks validity.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003289 prevp = p;
3290 flag = get_affitem(aff->af_flagtype, &p);
3291 if (flag != 0)
3292 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003293 // Find the flag in the hashtable. If it was used before, use
3294 // the existing ID. Otherwise add a new entry.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003295 vim_strncpy(key, prevp, p - prevp);
3296 hi = hash_find(&aff->af_comp, key);
3297 if (!HASHITEM_EMPTY(hi))
3298 id = HI2CI(hi)->ci_newID;
3299 else
3300 {
3301 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE);
3302 if (ci == NULL)
3303 break;
3304 STRCPY(ci->ci_key, key);
3305 ci->ci_flag = flag;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003306 // Avoid using a flag ID that has a special meaning in a
3307 // regexp (also inside []).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003308 do
3309 {
3310 check_renumber(spin);
3311 id = spin->si_newcompID--;
3312 } while (vim_strchr((char_u *)"/?*+[]\\-^", id) != NULL);
3313 ci->ci_newID = id;
Bram Moolenaaref2c3252022-11-25 16:31:51 +00003314 hash_add(&aff->af_comp, ci->ci_key, "spelling");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003315 }
3316 *tp++ = id;
3317 }
3318 if (aff->af_flagtype == AFT_NUM && *p == ',')
3319 ++p;
3320 }
3321 }
3322
3323 *tp = NUL;
3324}
3325
3326/*
3327 * Check that the new IDs for postponed affixes and compounding don't overrun
3328 * each other. We have almost 255 available, but start at 0-127 to avoid
3329 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255.
3330 * When that is used up an error message is given.
3331 */
3332 static void
3333check_renumber(spellinfo_T *spin)
3334{
3335 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128)
3336 {
3337 spin->si_newprefID = 127;
3338 spin->si_newcompID = 255;
3339 }
3340}
3341
3342/*
3343 * Return TRUE if flag "flag" appears in affix list "afflist".
3344 */
3345 static int
3346flag_in_afflist(int flagtype, char_u *afflist, unsigned flag)
3347{
3348 char_u *p;
3349 unsigned n;
3350
3351 switch (flagtype)
3352 {
3353 case AFT_CHAR:
3354 return vim_strchr(afflist, flag) != NULL;
3355
3356 case AFT_CAPLONG:
3357 case AFT_LONG:
3358 for (p = afflist; *p != NUL; )
3359 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003360 n = mb_ptr2char_adv(&p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003361 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z'))
3362 && *p != NUL)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003363 n = mb_ptr2char_adv(&p) + (n << 16);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003364 if (n == flag)
3365 return TRUE;
3366 }
3367 break;
3368
3369 case AFT_NUM:
3370 for (p = afflist; *p != NUL; )
3371 {
3372 n = getdigits(&p);
Bram Moolenaar3d2a47c2019-11-07 20:48:42 +01003373 if (n == 0)
3374 n = ZERO_FLAG;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003375 if (n == flag)
3376 return TRUE;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003377 if (*p != NUL) // skip over comma
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003378 ++p;
3379 }
3380 break;
3381 }
3382 return FALSE;
3383}
3384
3385/*
3386 * Give a warning when "spinval" and "affval" numbers are set and not the same.
3387 */
3388 static void
3389aff_check_number(int spinval, int affval, char *name)
3390{
3391 if (spinval != 0 && spinval != affval)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003392 smsg(_("%s value differs from what is used in another .aff file"), name);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003393}
3394
3395/*
3396 * Give a warning when "spinval" and "affval" strings are set and not the same.
3397 */
3398 static void
3399aff_check_string(char_u *spinval, char_u *affval, char *name)
3400{
3401 if (spinval != NULL && STRCMP(spinval, affval) != 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003402 smsg(_("%s value differs from what is used in another .aff file"), name);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003403}
3404
3405/*
3406 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being
3407 * NULL as equal.
3408 */
3409 static int
3410str_equal(char_u *s1, char_u *s2)
3411{
3412 if (s1 == NULL || s2 == NULL)
3413 return s1 == s2;
3414 return STRCMP(s1, s2) == 0;
3415}
3416
3417/*
3418 * Add a from-to item to "gap". Used for REP and SAL items.
3419 * They are stored case-folded.
3420 */
3421 static void
3422add_fromto(
3423 spellinfo_T *spin,
3424 garray_T *gap,
3425 char_u *from,
3426 char_u *to)
3427{
3428 fromto_T *ftp;
3429 char_u word[MAXWLEN];
3430
Yegappan Lakshmananfadc02a2023-01-27 21:03:12 +00003431 if (ga_grow(gap, 1) == FAIL)
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00003432 return;
3433
3434 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
3435 (void)spell_casefold(curwin, from, (int)STRLEN(from), word, MAXWLEN);
3436 ftp->ft_from = getroom_save(spin, word);
3437 (void)spell_casefold(curwin, to, (int)STRLEN(to), word, MAXWLEN);
3438 ftp->ft_to = getroom_save(spin, word);
3439 ++gap->ga_len;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003440}
3441
3442/*
3443 * Convert a boolean argument in a SAL line to TRUE or FALSE;
3444 */
3445 static int
3446sal_to_bool(char_u *s)
3447{
3448 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0;
3449}
3450
3451/*
3452 * Free the structure filled by spell_read_aff().
3453 */
3454 static void
3455spell_free_aff(afffile_T *aff)
3456{
3457 hashtab_T *ht;
3458 hashitem_T *hi;
3459 int todo;
3460 affheader_T *ah;
3461 affentry_T *ae;
3462
3463 vim_free(aff->af_enc);
3464
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003465 // All this trouble to free the "ae_prog" items...
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003466 for (ht = &aff->af_pref; ; ht = &aff->af_suff)
3467 {
3468 todo = (int)ht->ht_used;
Yegappan Lakshmanan14113fd2023-03-07 17:13:51 +00003469 FOR_ALL_HASHTAB_ITEMS(ht, hi, todo)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003470 {
3471 if (!HASHITEM_EMPTY(hi))
3472 {
3473 --todo;
3474 ah = HI2AH(hi);
3475 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
3476 vim_regfree(ae->ae_prog);
3477 }
3478 }
3479 if (ht == &aff->af_suff)
3480 break;
3481 }
3482
3483 hash_clear(&aff->af_pref);
3484 hash_clear(&aff->af_suff);
3485 hash_clear(&aff->af_comp);
3486}
3487
3488/*
3489 * Read dictionary file "fname".
3490 * Returns OK or FAIL;
3491 */
3492 static int
3493spell_read_dic(spellinfo_T *spin, char_u *fname, afffile_T *affile)
3494{
3495 hashtab_T ht;
3496 char_u line[MAXLINELEN];
3497 char_u *p;
3498 char_u *afflist;
3499 char_u store_afflist[MAXWLEN];
3500 int pfxlen;
3501 int need_affix;
3502 char_u *dw;
3503 char_u *pc;
3504 char_u *w;
3505 int l;
3506 hash_T hash;
3507 hashitem_T *hi;
3508 FILE *fd;
3509 int lnum = 1;
3510 int non_ascii = 0;
3511 int retval = OK;
3512 char_u message[MAXLINELEN + MAXWLEN];
3513 int flags;
3514 int duplicate = 0;
Bram Moolenaar408c23b2020-06-03 22:15:45 +02003515 time_T last_msg_time = 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003516
3517 /*
3518 * Open the file.
3519 */
3520 fd = mch_fopen((char *)fname, "r");
3521 if (fd == NULL)
3522 {
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00003523 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003524 return FAIL;
3525 }
3526
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003527 // The hashtable is only used to detect duplicated words.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003528 hash_init(&ht);
3529
3530 vim_snprintf((char *)IObuff, IOSIZE,
Bram Moolenaarc1669272018-06-19 14:23:53 +02003531 _("Reading dictionary file %s..."), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003532 spell_message(spin, IObuff);
3533
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003534 // start with a message for the first line
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003535 spin->si_msg_count = 999999;
3536
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003537 // Read and ignore the first line: word count.
Bram Moolenaare90d63e2020-09-02 12:58:48 +02003538 if (vim_fgets(line, MAXLINELEN, fd) || !vim_isdigit(*skipwhite(line)))
Bram Moolenaar677658a2022-01-05 16:09:06 +00003539 semsg(_(e_no_word_count_in_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003540
3541 /*
3542 * Read all the lines in the file one by one.
3543 * The words are converted to 'encoding' here, before being added to
3544 * the hashtable.
3545 */
3546 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
3547 {
3548 line_breakcheck();
3549 ++lnum;
3550 if (line[0] == '#' || line[0] == '/')
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003551 continue; // comment line
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003552
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003553 // Remove CR, LF and white space from the end. White space halfway
3554 // the word is kept to allow e.g., "et al.".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003555 l = (int)STRLEN(line);
3556 while (l > 0 && line[l - 1] <= ' ')
3557 --l;
3558 if (l == 0)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003559 continue; // empty line
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003560 line[l] = NUL;
3561
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003562 // Convert from "SET" to 'encoding' when needed.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003563 if (spin->si_conv.vc_type != CONV_NONE)
3564 {
3565 pc = string_convert(&spin->si_conv, line, NULL);
3566 if (pc == NULL)
3567 {
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003568 smsg(_("Conversion failure for word in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003569 fname, lnum, line);
3570 continue;
3571 }
3572 w = pc;
3573 }
3574 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003575 {
3576 pc = NULL;
3577 w = line;
3578 }
3579
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003580 // Truncate the word at the "/", set "afflist" to what follows.
3581 // Replace "\/" by "/" and "\\" by "\".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003582 afflist = NULL;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003583 for (p = w; *p != NUL; MB_PTR_ADV(p))
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003584 {
3585 if (*p == '\\' && (p[1] == '\\' || p[1] == '/'))
3586 STRMOVE(p, p + 1);
3587 else if (*p == '/')
3588 {
3589 *p = NUL;
3590 afflist = p + 1;
3591 break;
3592 }
3593 }
3594
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003595 // Skip non-ASCII words when "spin->si_ascii" is TRUE.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003596 if (spin->si_ascii && has_non_ascii(w))
3597 {
3598 ++non_ascii;
3599 vim_free(pc);
3600 continue;
3601 }
3602
Bram Moolenaar408c23b2020-06-03 22:15:45 +02003603 // This takes time, print a message every 10000 words, but not more
3604 // often than once per second.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003605 if (spin->si_verbose && spin->si_msg_count > 10000)
3606 {
3607 spin->si_msg_count = 0;
Bram Moolenaar408c23b2020-06-03 22:15:45 +02003608 if (vim_time() > last_msg_time)
3609 {
3610 last_msg_time = vim_time();
3611 vim_snprintf((char *)message, sizeof(message),
3612 _("line %6d, word %6ld - %s"),
3613 lnum, spin->si_foldwcount + spin->si_keepwcount, w);
3614 msg_start();
3615 msg_outtrans_long_attr(message, 0);
3616 msg_clr_eos();
3617 msg_didout = FALSE;
3618 msg_col = 0;
3619 out_flush();
3620 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003621 }
3622
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003623 // Store the word in the hashtable to be able to find duplicates.
=?UTF-8?q?Dundar=20G=C3=B6c?=420fabc2022-01-28 15:28:04 +00003624 dw = getroom_save(spin, w);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003625 if (dw == NULL)
3626 {
3627 retval = FAIL;
3628 vim_free(pc);
3629 break;
3630 }
3631
3632 hash = hash_hash(dw);
3633 hi = hash_lookup(&ht, dw, hash);
3634 if (!HASHITEM_EMPTY(hi))
3635 {
3636 if (p_verbose > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003637 smsg(_("Duplicate word in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003638 fname, lnum, dw);
3639 else if (duplicate == 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003640 smsg(_("First duplicate word in %s line %d: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003641 fname, lnum, dw);
3642 ++duplicate;
3643 }
3644 else
3645 hash_add_item(&ht, hi, dw, hash);
3646
3647 flags = 0;
3648 store_afflist[0] = NUL;
3649 pfxlen = 0;
3650 need_affix = FALSE;
3651 if (afflist != NULL)
3652 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003653 // Extract flags from the affix list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003654 flags |= get_affix_flags(affile, afflist);
3655
3656 if (affile->af_needaffix != 0 && flag_in_afflist(
3657 affile->af_flagtype, afflist, affile->af_needaffix))
3658 need_affix = TRUE;
3659
3660 if (affile->af_pfxpostpone)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003661 // Need to store the list of prefix IDs with the word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003662 pfxlen = get_pfxlist(affile, afflist, store_afflist);
3663
3664 if (spin->si_compflags != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003665 // Need to store the list of compound flags with the word.
3666 // Concatenate them to the list of prefix IDs.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003667 get_compflags(affile, afflist, store_afflist + pfxlen);
3668 }
3669
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003670 // Add the word to the word tree(s).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003671 if (store_word(spin, dw, flags, spin->si_region,
3672 store_afflist, need_affix) == FAIL)
3673 retval = FAIL;
3674
3675 if (afflist != NULL)
3676 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003677 // Find all matching suffixes and add the resulting words.
3678 // Additionally do matching prefixes that combine.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003679 if (store_aff_word(spin, dw, afflist, affile,
3680 &affile->af_suff, &affile->af_pref,
3681 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
3682 retval = FAIL;
3683
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003684 // Find all matching prefixes and add the resulting words.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003685 if (store_aff_word(spin, dw, afflist, affile,
3686 &affile->af_pref, NULL,
3687 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
3688 retval = FAIL;
3689 }
3690
3691 vim_free(pc);
3692 }
3693
3694 if (duplicate > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003695 smsg(_("%d duplicate word(s) in %s"), duplicate, fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003696 if (spin->si_ascii && non_ascii > 0)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01003697 smsg(_("Ignored %d word(s) with non-ASCII characters in %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003698 non_ascii, fname);
3699 hash_clear(&ht);
3700
3701 fclose(fd);
3702 return retval;
3703}
3704
3705/*
3706 * Check for affix flags in "afflist" that are turned into word flags.
3707 * Return WF_ flags.
3708 */
3709 static int
3710get_affix_flags(afffile_T *affile, char_u *afflist)
3711{
3712 int flags = 0;
3713
3714 if (affile->af_keepcase != 0 && flag_in_afflist(
3715 affile->af_flagtype, afflist, affile->af_keepcase))
3716 flags |= WF_KEEPCAP | WF_FIXCAP;
3717 if (affile->af_rare != 0 && flag_in_afflist(
3718 affile->af_flagtype, afflist, affile->af_rare))
3719 flags |= WF_RARE;
3720 if (affile->af_bad != 0 && flag_in_afflist(
3721 affile->af_flagtype, afflist, affile->af_bad))
3722 flags |= WF_BANNED;
3723 if (affile->af_needcomp != 0 && flag_in_afflist(
3724 affile->af_flagtype, afflist, affile->af_needcomp))
3725 flags |= WF_NEEDCOMP;
3726 if (affile->af_comproot != 0 && flag_in_afflist(
3727 affile->af_flagtype, afflist, affile->af_comproot))
3728 flags |= WF_COMPROOT;
3729 if (affile->af_nosuggest != 0 && flag_in_afflist(
3730 affile->af_flagtype, afflist, affile->af_nosuggest))
3731 flags |= WF_NOSUGGEST;
3732 return flags;
3733}
3734
3735/*
3736 * Get the list of prefix IDs from the affix list "afflist".
3737 * Used for PFXPOSTPONE.
3738 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL
3739 * and return the number of affixes.
3740 */
3741 static int
3742get_pfxlist(
3743 afffile_T *affile,
3744 char_u *afflist,
3745 char_u *store_afflist)
3746{
3747 char_u *p;
3748 char_u *prevp;
3749 int cnt = 0;
3750 int id;
3751 char_u key[AH_KEY_LEN];
3752 hashitem_T *hi;
3753
3754 for (p = afflist; *p != NUL; )
3755 {
3756 prevp = p;
3757 if (get_affitem(affile->af_flagtype, &p) != 0)
3758 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003759 // A flag is a postponed prefix flag if it appears in "af_pref"
3760 // and its ID is not zero.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003761 vim_strncpy(key, prevp, p - prevp);
3762 hi = hash_find(&affile->af_pref, key);
3763 if (!HASHITEM_EMPTY(hi))
3764 {
3765 id = HI2AH(hi)->ah_newID;
3766 if (id != 0)
3767 store_afflist[cnt++] = id;
3768 }
3769 }
3770 if (affile->af_flagtype == AFT_NUM && *p == ',')
3771 ++p;
3772 }
3773
3774 store_afflist[cnt] = NUL;
3775 return cnt;
3776}
3777
3778/*
3779 * Get the list of compound IDs from the affix list "afflist" that are used
3780 * for compound words.
3781 * Puts the flags in "store_afflist[]".
3782 */
3783 static void
3784get_compflags(
3785 afffile_T *affile,
3786 char_u *afflist,
3787 char_u *store_afflist)
3788{
3789 char_u *p;
3790 char_u *prevp;
3791 int cnt = 0;
3792 char_u key[AH_KEY_LEN];
3793 hashitem_T *hi;
3794
3795 for (p = afflist; *p != NUL; )
3796 {
3797 prevp = p;
3798 if (get_affitem(affile->af_flagtype, &p) != 0)
3799 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003800 // A flag is a compound flag if it appears in "af_comp".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003801 vim_strncpy(key, prevp, p - prevp);
3802 hi = hash_find(&affile->af_comp, key);
3803 if (!HASHITEM_EMPTY(hi))
3804 store_afflist[cnt++] = HI2CI(hi)->ci_newID;
3805 }
3806 if (affile->af_flagtype == AFT_NUM && *p == ',')
3807 ++p;
3808 }
3809
3810 store_afflist[cnt] = NUL;
3811}
3812
3813/*
3814 * Apply affixes to a word and store the resulting words.
3815 * "ht" is the hashtable with affentry_T that need to be applied, either
3816 * prefixes or suffixes.
3817 * "xht", when not NULL, is the prefix hashtable, to be used additionally on
3818 * the resulting words for combining affixes.
3819 *
3820 * Returns FAIL when out of memory.
3821 */
3822 static int
3823store_aff_word(
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003824 spellinfo_T *spin, // spell info
3825 char_u *word, // basic word start
3826 char_u *afflist, // list of names of supported affixes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003827 afffile_T *affile,
3828 hashtab_T *ht,
3829 hashtab_T *xht,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003830 int condit, // CONDIT_SUF et al.
3831 int flags, // flags for the word
3832 char_u *pfxlist, // list of prefix IDs
3833 int pfxlen) // nr of flags in "pfxlist" for prefixes, rest
3834 // is compound flags
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003835{
3836 int todo;
3837 hashitem_T *hi;
3838 affheader_T *ah;
3839 affentry_T *ae;
3840 char_u newword[MAXWLEN];
3841 int retval = OK;
3842 int i, j;
3843 char_u *p;
3844 int use_flags;
3845 char_u *use_pfxlist;
3846 int use_pfxlen;
3847 int need_affix;
3848 char_u store_afflist[MAXWLEN];
3849 char_u pfx_pfxlist[MAXWLEN];
3850 size_t wordlen = STRLEN(word);
3851 int use_condit;
3852
3853 todo = (int)ht->ht_used;
3854 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
3855 {
3856 if (!HASHITEM_EMPTY(hi))
3857 {
3858 --todo;
3859 ah = HI2AH(hi);
3860
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003861 // Check that the affix combines, if required, and that the word
3862 // supports this affix.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003863 if (((condit & CONDIT_COMB) == 0 || ah->ah_combine)
3864 && flag_in_afflist(affile->af_flagtype, afflist,
3865 ah->ah_flag))
3866 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003867 // Loop over all affix entries with this name.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003868 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
3869 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003870 // Check the condition. It's not logical to match case
3871 // here, but it is required for compatibility with
3872 // Myspell.
3873 // Another requirement from Myspell is that the chop
3874 // string is shorter than the word itself.
3875 // For prefixes, when "PFXPOSTPONE" was used, only do
3876 // prefixes with a chop string and/or flags.
3877 // When a previously added affix had CIRCUMFIX this one
3878 // must have it too, if it had not then this one must not
3879 // have one either.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003880 if ((xht != NULL || !affile->af_pfxpostpone
3881 || ae->ae_chop != NULL
3882 || ae->ae_flags != NULL)
3883 && (ae->ae_chop == NULL
3884 || STRLEN(ae->ae_chop) < wordlen)
3885 && (ae->ae_prog == NULL
3886 || vim_regexec_prog(&ae->ae_prog, FALSE,
3887 word, (colnr_T)0))
3888 && (((condit & CONDIT_CFIX) == 0)
3889 == ((condit & CONDIT_AFF) == 0
3890 || ae->ae_flags == NULL
3891 || !flag_in_afflist(affile->af_flagtype,
3892 ae->ae_flags, affile->af_circumfix))))
3893 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003894 // Match. Remove the chop and add the affix.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003895 if (xht == NULL)
3896 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003897 // prefix: chop/add at the start of the word
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003898 if (ae->ae_add == NULL)
3899 *newword = NUL;
3900 else
3901 vim_strncpy(newword, ae->ae_add, MAXWLEN - 1);
3902 p = word;
3903 if (ae->ae_chop != NULL)
3904 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003905 // Skip chop string.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003906 if (has_mbyte)
3907 {
3908 i = mb_charlen(ae->ae_chop);
3909 for ( ; i > 0; --i)
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003910 MB_PTR_ADV(p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003911 }
3912 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003913 p += STRLEN(ae->ae_chop);
3914 }
3915 STRCAT(newword, p);
3916 }
3917 else
3918 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003919 // suffix: chop/add at the end of the word
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003920 vim_strncpy(newword, word, MAXWLEN - 1);
3921 if (ae->ae_chop != NULL)
3922 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003923 // Remove chop string.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003924 p = newword + STRLEN(newword);
3925 i = (int)MB_CHARLEN(ae->ae_chop);
3926 for ( ; i > 0; --i)
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003927 MB_PTR_BACK(newword, p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003928 *p = NUL;
3929 }
3930 if (ae->ae_add != NULL)
3931 STRCAT(newword, ae->ae_add);
3932 }
3933
3934 use_flags = flags;
3935 use_pfxlist = pfxlist;
3936 use_pfxlen = pfxlen;
3937 need_affix = FALSE;
3938 use_condit = condit | CONDIT_COMB | CONDIT_AFF;
3939 if (ae->ae_flags != NULL)
3940 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003941 // Extract flags from the affix list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003942 use_flags |= get_affix_flags(affile, ae->ae_flags);
3943
3944 if (affile->af_needaffix != 0 && flag_in_afflist(
3945 affile->af_flagtype, ae->ae_flags,
3946 affile->af_needaffix))
3947 need_affix = TRUE;
3948
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003949 // When there is a CIRCUMFIX flag the other affix
3950 // must also have it and we don't add the word
3951 // with one affix.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003952 if (affile->af_circumfix != 0 && flag_in_afflist(
3953 affile->af_flagtype, ae->ae_flags,
3954 affile->af_circumfix))
3955 {
3956 use_condit |= CONDIT_CFIX;
3957 if ((condit & CONDIT_CFIX) == 0)
3958 need_affix = TRUE;
3959 }
3960
3961 if (affile->af_pfxpostpone
3962 || spin->si_compflags != NULL)
3963 {
3964 if (affile->af_pfxpostpone)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003965 // Get prefix IDS from the affix list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003966 use_pfxlen = get_pfxlist(affile,
3967 ae->ae_flags, store_afflist);
3968 else
3969 use_pfxlen = 0;
3970 use_pfxlist = store_afflist;
3971
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003972 // Combine the prefix IDs. Avoid adding the
3973 // same ID twice.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003974 for (i = 0; i < pfxlen; ++i)
3975 {
3976 for (j = 0; j < use_pfxlen; ++j)
3977 if (pfxlist[i] == use_pfxlist[j])
3978 break;
3979 if (j == use_pfxlen)
3980 use_pfxlist[use_pfxlen++] = pfxlist[i];
3981 }
3982
3983 if (spin->si_compflags != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003984 // Get compound IDS from the affix list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003985 get_compflags(affile, ae->ae_flags,
3986 use_pfxlist + use_pfxlen);
3987
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01003988 // Combine the list of compound flags.
3989 // Concatenate them to the prefix IDs list.
3990 // Avoid adding the same ID twice.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003991 for (i = pfxlen; pfxlist[i] != NUL; ++i)
3992 {
3993 for (j = use_pfxlen;
3994 use_pfxlist[j] != NUL; ++j)
3995 if (pfxlist[i] == use_pfxlist[j])
3996 break;
3997 if (use_pfxlist[j] == NUL)
3998 {
3999 use_pfxlist[j++] = pfxlist[i];
4000 use_pfxlist[j] = NUL;
4001 }
4002 }
4003 }
4004 }
4005
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004006 // Obey a "COMPOUNDFORBIDFLAG" of the affix: don't
4007 // use the compound flags.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004008 if (use_pfxlist != NULL && ae->ae_compforbid)
4009 {
4010 vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen);
4011 use_pfxlist = pfx_pfxlist;
4012 }
4013
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004014 // When there are postponed prefixes...
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004015 if (spin->si_prefroot != NULL
4016 && spin->si_prefroot->wn_sibling != NULL)
4017 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004018 // ... add a flag to indicate an affix was used.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004019 use_flags |= WF_HAS_AFF;
4020
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004021 // ... don't use a prefix list if combining
4022 // affixes is not allowed. But do use the
4023 // compound flags after them.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004024 if (!ah->ah_combine && use_pfxlist != NULL)
4025 use_pfxlist += use_pfxlen;
4026 }
4027
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004028 // When compounding is supported and there is no
4029 // "COMPOUNDPERMITFLAG" then forbid compounding on the
4030 // side where the affix is applied.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004031 if (spin->si_compflags != NULL && !ae->ae_comppermit)
4032 {
4033 if (xht != NULL)
4034 use_flags |= WF_NOCOMPAFT;
4035 else
4036 use_flags |= WF_NOCOMPBEF;
4037 }
4038
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004039 // Store the modified word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004040 if (store_word(spin, newword, use_flags,
4041 spin->si_region, use_pfxlist,
4042 need_affix) == FAIL)
4043 retval = FAIL;
4044
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004045 // When added a prefix or a first suffix and the affix
4046 // has flags may add a(nother) suffix. RECURSIVE!
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004047 if ((condit & CONDIT_SUF) && ae->ae_flags != NULL)
4048 if (store_aff_word(spin, newword, ae->ae_flags,
4049 affile, &affile->af_suff, xht,
4050 use_condit & (xht == NULL
4051 ? ~0 : ~CONDIT_SUF),
4052 use_flags, use_pfxlist, pfxlen) == FAIL)
4053 retval = FAIL;
4054
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004055 // When added a suffix and combining is allowed also
4056 // try adding a prefix additionally. Both for the
4057 // word flags and for the affix flags. RECURSIVE!
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004058 if (xht != NULL && ah->ah_combine)
4059 {
4060 if (store_aff_word(spin, newword,
4061 afflist, affile,
4062 xht, NULL, use_condit,
4063 use_flags, use_pfxlist,
4064 pfxlen) == FAIL
4065 || (ae->ae_flags != NULL
4066 && store_aff_word(spin, newword,
4067 ae->ae_flags, affile,
4068 xht, NULL, use_condit,
4069 use_flags, use_pfxlist,
4070 pfxlen) == FAIL))
4071 retval = FAIL;
4072 }
4073 }
4074 }
4075 }
4076 }
4077 }
4078
4079 return retval;
4080}
4081
4082/*
4083 * Read a file with a list of words.
4084 */
4085 static int
4086spell_read_wordfile(spellinfo_T *spin, char_u *fname)
4087{
4088 FILE *fd;
4089 long lnum = 0;
4090 char_u rline[MAXLINELEN];
4091 char_u *line;
4092 char_u *pc = NULL;
4093 char_u *p;
4094 int l;
4095 int retval = OK;
4096 int did_word = FALSE;
4097 int non_ascii = 0;
4098 int flags;
4099 int regionmask;
4100
4101 /*
4102 * Open the file.
4103 */
4104 fd = mch_fopen((char *)fname, "r");
4105 if (fd == NULL)
4106 {
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00004107 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004108 return FAIL;
4109 }
4110
Bram Moolenaarc1669272018-06-19 14:23:53 +02004111 vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s..."), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004112 spell_message(spin, IObuff);
4113
4114 /*
4115 * Read all the lines in the file one by one.
4116 */
4117 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
4118 {
4119 line_breakcheck();
4120 ++lnum;
4121
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004122 // Skip comment lines.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004123 if (*rline == '#')
4124 continue;
4125
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004126 // Remove CR, LF and white space from the end.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004127 l = (int)STRLEN(rline);
4128 while (l > 0 && rline[l - 1] <= ' ')
4129 --l;
4130 if (l == 0)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004131 continue; // empty or blank line
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004132 rline[l] = NUL;
4133
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004134 // Convert from "/encoding={encoding}" to 'encoding' when needed.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004135 vim_free(pc);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004136 if (spin->si_conv.vc_type != CONV_NONE)
4137 {
4138 pc = string_convert(&spin->si_conv, rline, NULL);
4139 if (pc == NULL)
4140 {
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004141 smsg(_("Conversion failure for word in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004142 fname, lnum, rline);
4143 continue;
4144 }
4145 line = pc;
4146 }
4147 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004148 {
4149 pc = NULL;
4150 line = rline;
4151 }
4152
4153 if (*line == '/')
4154 {
4155 ++line;
4156 if (STRNCMP(line, "encoding=", 9) == 0)
4157 {
4158 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004159 smsg(_("Duplicate /encoding= line ignored in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004160 fname, lnum, line - 1);
4161 else if (did_word)
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004162 smsg(_("/encoding= line after word ignored in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004163 fname, lnum, line - 1);
4164 else
4165 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004166 char_u *enc;
4167
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004168 // Setup for conversion to 'encoding'.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004169 line += 9;
4170 enc = enc_canonize(line);
4171 if (enc != NULL && !spin->si_ascii
4172 && convert_setup(&spin->si_conv, enc,
4173 p_enc) == FAIL)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01004174 smsg(_("Conversion in %s not supported: from %s to %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004175 fname, line, p_enc);
4176 vim_free(enc);
4177 spin->si_conv.vc_fail = TRUE;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004178 }
4179 continue;
4180 }
4181
4182 if (STRNCMP(line, "regions=", 8) == 0)
4183 {
4184 if (spin->si_region_count > 1)
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004185 smsg(_("Duplicate /regions= line ignored in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004186 fname, lnum, line);
4187 else
4188 {
4189 line += 8;
Bram Moolenaar2993ac52018-02-10 14:12:43 +01004190 if (STRLEN(line) > MAXREGIONS * 2)
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004191 smsg(_("Too many regions in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004192 fname, lnum, line);
4193 else
4194 {
4195 spin->si_region_count = (int)STRLEN(line) / 2;
4196 STRCPY(spin->si_region_name, line);
4197
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004198 // Adjust the mask for a word valid in all regions.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004199 spin->si_region = (1 << spin->si_region_count) - 1;
4200 }
4201 }
4202 continue;
4203 }
4204
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004205 smsg(_("/ line ignored in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004206 fname, lnum, line - 1);
4207 continue;
4208 }
4209
4210 flags = 0;
4211 regionmask = spin->si_region;
4212
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004213 // Check for flags and region after a slash.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004214 p = vim_strchr(line, '/');
4215 if (p != NULL)
4216 {
4217 *p++ = NUL;
4218 while (*p != NUL)
4219 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004220 if (*p == '=') // keep-case word
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004221 flags |= WF_KEEPCAP | WF_FIXCAP;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004222 else if (*p == '!') // Bad, bad, wicked word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004223 flags |= WF_BANNED;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004224 else if (*p == '?') // Rare word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004225 flags |= WF_RARE;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004226 else if (VIM_ISDIGIT(*p)) // region number(s)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004227 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004228 if ((flags & WF_REGION) == 0) // first one
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004229 regionmask = 0;
4230 flags |= WF_REGION;
4231
4232 l = *p - '0';
Bram Moolenaaree03b942017-10-27 00:57:05 +02004233 if (l == 0 || l > spin->si_region_count)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004234 {
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004235 smsg(_("Invalid region nr in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004236 fname, lnum, p);
4237 break;
4238 }
4239 regionmask |= 1 << (l - 1);
4240 }
4241 else
4242 {
Bram Moolenaardb99f9f2020-03-23 22:12:22 +01004243 smsg(_("Unrecognized flags in %s line %ld: %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004244 fname, lnum, p);
4245 break;
4246 }
4247 ++p;
4248 }
4249 }
4250
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004251 // Skip non-ASCII words when "spin->si_ascii" is TRUE.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004252 if (spin->si_ascii && has_non_ascii(line))
4253 {
4254 ++non_ascii;
4255 continue;
4256 }
4257
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004258 // Normal word: store it.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004259 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL)
4260 {
4261 retval = FAIL;
4262 break;
4263 }
4264 did_word = TRUE;
4265 }
4266
4267 vim_free(pc);
4268 fclose(fd);
4269
4270 if (spin->si_ascii && non_ascii > 0)
4271 {
4272 vim_snprintf((char *)IObuff, IOSIZE,
4273 _("Ignored %d words with non-ASCII characters"), non_ascii);
4274 spell_message(spin, IObuff);
4275 }
4276
4277 return retval;
4278}
4279
4280/*
4281 * Get part of an sblock_T, "len" bytes long.
4282 * This avoids calling free() for every little struct we use (and keeping
4283 * track of them).
4284 * The memory is cleared to all zeros.
4285 * Returns NULL when out of memory.
4286 */
4287 static void *
4288getroom(
4289 spellinfo_T *spin,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004290 size_t len, // length needed
4291 int align) // align for pointer
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004292{
4293 char_u *p;
4294 sblock_T *bl = spin->si_blocks;
4295
4296 if (align && bl != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004297 // Round size up for alignment. On some systems structures need to be
4298 // aligned to the size of a pointer (e.g., SPARC).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004299 bl->sb_used = (bl->sb_used + sizeof(char *) - 1)
4300 & ~(sizeof(char *) - 1);
4301
4302 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE)
4303 {
4304 if (len >= SBLOCKSIZE)
4305 bl = NULL;
4306 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004307 // Allocate a block of memory. It is not freed until much later.
zeertzjq1b438a82023-02-01 13:11:15 +00004308 bl = alloc_clear(offsetof(sblock_T, sb_data) + SBLOCKSIZE + 1);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004309 if (bl == NULL)
4310 {
4311 if (!spin->si_did_emsg)
4312 {
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00004313 emsg(_(e_insufficient_memory_word_list_will_be_incomplete));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004314 spin->si_did_emsg = TRUE;
4315 }
4316 return NULL;
4317 }
4318 bl->sb_next = spin->si_blocks;
4319 spin->si_blocks = bl;
4320 bl->sb_used = 0;
4321 ++spin->si_blocks_cnt;
4322 }
4323
4324 p = bl->sb_data + bl->sb_used;
4325 bl->sb_used += (int)len;
4326
4327 return p;
4328}
4329
4330/*
4331 * Make a copy of a string into memory allocated with getroom().
4332 * Returns NULL when out of memory.
4333 */
4334 static char_u *
4335getroom_save(spellinfo_T *spin, char_u *s)
4336{
4337 char_u *sc;
4338
4339 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE);
4340 if (sc != NULL)
4341 STRCPY(sc, s);
4342 return sc;
4343}
4344
4345
4346/*
4347 * Free the list of allocated sblock_T.
4348 */
4349 static void
4350free_blocks(sblock_T *bl)
4351{
4352 sblock_T *next;
4353
4354 while (bl != NULL)
4355 {
4356 next = bl->sb_next;
4357 vim_free(bl);
4358 bl = next;
4359 }
4360}
4361
4362/*
4363 * Allocate the root of a word tree.
4364 * Returns NULL when out of memory.
4365 */
4366 static wordnode_T *
4367wordtree_alloc(spellinfo_T *spin)
4368{
4369 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
4370}
4371
4372/*
Bram Moolenaar5e59ea52022-07-01 22:26:20 +01004373 * Return TRUE if "word" contains valid word characters.
4374 * Control characters and trailing '/' are invalid. Space is OK.
4375 */
4376 static int
K.Takata2ebcc352022-07-14 17:25:14 +01004377valid_spell_word(char_u *word, char_u *end)
Bram Moolenaar5e59ea52022-07-01 22:26:20 +01004378{
4379 char_u *p;
4380
K.Takata2ebcc352022-07-14 17:25:14 +01004381 if (enc_utf8 && !utf_valid_string(word, end))
Bram Moolenaar5e59ea52022-07-01 22:26:20 +01004382 return FALSE;
K.Takata2ebcc352022-07-14 17:25:14 +01004383 for (p = word; *p != NUL && p < end; p += mb_ptr2len(p))
Bram Moolenaar5e59ea52022-07-01 22:26:20 +01004384 if (*p < ' ' || (p[0] == '/' && p[1] == NUL))
4385 return FALSE;
4386 return TRUE;
4387}
4388
4389/*
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004390 * Store a word in the tree(s).
4391 * Always store it in the case-folded tree. For a keep-case word this is
4392 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and
4393 * used to find suggestions.
4394 * For a keep-case word also store it in the keep-case tree.
4395 * When "pfxlist" is not NULL store the word for each postponed prefix ID and
4396 * compound flag.
4397 */
4398 static int
4399store_word(
4400 spellinfo_T *spin,
4401 char_u *word,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004402 int flags, // extra flags, WF_BANNED
4403 int region, // supported region(s)
4404 char_u *pfxlist, // list of prefix IDs or NULL
4405 int need_affix) // only store word with affix ID
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004406{
4407 int len = (int)STRLEN(word);
4408 int ct = captype(word, word + len);
4409 char_u foldword[MAXWLEN];
4410 int res = OK;
4411 char_u *p;
4412
Bram Moolenaar7c824682022-05-08 22:32:58 +01004413 // Avoid adding illegal bytes to the word tree.
K.Takata2ebcc352022-07-14 17:25:14 +01004414 if (!valid_spell_word(word, word + len))
Bram Moolenaar7c824682022-05-08 22:32:58 +01004415 return FAIL;
4416
Bram Moolenaar4f135272021-06-11 19:07:40 +02004417 (void)spell_casefold(curwin, word, len, foldword, MAXWLEN);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004418 for (p = pfxlist; res == OK; ++p)
4419 {
4420 if (!need_affix || (p != NULL && *p != NUL))
4421 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags,
4422 region, p == NULL ? 0 : *p);
4423 if (p == NULL || *p == NUL)
4424 break;
4425 }
4426 ++spin->si_foldwcount;
4427
4428 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP)))
4429 {
4430 for (p = pfxlist; res == OK; ++p)
4431 {
4432 if (!need_affix || (p != NULL && *p != NUL))
4433 res = tree_add_word(spin, word, spin->si_keeproot, flags,
4434 region, p == NULL ? 0 : *p);
4435 if (p == NULL || *p == NUL)
4436 break;
4437 }
4438 ++spin->si_keepwcount;
4439 }
4440 return res;
4441}
4442
4443/*
4444 * Add word "word" to a word tree at "root".
4445 * When "flags" < 0 we are adding to the prefix tree where "flags" is used for
4446 * "rare" and "region" is the condition nr.
4447 * Returns FAIL when out of memory.
4448 */
4449 static int
4450tree_add_word(
4451 spellinfo_T *spin,
4452 char_u *word,
4453 wordnode_T *root,
4454 int flags,
4455 int region,
4456 int affixID)
4457{
4458 wordnode_T *node = root;
4459 wordnode_T *np;
4460 wordnode_T *copyp, **copyprev;
4461 wordnode_T **prev = NULL;
4462 int i;
4463
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004464 // Add each byte of the word to the tree, including the NUL at the end.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004465 for (i = 0; ; ++i)
4466 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004467 // When there is more than one reference to this node we need to make
4468 // a copy, so that we can modify it. Copy the whole list of siblings
4469 // (we don't optimize for a partly shared list of siblings).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004470 if (node != NULL && node->wn_refs > 1)
4471 {
4472 --node->wn_refs;
4473 copyprev = prev;
Bram Moolenaaraeea7212020-04-02 18:50:46 +02004474 FOR_ALL_NODE_SIBLINGS(node, copyp)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004475 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004476 // Allocate a new node and copy the info.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004477 np = get_wordnode(spin);
4478 if (np == NULL)
4479 return FAIL;
4480 np->wn_child = copyp->wn_child;
4481 if (np->wn_child != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004482 ++np->wn_child->wn_refs; // child gets extra ref
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004483 np->wn_byte = copyp->wn_byte;
4484 if (np->wn_byte == NUL)
4485 {
4486 np->wn_flags = copyp->wn_flags;
4487 np->wn_region = copyp->wn_region;
4488 np->wn_affixID = copyp->wn_affixID;
4489 }
4490
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004491 // Link the new node in the list, there will be one ref.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004492 np->wn_refs = 1;
4493 if (copyprev != NULL)
4494 *copyprev = np;
4495 copyprev = &np->wn_sibling;
4496
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004497 // Let "node" point to the head of the copied list.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004498 if (copyp == node)
4499 node = np;
4500 }
4501 }
4502
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004503 // Look for the sibling that has the same character. They are sorted
4504 // on byte value, thus stop searching when a sibling is found with a
4505 // higher byte value. For zero bytes (end of word) the sorting is
4506 // done on flags and then on affixID.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004507 while (node != NULL
4508 && (node->wn_byte < word[i]
4509 || (node->wn_byte == NUL
4510 && (flags < 0
4511 ? node->wn_affixID < (unsigned)affixID
4512 : (node->wn_flags < (unsigned)(flags & WN_MASK)
4513 || (node->wn_flags == (flags & WN_MASK)
4514 && (spin->si_sugtree
4515 ? (node->wn_region & 0xffff) < region
4516 : node->wn_affixID
4517 < (unsigned)affixID)))))))
4518 {
4519 prev = &node->wn_sibling;
4520 node = *prev;
4521 }
4522 if (node == NULL
4523 || node->wn_byte != word[i]
4524 || (word[i] == NUL
4525 && (flags < 0
4526 || spin->si_sugtree
4527 || node->wn_flags != (flags & WN_MASK)
4528 || node->wn_affixID != affixID)))
4529 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004530 // Allocate a new node.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004531 np = get_wordnode(spin);
4532 if (np == NULL)
4533 return FAIL;
4534 np->wn_byte = word[i];
4535
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004536 // If "node" is NULL this is a new child or the end of the sibling
4537 // list: ref count is one. Otherwise use ref count of sibling and
4538 // make ref count of sibling one (matters when inserting in front
4539 // of the list of siblings).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004540 if (node == NULL)
4541 np->wn_refs = 1;
4542 else
4543 {
4544 np->wn_refs = node->wn_refs;
4545 node->wn_refs = 1;
4546 }
4547 if (prev != NULL)
4548 *prev = np;
4549 np->wn_sibling = node;
4550 node = np;
4551 }
4552
4553 if (word[i] == NUL)
4554 {
4555 node->wn_flags = flags;
4556 node->wn_region |= region;
4557 node->wn_affixID = affixID;
4558 break;
4559 }
4560 prev = &node->wn_child;
4561 node = *prev;
4562 }
4563#ifdef SPELL_PRINTTREE
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01004564 smsg("Added \"%s\"", word);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004565 spell_print_tree(root->wn_sibling);
4566#endif
4567
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004568 // count nr of words added since last message
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004569 ++spin->si_msg_count;
4570
4571 if (spin->si_compress_cnt > 1)
4572 {
4573 if (--spin->si_compress_cnt == 1)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004574 // Did enough words to lower the block count limit.
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004575 spin->si_blocks_cnt += compress_inc;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004576 }
4577
4578 /*
4579 * When we have allocated lots of memory we need to compress the word tree
4580 * to free up some room. But compression is slow, and we might actually
4581 * need that room, thus only compress in the following situations:
4582 * 1. When not compressed before (si_compress_cnt == 0): when using
4583 * "compress_start" blocks.
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004584 * 2. When compressed before and used "compress_inc" blocks before
4585 * adding "compress_added" words (si_compress_cnt > 1).
4586 * 3. When compressed before, added "compress_added" words
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004587 * (si_compress_cnt == 1) and the number of free nodes drops below the
4588 * maximum word length.
4589 */
dundargocc57b5bc2022-11-02 13:30:51 +00004590#ifndef SPELL_COMPRESS_ALWAYS
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004591 if (spin->si_compress_cnt == 1
4592 ? spin->si_free_count < MAXWLEN
4593 : spin->si_blocks_cnt >= compress_start)
4594#endif
4595 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004596 // Decrement the block counter. The effect is that we compress again
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004597 // when the freed up room has been used and another "compress_inc"
4598 // blocks have been allocated. Unless "compress_added" words have
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004599 // been added, then the limit is put back again.
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004600 spin->si_blocks_cnt -= compress_inc;
4601 spin->si_compress_cnt = compress_added;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004602
4603 if (spin->si_verbose)
4604 {
4605 msg_start();
Bram Moolenaar32526b32019-01-19 17:43:09 +01004606 msg_puts(_(msg_compressing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004607 msg_clr_eos();
4608 msg_didout = FALSE;
4609 msg_col = 0;
4610 out_flush();
4611 }
4612
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004613 // Compress both trees. Either they both have many nodes, which makes
4614 // compression useful, or one of them is small, which means
4615 // compression goes fast. But when filling the soundfold word tree
4616 // there is no keep-case tree.
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004617 wordtree_compress(spin, spin->si_foldroot, "case-folded");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004618 if (affixID >= 0)
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004619 wordtree_compress(spin, spin->si_keeproot, "keep-case");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004620 }
4621
4622 return OK;
4623}
4624
4625/*
4626 * Get a wordnode_T, either from the list of previously freed nodes or
4627 * allocate a new one.
4628 * Returns NULL when out of memory.
4629 */
4630 static wordnode_T *
4631get_wordnode(spellinfo_T *spin)
4632{
4633 wordnode_T *n;
4634
4635 if (spin->si_first_free == NULL)
4636 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
4637 else
4638 {
4639 n = spin->si_first_free;
4640 spin->si_first_free = n->wn_child;
Bram Moolenaara80faa82020-04-12 19:37:17 +02004641 CLEAR_POINTER(n);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004642 --spin->si_free_count;
4643 }
4644#ifdef SPELL_PRINTTREE
4645 if (n != NULL)
4646 n->wn_nr = ++spin->si_wordnode_nr;
4647#endif
4648 return n;
4649}
4650
4651/*
4652 * Decrement the reference count on a node (which is the head of a list of
4653 * siblings). If the reference count becomes zero free the node and its
4654 * siblings.
4655 * Returns the number of nodes actually freed.
4656 */
4657 static int
4658deref_wordnode(spellinfo_T *spin, wordnode_T *node)
4659{
4660 wordnode_T *np;
4661 int cnt = 0;
4662
4663 if (--node->wn_refs == 0)
4664 {
Bram Moolenaaraeea7212020-04-02 18:50:46 +02004665 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004666 {
4667 if (np->wn_child != NULL)
4668 cnt += deref_wordnode(spin, np->wn_child);
4669 free_wordnode(spin, np);
4670 ++cnt;
4671 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004672 ++cnt; // length field
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004673 }
4674 return cnt;
4675}
4676
4677/*
4678 * Free a wordnode_T for re-use later.
4679 * Only the "wn_child" field becomes invalid.
4680 */
4681 static void
4682free_wordnode(spellinfo_T *spin, wordnode_T *n)
4683{
4684 n->wn_child = spin->si_first_free;
4685 spin->si_first_free = n;
4686 ++spin->si_free_count;
4687}
4688
4689/*
4690 * Compress a tree: find tails that are identical and can be shared.
4691 */
4692 static void
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004693wordtree_compress(spellinfo_T *spin, wordnode_T *root, char *name)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004694{
4695 hashtab_T ht;
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02004696 long n;
4697 long tot = 0;
4698 long perc;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004699
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004700 // Skip the root itself, it's not actually used. The first sibling is the
4701 // start of the tree.
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00004702 if (root->wn_sibling == NULL)
4703 return;
4704
4705 hash_init(&ht);
4706 n = node_compress(spin, root->wn_sibling, &ht, &tot);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004707
4708#ifndef SPELL_PRINTTREE
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00004709 if (spin->si_verbose || p_verbose > 2)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004710#endif
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00004711 {
4712 if (tot > 1000000)
4713 perc = (tot - n) / (tot / 100);
4714 else if (tot == 0)
4715 perc = 0;
4716 else
4717 perc = (tot - n) * 100 / tot;
4718 vim_snprintf((char *)IObuff, IOSIZE,
4719 _("Compressed %s: %ld of %ld nodes; %ld (%ld%%) remaining"),
4720 name, n, tot, tot - n, perc);
4721 spell_message(spin, IObuff);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004722 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00004723#ifdef SPELL_PRINTTREE
4724 spell_print_tree(root->wn_sibling);
4725#endif
4726 hash_clear(&ht);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004727}
4728
4729/*
4730 * Compress a node, its siblings and its children, depth first.
4731 * Returns the number of compressed nodes.
4732 */
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02004733 static long
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004734node_compress(
4735 spellinfo_T *spin,
4736 wordnode_T *node,
4737 hashtab_T *ht,
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02004738 long *tot) // total count of nodes before compressing,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004739 // incremented while going through the tree
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004740{
4741 wordnode_T *np;
4742 wordnode_T *tp;
4743 wordnode_T *child;
4744 hash_T hash;
4745 hashitem_T *hi;
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02004746 long len = 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004747 unsigned nr, n;
Bram Moolenaar59f88fb2020-06-03 20:51:11 +02004748 long compressed = 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004749
4750 /*
4751 * Go through the list of siblings. Compress each child and then try
4752 * finding an identical child to replace it.
4753 * Note that with "child" we mean not just the node that is pointed to,
4754 * but the whole list of siblings of which the child node is the first.
4755 */
4756 for (np = node; np != NULL && !got_int; np = np->wn_sibling)
4757 {
4758 ++len;
4759 if ((child = np->wn_child) != NULL)
4760 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004761 // Compress the child first. This fills hashkey.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004762 compressed += node_compress(spin, child, ht, tot);
4763
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004764 // Try to find an identical child.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004765 hash = hash_hash(child->wn_u1.hashkey);
4766 hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
4767 if (!HASHITEM_EMPTY(hi))
4768 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004769 // There are children we encountered before with a hash value
4770 // identical to the current child. Now check if there is one
4771 // that is really identical.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004772 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
4773 if (node_equal(child, tp))
4774 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004775 // Found one! Now use that child in place of the
4776 // current one. This means the current child and all
4777 // its siblings is unlinked from the tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004778 ++tp->wn_refs;
4779 compressed += deref_wordnode(spin, child);
4780 np->wn_child = tp;
4781 break;
4782 }
4783 if (tp == NULL)
4784 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004785 // No other child with this hash value equals the child of
4786 // the node, add it to the linked list after the first
4787 // item.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004788 tp = HI2WN(hi);
4789 child->wn_u2.next = tp->wn_u2.next;
4790 tp->wn_u2.next = child;
4791 }
4792 }
4793 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004794 // No other child has this hash value, add it to the
4795 // hashtable.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004796 hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
4797 }
4798 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004799 *tot += len + 1; // add one for the node that stores the length
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004800
4801 /*
4802 * Make a hash key for the node and its siblings, so that we can quickly
4803 * find a lookalike node. This must be done after compressing the sibling
4804 * list, otherwise the hash key would become invalid by the compression.
4805 */
4806 node->wn_u1.hashkey[0] = len;
4807 nr = 0;
Bram Moolenaaraeea7212020-04-02 18:50:46 +02004808 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004809 {
4810 if (np->wn_byte == NUL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004811 // end node: use wn_flags, wn_region and wn_affixID
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004812 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16);
4813 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004814 // byte node: use the byte value and the child pointer
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004815 n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8));
4816 nr = nr * 101 + n;
4817 }
4818
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004819 // Avoid NUL bytes, it terminates the hash key.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004820 n = nr & 0xff;
4821 node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
4822 n = (nr >> 8) & 0xff;
4823 node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
4824 n = (nr >> 16) & 0xff;
4825 node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
4826 n = (nr >> 24) & 0xff;
4827 node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
4828 node->wn_u1.hashkey[5] = NUL;
4829
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004830 // Check for CTRL-C pressed now and then.
Bram Moolenaar408c23b2020-06-03 22:15:45 +02004831 veryfast_breakcheck();
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004832
4833 return compressed;
4834}
4835
4836/*
4837 * Return TRUE when two nodes have identical siblings and children.
4838 */
4839 static int
4840node_equal(wordnode_T *n1, wordnode_T *n2)
4841{
4842 wordnode_T *p1;
4843 wordnode_T *p2;
4844
4845 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
4846 p1 = p1->wn_sibling, p2 = p2->wn_sibling)
4847 if (p1->wn_byte != p2->wn_byte
4848 || (p1->wn_byte == NUL
4849 ? (p1->wn_flags != p2->wn_flags
4850 || p1->wn_region != p2->wn_region
4851 || p1->wn_affixID != p2->wn_affixID)
4852 : (p1->wn_child != p2->wn_child)))
4853 break;
4854
4855 return p1 == NULL && p2 == NULL;
4856}
4857
Bram Moolenaareae1b912019-05-09 15:12:55 +02004858static int rep_compare(const void *s1, const void *s2);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004859
4860/*
4861 * Function given to qsort() to sort the REP items on "from" string.
4862 */
4863 static int
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004864rep_compare(const void *s1, const void *s2)
4865{
4866 fromto_T *p1 = (fromto_T *)s1;
4867 fromto_T *p2 = (fromto_T *)s2;
4868
4869 return STRCMP(p1->ft_from, p2->ft_from);
4870}
4871
4872/*
4873 * Write the Vim .spl file "fname".
4874 * Return FAIL or OK;
4875 */
4876 static int
4877write_vim_spell(spellinfo_T *spin, char_u *fname)
4878{
4879 FILE *fd;
4880 int regionmask;
4881 int round;
4882 wordnode_T *tree;
4883 int nodecount;
4884 int i;
4885 int l;
4886 garray_T *gap;
4887 fromto_T *ftp;
4888 char_u *p;
4889 int rr;
4890 int retval = OK;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004891 size_t fwv = 1; // collect return value of fwrite() to avoid
4892 // warnings from picky compiler
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004893
4894 fd = mch_fopen((char *)fname, "w");
4895 if (fd == NULL)
4896 {
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00004897 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004898 return FAIL;
4899 }
4900
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004901 // <HEADER>: <fileID> <versionnr>
4902 // <fileID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004903 fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd);
4904 if (fwv != (size_t)1)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004905 // Catch first write error, don't try writing more.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004906 goto theend;
4907
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004908 putc(VIMSPELLVERSION, fd); // <versionnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004909
4910 /*
4911 * <SECTIONS>: <section> ... <sectionend>
4912 */
4913
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004914 // SN_INFO: <infotext>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004915 if (spin->si_info != NULL)
4916 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004917 putc(SN_INFO, fd); // <sectionID>
4918 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004919
4920 i = (int)STRLEN(spin->si_info);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004921 put_bytes(fd, (long_u)i, 4); // <sectionlen>
4922 fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); // <infotext>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004923 }
4924
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004925 // SN_REGION: <regionname> ...
4926 // Write the region names only if there is more than one.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004927 if (spin->si_region_count > 1)
4928 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004929 putc(SN_REGION, fd); // <sectionID>
4930 putc(SNF_REQUIRED, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004931 l = spin->si_region_count * 2;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004932 put_bytes(fd, (long_u)l, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004933 fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004934 // <regionname> ...
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004935 regionmask = (1 << spin->si_region_count) - 1;
4936 }
4937 else
4938 regionmask = 0;
4939
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004940 // SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars>
4941 //
4942 // The table with character flags and the table for case folding.
4943 // This makes sure the same characters are recognized as word characters
Dominique Pelleaf4a61a2021-12-27 17:21:41 +00004944 // when generating and when using a spell file.
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004945 // Skip this for ASCII, the table may conflict with the one used for
4946 // 'encoding'.
4947 // Also skip this for an .add.spl file, the main spell file must contain
4948 // the table (avoids that it conflicts). File is shorter too.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004949 if (!spin->si_ascii && !spin->si_add)
4950 {
4951 char_u folchars[128 * 8];
4952 int flags;
4953
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004954 putc(SN_CHARFLAGS, fd); // <sectionID>
4955 putc(SNF_REQUIRED, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004956
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004957 // Form the <folchars> string first, we need to know its length.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004958 l = 0;
4959 for (i = 128; i < 256; ++i)
4960 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004961 if (has_mbyte)
4962 l += mb_char2bytes(spelltab.st_fold[i], folchars + l);
4963 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004964 folchars[l++] = spelltab.st_fold[i];
4965 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004966 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004967
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004968 fputc(128, fd); // <charflagslen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004969 for (i = 128; i < 256; ++i)
4970 {
4971 flags = 0;
4972 if (spelltab.st_isw[i])
4973 flags |= CF_WORD;
4974 if (spelltab.st_isu[i])
4975 flags |= CF_UPPER;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004976 fputc(flags, fd); // <charflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004977 }
4978
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004979 put_bytes(fd, (long_u)l, 2); // <folcharslen>
4980 fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); // <folchars>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004981 }
4982
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004983 // SN_MIDWORD: <midword>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004984 if (spin->si_midword != NULL)
4985 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004986 putc(SN_MIDWORD, fd); // <sectionID>
4987 putc(SNF_REQUIRED, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004988
4989 i = (int)STRLEN(spin->si_midword);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004990 put_bytes(fd, (long_u)i, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004991 fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004992 // <midword>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004993 }
4994
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004995 // SN_PREFCOND: <prefcondcnt> <prefcond> ...
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004996 if (spin->si_prefcond.ga_len > 0)
4997 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01004998 putc(SN_PREFCOND, fd); // <sectionID>
4999 putc(SNF_REQUIRED, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005000
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +00005001 l = write_spell_prefcond(NULL, &spin->si_prefcond, &fwv);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005002 put_bytes(fd, (long_u)l, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005003
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +00005004 write_spell_prefcond(fd, &spin->si_prefcond, &fwv);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005005 }
5006
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005007 // SN_REP: <repcount> <rep> ...
5008 // SN_SAL: <salflags> <salcount> <sal> ...
5009 // SN_REPSAL: <repcount> <rep> ...
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005010
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005011 // round 1: SN_REP section
5012 // round 2: SN_SAL section (unless SN_SOFO is used)
5013 // round 3: SN_REPSAL section
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005014 for (round = 1; round <= 3; ++round)
5015 {
5016 if (round == 1)
5017 gap = &spin->si_rep;
5018 else if (round == 2)
5019 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005020 // Don't write SN_SAL when using a SN_SOFO section
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005021 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
5022 continue;
5023 gap = &spin->si_sal;
5024 }
5025 else
5026 gap = &spin->si_repsal;
5027
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005028 // Don't write the section if there are no items.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005029 if (gap->ga_len == 0)
5030 continue;
5031
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005032 // Sort the REP/REPSAL items.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005033 if (round != 2)
5034 qsort(gap->ga_data, (size_t)gap->ga_len,
5035 sizeof(fromto_T), rep_compare);
5036
5037 i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005038 putc(i, fd); // <sectionID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005039
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005040 // This is for making suggestions, section is not required.
5041 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005042
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005043 // Compute the length of what follows.
5044 l = 2; // count <repcount> or <salcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005045 for (i = 0; i < gap->ga_len; ++i)
5046 {
5047 ftp = &((fromto_T *)gap->ga_data)[i];
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005048 l += 1 + (int)STRLEN(ftp->ft_from); // count <*fromlen> and <*from>
5049 l += 1 + (int)STRLEN(ftp->ft_to); // count <*tolen> and <*to>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005050 }
5051 if (round == 2)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005052 ++l; // count <salflags>
5053 put_bytes(fd, (long_u)l, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005054
5055 if (round == 2)
5056 {
5057 i = 0;
5058 if (spin->si_followup)
5059 i |= SAL_F0LLOWUP;
5060 if (spin->si_collapse)
5061 i |= SAL_COLLAPSE;
5062 if (spin->si_rem_accents)
5063 i |= SAL_REM_ACCENTS;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005064 putc(i, fd); // <salflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005065 }
5066
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005067 put_bytes(fd, (long_u)gap->ga_len, 2); // <repcount> or <salcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005068 for (i = 0; i < gap->ga_len; ++i)
5069 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005070 // <rep> : <repfromlen> <repfrom> <reptolen> <repto>
5071 // <sal> : <salfromlen> <salfrom> <saltolen> <salto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005072 ftp = &((fromto_T *)gap->ga_data)[i];
5073 for (rr = 1; rr <= 2; ++rr)
5074 {
5075 p = rr == 1 ? ftp->ft_from : ftp->ft_to;
5076 l = (int)STRLEN(p);
5077 putc(l, fd);
5078 if (l > 0)
5079 fwv &= fwrite(p, l, (size_t)1, fd);
5080 }
5081 }
5082
5083 }
5084
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005085 // SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
5086 // This is for making suggestions, section is not required.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005087 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
5088 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005089 putc(SN_SOFO, fd); // <sectionID>
5090 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005091
5092 l = (int)STRLEN(spin->si_sofofr);
5093 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005094 // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005095
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005096 put_bytes(fd, (long_u)l, 2); // <sofofromlen>
5097 fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); // <sofofrom>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005098
5099 l = (int)STRLEN(spin->si_sofoto);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005100 put_bytes(fd, (long_u)l, 2); // <sofotolen>
5101 fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); // <sofoto>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005102 }
5103
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005104 // SN_WORDS: <word> ...
5105 // This is for making suggestions, section is not required.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005106 if (spin->si_commonwords.ht_used > 0)
5107 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005108 putc(SN_WORDS, fd); // <sectionID>
5109 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005110
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005111 // round 1: count the bytes
5112 // round 2: write the bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005113 for (round = 1; round <= 2; ++round)
5114 {
5115 int todo;
5116 int len = 0;
5117 hashitem_T *hi;
5118
5119 todo = (int)spin->si_commonwords.ht_used;
Yegappan Lakshmanan14113fd2023-03-07 17:13:51 +00005120 FOR_ALL_HASHTAB_ITEMS(&spin->si_commonwords, hi, todo)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005121 if (!HASHITEM_EMPTY(hi))
5122 {
5123 l = (int)STRLEN(hi->hi_key) + 1;
5124 len += l;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005125 if (round == 2) // <word>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005126 fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd);
5127 --todo;
5128 }
5129 if (round == 1)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005130 put_bytes(fd, (long_u)len, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005131 }
5132 }
5133
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005134 // SN_MAP: <mapstr>
5135 // This is for making suggestions, section is not required.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005136 if (spin->si_map.ga_len > 0)
5137 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005138 putc(SN_MAP, fd); // <sectionID>
5139 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005140 l = spin->si_map.ga_len;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005141 put_bytes(fd, (long_u)l, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005142 fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005143 // <mapstr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005144 }
5145
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005146 // SN_SUGFILE: <timestamp>
5147 // This is used to notify that a .sug file may be available and at the
5148 // same time allows for checking that a .sug file that is found matches
5149 // with this .spl file. That's because the word numbers must be exactly
5150 // right.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005151 if (!spin->si_nosugfile
5152 && (spin->si_sal.ga_len > 0
5153 || (spin->si_sofofr != NULL && spin->si_sofoto != NULL)))
5154 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005155 putc(SN_SUGFILE, fd); // <sectionID>
5156 putc(0, fd); // <sectionflags>
5157 put_bytes(fd, (long_u)8, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005158
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005159 // Set si_sugtime and write it to the file.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005160 spin->si_sugtime = time(NULL);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005161 put_time(fd, spin->si_sugtime); // <timestamp>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005162 }
5163
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005164 // SN_NOSPLITSUGS: nothing
5165 // This is used to notify that no suggestions with word splits are to be
5166 // made.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005167 if (spin->si_nosplitsugs)
5168 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005169 putc(SN_NOSPLITSUGS, fd); // <sectionID>
5170 putc(0, fd); // <sectionflags>
5171 put_bytes(fd, (long_u)0, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005172 }
5173
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005174 // SN_NOCOMPUNDSUGS: nothing
5175 // This is used to notify that no suggestions with compounds are to be
5176 // made.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005177 if (spin->si_nocompoundsugs)
5178 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005179 putc(SN_NOCOMPOUNDSUGS, fd); // <sectionID>
5180 putc(0, fd); // <sectionflags>
5181 put_bytes(fd, (long_u)0, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005182 }
5183
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005184 // SN_COMPOUND: compound info.
5185 // We don't mark it required, when not supported all compound words will
5186 // be bad words.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005187 if (spin->si_compflags != NULL)
5188 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005189 putc(SN_COMPOUND, fd); // <sectionID>
5190 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005191
5192 l = (int)STRLEN(spin->si_compflags);
5193 for (i = 0; i < spin->si_comppat.ga_len; ++i)
5194 l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005195 put_bytes(fd, (long_u)(l + 7), 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005196
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005197 putc(spin->si_compmax, fd); // <compmax>
5198 putc(spin->si_compminlen, fd); // <compminlen>
5199 putc(spin->si_compsylmax, fd); // <compsylmax>
5200 putc(0, fd); // for Vim 7.0b compatibility
5201 putc(spin->si_compoptions, fd); // <compoptions>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005202 put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005203 // <comppatcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005204 for (i = 0; i < spin->si_comppat.ga_len; ++i)
5205 {
5206 p = ((char_u **)(spin->si_comppat.ga_data))[i];
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005207 putc((int)STRLEN(p), fd); // <comppatlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005208 fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005209 // <comppattext>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005210 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005211 // <compflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005212 fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags),
5213 (size_t)1, fd);
5214 }
5215
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005216 // SN_NOBREAK: NOBREAK flag
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005217 if (spin->si_nobreak)
5218 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005219 putc(SN_NOBREAK, fd); // <sectionID>
5220 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005221
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005222 // It's empty, the presence of the section flags the feature.
5223 put_bytes(fd, (long_u)0, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005224 }
5225
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005226 // SN_SYLLABLE: syllable info.
5227 // We don't mark it required, when not supported syllables will not be
5228 // counted.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005229 if (spin->si_syllable != NULL)
5230 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005231 putc(SN_SYLLABLE, fd); // <sectionID>
5232 putc(0, fd); // <sectionflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005233
5234 l = (int)STRLEN(spin->si_syllable);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005235 put_bytes(fd, (long_u)l, 4); // <sectionlen>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005236 fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005237 // <syllable>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005238 }
5239
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005240 // end of <SECTIONS>
5241 putc(SN_END, fd); // <sectionend>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005242
5243
5244 /*
5245 * <LWORDTREE> <KWORDTREE> <PREFIXTREE>
5246 */
5247 spin->si_memtot = 0;
5248 for (round = 1; round <= 3; ++round)
5249 {
5250 if (round == 1)
5251 tree = spin->si_foldroot->wn_sibling;
5252 else if (round == 2)
5253 tree = spin->si_keeproot->wn_sibling;
5254 else
5255 tree = spin->si_prefroot->wn_sibling;
5256
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005257 // Clear the index and wnode fields in the tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005258 clear_node(tree);
5259
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005260 // Count the number of nodes. Needed to be able to allocate the
5261 // memory when reading the nodes. Also fills in index for shared
5262 // nodes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005263 nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
5264
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005265 // number of nodes in 4 bytes
5266 put_bytes(fd, (long_u)nodecount, 4); // <nodecount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005267 spin->si_memtot += nodecount + nodecount * sizeof(int);
5268
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005269 // Write the nodes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005270 (void)put_node(fd, tree, 0, regionmask, round == 3);
5271 }
5272
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005273 // Write another byte to check for errors (file system full).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005274 if (putc(0, fd) == EOF)
5275 retval = FAIL;
5276theend:
5277 if (fclose(fd) == EOF)
5278 retval = FAIL;
5279
5280 if (fwv != (size_t)1)
5281 retval = FAIL;
5282 if (retval == FAIL)
Bram Moolenaar40bcec12021-12-05 22:19:27 +00005283 emsg(_(e_error_while_writing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005284
5285 return retval;
5286}
5287
5288/*
5289 * Clear the index and wnode fields of "node", it siblings and its
5290 * children. This is needed because they are a union with other items to save
5291 * space.
5292 */
5293 static void
5294clear_node(wordnode_T *node)
5295{
5296 wordnode_T *np;
5297
5298 if (node != NULL)
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005299 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005300 {
5301 np->wn_u1.index = 0;
5302 np->wn_u2.wnode = NULL;
5303
5304 if (np->wn_byte != NUL)
5305 clear_node(np->wn_child);
5306 }
5307}
5308
5309
5310/*
5311 * Dump a word tree at node "node".
5312 *
5313 * This first writes the list of possible bytes (siblings). Then for each
5314 * byte recursively write the children.
5315 *
5316 * NOTE: The code here must match the code in read_tree_node(), since
5317 * assumptions are made about the indexes (so that we don't have to write them
5318 * in the file).
5319 *
5320 * Returns the number of nodes used.
5321 */
5322 static int
5323put_node(
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005324 FILE *fd, // NULL when only counting
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005325 wordnode_T *node,
5326 int idx,
5327 int regionmask,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005328 int prefixtree) // TRUE for PREFIXTREE
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005329{
5330 int newindex = idx;
5331 int siblingcount = 0;
5332 wordnode_T *np;
5333 int flags;
5334
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005335 // If "node" is zero the tree is empty.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005336 if (node == NULL)
5337 return 0;
5338
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005339 // Store the index where this node is written.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005340 node->wn_u1.index = idx;
5341
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005342 // Count the number of siblings.
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005343 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005344 ++siblingcount;
5345
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005346 // Write the sibling count.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005347 if (fd != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005348 putc(siblingcount, fd); // <siblingcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005349
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005350 // Write each sibling byte and optionally extra info.
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005351 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005352 {
5353 if (np->wn_byte == 0)
5354 {
5355 if (fd != NULL)
5356 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005357 // For a NUL byte (end of word) write the flags etc.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005358 if (prefixtree)
5359 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005360 // In PREFIXTREE write the required affixID and the
5361 // associated condition nr (stored in wn_region). The
5362 // byte value is misused to store the "rare" and "not
5363 // combining" flags
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005364 if (np->wn_flags == (short_u)PFX_FLAGS)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005365 putc(BY_NOFLAGS, fd); // <byte>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005366 else
5367 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005368 putc(BY_FLAGS, fd); // <byte>
5369 putc(np->wn_flags, fd); // <pflags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005370 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005371 putc(np->wn_affixID, fd); // <affixID>
5372 put_bytes(fd, (long_u)np->wn_region, 2); // <prefcondnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005373 }
5374 else
5375 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005376 // For word trees we write the flag/region items.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005377 flags = np->wn_flags;
5378 if (regionmask != 0 && np->wn_region != regionmask)
5379 flags |= WF_REGION;
5380 if (np->wn_affixID != 0)
5381 flags |= WF_AFX;
5382 if (flags == 0)
5383 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005384 // word without flags or region
5385 putc(BY_NOFLAGS, fd); // <byte>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005386 }
5387 else
5388 {
5389 if (np->wn_flags >= 0x100)
5390 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005391 putc(BY_FLAGS2, fd); // <byte>
5392 putc(flags, fd); // <flags>
5393 putc((unsigned)flags >> 8, fd); // <flags2>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005394 }
5395 else
5396 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005397 putc(BY_FLAGS, fd); // <byte>
5398 putc(flags, fd); // <flags>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005399 }
5400 if (flags & WF_REGION)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005401 putc(np->wn_region, fd); // <region>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005402 if (flags & WF_AFX)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005403 putc(np->wn_affixID, fd); // <affixID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005404 }
5405 }
5406 }
5407 }
5408 else
5409 {
5410 if (np->wn_child->wn_u1.index != 0
5411 && np->wn_child->wn_u2.wnode != node)
5412 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005413 // The child is written elsewhere, write the reference.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005414 if (fd != NULL)
5415 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005416 putc(BY_INDEX, fd); // <byte>
5417 // <nodeidx>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005418 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
5419 }
5420 }
5421 else if (np->wn_child->wn_u2.wnode == NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005422 // We will write the child below and give it an index.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005423 np->wn_child->wn_u2.wnode = node;
5424
5425 if (fd != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005426 if (putc(np->wn_byte, fd) == EOF) // <byte> or <xbyte>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005427 {
Bram Moolenaar40bcec12021-12-05 22:19:27 +00005428 emsg(_(e_error_while_writing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005429 return 0;
5430 }
5431 }
5432 }
5433
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005434 // Space used in the array when reading: one for each sibling and one for
5435 // the count.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005436 newindex += siblingcount + 1;
5437
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005438 // Recursively dump the children of each sibling.
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005439 FOR_ALL_NODE_SIBLINGS(node, np)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005440 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
5441 newindex = put_node(fd, np->wn_child, newindex, regionmask,
5442 prefixtree);
5443
5444 return newindex;
5445}
5446
5447
5448/*
5449 * ":mkspell [-ascii] outfile infile ..."
5450 * ":mkspell [-ascii] addfile"
5451 */
5452 void
5453ex_mkspell(exarg_T *eap)
5454{
5455 int fcount;
5456 char_u **fnames;
5457 char_u *arg = eap->arg;
5458 int ascii = FALSE;
5459
5460 if (STRNCMP(arg, "-ascii", 6) == 0)
5461 {
5462 ascii = TRUE;
5463 arg = skipwhite(arg + 6);
5464 }
5465
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005466 // Expand all the remaining arguments (e.g., $VIMRUNTIME).
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00005467 if (get_arglist_exp(arg, &fcount, &fnames, FALSE) != OK)
5468 return;
5469
5470 mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
5471 FreeWild(fcount, fnames);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005472}
5473
5474/*
5475 * Create the .sug file.
5476 * Uses the soundfold info in "spin".
5477 * Writes the file with the name "wfname", with ".spl" changed to ".sug".
5478 */
5479 static void
5480spell_make_sugfile(spellinfo_T *spin, char_u *wfname)
5481{
5482 char_u *fname = NULL;
5483 int len;
5484 slang_T *slang;
5485 int free_slang = FALSE;
5486
5487 /*
5488 * Read back the .spl file that was written. This fills the required
5489 * info for soundfolding. This also uses less memory than the
5490 * pointer-linked version of the trie. And it avoids having two versions
5491 * of the code for the soundfolding stuff.
5492 * It might have been done already by spell_reload_one().
5493 */
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005494 FOR_ALL_SPELL_LANGS(slang)
Bram Moolenaar99499b12019-05-23 21:35:48 +02005495 if (fullpathcmp(wfname, slang->sl_fname, FALSE, TRUE) == FPC_SAME)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005496 break;
5497 if (slang == NULL)
5498 {
5499 spell_message(spin, (char_u *)_("Reading back spell file..."));
5500 slang = spell_load_file(wfname, NULL, NULL, FALSE);
5501 if (slang == NULL)
5502 return;
5503 free_slang = TRUE;
5504 }
5505
5506 /*
5507 * Clear the info in "spin" that is used.
5508 */
5509 spin->si_blocks = NULL;
5510 spin->si_blocks_cnt = 0;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005511 spin->si_compress_cnt = 0; // will stay at 0 all the time
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005512 spin->si_free_count = 0;
5513 spin->si_first_free = NULL;
5514 spin->si_foldwcount = 0;
5515
5516 /*
5517 * Go through the trie of good words, soundfold each word and add it to
5518 * the soundfold trie.
5519 */
5520 spell_message(spin, (char_u *)_("Performing soundfolding..."));
5521 if (sug_filltree(spin, slang) == FAIL)
5522 goto theend;
5523
5524 /*
5525 * Create the table which links each soundfold word with a list of the
5526 * good words it may come from. Creates buffer "spin->si_spellbuf".
5527 * This also removes the wordnr from the NUL byte entries to make
5528 * compression possible.
5529 */
5530 if (sug_maketable(spin) == FAIL)
5531 goto theend;
5532
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005533 smsg(_("Number of words after soundfolding: %ld"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005534 (long)spin->si_spellbuf->b_ml.ml_line_count);
5535
5536 /*
5537 * Compress the soundfold trie.
5538 */
5539 spell_message(spin, (char_u *)_(msg_compressing));
Bram Moolenaar408c23b2020-06-03 22:15:45 +02005540 wordtree_compress(spin, spin->si_foldroot, "case-folded");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005541
5542 /*
5543 * Write the .sug file.
5544 * Make the file name by changing ".spl" to ".sug".
5545 */
5546 fname = alloc(MAXPATHL);
5547 if (fname == NULL)
5548 goto theend;
5549 vim_strncpy(fname, wfname, MAXPATHL - 1);
5550 len = (int)STRLEN(fname);
5551 fname[len - 2] = 'u';
5552 fname[len - 1] = 'g';
5553 sug_write(spin, fname);
5554
5555theend:
5556 vim_free(fname);
5557 if (free_slang)
5558 slang_free(slang);
5559 free_blocks(spin->si_blocks);
5560 close_spellbuf(spin->si_spellbuf);
5561}
5562
5563/*
5564 * Build the soundfold trie for language "slang".
5565 */
5566 static int
5567sug_filltree(spellinfo_T *spin, slang_T *slang)
5568{
5569 char_u *byts;
5570 idx_T *idxs;
5571 int depth;
5572 idx_T arridx[MAXWLEN];
5573 int curi[MAXWLEN];
5574 char_u tword[MAXWLEN];
5575 char_u tsalword[MAXWLEN];
5576 int c;
5577 idx_T n;
5578 unsigned words_done = 0;
5579 int wordcount[MAXWLEN];
5580
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005581 // We use si_foldroot for the soundfolded trie.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005582 spin->si_foldroot = wordtree_alloc(spin);
5583 if (spin->si_foldroot == NULL)
5584 return FAIL;
5585
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005586 // let tree_add_word() know we're adding to the soundfolded tree
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005587 spin->si_sugtree = TRUE;
5588
5589 /*
5590 * Go through the whole case-folded tree, soundfold each word and put it
Bram Moolenaar6669de12022-08-21 20:33:47 +01005591 * in the trie. Bail out if the tree is empty.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005592 */
5593 byts = slang->sl_fbyts;
5594 idxs = slang->sl_fidxs;
Bram Moolenaar6669de12022-08-21 20:33:47 +01005595 if (byts == NULL || idxs == NULL)
5596 return FAIL;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005597
5598 arridx[0] = 0;
5599 curi[0] = 1;
5600 wordcount[0] = 0;
5601
5602 depth = 0;
5603 while (depth >= 0 && !got_int)
5604 {
5605 if (curi[depth] > byts[arridx[depth]])
5606 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005607 // Done all bytes at this node, go up one level.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005608 idxs[arridx[depth]] = wordcount[depth];
5609 if (depth > 0)
5610 wordcount[depth - 1] += wordcount[depth];
5611
5612 --depth;
5613 line_breakcheck();
5614 }
5615 else
5616 {
5617
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005618 // Do one more byte at this node.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005619 n = arridx[depth] + curi[depth];
5620 ++curi[depth];
5621
5622 c = byts[n];
5623 if (c == 0)
5624 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005625 // Sound-fold the word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005626 tword[depth] = NUL;
5627 spell_soundfold(slang, tword, TRUE, tsalword);
5628
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005629 // We use the "flags" field for the MSB of the wordnr,
5630 // "region" for the LSB of the wordnr.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005631 if (tree_add_word(spin, tsalword, spin->si_foldroot,
5632 words_done >> 16, words_done & 0xffff,
5633 0) == FAIL)
5634 return FAIL;
5635
5636 ++words_done;
5637 ++wordcount[depth];
5638
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005639 // Reset the block count each time to avoid compression
5640 // kicking in.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005641 spin->si_blocks_cnt = 0;
5642
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005643 // Skip over any other NUL bytes (same word with different
Bram Moolenaar07399e72020-08-24 20:05:50 +02005644 // flags). But don't go over the end.
5645 while (n + 1 < slang->sl_fbyts_len && byts[n + 1] == 0)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005646 {
5647 ++n;
5648 ++curi[depth];
5649 }
5650 }
5651 else
5652 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005653 // Normal char, go one level deeper.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005654 tword[depth++] = c;
5655 arridx[depth] = idxs[n];
5656 curi[depth] = 1;
5657 wordcount[depth] = 0;
5658 }
5659 }
5660 }
5661
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01005662 smsg(_("Total number of words: %d"), words_done);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005663
5664 return OK;
5665}
5666
5667/*
5668 * Make the table that links each word in the soundfold trie to the words it
5669 * can be produced from.
5670 * This is not unlike lines in a file, thus use a memfile to be able to access
5671 * the table efficiently.
5672 * Returns FAIL when out of memory.
5673 */
5674 static int
5675sug_maketable(spellinfo_T *spin)
5676{
5677 garray_T ga;
5678 int res = OK;
5679
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005680 // Allocate a buffer, open a memline for it and create the swap file
5681 // (uses a temp file, not a .swp file).
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005682 spin->si_spellbuf = open_spellbuf();
5683 if (spin->si_spellbuf == NULL)
5684 return FAIL;
5685
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005686 // Use a buffer to store the line info, avoids allocating many small
5687 // pieces of memory.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005688 ga_init2(&ga, 1, 100);
5689
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005690 // recursively go through the tree
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005691 if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1)
5692 res = FAIL;
5693
5694 ga_clear(&ga);
5695 return res;
5696}
5697
5698/*
5699 * Fill the table for one node and its children.
5700 * Returns the wordnr at the start of the node.
5701 * Returns -1 when out of memory.
5702 */
5703 static int
5704sug_filltable(
5705 spellinfo_T *spin,
5706 wordnode_T *node,
5707 int startwordnr,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005708 garray_T *gap) // place to store line of numbers
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005709{
5710 wordnode_T *p, *np;
5711 int wordnr = startwordnr;
5712 int nr;
5713 int prev_nr;
5714
Bram Moolenaaraeea7212020-04-02 18:50:46 +02005715 FOR_ALL_NODE_SIBLINGS(node, p)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005716 {
5717 if (p->wn_byte == NUL)
5718 {
5719 gap->ga_len = 0;
5720 prev_nr = 0;
5721 for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling)
5722 {
5723 if (ga_grow(gap, 10) == FAIL)
5724 return -1;
5725
5726 nr = (np->wn_flags << 16) + (np->wn_region & 0xffff);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005727 // Compute the offset from the previous nr and store the
5728 // offset in a way that it takes a minimum number of bytes.
5729 // It's a bit like utf-8, but without the need to mark
5730 // following bytes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005731 nr -= prev_nr;
5732 prev_nr += nr;
5733 gap->ga_len += offset2bytes(nr,
5734 (char_u *)gap->ga_data + gap->ga_len);
5735 }
5736
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005737 // add the NUL byte
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005738 ((char_u *)gap->ga_data)[gap->ga_len++] = NUL;
5739
5740 if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr,
5741 gap->ga_data, gap->ga_len, TRUE) == FAIL)
5742 return -1;
5743 ++wordnr;
5744
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005745 // Remove extra NUL entries, we no longer need them. We don't
Dominique Pelleaf4a61a2021-12-27 17:21:41 +00005746 // bother freeing the nodes, they won't be reused anyway.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005747 while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL)
5748 p->wn_sibling = p->wn_sibling->wn_sibling;
5749
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005750 // Clear the flags on the remaining NUL node, so that compression
5751 // works a lot better.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005752 p->wn_flags = 0;
5753 p->wn_region = 0;
5754 }
5755 else
5756 {
5757 wordnr = sug_filltable(spin, p->wn_child, wordnr, gap);
5758 if (wordnr == -1)
5759 return -1;
5760 }
5761 }
5762 return wordnr;
5763}
5764
5765/*
5766 * Convert an offset into a minimal number of bytes.
5767 * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL
5768 * bytes.
5769 */
5770 static int
5771offset2bytes(int nr, char_u *buf)
5772{
5773 int rem;
5774 int b1, b2, b3, b4;
5775
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005776 // Split the number in parts of base 255. We need to avoid NUL bytes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005777 b1 = nr % 255 + 1;
5778 rem = nr / 255;
5779 b2 = rem % 255 + 1;
5780 rem = rem / 255;
5781 b3 = rem % 255 + 1;
5782 b4 = rem / 255 + 1;
5783
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005784 if (b4 > 1 || b3 > 0x1f) // 4 bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005785 {
5786 buf[0] = 0xe0 + b4;
5787 buf[1] = b3;
5788 buf[2] = b2;
5789 buf[3] = b1;
5790 return 4;
5791 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005792 if (b3 > 1 || b2 > 0x3f ) // 3 bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005793 {
5794 buf[0] = 0xc0 + b3;
5795 buf[1] = b2;
5796 buf[2] = b1;
5797 return 3;
5798 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005799 if (b2 > 1 || b1 > 0x7f ) // 2 bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005800 {
5801 buf[0] = 0x80 + b2;
5802 buf[1] = b1;
5803 return 2;
5804 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005805 // 1 byte
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005806 buf[0] = b1;
5807 return 1;
5808}
5809
5810/*
5811 * Write the .sug file in "fname".
5812 */
5813 static void
5814sug_write(spellinfo_T *spin, char_u *fname)
5815{
5816 FILE *fd;
5817 wordnode_T *tree;
5818 int nodecount;
5819 int wcount;
5820 char_u *line;
5821 linenr_T lnum;
5822 int len;
5823
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005824 // Create the file. Note that an existing file is silently overwritten!
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005825 fd = mch_fopen((char *)fname, "w");
5826 if (fd == NULL)
5827 {
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00005828 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005829 return;
5830 }
5831
5832 vim_snprintf((char *)IObuff, IOSIZE,
Bram Moolenaarc1669272018-06-19 14:23:53 +02005833 _("Writing suggestion file %s..."), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005834 spell_message(spin, IObuff);
5835
5836 /*
5837 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
5838 */
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005839 if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) // <fileID>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005840 {
Bram Moolenaar40bcec12021-12-05 22:19:27 +00005841 emsg(_(e_error_while_writing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005842 goto theend;
5843 }
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005844 putc(VIMSUGVERSION, fd); // <versionnr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005845
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005846 // Write si_sugtime to the file.
5847 put_time(fd, spin->si_sugtime); // <timestamp>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005848
5849 /*
5850 * <SUGWORDTREE>
5851 */
5852 spin->si_memtot = 0;
5853 tree = spin->si_foldroot->wn_sibling;
5854
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005855 // Clear the index and wnode fields in the tree.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005856 clear_node(tree);
5857
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005858 // Count the number of nodes. Needed to be able to allocate the
5859 // memory when reading the nodes. Also fills in index for shared
5860 // nodes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005861 nodecount = put_node(NULL, tree, 0, 0, FALSE);
5862
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005863 // number of nodes in 4 bytes
5864 put_bytes(fd, (long_u)nodecount, 4); // <nodecount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005865 spin->si_memtot += nodecount + nodecount * sizeof(int);
5866
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005867 // Write the nodes.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005868 (void)put_node(fd, tree, 0, 0, FALSE);
5869
5870 /*
5871 * <SUGTABLE>: <sugwcount> <sugline> ...
5872 */
5873 wcount = spin->si_spellbuf->b_ml.ml_line_count;
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005874 put_bytes(fd, (long_u)wcount, 4); // <sugwcount>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005875
5876 for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum)
5877 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005878 // <sugline>: <sugnr> ... NUL
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005879 line = ml_get_buf(spin->si_spellbuf, lnum, FALSE);
zeertzjq94b7c322024-03-12 21:50:32 +01005880 len = ml_get_buf_len(spin->si_spellbuf, lnum) + 1;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005881 if (fwrite(line, (size_t)len, (size_t)1, fd) == 0)
5882 {
Bram Moolenaar40bcec12021-12-05 22:19:27 +00005883 emsg(_(e_error_while_writing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005884 goto theend;
5885 }
5886 spin->si_memtot += len;
5887 }
5888
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005889 // Write another byte to check for errors.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005890 if (putc(0, fd) == EOF)
Bram Moolenaar40bcec12021-12-05 22:19:27 +00005891 emsg(_(e_error_while_writing));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005892
5893 vim_snprintf((char *)IObuff, IOSIZE,
5894 _("Estimated runtime memory use: %d bytes"), spin->si_memtot);
5895 spell_message(spin, IObuff);
5896
5897theend:
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005898 // close the file
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005899 fclose(fd);
5900}
5901
5902
5903/*
5904 * Create a Vim spell file from one or more word lists.
5905 * "fnames[0]" is the output file name.
5906 * "fnames[fcount - 1]" is the last input file name.
5907 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name
5908 * and ".spl" is appended to make the output file name.
5909 */
5910 void
5911mkspell(
5912 int fcount,
5913 char_u **fnames,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005914 int ascii, // -ascii argument given
5915 int over_write, // overwrite existing output file
5916 int added_word) // invoked through "zg"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005917{
5918 char_u *fname = NULL;
5919 char_u *wfname;
5920 char_u **innames;
5921 int incount;
Bram Moolenaar2993ac52018-02-10 14:12:43 +01005922 afffile_T *(afile[MAXREGIONS]);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005923 int i;
5924 int len;
5925 stat_T st;
5926 int error = FALSE;
5927 spellinfo_T spin;
5928
Bram Moolenaara80faa82020-04-12 19:37:17 +02005929 CLEAR_FIELD(spin);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005930 spin.si_verbose = !added_word;
5931 spin.si_ascii = ascii;
5932 spin.si_followup = TRUE;
5933 spin.si_rem_accents = TRUE;
Bram Moolenaar04935fb2022-01-08 16:19:22 +00005934 ga_init2(&spin.si_rep, sizeof(fromto_T), 20);
5935 ga_init2(&spin.si_repsal, sizeof(fromto_T), 20);
5936 ga_init2(&spin.si_sal, sizeof(fromto_T), 20);
5937 ga_init2(&spin.si_map, sizeof(char_u), 100);
5938 ga_init2(&spin.si_comppat, sizeof(char_u *), 20);
5939 ga_init2(&spin.si_prefcond, sizeof(char_u *), 50);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005940 hash_init(&spin.si_commonwords);
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005941 spin.si_newcompID = 127; // start compound ID at first maximum
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005942
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005943 // default: fnames[0] is output file, following are input files
Bram Moolenaar927b7dd2020-06-29 22:24:56 +02005944 // When "fcount" is 1 there is only one file.
5945 innames = &fnames[fcount == 1 ? 0 : 1];
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005946 incount = fcount - 1;
5947
5948 wfname = alloc(MAXPATHL);
5949 if (wfname == NULL)
5950 return;
5951
5952 if (fcount >= 1)
5953 {
5954 len = (int)STRLEN(fnames[0]);
5955 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
5956 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005957 // For ":mkspell path/en.latin1.add" output file is
5958 // "path/en.latin1.add.spl".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005959 incount = 1;
5960 vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]);
5961 }
5962 else if (fcount == 1)
5963 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005964 // For ":mkspell path/vim" output file is "path/vim.latin1.spl".
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005965 incount = 1;
5966 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
5967 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
5968 }
5969 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
5970 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005971 // Name ends in ".spl", use as the file name.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005972 vim_strncpy(wfname, fnames[0], MAXPATHL - 1);
5973 }
5974 else
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005975 // Name should be language, make the file name from it.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005976 vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
5977 fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
5978
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005979 // Check for .ascii.spl.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005980 if (strstr((char *)gettail(wfname), SPL_FNAME_ASCII) != NULL)
5981 spin.si_ascii = TRUE;
5982
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005983 // Check for .add.spl.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005984 if (strstr((char *)gettail(wfname), SPL_FNAME_ADD) != NULL)
5985 spin.si_add = TRUE;
5986 }
5987
5988 if (incount <= 0)
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00005989 emsg(_(e_invalid_argument)); // need at least output and input names
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005990 else if (vim_strchr(gettail(wfname), '_') != NULL)
Bram Moolenaar677658a2022-01-05 16:09:06 +00005991 emsg(_(e_output_file_name_must_not_have_region_name));
Bram Moolenaar2993ac52018-02-10 14:12:43 +01005992 else if (incount > MAXREGIONS)
Bram Moolenaar677658a2022-01-05 16:09:06 +00005993 semsg(_(e_only_up_to_nr_regions_supported), MAXREGIONS);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005994 else
5995 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01005996 // Check for overwriting before doing things that may take a lot of
5997 // time.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02005998 if (!over_write && mch_stat((char *)wfname, &st) >= 0)
5999 {
Bram Moolenaar108010a2021-06-27 22:03:33 +02006000 emsg(_(e_file_exists));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006001 goto theend;
6002 }
6003 if (mch_isdir(wfname))
6004 {
Bram Moolenaar4dea2d92022-03-31 11:37:57 +01006005 semsg(_(e_str_is_directory), wfname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006006 goto theend;
6007 }
6008
6009 fname = alloc(MAXPATHL);
6010 if (fname == NULL)
6011 goto theend;
6012
6013 /*
6014 * Init the aff and dic pointers.
6015 * Get the region names if there are more than 2 arguments.
6016 */
6017 for (i = 0; i < incount; ++i)
6018 {
6019 afile[i] = NULL;
6020
6021 if (incount > 1)
6022 {
6023 len = (int)STRLEN(innames[i]);
6024 if (STRLEN(gettail(innames[i])) < 5
6025 || innames[i][len - 3] != '_')
6026 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006027 semsg(_(e_invalid_region_in_str), innames[i]);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006028 goto theend;
6029 }
6030 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
6031 spin.si_region_name[i * 2 + 1] =
6032 TOLOWER_ASC(innames[i][len - 1]);
6033 }
6034 }
6035 spin.si_region_count = incount;
6036
6037 spin.si_foldroot = wordtree_alloc(&spin);
6038 spin.si_keeproot = wordtree_alloc(&spin);
6039 spin.si_prefroot = wordtree_alloc(&spin);
6040 if (spin.si_foldroot == NULL
6041 || spin.si_keeproot == NULL
6042 || spin.si_prefroot == NULL)
6043 {
6044 free_blocks(spin.si_blocks);
6045 goto theend;
6046 }
6047
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006048 // When not producing a .add.spl file clear the character table when
6049 // we encounter one in the .aff file. This means we dump the current
6050 // one in the .spl file if the .aff file doesn't define one. That's
6051 // better than guessing the contents, the table will match a
6052 // previously loaded spell file.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006053 if (!spin.si_add)
6054 spin.si_clear_chartab = TRUE;
6055
6056 /*
6057 * Read all the .aff and .dic files.
6058 * Text is converted to 'encoding'.
6059 * Words are stored in the case-folded and keep-case trees.
6060 */
6061 for (i = 0; i < incount && !error; ++i)
6062 {
6063 spin.si_conv.vc_type = CONV_NONE;
6064 spin.si_region = 1 << i;
6065
6066 vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]);
6067 if (mch_stat((char *)fname, &st) >= 0)
6068 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006069 // Read the .aff file. Will init "spin->si_conv" based on the
6070 // "SET" line.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006071 afile[i] = spell_read_aff(&spin, fname);
6072 if (afile[i] == NULL)
6073 error = TRUE;
6074 else
6075 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006076 // Read the .dic file and store the words in the trees.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006077 vim_snprintf((char *)fname, MAXPATHL, "%s.dic",
6078 innames[i]);
6079 if (spell_read_dic(&spin, fname, afile[i]) == FAIL)
6080 error = TRUE;
6081 }
6082 }
6083 else
6084 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006085 // No .aff file, try reading the file as a word list. Store
6086 // the words in the trees.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006087 if (spell_read_wordfile(&spin, innames[i]) == FAIL)
6088 error = TRUE;
6089 }
6090
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006091 // Free any conversion stuff.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006092 convert_setup(&spin.si_conv, NULL, NULL);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006093 }
6094
6095 if (spin.si_compflags != NULL && spin.si_nobreak)
Bram Moolenaar32526b32019-01-19 17:43:09 +01006096 msg(_("Warning: both compounding and NOBREAK specified"));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006097
6098 if (!error && !got_int)
6099 {
6100 /*
6101 * Combine tails in the tree.
6102 */
6103 spell_message(&spin, (char_u *)_(msg_compressing));
Bram Moolenaar408c23b2020-06-03 22:15:45 +02006104 wordtree_compress(&spin, spin.si_foldroot, "case-folded");
6105 wordtree_compress(&spin, spin.si_keeproot, "keep-case");
6106 wordtree_compress(&spin, spin.si_prefroot, "prefixes");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006107 }
6108
6109 if (!error && !got_int)
6110 {
6111 /*
6112 * Write the info in the spell file.
6113 */
6114 vim_snprintf((char *)IObuff, IOSIZE,
Bram Moolenaarc1669272018-06-19 14:23:53 +02006115 _("Writing spell file %s..."), wfname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006116 spell_message(&spin, IObuff);
6117
6118 error = write_vim_spell(&spin, wfname) == FAIL;
6119
6120 spell_message(&spin, (char_u *)_("Done!"));
6121 vim_snprintf((char *)IObuff, IOSIZE,
6122 _("Estimated runtime memory use: %d bytes"), spin.si_memtot);
6123 spell_message(&spin, IObuff);
6124
6125 /*
6126 * If the file is loaded need to reload it.
6127 */
6128 if (!error)
6129 spell_reload_one(wfname, added_word);
6130 }
6131
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006132 // Free the allocated memory.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006133 ga_clear(&spin.si_rep);
6134 ga_clear(&spin.si_repsal);
6135 ga_clear(&spin.si_sal);
6136 ga_clear(&spin.si_map);
6137 ga_clear(&spin.si_comppat);
6138 ga_clear(&spin.si_prefcond);
6139 hash_clear_all(&spin.si_commonwords, 0);
6140
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006141 // Free the .aff file structures.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006142 for (i = 0; i < incount; ++i)
6143 if (afile[i] != NULL)
6144 spell_free_aff(afile[i]);
6145
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006146 // Free all the bits and pieces at once.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006147 free_blocks(spin.si_blocks);
6148
6149 /*
6150 * If there is soundfolding info and no NOSUGFILE item create the
6151 * .sug file with the soundfolded word trie.
6152 */
6153 if (spin.si_sugtime != 0 && !error && !got_int)
6154 spell_make_sugfile(&spin, wfname);
6155
6156 }
6157
6158theend:
6159 vim_free(fname);
6160 vim_free(wfname);
6161}
6162
6163/*
6164 * Display a message for spell file processing when 'verbose' is set or using
6165 * ":mkspell". "str" can be IObuff.
6166 */
6167 static void
6168spell_message(spellinfo_T *spin, char_u *str)
6169{
6170 if (spin->si_verbose || p_verbose > 2)
6171 {
6172 if (!spin->si_verbose)
6173 verbose_enter();
Bram Moolenaar32526b32019-01-19 17:43:09 +01006174 msg((char *)str);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006175 out_flush();
6176 if (!spin->si_verbose)
6177 verbose_leave();
6178 }
6179}
6180
6181/*
6182 * ":[count]spellgood {word}"
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006183 * ":[count]spellwrong {word}"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006184 * ":[count]spellundo {word}"
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006185 * ":[count]spellrare {word}"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006186 */
6187 void
6188ex_spell(exarg_T *eap)
6189{
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006190 spell_add_word(eap->arg, (int)STRLEN(eap->arg),
6191 eap->cmdidx == CMD_spellwrong ? SPELL_ADD_BAD :
6192 eap->cmdidx == CMD_spellrare ? SPELL_ADD_RARE : SPELL_ADD_GOOD,
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006193 eap->forceit ? 0 : (int)eap->line2,
6194 eap->cmdidx == CMD_spellundo);
6195}
6196
6197/*
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006198 * Add "word[len]" to 'spellfile' as a good, rare or bad word.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006199 */
6200 void
6201spell_add_word(
6202 char_u *word,
6203 int len,
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006204 int what, // SPELL_ADD_ values
6205 int idx, // "zG" and "zW": zero, otherwise index in
6206 // 'spellfile'
6207 int undo) // TRUE for "zug", "zuG", "zuw" and "zuW"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006208{
6209 FILE *fd = NULL;
6210 buf_T *buf = NULL;
6211 int new_spf = FALSE;
6212 char_u *fname;
6213 char_u *fnamebuf = NULL;
6214 char_u line[MAXWLEN * 2];
6215 long fpos, fpos_next = 0;
6216 int i;
6217 char_u *spf;
6218
K.Takata2ebcc352022-07-14 17:25:14 +01006219 if (!valid_spell_word(word, word + len))
Bram Moolenaar7c824682022-05-08 22:32:58 +01006220 {
6221 emsg(_(e_illegal_character_in_word));
6222 return;
6223 }
6224
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006225 if (idx == 0) // use internal wordlist
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006226 {
6227 if (int_wordlist == NULL)
6228 {
6229 int_wordlist = vim_tempname('s', FALSE);
6230 if (int_wordlist == NULL)
6231 return;
6232 }
6233 fname = int_wordlist;
6234 }
6235 else
6236 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006237 // If 'spellfile' isn't set figure out a good default value.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006238 if (*curwin->w_s->b_p_spf == NUL)
6239 {
6240 init_spellfile();
6241 new_spf = TRUE;
6242 }
6243
6244 if (*curwin->w_s->b_p_spf == NUL)
6245 {
Bram Moolenaar74409f62022-01-01 15:58:22 +00006246 semsg(_(e_option_str_is_not_set), "spellfile");
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006247 return;
6248 }
6249 fnamebuf = alloc(MAXPATHL);
6250 if (fnamebuf == NULL)
6251 return;
6252
6253 for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i)
6254 {
6255 copy_option_part(&spf, fnamebuf, MAXPATHL, ",");
6256 if (i == idx)
6257 break;
6258 if (*spf == NUL)
6259 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006260 semsg(_(e_spellfile_does_not_have_nr_entries), idx);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006261 vim_free(fnamebuf);
6262 return;
6263 }
6264 }
6265
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006266 // Check that the user isn't editing the .add file somewhere.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006267 buf = buflist_findname_exp(fnamebuf);
6268 if (buf != NULL && buf->b_ml.ml_mfp == NULL)
6269 buf = NULL;
6270 if (buf != NULL && bufIsChanged(buf))
6271 {
Bram Moolenaareb822a22021-12-31 15:09:27 +00006272 emsg(_(e_file_is_loaded_in_another_buffer));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006273 vim_free(fnamebuf);
6274 return;
6275 }
6276
6277 fname = fnamebuf;
6278 }
6279
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006280 if (what == SPELL_ADD_BAD || undo)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006281 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006282 // When the word appears as good word we need to remove that one,
6283 // since its flags sort before the one with WF_BANNED.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006284 fd = mch_fopen((char *)fname, "r");
6285 if (fd != NULL)
6286 {
6287 while (!vim_fgets(line, MAXWLEN * 2, fd))
6288 {
6289 fpos = fpos_next;
6290 fpos_next = ftell(fd);
Bram Moolenaar416b5f42022-02-25 21:47:48 +00006291 if (fpos_next < 0)
6292 break; // should never happen
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006293 if (STRNCMP(word, line, len) == 0
6294 && (line[len] == '/' || line[len] < ' '))
6295 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006296 // Found duplicate word. Remove it by writing a '#' at
6297 // the start of the line. Mixing reading and writing
6298 // doesn't work for all systems, close the file first.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006299 fclose(fd);
6300 fd = mch_fopen((char *)fname, "r+");
6301 if (fd == NULL)
6302 break;
6303 if (fseek(fd, fpos, SEEK_SET) == 0)
6304 {
6305 fputc('#', fd);
6306 if (undo)
6307 {
6308 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01006309 smsg(_("Word '%.*s' removed from %s"),
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006310 len, word, NameBuff);
6311 }
6312 }
Bram Moolenaar2c363a22021-02-03 20:14:23 +01006313 if (fseek(fd, fpos_next, SEEK_SET) != 0)
6314 {
6315 PERROR(_("Seek error in spellfile"));
6316 break;
6317 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006318 }
6319 }
6320 if (fd != NULL)
6321 fclose(fd);
6322 }
6323 }
6324
6325 if (!undo)
6326 {
6327 fd = mch_fopen((char *)fname, "a");
6328 if (fd == NULL && new_spf)
6329 {
6330 char_u *p;
6331
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006332 // We just initialized the 'spellfile' option and can't open the
6333 // file. We may need to create the "spell" directory first. We
6334 // already checked the runtime directory is writable in
6335 // init_spellfile().
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006336 if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname)
6337 {
6338 int c = *p;
6339
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006340 // The directory doesn't exist. Try creating it and opening
6341 // the file again.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006342 *p = NUL;
6343 vim_mkdir(fname, 0755);
6344 *p = c;
6345 fd = mch_fopen((char *)fname, "a");
6346 }
6347 }
6348
6349 if (fd == NULL)
Bram Moolenaar460ae5d2022-01-01 14:19:49 +00006350 semsg(_(e_cant_open_file_str), fname);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006351 else
6352 {
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006353 if (what == SPELL_ADD_BAD)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006354 fprintf(fd, "%.*s/!\n", len, word);
Bram Moolenaar08cc3742019-08-11 22:51:14 +02006355 else if (what == SPELL_ADD_RARE)
6356 fprintf(fd, "%.*s/?\n", len, word);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006357 else
6358 fprintf(fd, "%.*s\n", len, word);
6359 fclose(fd);
6360
6361 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
Bram Moolenaarf9e3e092019-01-13 23:38:42 +01006362 smsg(_("Word '%.*s' added to %s"), len, word, NameBuff);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006363 }
6364 }
6365
6366 if (fd != NULL)
6367 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006368 // Update the .add.spl file.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006369 mkspell(1, &fname, FALSE, TRUE, TRUE);
6370
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006371 // If the .add file is edited somewhere, reload it.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006372 if (buf != NULL)
Rob Pilling8196e942022-02-11 15:12:10 +00006373 buf_reload(buf, buf->b_orig_mode, FALSE);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006374
Bram Moolenaara4d158b2022-08-14 14:17:45 +01006375 redraw_all_later(UPD_SOME_VALID);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006376 }
6377 vim_free(fnamebuf);
6378}
6379
6380/*
6381 * Initialize 'spellfile' for the current buffer.
6382 */
6383 static void
6384init_spellfile(void)
6385{
6386 char_u *buf;
6387 int l;
6388 char_u *fname;
6389 char_u *rtp;
6390 char_u *lend;
6391 int aspath = FALSE;
6392 char_u *lstart = curbuf->b_s.b_p_spl;
6393
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00006394 if (*curwin->w_s->b_p_spl == NUL || curwin->w_s->b_langp.ga_len <= 0)
6395 return;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006396
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00006397 buf = alloc(MAXPATHL);
6398 if (buf == NULL)
6399 return;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006400
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00006401 // Find the end of the language name. Exclude the region. If there
6402 // is a path separator remember the start of the tail.
6403 for (lend = curwin->w_s->b_p_spl; *lend != NUL
6404 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend)
6405 if (vim_ispathsep(*lend))
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006406 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00006407 aspath = TRUE;
6408 lstart = lend + 1;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006409 }
6410
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00006411 // Loop over all entries in 'runtimepath'. Use the first one where we
6412 // are allowed to write.
6413 rtp = p_rtp;
6414 while (*rtp != NUL)
6415 {
6416 if (aspath)
6417 // Use directory of an entry with path, e.g., for
6418 // "/dir/lg.utf-8.spl" use "/dir".
6419 vim_strncpy(buf, curbuf->b_s.b_p_spl,
6420 lstart - curbuf->b_s.b_p_spl - 1);
6421 else
6422 // Copy the path from 'runtimepath' to buf[].
6423 copy_option_part(&rtp, buf, MAXPATHL, ",");
6424 if (filewritable(buf) == 2)
6425 {
6426 // Use the first language name from 'spelllang' and the
6427 // encoding used in the first loaded .spl file.
6428 if (aspath)
6429 vim_strncpy(buf, curbuf->b_s.b_p_spl,
6430 lend - curbuf->b_s.b_p_spl);
6431 else
6432 {
6433 // Create the "spell" directory if it doesn't exist yet.
6434 l = (int)STRLEN(buf);
6435 vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell");
6436 if (filewritable(buf) != 2)
Christian Brabandt220474d2024-07-20 13:26:44 +02006437 {
6438 if (vim_mkdir(buf, 0755) != 0)
6439 {
6440 vim_free(buf);
6441 return;
6442 }
6443 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00006444
6445 l = (int)STRLEN(buf);
6446 vim_snprintf((char *)buf + l, MAXPATHL - l,
6447 "/%.*s", (int)(lend - lstart), lstart);
6448 }
6449 l = (int)STRLEN(buf);
6450 fname = LANGP_ENTRY(curwin->w_s->b_langp, 0)
6451 ->lp_slang->sl_fname;
6452 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add",
6453 fname != NULL
6454 && strstr((char *)gettail(fname), ".ascii.") != NULL
6455 ? (char_u *)"ascii" : spell_enc());
6456 set_option_value_give_err((char_u *)"spellfile",
6457 0L, buf, OPT_LOCAL);
6458 break;
6459 }
6460 aspath = FALSE;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006461 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +00006462
6463 vim_free(buf);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006464}
6465
6466
6467
6468/*
6469 * Set the spell character tables from strings in the affix file.
6470 */
6471 static int
6472set_spell_chartab(char_u *fol, char_u *low, char_u *upp)
6473{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006474 // We build the new tables here first, so that we can compare with the
6475 // previous one.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006476 spelltab_T new_st;
6477 char_u *pf = fol, *pl = low, *pu = upp;
6478 int f, l, u;
6479
6480 clear_spell_chartab(&new_st);
6481
6482 while (*pf != NUL)
6483 {
6484 if (*pl == NUL || *pu == NUL)
6485 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006486 emsg(_(e_format_error_in_affix_file_fol_low_or_upp));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006487 return FAIL;
6488 }
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006489 f = mb_ptr2char_adv(&pf);
6490 l = mb_ptr2char_adv(&pl);
6491 u = mb_ptr2char_adv(&pu);
Bram Moolenaar264b74f2019-01-24 17:18:42 +01006492
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006493 // Every character that appears is a word character.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006494 if (f < 256)
6495 new_st.st_isw[f] = TRUE;
6496 if (l < 256)
6497 new_st.st_isw[l] = TRUE;
6498 if (u < 256)
6499 new_st.st_isw[u] = TRUE;
6500
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006501 // if "LOW" and "FOL" are not the same the "LOW" char needs
6502 // case-folding
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006503 if (l < 256 && l != f)
6504 {
6505 if (f >= 256)
6506 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006507 emsg(_(e_character_in_fol_low_or_upp_is_out_of_range));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006508 return FAIL;
6509 }
6510 new_st.st_fold[l] = f;
6511 }
6512
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006513 // if "UPP" and "FOL" are not the same the "UPP" char needs
6514 // case-folding, it's upper case and the "UPP" is the upper case of
6515 // "FOL" .
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006516 if (u < 256 && u != f)
6517 {
6518 if (f >= 256)
6519 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006520 emsg(_(e_character_in_fol_low_or_upp_is_out_of_range));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006521 return FAIL;
6522 }
6523 new_st.st_fold[u] = f;
6524 new_st.st_isu[u] = TRUE;
6525 new_st.st_upper[f] = u;
6526 }
6527 }
6528
6529 if (*pl != NUL || *pu != NUL)
6530 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006531 emsg(_(e_format_error_in_affix_file_fol_low_or_upp));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006532 return FAIL;
6533 }
6534
6535 return set_spell_finish(&new_st);
6536}
6537
6538/*
6539 * Set the spell character tables from strings in the .spl file.
6540 */
6541 static void
6542set_spell_charflags(
6543 char_u *flags,
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006544 int cnt, // length of "flags"
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006545 char_u *fol)
6546{
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006547 // We build the new tables here first, so that we can compare with the
6548 // previous one.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006549 spelltab_T new_st;
6550 int i;
6551 char_u *p = fol;
6552 int c;
6553
6554 clear_spell_chartab(&new_st);
6555
6556 for (i = 0; i < 128; ++i)
6557 {
6558 if (i < cnt)
6559 {
6560 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
6561 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
6562 }
6563
6564 if (*p != NUL)
6565 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006566 c = mb_ptr2char_adv(&p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006567 new_st.st_fold[i + 128] = c;
6568 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
6569 new_st.st_upper[c] = i + 128;
6570 }
6571 }
6572
6573 (void)set_spell_finish(&new_st);
6574}
6575
6576 static int
6577set_spell_finish(spelltab_T *new_st)
6578{
6579 int i;
6580
6581 if (did_set_spelltab)
6582 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006583 // check that it's the same table
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006584 for (i = 0; i < 256; ++i)
6585 {
6586 if (spelltab.st_isw[i] != new_st->st_isw[i]
6587 || spelltab.st_isu[i] != new_st->st_isu[i]
6588 || spelltab.st_fold[i] != new_st->st_fold[i]
6589 || spelltab.st_upper[i] != new_st->st_upper[i])
6590 {
Bram Moolenaar677658a2022-01-05 16:09:06 +00006591 emsg(_(e_word_characters_differ_between_spell_files));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006592 return FAIL;
6593 }
6594 }
6595 }
6596 else
6597 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006598 // copy the new spelltab into the one being used
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006599 spelltab = *new_st;
6600 did_set_spelltab = TRUE;
6601 }
6602
6603 return OK;
6604}
6605
6606/*
6607 * Write the table with prefix conditions to the .spl file.
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +00006608 * When "fd" is NULL only count the length of what is written and return it.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006609 */
6610 static int
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +00006611write_spell_prefcond(FILE *fd, garray_T *gap, size_t *fwv)
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006612{
6613 int i;
6614 char_u *p;
6615 int len;
6616 int totlen;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006617
6618 if (fd != NULL)
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006619 put_bytes(fd, (long_u)gap->ga_len, 2); // <prefcondcnt>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006620
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006621 totlen = 2 + gap->ga_len; // length of <prefcondcnt> and <condlen> bytes
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006622
6623 for (i = 0; i < gap->ga_len; ++i)
6624 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006625 // <prefcond> : <condlen> <condstr>
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006626 p = ((char_u **)gap->ga_data)[i];
6627 if (p != NULL)
6628 {
6629 len = (int)STRLEN(p);
6630 if (fd != NULL)
6631 {
6632 fputc(len, fd);
=?UTF-8?q?Bj=C3=B6rn=20Linse?=1daedc82021-12-10 20:39:17 +00006633 *fwv &= fwrite(p, (size_t)len, (size_t)1, fd);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006634 }
6635 totlen += len;
6636 }
6637 else if (fd != NULL)
6638 fputc(0, fd);
6639 }
6640
6641 return totlen;
6642}
6643
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006644/*
6645 * Use map string "map" for languages "lp".
6646 */
6647 static void
6648set_map_str(slang_T *lp, char_u *map)
6649{
6650 char_u *p;
6651 int headc = 0;
6652 int c;
6653 int i;
6654
6655 if (*map == NUL)
6656 {
6657 lp->sl_has_map = FALSE;
6658 return;
6659 }
6660 lp->sl_has_map = TRUE;
6661
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006662 // Init the array and hash tables empty.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006663 for (i = 0; i < 256; ++i)
6664 lp->sl_map_array[i] = 0;
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006665 hash_init(&lp->sl_map_hash);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006666
6667 /*
6668 * The similar characters are stored separated with slashes:
6669 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and
6670 * before the same slash. For characters above 255 sl_map_hash is used.
6671 */
6672 for (p = map; *p != NUL; )
6673 {
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006674 c = mb_cptr2char_adv(&p);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006675 if (c == '/')
6676 headc = 0;
6677 else
6678 {
6679 if (headc == 0)
6680 headc = c;
6681
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006682 // Characters above 255 don't fit in sl_map_array[], put them in
6683 // the hash table. Each entry is the char, a NUL the headchar and
6684 // a NUL.
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006685 if (c >= 256)
6686 {
6687 int cl = mb_char2len(c);
6688 int headcl = mb_char2len(headc);
6689 char_u *b;
6690 hash_T hash;
6691 hashitem_T *hi;
6692
Bram Moolenaar964b3742019-05-24 18:54:09 +02006693 b = alloc(cl + headcl + 2);
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006694 if (b == NULL)
6695 return;
6696 mb_char2bytes(c, b);
6697 b[cl] = NUL;
6698 mb_char2bytes(headc, b + cl + 1);
6699 b[cl + 1 + headcl] = NUL;
6700 hash = hash_hash(b);
6701 hi = hash_lookup(&lp->sl_map_hash, b, hash);
6702 if (HASHITEM_EMPTY(hi))
6703 hash_add_item(&lp->sl_map_hash, hi, b, hash);
6704 else
6705 {
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006706 // This should have been checked when generating the .spl
6707 // file.
Bram Moolenaar677658a2022-01-05 16:09:06 +00006708 emsg(_(e_duplicate_char_in_map_entry));
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006709 vim_free(b);
6710 }
6711 }
6712 else
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02006713 lp->sl_map_array[c] = headc;
6714 }
6715 }
6716}
6717
Bram Moolenaar0d6f5d92019-12-05 21:33:15 +01006718#endif // FEAT_SPELL