blob: 1c3fd0f651318ae4b51b2378033c419c198522be [file] [log] [blame]
Bram Moolenaare19defe2005-03-21 08:23:33 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * spell.c: code for spell checking
Bram Moolenaarfc735152005-03-22 22:54:12 +000012 *
Bram Moolenaar51485f02005-06-04 21:55:20 +000013 * The spell checking mechanism uses a tree (aka trie). Each node in the tree
14 * has a list of bytes that can appear (siblings). For each byte there is a
15 * pointer to the node with the byte that follows in the word (child).
Bram Moolenaar9f30f502005-06-14 22:01:04 +000016 *
17 * A NUL byte is used where the word may end. The bytes are sorted, so that
18 * binary searching can be used and the NUL bytes are at the start. The
19 * number of possible bytes is stored before the list of bytes.
20 *
21 * The tree uses two arrays: "byts" stores the characters, "idxs" stores
22 * either the next index or flags. The tree starts at index 0. For example,
23 * to lookup "vi" this sequence is followed:
24 * i = 0
25 * len = byts[i]
26 * n = where "v" appears in byts[i + 1] to byts[i + len]
27 * i = idxs[n]
28 * len = byts[i]
29 * n = where "i" appears in byts[i + 1] to byts[i + len]
30 * i = idxs[n]
31 * len = byts[i]
32 * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
Bram Moolenaar51485f02005-06-04 21:55:20 +000033 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +000034 * There are two word trees: one with case-folded words and one with words in
Bram Moolenaar51485f02005-06-04 21:55:20 +000035 * original case. The second one is only used for keep-case words and is
36 * usually small.
37 *
Bram Moolenaarae5bce12005-08-15 21:41:48 +000038 * There is one additional tree for when not all prefixes are applied when
Bram Moolenaar1d73c882005-06-19 22:48:47 +000039 * generating the .spl file. This tree stores all the possible prefixes, as
40 * if they were words. At each word (prefix) end the prefix nr is stored, the
41 * following word must support this prefix nr. And the condition nr is
42 * stored, used to lookup the condition that the word must match with.
43 *
Bram Moolenaar51485f02005-06-04 21:55:20 +000044 * Thanks to Olaf Seibert for providing an example implementation of this tree
45 * and the compression mechanism.
Bram Moolenaar4770d092006-01-12 23:22:24 +000046 * LZ trie ideas:
47 * http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf
48 * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +000049 *
50 * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +000051 *
Bram Moolenaar402d2fe2005-04-15 21:00:38 +000052 * Why doesn't Vim use aspell/ispell/myspell/etc.?
53 * See ":help develop-spell".
54 */
55
Bram Moolenaar329cc7e2005-08-10 07:51:35 +000056/* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word.
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000057 * Only use it for small word lists! */
Bram Moolenaar329cc7e2005-08-10 07:51:35 +000058#if 0
59# define SPELL_PRINTTREE
Bram Moolenaar329cc7e2005-08-10 07:51:35 +000060#endif
61
Bram Moolenaar2d3f4892006-01-20 23:02:51 +000062/* Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk() for a
63 * specific word. */
Bram Moolenaar4770d092006-01-12 23:22:24 +000064#if 0
65# define DEBUG_TRIEWALK
66#endif
67
Bram Moolenaar51485f02005-06-04 21:55:20 +000068/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +000069 * Use this to adjust the score after finding suggestions, based on the
70 * suggested word sounding like the bad word. This is much faster than doing
71 * it for every possible suggestion.
Bram Moolenaar4770d092006-01-12 23:22:24 +000072 * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@"
73 * vs "ht") and goes down in the list.
Bram Moolenaard857f0e2005-06-21 22:37:39 +000074 * Used when 'spellsuggest' is set to "best".
75 */
76#define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4)
77
78/*
Bram Moolenaar4770d092006-01-12 23:22:24 +000079 * Do the opposite: based on a maximum end score and a known sound score,
Bram Moolenaar6949d1d2008-08-25 02:14:05 +000080 * compute the maximum word score that can be used.
Bram Moolenaar4770d092006-01-12 23:22:24 +000081 */
82#define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3)
83
84/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +000085 * Vim spell file format: <HEADER>
Bram Moolenaar5195e452005-08-19 20:32:47 +000086 * <SECTIONS>
Bram Moolenaar1d73c882005-06-19 22:48:47 +000087 * <LWORDTREE>
88 * <KWORDTREE>
89 * <PREFIXTREE>
Bram Moolenaar51485f02005-06-04 21:55:20 +000090 *
Bram Moolenaar5195e452005-08-19 20:32:47 +000091 * <HEADER>: <fileID> <versionnr>
Bram Moolenaar51485f02005-06-04 21:55:20 +000092 *
Bram Moolenaar5195e452005-08-19 20:32:47 +000093 * <fileID> 8 bytes "VIMspell"
94 * <versionnr> 1 byte VIMSPELLVERSION
95 *
96 *
97 * Sections make it possible to add information to the .spl file without
98 * making it incompatible with previous versions. There are two kinds of
99 * sections:
100 * 1. Not essential for correct spell checking. E.g. for making suggestions.
101 * These are skipped when not supported.
102 * 2. Optional information, but essential for spell checking when present.
103 * E.g. conditions for affixes. When this section is present but not
104 * supported an error message is given.
105 *
106 * <SECTIONS>: <section> ... <sectionend>
107 *
108 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
109 *
110 * <sectionID> 1 byte number from 0 to 254 identifying the section
111 *
112 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct
113 * spell checking
114 *
115 * <sectionlen> 4 bytes length of section contents, MSB first
116 *
117 * <sectionend> 1 byte SN_END
118 *
119 *
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000120 * sectionID == SN_INFO: <infotext>
121 * <infotext> N bytes free format text with spell file info (version,
122 * website, etc)
123 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000124 * sectionID == SN_REGION: <regionname> ...
125 * <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000126 * First <regionname> is region 1.
127 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000128 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
129 * <folcharslen> <folchars>
Bram Moolenaar51485f02005-06-04 21:55:20 +0000130 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
131 * <charflags> N bytes List of flags (first one is for character 128):
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000132 * 0x01 word character CF_WORD
133 * 0x02 upper-case character CF_UPPER
Bram Moolenaar5195e452005-08-19 20:32:47 +0000134 * <folcharslen> 2 bytes Number of bytes in <folchars>.
135 * <folchars> N bytes Folded characters, first one is for character 128.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000136 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000137 * sectionID == SN_MIDWORD: <midword>
138 * <midword> N bytes Characters that are word characters only when used
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000139 * in the middle of a word.
140 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000141 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000142 * <prefcondcnt> 2 bytes Number of <prefcond> items following.
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000143 * <prefcond> : <condlen> <condstr>
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000144 * <condlen> 1 byte Length of <condstr>.
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000145 * <condstr> N bytes Condition for the prefix.
146 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000147 * sectionID == SN_REP: <repcount> <rep> ...
148 * <repcount> 2 bytes number of <rep> items, MSB first.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000149 * <rep> : <repfromlen> <repfrom> <reptolen> <repto>
Bram Moolenaar5195e452005-08-19 20:32:47 +0000150 * <repfromlen> 1 byte length of <repfrom>
151 * <repfrom> N bytes "from" part of replacement
152 * <reptolen> 1 byte length of <repto>
153 * <repto> N bytes "to" part of replacement
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000154 *
Bram Moolenaar4770d092006-01-12 23:22:24 +0000155 * sectionID == SN_REPSAL: <repcount> <rep> ...
156 * just like SN_REP but for soundfolded words
157 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000158 * sectionID == SN_SAL: <salflags> <salcount> <sal> ...
159 * <salflags> 1 byte flags for soundsalike conversion:
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000160 * SAL_F0LLOWUP
161 * SAL_COLLAPSE
162 * SAL_REM_ACCENTS
Bram Moolenaar5195e452005-08-19 20:32:47 +0000163 * <salcount> 2 bytes number of <sal> items following
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000164 * <sal> : <salfromlen> <salfrom> <saltolen> <salto>
Bram Moolenaar5195e452005-08-19 20:32:47 +0000165 * <salfromlen> 1 byte length of <salfrom>
166 * <salfrom> N bytes "from" part of soundsalike
167 * <saltolen> 1 byte length of <salto>
168 * <salto> N bytes "to" part of soundsalike
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000169 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000170 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
171 * <sofofromlen> 2 bytes length of <sofofrom>
172 * <sofofrom> N bytes "from" part of soundfold
173 * <sofotolen> 2 bytes length of <sofoto>
174 * <sofoto> N bytes "to" part of soundfold
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000175 *
Bram Moolenaar4770d092006-01-12 23:22:24 +0000176 * sectionID == SN_SUGFILE: <timestamp>
177 * <timestamp> 8 bytes time in seconds that must match with .sug file
178 *
Bram Moolenaare1438bb2006-03-01 22:01:55 +0000179 * sectionID == SN_NOSPLITSUGS: nothing
180 *
Bram Moolenaar4770d092006-01-12 23:22:24 +0000181 * sectionID == SN_WORDS: <word> ...
182 * <word> N bytes NUL terminated common word
183 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000184 * sectionID == SN_MAP: <mapstr>
185 * <mapstr> N bytes String with sequences of similar characters,
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000186 * separated by slashes.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000187 *
Bram Moolenaar899dddf2006-03-26 21:06:50 +0000188 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions>
189 * <comppatcount> <comppattern> ... <compflags>
Bram Moolenaar5195e452005-08-19 20:32:47 +0000190 * <compmax> 1 byte Maximum nr of words in compound word.
191 * <compminlen> 1 byte Minimal word length for compounding.
192 * <compsylmax> 1 byte Maximum nr of syllables in compound word.
Bram Moolenaar899dddf2006-03-26 21:06:50 +0000193 * <compoptions> 2 bytes COMP_ flags.
194 * <comppatcount> 2 bytes number of <comppattern> following
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000195 * <compflags> N bytes Flags from COMPOUNDRULE items, separated by
Bram Moolenaar5195e452005-08-19 20:32:47 +0000196 * slashes.
197 *
Bram Moolenaar899dddf2006-03-26 21:06:50 +0000198 * <comppattern>: <comppatlen> <comppattext>
199 * <comppatlen> 1 byte length of <comppattext>
200 * <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN
201 *
202 * sectionID == SN_NOBREAK: (empty, its presence is what matters)
Bram Moolenaar78622822005-08-23 21:00:13 +0000203 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000204 * sectionID == SN_SYLLABLE: <syllable>
205 * <syllable> N bytes String from SYLLABLE item.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000206 *
207 * <LWORDTREE>: <wordtree>
208 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000209 * <KWORDTREE>: <wordtree>
210 *
211 * <PREFIXTREE>: <wordtree>
212 *
213 *
Bram Moolenaar51485f02005-06-04 21:55:20 +0000214 * <wordtree>: <nodecount> <nodedata> ...
215 *
216 * <nodecount> 4 bytes Number of nodes following. MSB first.
217 *
218 * <nodedata>: <siblingcount> <sibling> ...
219 *
220 * <siblingcount> 1 byte Number of siblings in this node. The siblings
221 * follow in sorted order.
222 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000223 * <sibling>: <byte> [ <nodeidx> <xbyte>
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000224 * | <flags> [<flags2>] [<region>] [<affixID>]
225 * | [<pflags>] <affixID> <prefcondnr> ]
Bram Moolenaar51485f02005-06-04 21:55:20 +0000226 *
227 * <byte> 1 byte Byte value of the sibling. Special cases:
228 * BY_NOFLAGS: End of word without flags and for all
229 * regions.
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000230 * For PREFIXTREE <affixID> and
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000231 * <prefcondnr> follow.
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000232 * BY_FLAGS: End of word, <flags> follow.
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000233 * For PREFIXTREE <pflags>, <affixID>
Bram Moolenaar53805d12005-08-01 07:08:33 +0000234 * and <prefcondnr> follow.
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000235 * BY_FLAGS2: End of word, <flags> and <flags2>
236 * follow. Not used in PREFIXTREE.
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000237 * BY_INDEX: Child of sibling is shared, <nodeidx>
Bram Moolenaar51485f02005-06-04 21:55:20 +0000238 * and <xbyte> follow.
239 *
240 * <nodeidx> 3 bytes Index of child for this sibling, MSB first.
241 *
242 * <xbyte> 1 byte byte value of the sibling.
243 *
244 * <flags> 1 byte bitmask of:
245 * WF_ALLCAP word must have only capitals
246 * WF_ONECAP first char of word must be capital
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000247 * WF_KEEPCAP keep-case word
248 * WF_FIXCAP keep-case word, all caps not allowed
Bram Moolenaar51485f02005-06-04 21:55:20 +0000249 * WF_RARE rare word
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000250 * WF_BANNED bad word
Bram Moolenaar51485f02005-06-04 21:55:20 +0000251 * WF_REGION <region> follows
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000252 * WF_AFX <affixID> follows
Bram Moolenaar51485f02005-06-04 21:55:20 +0000253 *
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000254 * <flags2> 1 byte Bitmask of:
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000255 * WF_HAS_AFF >> 8 word includes affix
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000256 * WF_NEEDCOMP >> 8 word only valid in compound
Bram Moolenaare1438bb2006-03-01 22:01:55 +0000257 * WF_NOSUGGEST >> 8 word not used for suggestions
Bram Moolenaar899dddf2006-03-26 21:06:50 +0000258 * WF_COMPROOT >> 8 word already a compound
Bram Moolenaar910f66f2006-04-05 20:41:53 +0000259 * WF_NOCOMPBEF >> 8 no compounding before this word
260 * WF_NOCOMPAFT >> 8 no compounding after this word
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000261 *
Bram Moolenaar53805d12005-08-01 07:08:33 +0000262 * <pflags> 1 byte bitmask of:
263 * WFP_RARE rare prefix
264 * WFP_NC non-combining prefix
265 * WFP_UP letter after prefix made upper case
266 *
Bram Moolenaar51485f02005-06-04 21:55:20 +0000267 * <region> 1 byte Bitmask for regions in which word is valid. When
268 * omitted it's valid in all regions.
269 * Lowest bit is for region 1.
270 *
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000271 * <affixID> 1 byte ID of affix that can be used with this word. In
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000272 * PREFIXTREE used for the required prefix ID.
273 *
274 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list
275 * from HEADER.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000276 *
Bram Moolenaar51485f02005-06-04 21:55:20 +0000277 * All text characters are in 'encoding', but stored as single bytes.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000278 */
279
Bram Moolenaar4770d092006-01-12 23:22:24 +0000280/*
281 * Vim .sug file format: <SUGHEADER>
282 * <SUGWORDTREE>
283 * <SUGTABLE>
284 *
285 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
286 *
287 * <fileID> 6 bytes "VIMsug"
288 * <versionnr> 1 byte VIMSUGVERSION
289 * <timestamp> 8 bytes timestamp that must match with .spl file
290 *
291 *
292 * <SUGWORDTREE>: <wordtree> (see above, no flags or region used)
293 *
294 *
295 * <SUGTABLE>: <sugwcount> <sugline> ...
296 *
297 * <sugwcount> 4 bytes number of <sugline> following
298 *
299 * <sugline>: <sugnr> ... NUL
300 *
301 * <sugnr>: X bytes word number that results in this soundfolded word,
302 * stored as an offset to the previous number in as
303 * few bytes as possible, see offset2bytes())
304 */
305
Bram Moolenaare19defe2005-03-21 08:23:33 +0000306#if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000307# include "vimio.h" /* for lseek(), must be before vim.h */
Bram Moolenaare19defe2005-03-21 08:23:33 +0000308#endif
309
310#include "vim.h"
311
Bram Moolenaarf71a3db2006-03-12 21:50:18 +0000312#if defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaare19defe2005-03-21 08:23:33 +0000313
Bram Moolenaar4770d092006-01-12 23:22:24 +0000314#ifndef UNIX /* it's in os_unix.h for Unix */
315# include <time.h> /* for time_t */
316#endif
317
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000318#define MAXWLEN 250 /* Assume max. word len is this many bytes.
319 Some places assume a word length fits in a
320 byte, thus it can't be above 255. */
Bram Moolenaarfc735152005-03-22 22:54:12 +0000321
Bram Moolenaare52325c2005-08-22 22:54:29 +0000322/* Type used for indexes in the word tree need to be at least 4 bytes. If int
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000323 * is 8 bytes we could use something smaller, but what? */
Bram Moolenaare52325c2005-08-22 22:54:29 +0000324#if SIZEOF_INT > 3
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000325typedef int idx_T;
326#else
327typedef long idx_T;
328#endif
329
330/* Flags used for a word. Only the lowest byte can be used, the region byte
331 * comes above it. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000332#define WF_REGION 0x01 /* region byte follows */
333#define WF_ONECAP 0x02 /* word with one capital (or all capitals) */
334#define WF_ALLCAP 0x04 /* word must be all capitals */
335#define WF_RARE 0x08 /* rare word */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000336#define WF_BANNED 0x10 /* bad word */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000337#define WF_AFX 0x20 /* affix ID follows */
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000338#define WF_FIXCAP 0x40 /* keep-case word, allcap not allowed */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000339#define WF_KEEPCAP 0x80 /* keep-case word */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000340
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000341/* for <flags2>, shifted up one byte to be used in wn_flags */
342#define WF_HAS_AFF 0x0100 /* word includes affix */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000343#define WF_NEEDCOMP 0x0200 /* word only valid in compound */
Bram Moolenaare1438bb2006-03-01 22:01:55 +0000344#define WF_NOSUGGEST 0x0400 /* word not to be suggested */
Bram Moolenaar899dddf2006-03-26 21:06:50 +0000345#define WF_COMPROOT 0x0800 /* already compounded word, COMPOUNDROOT */
Bram Moolenaar910f66f2006-04-05 20:41:53 +0000346#define WF_NOCOMPBEF 0x1000 /* no compounding before this word */
347#define WF_NOCOMPAFT 0x2000 /* no compounding after this word */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000348
Bram Moolenaar2d3f4892006-01-20 23:02:51 +0000349/* only used for su_badflags */
350#define WF_MIXCAP 0x20 /* mix of upper and lower case: macaRONI */
351
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000352#define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000353
Bram Moolenaar53805d12005-08-01 07:08:33 +0000354/* flags for <pflags> */
Bram Moolenaar5555acc2006-04-07 21:33:12 +0000355#define WFP_RARE 0x01 /* rare prefix */
356#define WFP_NC 0x02 /* prefix is not combining */
357#define WFP_UP 0x04 /* to-upper prefix */
358#define WFP_COMPPERMIT 0x08 /* prefix with COMPOUNDPERMITFLAG */
359#define WFP_COMPFORBID 0x10 /* prefix with COMPOUNDFORBIDFLAG */
Bram Moolenaar53805d12005-08-01 07:08:33 +0000360
Bram Moolenaar5555acc2006-04-07 21:33:12 +0000361/* Flags for postponed prefixes in "sl_pidxs". Must be above affixID (one
362 * byte) and prefcondnr (two bytes). */
363#define WF_RAREPFX (WFP_RARE << 24) /* rare postponed prefix */
364#define WF_PFX_NC (WFP_NC << 24) /* non-combining postponed prefix */
365#define WF_PFX_UP (WFP_UP << 24) /* to-upper postponed prefix */
366#define WF_PFX_COMPPERMIT (WFP_COMPPERMIT << 24) /* postponed prefix with
367 * COMPOUNDPERMITFLAG */
368#define WF_PFX_COMPFORBID (WFP_COMPFORBID << 24) /* postponed prefix with
369 * COMPOUNDFORBIDFLAG */
370
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000371
Bram Moolenaar899dddf2006-03-26 21:06:50 +0000372/* flags for <compoptions> */
373#define COMP_CHECKDUP 1 /* CHECKCOMPOUNDDUP */
374#define COMP_CHECKREP 2 /* CHECKCOMPOUNDREP */
375#define COMP_CHECKCASE 4 /* CHECKCOMPOUNDCASE */
376#define COMP_CHECKTRIPLE 8 /* CHECKCOMPOUNDTRIPLE */
377
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000378/* Special byte values for <byte>. Some are only used in the tree for
379 * postponed prefixes, some only in the other trees. This is a bit messy... */
380#define BY_NOFLAGS 0 /* end of word without flags or region; for
Bram Moolenaar53805d12005-08-01 07:08:33 +0000381 * postponed prefix: no <pflags> */
382#define BY_INDEX 1 /* child is shared, index follows */
383#define BY_FLAGS 2 /* end of word, <flags> byte follows; for
384 * postponed prefix: <pflags> follows */
385#define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes
386 * follow; never used in prefix tree */
387#define BY_SPECIAL BY_FLAGS2 /* highest special byte value */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000388
Bram Moolenaar4770d092006-01-12 23:22:24 +0000389/* Info from "REP", "REPSAL" and "SAL" entries in ".aff" file used in si_rep,
390 * si_repsal, sl_rep, and si_sal. Not for sl_sal!
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000391 * One replacement: from "ft_from" to "ft_to". */
392typedef struct fromto_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000393{
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000394 char_u *ft_from;
395 char_u *ft_to;
396} fromto_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000397
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000398/* Info from "SAL" entries in ".aff" file used in sl_sal.
399 * The info is split for quick processing by spell_soundfold().
400 * Note that "sm_oneof" and "sm_rules" point into sm_lead. */
401typedef struct salitem_S
402{
403 char_u *sm_lead; /* leading letters */
404 int sm_leadlen; /* length of "sm_lead" */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000405 char_u *sm_oneof; /* letters from () or NULL */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000406 char_u *sm_rules; /* rules like ^, $, priority */
407 char_u *sm_to; /* replacement. */
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000408#ifdef FEAT_MBYTE
409 int *sm_lead_w; /* wide character copy of "sm_lead" */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000410 int *sm_oneof_w; /* wide character copy of "sm_oneof" */
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000411 int *sm_to_w; /* wide character copy of "sm_to" */
412#endif
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000413} salitem_T;
414
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000415#ifdef FEAT_MBYTE
416typedef int salfirst_T;
417#else
418typedef short salfirst_T;
419#endif
420
Bram Moolenaar5195e452005-08-19 20:32:47 +0000421/* Values for SP_*ERROR are negative, positive values are used by
422 * read_cnt_string(). */
423#define SP_TRUNCERROR -1 /* spell file truncated error */
424#define SP_FORMERROR -2 /* format error in spell file */
Bram Moolenaar6de68532005-08-24 22:08:48 +0000425#define SP_OTHERERROR -3 /* other error while reading spell file */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000426
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000427/*
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000428 * Structure used to store words and other info for one language, loaded from
429 * a .spl file.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000430 * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the
431 * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words.
432 *
433 * The "byts" array stores the possible bytes in each tree node, preceded by
434 * the number of possible bytes, sorted on byte value:
435 * <len> <byte1> <byte2> ...
436 * The "idxs" array stores the index of the child node corresponding to the
437 * byte in "byts".
438 * Exception: when the byte is zero, the word may end here and "idxs" holds
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000439 * the flags, region mask and affixID for the word. There may be several
440 * zeros in sequence for alternative flag/region/affixID combinations.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000441 */
442typedef struct slang_S slang_T;
443struct slang_S
444{
445 slang_T *sl_next; /* next language */
446 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */
Bram Moolenaarb765d632005-06-07 21:00:02 +0000447 char_u *sl_fname; /* name of .spl file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000448 int sl_add; /* TRUE if it's a .add file. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000449
Bram Moolenaar51485f02005-06-04 21:55:20 +0000450 char_u *sl_fbyts; /* case-folded word bytes */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000451 idx_T *sl_fidxs; /* case-folded word indexes */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000452 char_u *sl_kbyts; /* keep-case word bytes */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000453 idx_T *sl_kidxs; /* keep-case word indexes */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000454 char_u *sl_pbyts; /* prefix tree word bytes */
455 idx_T *sl_pidxs; /* prefix tree word indexes */
456
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000457 char_u *sl_info; /* infotext string or NULL */
458
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000459 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000460
Bram Moolenaar9c96f592005-06-30 21:52:39 +0000461 char_u *sl_midword; /* MIDWORD string or NULL */
462
Bram Moolenaar4770d092006-01-12 23:22:24 +0000463 hashtab_T sl_wordcount; /* hashtable with word count, wordcount_T */
464
Bram Moolenaar899dddf2006-03-26 21:06:50 +0000465 int sl_compmax; /* COMPOUNDWORDMAX (default: MAXWLEN) */
Bram Moolenaarda2303d2005-08-30 21:55:26 +0000466 int sl_compminlen; /* COMPOUNDMIN (default: 0) */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000467 int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */
Bram Moolenaar899dddf2006-03-26 21:06:50 +0000468 int sl_compoptions; /* COMP_* flags */
469 garray_T sl_comppat; /* CHECKCOMPOUNDPATTERN items */
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000470 regprog_T *sl_compprog; /* COMPOUNDRULE turned into a regexp progrm
Bram Moolenaar5195e452005-08-19 20:32:47 +0000471 * (NULL when no compounding) */
Bram Moolenaar9f94b052008-11-30 20:12:46 +0000472 char_u *sl_comprules; /* all COMPOUNDRULE concatenated (or NULL) */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000473 char_u *sl_compstartflags; /* flags for first compound word */
Bram Moolenaard12a1322005-08-21 22:08:24 +0000474 char_u *sl_compallflags; /* all flags for compound words */
Bram Moolenaar78622822005-08-23 21:00:13 +0000475 char_u sl_nobreak; /* When TRUE: no spaces between words */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000476 char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */
477 garray_T sl_syl_items; /* syllable items */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000478
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000479 int sl_prefixcnt; /* number of items in "sl_prefprog" */
480 regprog_T **sl_prefprog; /* table with regprogs for prefixes */
481
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000482 garray_T sl_rep; /* list of fromto_T entries from REP lines */
483 short sl_rep_first[256]; /* indexes where byte first appears, -1 if
484 there is none */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000485 garray_T sl_sal; /* list of salitem_T entries from SAL lines */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000486 salfirst_T sl_sal_first[256]; /* indexes where byte first appears, -1 if
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000487 there is none */
488 int sl_followup; /* SAL followup */
489 int sl_collapse; /* SAL collapse_result */
490 int sl_rem_accents; /* SAL remove_accents */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000491 int sl_sofo; /* SOFOFROM and SOFOTO instead of SAL items:
492 * "sl_sal_first" maps chars, when has_mbyte
493 * "sl_sal" is a list of wide char lists. */
494 garray_T sl_repsal; /* list of fromto_T entries from REPSAL lines */
495 short sl_repsal_first[256]; /* sl_rep_first for REPSAL lines */
Bram Moolenaare1438bb2006-03-01 22:01:55 +0000496 int sl_nosplitsugs; /* don't suggest splitting a word */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000497
498 /* Info from the .sug file. Loaded on demand. */
499 time_t sl_sugtime; /* timestamp for .sug file */
500 char_u *sl_sbyts; /* soundfolded word bytes */
501 idx_T *sl_sidxs; /* soundfolded word indexes */
502 buf_T *sl_sugbuf; /* buffer with word number table */
503 int sl_sugloaded; /* TRUE when .sug file was loaded or failed to
504 load */
505
Bram Moolenaarea424162005-06-16 21:51:00 +0000506 int sl_has_map; /* TRUE if there is a MAP line */
507#ifdef FEAT_MBYTE
508 hashtab_T sl_map_hash; /* MAP for multi-byte chars */
509 int sl_map_array[256]; /* MAP for first 256 chars */
510#else
511 char_u sl_map_array[256]; /* MAP for first 256 chars */
512#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +0000513 hashtab_T sl_sounddone; /* table with soundfolded words that have
514 handled, see add_sound_suggest() */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000515};
516
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000517/* First language that is loaded, start of the linked list of loaded
518 * languages. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000519static slang_T *first_lang = NULL;
520
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000521/* Flags used in .spl file for soundsalike flags. */
522#define SAL_F0LLOWUP 1
523#define SAL_COLLAPSE 2
524#define SAL_REM_ACCENTS 4
525
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000526/*
527 * Structure used in "b_langp", filled from 'spelllang'.
528 */
529typedef struct langp_S
530{
Bram Moolenaar8b96d642005-09-05 22:05:30 +0000531 slang_T *lp_slang; /* info for this language */
532 slang_T *lp_sallang; /* language used for sound folding or NULL */
533 slang_T *lp_replang; /* language used for REP items or NULL */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000534 int lp_region; /* bitmask for region or REGION_ALL */
535} langp_T;
536
537#define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
538
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000539#define REGION_ALL 0xff /* word valid in all regions */
540
Bram Moolenaar5195e452005-08-19 20:32:47 +0000541#define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */
542#define VIMSPELLMAGICL 8
543#define VIMSPELLVERSION 50
544
Bram Moolenaar4770d092006-01-12 23:22:24 +0000545#define VIMSUGMAGIC "VIMsug" /* string at start of Vim .sug file */
546#define VIMSUGMAGICL 6
547#define VIMSUGVERSION 1
548
Bram Moolenaar5195e452005-08-19 20:32:47 +0000549/* Section IDs. Only renumber them when VIMSPELLVERSION changes! */
550#define SN_REGION 0 /* <regionname> section */
551#define SN_CHARFLAGS 1 /* charflags section */
552#define SN_MIDWORD 2 /* <midword> section */
553#define SN_PREFCOND 3 /* <prefcond> section */
554#define SN_REP 4 /* REP items section */
555#define SN_SAL 5 /* SAL items section */
556#define SN_SOFO 6 /* soundfolding section */
557#define SN_MAP 7 /* MAP items section */
558#define SN_COMPOUND 8 /* compound words section */
559#define SN_SYLLABLE 9 /* syllable section */
Bram Moolenaar78622822005-08-23 21:00:13 +0000560#define SN_NOBREAK 10 /* NOBREAK section */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000561#define SN_SUGFILE 11 /* timestamp for .sug file */
562#define SN_REPSAL 12 /* REPSAL items section */
563#define SN_WORDS 13 /* common words */
Bram Moolenaare1438bb2006-03-01 22:01:55 +0000564#define SN_NOSPLITSUGS 14 /* don't split word for suggestions */
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000565#define SN_INFO 15 /* info section */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000566#define SN_END 255 /* end of sections */
567
568#define SNF_REQUIRED 1 /* <sectionflags>: required section */
569
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000570/* Result values. Lower number is accepted over higher one. */
571#define SP_BANNED -1
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000572#define SP_OK 0
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000573#define SP_RARE 1
574#define SP_LOCAL 2
575#define SP_BAD 3
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000576
Bram Moolenaar7887d882005-07-01 22:33:52 +0000577/* file used for "zG" and "zW" */
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000578static char_u *int_wordlist = NULL;
Bram Moolenaar7887d882005-07-01 22:33:52 +0000579
Bram Moolenaar4770d092006-01-12 23:22:24 +0000580typedef struct wordcount_S
581{
582 short_u wc_count; /* nr of times word was seen */
583 char_u wc_word[1]; /* word, actually longer */
584} wordcount_T;
585
586static wordcount_T dumwc;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000587#define WC_KEY_OFF (unsigned)(dumwc.wc_word - (char_u *)&dumwc)
Bram Moolenaar4770d092006-01-12 23:22:24 +0000588#define HI2WC(hi) ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF))
589#define MAXWORDCOUNT 0xffff
590
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000591/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000592 * Information used when looking for suggestions.
593 */
594typedef struct suginfo_S
595{
596 garray_T su_ga; /* suggestions, contains "suggest_T" */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000597 int su_maxcount; /* max. number of suggestions displayed */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000598 int su_maxscore; /* maximum score for adding to su_ga */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000599 int su_sfmaxscore; /* idem, for when doing soundfold words */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000600 garray_T su_sga; /* like su_ga, sound-folded scoring */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000601 char_u *su_badptr; /* start of bad word in line */
602 int su_badlen; /* length of detected bad word in line */
Bram Moolenaar0c405862005-06-22 22:26:26 +0000603 int su_badflags; /* caps flags for bad word */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000604 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */
605 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +0000606 char_u su_sal_badword[MAXWLEN]; /* su_badword soundfolded */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000607 hashtab_T su_banned; /* table with banned words */
Bram Moolenaar8b96d642005-09-05 22:05:30 +0000608 slang_T *su_sallang; /* default language for sound folding */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000609} suginfo_T;
610
611/* One word suggestion. Used in "si_ga". */
612typedef struct suggest_S
613{
614 char_u *st_word; /* suggested word, allocated string */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000615 int st_wordlen; /* STRLEN(st_word) */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000616 int st_orglen; /* length of replaced text */
617 int st_score; /* lower is better */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000618 int st_altscore; /* used when st_score compares equal */
619 int st_salscore; /* st_score is for soundalike */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000620 int st_had_bonus; /* bonus already included in score */
Bram Moolenaar8b96d642005-09-05 22:05:30 +0000621 slang_T *st_slang; /* language used for sound folding */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000622} suggest_T;
623
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000624#define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i])
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000625
Bram Moolenaar4770d092006-01-12 23:22:24 +0000626/* TRUE if a word appears in the list of banned words. */
627#define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word)))
628
Bram Moolenaar6949d1d2008-08-25 02:14:05 +0000629/* Number of suggestions kept when cleaning up. We need to keep more than
Bram Moolenaar4770d092006-01-12 23:22:24 +0000630 * what is displayed, because when rescore_suggestions() is called the score
631 * may change and wrong suggestions may be removed later. */
632#define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 130 ? 150 : (su)->su_maxcount + 20)
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000633
634/* Threshold for sorting and cleaning up suggestions. Don't want to keep lots
635 * of suggestions that are not going to be displayed. */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000636#define SUG_MAX_COUNT(su) (SUG_CLEAN_COUNT(su) + 50)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000637
638/* score for various changes */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000639#define SCORE_SPLIT 149 /* split bad word */
Bram Moolenaare1438bb2006-03-01 22:01:55 +0000640#define SCORE_SPLIT_NO 249 /* split bad word with NOSPLITSUGS */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000641#define SCORE_ICASE 52 /* slightly different case */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000642#define SCORE_REGION 200 /* word is for different region */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000643#define SCORE_RARE 180 /* rare word */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000644#define SCORE_SWAP 75 /* swap two characters */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000645#define SCORE_SWAP3 110 /* swap two characters in three */
Bram Moolenaar1e015462005-09-25 22:16:38 +0000646#define SCORE_REP 65 /* REP replacement */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000647#define SCORE_SUBST 93 /* substitute a character */
648#define SCORE_SIMILAR 33 /* substitute a similar character */
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +0000649#define SCORE_SUBCOMP 33 /* substitute a composing character */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000650#define SCORE_DEL 94 /* delete a character */
Bram Moolenaar1e015462005-09-25 22:16:38 +0000651#define SCORE_DELDUP 66 /* delete a duplicated character */
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +0000652#define SCORE_DELCOMP 28 /* delete a composing character */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000653#define SCORE_INS 96 /* insert a character */
Bram Moolenaar1e015462005-09-25 22:16:38 +0000654#define SCORE_INSDUP 67 /* insert a duplicate character */
Bram Moolenaar8b59de92005-08-11 19:59:29 +0000655#define SCORE_INSCOMP 30 /* insert a composing character */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000656#define SCORE_NONWORD 103 /* change non-word to word char */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000657
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000658#define SCORE_FILE 30 /* suggestion from a file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000659#define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower.
660 * 350 allows for about three changes. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000661
Bram Moolenaar4770d092006-01-12 23:22:24 +0000662#define SCORE_COMMON1 30 /* subtracted for words seen before */
663#define SCORE_COMMON2 40 /* subtracted for words often seen */
664#define SCORE_COMMON3 50 /* subtracted for words very often seen */
665#define SCORE_THRES2 10 /* word count threshold for COMMON2 */
666#define SCORE_THRES3 100 /* word count threshold for COMMON3 */
667
668/* When trying changed soundfold words it becomes slow when trying more than
669 * two changes. With less then two changes it's slightly faster but we miss a
670 * few good suggestions. In rare cases we need to try three of four changes.
671 */
672#define SCORE_SFMAX1 200 /* maximum score for first try */
673#define SCORE_SFMAX2 300 /* maximum score for second try */
674#define SCORE_SFMAX3 400 /* maximum score for third try */
675
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000676#define SCORE_BIG SCORE_INS * 3 /* big difference */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000677#define SCORE_MAXMAX 999999 /* accept any score */
678#define SCORE_LIMITMAX 350 /* for spell_edit_score_limit() */
679
680/* for spell_edit_score_limit() we need to know the minimum value of
681 * SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS */
682#define SCORE_EDIT_MIN SCORE_SIMILAR
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000683
684/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000685 * Structure to store info for word matching.
686 */
687typedef struct matchinf_S
688{
689 langp_T *mi_lp; /* info for language and region */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000690
691 /* pointers to original text to be checked */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000692 char_u *mi_word; /* start of word being checked */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000693 char_u *mi_end; /* end of matching word so far */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000694 char_u *mi_fend; /* next char to be added to mi_fword */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000695 char_u *mi_cend; /* char after what was used for
696 mi_capflags */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000697
698 /* case-folded text */
699 char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000700 int mi_fwordlen; /* nr of valid bytes in mi_fword */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000701
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000702 /* for when checking word after a prefix */
703 int mi_prefarridx; /* index in sl_pidxs with list of
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000704 affixID/condition */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000705 int mi_prefcnt; /* number of entries at mi_prefarridx */
706 int mi_prefixlen; /* byte length of prefix */
Bram Moolenaar53805d12005-08-01 07:08:33 +0000707#ifdef FEAT_MBYTE
708 int mi_cprefixlen; /* byte length of prefix in original
709 case */
710#else
711# define mi_cprefixlen mi_prefixlen /* it's the same value */
712#endif
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000713
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000714 /* for when checking a compound word */
715 int mi_compoff; /* start of following word offset */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000716 char_u mi_compflags[MAXWLEN]; /* flags for compound words used */
717 int mi_complen; /* nr of compound words used */
Bram Moolenaar899dddf2006-03-26 21:06:50 +0000718 int mi_compextra; /* nr of COMPOUNDROOT words */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000719
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000720 /* others */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000721 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000722 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
Bram Moolenaar9c96f592005-06-30 21:52:39 +0000723 buf_T *mi_buf; /* buffer being checked */
Bram Moolenaar78622822005-08-23 21:00:13 +0000724
725 /* for NOBREAK */
726 int mi_result2; /* "mi_resul" without following word */
727 char_u *mi_end2; /* "mi_end" without following word */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000728} matchinf_T;
729
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000730/*
731 * The tables used for recognizing word characters according to spelling.
732 * These are only used for the first 256 characters of 'encoding'.
733 */
734typedef struct spelltab_S
735{
736 char_u st_isw[256]; /* flags: is word char */
737 char_u st_isu[256]; /* flags: is uppercase char */
738 char_u st_fold[256]; /* chars: folded case */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000739 char_u st_upper[256]; /* chars: upper case */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000740} spelltab_T;
741
742static spelltab_T spelltab;
743static int did_set_spelltab;
744
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000745#define CF_WORD 0x01
746#define CF_UPPER 0x02
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000747
748static void clear_spell_chartab __ARGS((spelltab_T *sp));
749static int set_spell_finish __ARGS((spelltab_T *new_st));
Bram Moolenaar9c96f592005-06-30 21:52:39 +0000750static int spell_iswordp __ARGS((char_u *p, buf_T *buf));
751static int spell_iswordp_nmw __ARGS((char_u *p));
752#ifdef FEAT_MBYTE
Bram Moolenaar7a91a4a2008-04-09 13:49:57 +0000753static int spell_mb_isword_class __ARGS((int cl));
Bram Moolenaar9c96f592005-06-30 21:52:39 +0000754static int spell_iswordp_w __ARGS((int *p, buf_T *buf));
755#endif
Bram Moolenaar5195e452005-08-19 20:32:47 +0000756static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000757
758/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000759 * For finding suggestions: At each node in the tree these states are tried:
Bram Moolenaarea424162005-06-16 21:51:00 +0000760 */
761typedef enum
762{
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000763 STATE_START = 0, /* At start of node check for NUL bytes (goodword
764 * ends); if badword ends there is a match, otherwise
765 * try splitting word. */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000766 STATE_NOPREFIX, /* try without prefix */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000767 STATE_SPLITUNDO, /* Undo splitting. */
Bram Moolenaarea424162005-06-16 21:51:00 +0000768 STATE_ENDNUL, /* Past NUL bytes at start of the node. */
769 STATE_PLAIN, /* Use each byte of the node. */
770 STATE_DEL, /* Delete a byte from the bad word. */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000771 STATE_INS_PREP, /* Prepare for inserting bytes. */
Bram Moolenaarea424162005-06-16 21:51:00 +0000772 STATE_INS, /* Insert a byte in the bad word. */
773 STATE_SWAP, /* Swap two bytes. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000774 STATE_UNSWAP, /* Undo swap two characters. */
775 STATE_SWAP3, /* Swap two characters over three. */
776 STATE_UNSWAP3, /* Undo Swap two characters over three. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000777 STATE_UNROT3L, /* Undo rotate three characters left */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000778 STATE_UNROT3R, /* Undo rotate three characters right */
Bram Moolenaarea424162005-06-16 21:51:00 +0000779 STATE_REP_INI, /* Prepare for using REP items. */
780 STATE_REP, /* Use matching REP items from the .aff file. */
781 STATE_REP_UNDO, /* Undo a REP item replacement. */
782 STATE_FINAL /* End of this node. */
783} state_T;
784
785/*
Bram Moolenaar0c405862005-06-22 22:26:26 +0000786 * Struct to keep the state at each level in suggest_try_change().
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000787 */
788typedef struct trystate_S
789{
Bram Moolenaarea424162005-06-16 21:51:00 +0000790 state_T ts_state; /* state at this level, STATE_ */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000791 int ts_score; /* score */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000792 idx_T ts_arridx; /* index in tree array, start of node */
Bram Moolenaarea424162005-06-16 21:51:00 +0000793 short ts_curi; /* index in list of child nodes */
794 char_u ts_fidx; /* index in fword[], case-folded bad word */
795 char_u ts_fidxtry; /* ts_fidx at which bytes may be changed */
796 char_u ts_twordlen; /* valid length of tword[] */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +0000797 char_u ts_prefixdepth; /* stack depth for end of prefix or
Bram Moolenaard12a1322005-08-21 22:08:24 +0000798 * PFD_PREFIXTREE or PFD_NOPREFIX */
799 char_u ts_flags; /* TSF_ flags */
Bram Moolenaarea424162005-06-16 21:51:00 +0000800#ifdef FEAT_MBYTE
801 char_u ts_tcharlen; /* number of bytes in tword character */
802 char_u ts_tcharidx; /* current byte index in tword character */
803 char_u ts_isdiff; /* DIFF_ values */
804 char_u ts_fcharstart; /* index in fword where badword char started */
805#endif
Bram Moolenaar5195e452005-08-19 20:32:47 +0000806 char_u ts_prewordlen; /* length of word in "preword[]" */
807 char_u ts_splitoff; /* index in "tword" after last split */
Bram Moolenaar78622822005-08-23 21:00:13 +0000808 char_u ts_splitfidx; /* "ts_fidx" at word split */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000809 char_u ts_complen; /* nr of compound words used */
Bram Moolenaard12a1322005-08-21 22:08:24 +0000810 char_u ts_compsplit; /* index for "compflags" where word was spit */
Bram Moolenaar0c405862005-06-22 22:26:26 +0000811 char_u ts_save_badflags; /* su_badflags saved here */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000812 char_u ts_delidx; /* index in fword for char that was deleted,
813 valid when "ts_flags" has TSF_DIDDEL */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000814} trystate_T;
815
Bram Moolenaarea424162005-06-16 21:51:00 +0000816/* values for ts_isdiff */
817#define DIFF_NONE 0 /* no different byte (yet) */
818#define DIFF_YES 1 /* different byte found */
819#define DIFF_INSERT 2 /* inserting character */
820
Bram Moolenaard12a1322005-08-21 22:08:24 +0000821/* values for ts_flags */
822#define TSF_PREFIXOK 1 /* already checked that prefix is OK */
823#define TSF_DIDSPLIT 2 /* tried split at this point */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000824#define TSF_DIDDEL 4 /* did a delete, "ts_delidx" has index */
Bram Moolenaard12a1322005-08-21 22:08:24 +0000825
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000826/* special values ts_prefixdepth */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +0000827#define PFD_NOPREFIX 0xff /* not using prefixes */
Bram Moolenaard12a1322005-08-21 22:08:24 +0000828#define PFD_PREFIXTREE 0xfe /* walking through the prefix tree */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000829#define PFD_NOTSPECIAL 0xfd /* highest value that's not special */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000830
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000831/* mode values for find_word */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000832#define FIND_FOLDWORD 0 /* find word case-folded */
833#define FIND_KEEPWORD 1 /* find keep-case word */
834#define FIND_PREFIX 2 /* find word after prefix */
835#define FIND_COMPOUND 3 /* find case-folded compound word */
836#define FIND_KEEPCOMPOUND 4 /* find keep-case compound word */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000837
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000838static slang_T *slang_alloc __ARGS((char_u *lang));
839static void slang_free __ARGS((slang_T *lp));
Bram Moolenaarb765d632005-06-07 21:00:02 +0000840static void slang_clear __ARGS((slang_T *lp));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000841static void slang_clear_sug __ARGS((slang_T *lp));
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000842static void find_word __ARGS((matchinf_T *mip, int mode));
Bram Moolenaar9f94b052008-11-30 20:12:46 +0000843static int match_checkcompoundpattern __ARGS((char_u *ptr, int wlen, garray_T *gap));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000844static int can_compound __ARGS((slang_T *slang, char_u *word, char_u *flags));
Bram Moolenaar9f94b052008-11-30 20:12:46 +0000845static int can_be_compound __ARGS((trystate_T *sp, slang_T *slang, char_u *compflags, int flag));
846static int match_compoundrule __ARGS((slang_T *slang, char_u *compflags));
Bram Moolenaar53805d12005-08-01 07:08:33 +0000847static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req));
Bram Moolenaard12a1322005-08-21 22:08:24 +0000848static void find_prefix __ARGS((matchinf_T *mip, int mode));
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000849static int fold_more __ARGS((matchinf_T *mip));
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000850static int spell_valid_case __ARGS((int wordflags, int treeflags));
Bram Moolenaar95529562005-08-25 21:21:38 +0000851static int no_spell_checking __ARGS((win_T *wp));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000852static void spell_load_lang __ARGS((char_u *lang));
Bram Moolenaarb765d632005-06-07 21:00:02 +0000853static char_u *spell_enc __ARGS((void));
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000854static void int_wordlist_spl __ARGS((char_u *fname));
Bram Moolenaarb765d632005-06-07 21:00:02 +0000855static void spell_load_cb __ARGS((char_u *fname, void *cookie));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000856static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent));
Bram Moolenaarb388adb2006-02-28 23:50:17 +0000857static time_t get8c __ARGS((FILE *fd));
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000858static char_u *read_cnt_string __ARGS((FILE *fd, int cnt_bytes, int *lenp));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000859static char_u *read_string __ARGS((FILE *fd, int cnt));
860static int read_region_section __ARGS((FILE *fd, slang_T *slang, int len));
861static int read_charflags_section __ARGS((FILE *fd));
862static int read_prefcond_section __ARGS((FILE *fd, slang_T *lp));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000863static int read_rep_section __ARGS((FILE *fd, garray_T *gap, short *first));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000864static int read_sal_section __ARGS((FILE *fd, slang_T *slang));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000865static int read_words_section __ARGS((FILE *fd, slang_T *lp, int len));
866static void count_common_word __ARGS((slang_T *lp, char_u *word, int len, int count));
867static int score_wordcount_adj __ARGS((slang_T *slang, int score, char_u *word, int split));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000868static int read_sofo_section __ARGS((FILE *fd, slang_T *slang));
869static int read_compound __ARGS((FILE *fd, slang_T *slang, int len));
Bram Moolenaar6de68532005-08-24 22:08:48 +0000870static int byte_in_str __ARGS((char_u *str, int byte));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000871static int init_syl_tab __ARGS((slang_T *slang));
872static int count_syllables __ARGS((slang_T *slang, char_u *word));
Bram Moolenaar7887d882005-07-01 22:33:52 +0000873static int set_sofo __ARGS((slang_T *lp, char_u *from, char_u *to));
874static void set_sal_first __ARGS((slang_T *lp));
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000875#ifdef FEAT_MBYTE
876static int *mb_str2wide __ARGS((char_u *s));
877#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +0000878static int spell_read_tree __ARGS((FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt));
879static idx_T read_tree_node __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr));
Bram Moolenaar9c96f592005-06-30 21:52:39 +0000880static void clear_midword __ARGS((buf_T *buf));
881static void use_midword __ARGS((slang_T *lp, buf_T *buf));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000882static int find_region __ARGS((char_u *rp, char_u *region));
883static int captype __ARGS((char_u *word, char_u *end));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000884static int badword_captype __ARGS((char_u *word, char_u *end));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000885static void spell_reload_one __ARGS((char_u *fname, int added_word));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000886static void set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000887static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000888static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen));
Bram Moolenaar8b59de92005-08-11 19:59:29 +0000889static int check_need_cap __ARGS((linenr_T lnum, colnr_T col));
Bram Moolenaar66fa2712006-01-22 23:22:22 +0000890static void spell_find_suggest __ARGS((char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive));
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000891#ifdef FEAT_EVAL
892static void spell_suggest_expr __ARGS((suginfo_T *su, char_u *expr));
893#endif
894static void spell_suggest_file __ARGS((suginfo_T *su, char_u *fname));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000895static void spell_suggest_intern __ARGS((suginfo_T *su, int interactive));
896static void suggest_load_files __ARGS((void));
897static void tree_count_words __ARGS((char_u *byts, idx_T *idxs));
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000898static void spell_find_cleanup __ARGS((suginfo_T *su));
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000899static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper));
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000900static void allcap_copy __ARGS((char_u *word, char_u *wcopy));
Bram Moolenaar0c405862005-06-22 22:26:26 +0000901static void suggest_try_special __ARGS((suginfo_T *su));
902static void suggest_try_change __ARGS((suginfo_T *su));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000903static void suggest_trie_walk __ARGS((suginfo_T *su, langp_T *lp, char_u *fword, int soundfold));
904static void go_deeper __ARGS((trystate_T *stack, int depth, int score_add));
Bram Moolenaar53805d12005-08-01 07:08:33 +0000905#ifdef FEAT_MBYTE
906static int nofold_len __ARGS((char_u *fword, int flen, char_u *word));
907#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000908static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword));
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000909static void score_comp_sal __ARGS((suginfo_T *su));
910static void score_combine __ARGS((suginfo_T *su));
Bram Moolenaarf417f2b2005-06-23 22:29:21 +0000911static int stp_sal_score __ARGS((suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000912static void suggest_try_soundalike_prep __ARGS((void));
Bram Moolenaar0c405862005-06-22 22:26:26 +0000913static void suggest_try_soundalike __ARGS((suginfo_T *su));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000914static void suggest_try_soundalike_finish __ARGS((void));
915static void add_sound_suggest __ARGS((suginfo_T *su, char_u *goodword, int score, langp_T *lp));
916static int soundfold_find __ARGS((slang_T *slang, char_u *word));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000917static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags));
Bram Moolenaarea424162005-06-16 21:51:00 +0000918static void set_map_str __ARGS((slang_T *lp, char_u *map));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000919static int similar_chars __ARGS((slang_T *slang, int c1, int c2));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000920static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf));
921static void check_suggestions __ARGS((suginfo_T *su, garray_T *gap));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000922static void add_banned __ARGS((suginfo_T *su, char_u *word));
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000923static void rescore_suggestions __ARGS((suginfo_T *su));
Bram Moolenaar482aaeb2005-09-29 18:26:07 +0000924static void rescore_one __ARGS((suginfo_T *su, suggest_T *stp));
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000925static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep));
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000926static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, int folded, char_u *res));
927static void spell_soundfold_sofo __ARGS((slang_T *slang, char_u *inword, char_u *res));
928static void spell_soundfold_sal __ARGS((slang_T *slang, char_u *inword, char_u *res));
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000929#ifdef FEAT_MBYTE
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000930static void spell_soundfold_wsal __ARGS((slang_T *slang, char_u *inword, char_u *res));
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000931#endif
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000932static int soundalike_score __ARGS((char_u *goodsound, char_u *badsound));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000933static int spell_edit_score __ARGS((slang_T *slang, char_u *badword, char_u *goodword));
934static int spell_edit_score_limit __ARGS((slang_T *slang, char_u *badword, char_u *goodword, int limit));
935#ifdef FEAT_MBYTE
936static int spell_edit_score_limit_w __ARGS((slang_T *slang, char_u *badword, char_u *goodword, int limit));
937#endif
Bram Moolenaarb475fb92006-03-02 22:40:52 +0000938static void dump_word __ARGS((slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum));
939static linenr_T dump_prefixes __ARGS((slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000940static buf_T *open_spellbuf __ARGS((void));
941static void close_spellbuf __ARGS((buf_T *buf));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000942
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000943/*
944 * Use our own character-case definitions, because the current locale may
945 * differ from what the .spl file uses.
946 * These must not be called with negative number!
947 */
948#ifndef FEAT_MBYTE
949/* Non-multi-byte implementation. */
Bram Moolenaar2c4278f2009-05-17 11:33:22 +0000950# define SPELL_TOFOLD(c) ((c) < 256 ? (int)spelltab.st_fold[c] : (c))
951# define SPELL_TOUPPER(c) ((c) < 256 ? (int)spelltab.st_upper[c] : (c))
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000952# define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE)
953#else
Bram Moolenaarcfc7d632005-07-28 22:28:16 +0000954# if defined(HAVE_WCHAR_H)
955# include <wchar.h> /* for towupper() and towlower() */
956# endif
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000957/* Multi-byte implementation. For Unicode we can call utf_*(), but don't do
958 * that for ASCII, because we don't want to use 'casemap' here. Otherwise use
959 * the "w" library function for characters above 255 if available. */
960# ifdef HAVE_TOWLOWER
961# define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
Bram Moolenaar2c4278f2009-05-17 11:33:22 +0000962 : (c) < 256 ? (int)spelltab.st_fold[c] : (int)towlower(c))
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000963# else
964# define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
Bram Moolenaar2c4278f2009-05-17 11:33:22 +0000965 : (c) < 256 ? (int)spelltab.st_fold[c] : (c))
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000966# endif
967
968# ifdef HAVE_TOWUPPER
969# define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
Bram Moolenaar2c4278f2009-05-17 11:33:22 +0000970 : (c) < 256 ? (int)spelltab.st_upper[c] : (int)towupper(c))
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000971# else
972# define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
Bram Moolenaar2c4278f2009-05-17 11:33:22 +0000973 : (c) < 256 ? (int)spelltab.st_upper[c] : (c))
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000974# endif
975
976# ifdef HAVE_ISWUPPER
977# define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
978 : (c) < 256 ? spelltab.st_isu[c] : iswupper(c))
979# else
980# define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000981 : (c) < 256 ? spelltab.st_isu[c] : (FALSE))
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000982# endif
983#endif
984
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000985
986static char *e_format = N_("E759: Format error in spell file");
Bram Moolenaar7887d882005-07-01 22:33:52 +0000987static char *e_spell_trunc = N_("E758: Truncated spell file");
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +0000988static char *e_afftrailing = N_("Trailing text in %s line %d: %s");
Bram Moolenaar6de68532005-08-24 22:08:48 +0000989static char *e_affname = N_("Affix name too long in %s line %d: %s");
990static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
991static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
Bram Moolenaar329cc7e2005-08-10 07:51:35 +0000992static char *msg_compressing = N_("Compressing word tree...");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000993
Bram Moolenaara40ceaf2006-01-13 22:35:40 +0000994/* Remember what "z?" replaced. */
995static char_u *repl_from = NULL;
996static char_u *repl_to = NULL;
997
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000998/*
999 * Main spell-checking function.
Bram Moolenaar51485f02005-06-04 21:55:20 +00001000 * "ptr" points to a character that could be the start of a word.
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001001 * "*attrp" is set to the highlight index for a badly spelled word. For a
1002 * non-word or when it's OK it remains unchanged.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001003 * This must only be called when 'spelllang' is not empty.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001004 *
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001005 * "capcol" is used to check for a Capitalised word after the end of a
1006 * sentence. If it's zero then perform the check. Return the column where to
1007 * check next, or -1 when no sentence end was found. If it's NULL then don't
1008 * worry.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001009 *
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001010 * Returns the length of the word in bytes, also when it's OK, so that the
1011 * caller can skip over the word.
1012 */
1013 int
Bram Moolenaar4770d092006-01-12 23:22:24 +00001014spell_check(wp, ptr, attrp, capcol, docount)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001015 win_T *wp; /* current window */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001016 char_u *ptr;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001017 hlf_T *attrp;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001018 int *capcol; /* column to check for Capital */
Bram Moolenaar4770d092006-01-12 23:22:24 +00001019 int docount; /* count good words */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001020{
1021 matchinf_T mi; /* Most things are put in "mi" so that it can
1022 be passed to functions quickly. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001023 int nrlen = 0; /* found a number first */
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001024 int c;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001025 int wrongcaplen = 0;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001026 int lpi;
Bram Moolenaar4770d092006-01-12 23:22:24 +00001027 int count_word = docount;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001028
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001029 /* A word never starts at a space or a control character. Return quickly
1030 * then, skipping over the character. */
1031 if (*ptr <= ' ')
1032 return 1;
Bram Moolenaara226a6d2006-02-26 23:59:20 +00001033
1034 /* Return here when loading language files failed. */
1035 if (wp->w_buffer->b_langp.ga_len == 0)
1036 return 1;
1037
Bram Moolenaar5195e452005-08-19 20:32:47 +00001038 vim_memset(&mi, 0, sizeof(matchinf_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001039
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001040 /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and
Bram Moolenaar43abc522005-12-10 20:15:02 +00001041 * 0X99FF. But always do check spelling to find "3GPP" and "11
1042 * julifeest". */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001043 if (*ptr >= '0' && *ptr <= '9')
Bram Moolenaar51485f02005-06-04 21:55:20 +00001044 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00001045 if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
1046 mi.mi_end = skiphex(ptr + 2);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001047 else
1048 mi.mi_end = skipdigits(ptr);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00001049 nrlen = (int)(mi.mi_end - ptr);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001050 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001051
Bram Moolenaar0c405862005-06-22 22:26:26 +00001052 /* Find the normal end of the word (until the next non-word character). */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001053 mi.mi_word = ptr;
Bram Moolenaar43abc522005-12-10 20:15:02 +00001054 mi.mi_fend = ptr;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00001055 if (spell_iswordp(mi.mi_fend, wp->w_buffer))
Bram Moolenaar51485f02005-06-04 21:55:20 +00001056 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001057 do
Bram Moolenaar51485f02005-06-04 21:55:20 +00001058 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001059 mb_ptr_adv(mi.mi_fend);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00001060 } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp->w_buffer));
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001061
1062 if (capcol != NULL && *capcol == 0 && wp->w_buffer->b_cap_prog != NULL)
1063 {
1064 /* Check word starting with capital letter. */
Bram Moolenaar53805d12005-08-01 07:08:33 +00001065 c = PTR2CHAR(ptr);
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001066 if (!SPELL_ISUPPER(c))
Bram Moolenaar5195e452005-08-19 20:32:47 +00001067 wrongcaplen = (int)(mi.mi_fend - ptr);
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001068 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001069 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001070 if (capcol != NULL)
1071 *capcol = -1;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001072
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001073 /* We always use the characters up to the next non-word character,
1074 * also for bad words. */
1075 mi.mi_end = mi.mi_fend;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001076
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001077 /* Check caps type later. */
Bram Moolenaar9c96f592005-06-30 21:52:39 +00001078 mi.mi_buf = wp->w_buffer;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001079
Bram Moolenaar5195e452005-08-19 20:32:47 +00001080 /* case-fold the word with one non-word character, so that we can check
1081 * for the word end. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001082 if (*mi.mi_fend != NUL)
1083 mb_ptr_adv(mi.mi_fend);
1084
1085 (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
1086 MAXWLEN + 1);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00001087 mi.mi_fwordlen = (int)STRLEN(mi.mi_fword);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001088
1089 /* The word is bad unless we recognize it. */
1090 mi.mi_result = SP_BAD;
Bram Moolenaar78622822005-08-23 21:00:13 +00001091 mi.mi_result2 = SP_BAD;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001092
1093 /*
1094 * Loop over the languages specified in 'spelllang'.
Bram Moolenaar4770d092006-01-12 23:22:24 +00001095 * We check them all, because a word may be matched longer in another
1096 * language.
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001097 */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001098 for (lpi = 0; lpi < wp->w_buffer->b_langp.ga_len; ++lpi)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001099 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001100 mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, lpi);
1101
1102 /* If reloading fails the language is still in the list but everything
1103 * has been cleared. */
1104 if (mi.mi_lp->lp_slang->sl_fidxs == NULL)
1105 continue;
1106
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001107 /* Check for a matching word in case-folded words. */
1108 find_word(&mi, FIND_FOLDWORD);
1109
1110 /* Check for a matching word in keep-case words. */
1111 find_word(&mi, FIND_KEEPWORD);
1112
1113 /* Check for matching prefixes. */
Bram Moolenaard12a1322005-08-21 22:08:24 +00001114 find_prefix(&mi, FIND_FOLDWORD);
Bram Moolenaar78622822005-08-23 21:00:13 +00001115
1116 /* For a NOBREAK language, may want to use a word without a following
1117 * word as a backup. */
1118 if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
1119 && mi.mi_result2 != SP_BAD)
1120 {
1121 mi.mi_result = mi.mi_result2;
1122 mi.mi_end = mi.mi_end2;
1123 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00001124
1125 /* Count the word in the first language where it's found to be OK. */
1126 if (count_word && mi.mi_result == SP_OK)
1127 {
1128 count_common_word(mi.mi_lp->lp_slang, ptr,
1129 (int)(mi.mi_end - ptr), 1);
1130 count_word = FALSE;
1131 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001132 }
1133
1134 if (mi.mi_result != SP_OK)
1135 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00001136 /* If we found a number skip over it. Allows for "42nd". Do flag
1137 * rare and local words, e.g., "3GPP". */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001138 if (nrlen > 0)
Bram Moolenaar0c405862005-06-22 22:26:26 +00001139 {
1140 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
1141 return nrlen;
1142 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001143
1144 /* When we are at a non-word character there is no error, just
1145 * skip over the character (try looking for a word after it). */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001146 else if (!spell_iswordp_nmw(ptr))
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +00001147 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001148 if (capcol != NULL && wp->w_buffer->b_cap_prog != NULL)
1149 {
1150 regmatch_T regmatch;
1151
1152 /* Check for end of sentence. */
1153 regmatch.regprog = wp->w_buffer->b_cap_prog;
1154 regmatch.rm_ic = FALSE;
1155 if (vim_regexec(&regmatch, ptr, 0))
1156 *capcol = (int)(regmatch.endp[0] - ptr);
1157 }
1158
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001159#ifdef FEAT_MBYTE
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001160 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001161 return (*mb_ptr2len)(ptr);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001162#endif
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001163 return 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001164 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00001165 else if (mi.mi_end == ptr)
1166 /* Always include at least one character. Required for when there
1167 * is a mixup in "midword". */
1168 mb_ptr_adv(mi.mi_end);
Bram Moolenaar78622822005-08-23 21:00:13 +00001169 else if (mi.mi_result == SP_BAD
1170 && LANGP_ENTRY(wp->w_buffer->b_langp, 0)->lp_slang->sl_nobreak)
1171 {
1172 char_u *p, *fp;
1173 int save_result = mi.mi_result;
1174
1175 /* First language in 'spelllang' is NOBREAK. Find first position
1176 * at which any word would be valid. */
1177 mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001178 if (mi.mi_lp->lp_slang->sl_fidxs != NULL)
Bram Moolenaar78622822005-08-23 21:00:13 +00001179 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001180 p = mi.mi_word;
1181 fp = mi.mi_fword;
1182 for (;;)
Bram Moolenaar78622822005-08-23 21:00:13 +00001183 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001184 mb_ptr_adv(p);
1185 mb_ptr_adv(fp);
1186 if (p >= mi.mi_end)
1187 break;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00001188 mi.mi_compoff = (int)(fp - mi.mi_fword);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001189 find_word(&mi, FIND_COMPOUND);
1190 if (mi.mi_result != SP_BAD)
1191 {
1192 mi.mi_end = p;
1193 break;
1194 }
Bram Moolenaar78622822005-08-23 21:00:13 +00001195 }
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001196 mi.mi_result = save_result;
Bram Moolenaar78622822005-08-23 21:00:13 +00001197 }
Bram Moolenaar78622822005-08-23 21:00:13 +00001198 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001199
1200 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001201 *attrp = HLF_SPB;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001202 else if (mi.mi_result == SP_RARE)
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001203 *attrp = HLF_SPR;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001204 else
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001205 *attrp = HLF_SPL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001206 }
1207
Bram Moolenaar5195e452005-08-19 20:32:47 +00001208 if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE))
1209 {
1210 /* Report SpellCap only when the word isn't badly spelled. */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001211 *attrp = HLF_SPC;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001212 return wrongcaplen;
1213 }
1214
Bram Moolenaar51485f02005-06-04 21:55:20 +00001215 return (int)(mi.mi_end - ptr);
1216}
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001217
Bram Moolenaar51485f02005-06-04 21:55:20 +00001218/*
1219 * Check if the word at "mip->mi_word" is in the tree.
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001220 * When "mode" is FIND_FOLDWORD check in fold-case word tree.
1221 * When "mode" is FIND_KEEPWORD check in keep-case word tree.
1222 * When "mode" is FIND_PREFIX check for word after prefix in fold-case word
1223 * tree.
Bram Moolenaar51485f02005-06-04 21:55:20 +00001224 *
1225 * For a match mip->mi_result is updated.
1226 */
1227 static void
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001228find_word(mip, mode)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001229 matchinf_T *mip;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001230 int mode;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001231{
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001232 idx_T arridx = 0;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001233 int endlen[MAXWLEN]; /* length at possible word endings */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001234 idx_T endidx[MAXWLEN]; /* possible word endings */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001235 int endidxcnt = 0;
1236 int len;
1237 int wlen = 0;
1238 int flen;
1239 int c;
1240 char_u *ptr;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001241 idx_T lo, hi, m;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001242#ifdef FEAT_MBYTE
1243 char_u *s;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001244#endif
Bram Moolenaare52325c2005-08-22 22:54:29 +00001245 char_u *p;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001246 int res = SP_BAD;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001247 slang_T *slang = mip->mi_lp->lp_slang;
1248 unsigned flags;
1249 char_u *byts;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001250 idx_T *idxs;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001251 int word_ends;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001252 int prefix_found;
Bram Moolenaar78622822005-08-23 21:00:13 +00001253 int nobreak_result;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001254
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001255 if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001256 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001257 /* Check for word with matching case in keep-case tree. */
1258 ptr = mip->mi_word;
1259 flen = 9999; /* no case folding, always enough bytes */
1260 byts = slang->sl_kbyts;
1261 idxs = slang->sl_kidxs;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001262
1263 if (mode == FIND_KEEPCOMPOUND)
1264 /* Skip over the previously found word(s). */
1265 wlen += mip->mi_compoff;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001266 }
1267 else
1268 {
1269 /* Check for case-folded in case-folded tree. */
1270 ptr = mip->mi_fword;
1271 flen = mip->mi_fwordlen; /* available case-folded bytes */
1272 byts = slang->sl_fbyts;
1273 idxs = slang->sl_fidxs;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001274
1275 if (mode == FIND_PREFIX)
1276 {
1277 /* Skip over the prefix. */
1278 wlen = mip->mi_prefixlen;
1279 flen -= mip->mi_prefixlen;
1280 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001281 else if (mode == FIND_COMPOUND)
1282 {
1283 /* Skip over the previously found word(s). */
1284 wlen = mip->mi_compoff;
1285 flen -= mip->mi_compoff;
1286 }
1287
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001288 }
1289
Bram Moolenaar51485f02005-06-04 21:55:20 +00001290 if (byts == NULL)
1291 return; /* array is empty */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001292
Bram Moolenaar51485f02005-06-04 21:55:20 +00001293 /*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001294 * Repeat advancing in the tree until:
1295 * - there is a byte that doesn't match,
1296 * - we reach the end of the tree,
1297 * - or we reach the end of the line.
Bram Moolenaar51485f02005-06-04 21:55:20 +00001298 */
1299 for (;;)
1300 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00001301 if (flen <= 0 && *mip->mi_fend != NUL)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001302 flen = fold_more(mip);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001303
1304 len = byts[arridx++];
1305
1306 /* If the first possible byte is a zero the word could end here.
1307 * Remember this index, we first check for the longest word. */
1308 if (byts[arridx] == 0)
1309 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001310 if (endidxcnt == MAXWLEN)
1311 {
1312 /* Must be a corrupted spell file. */
1313 EMSG(_(e_format));
1314 return;
1315 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00001316 endlen[endidxcnt] = wlen;
1317 endidx[endidxcnt++] = arridx++;
1318 --len;
1319
1320 /* Skip over the zeros, there can be several flag/region
1321 * combinations. */
1322 while (len > 0 && byts[arridx] == 0)
1323 {
1324 ++arridx;
1325 --len;
1326 }
1327 if (len == 0)
1328 break; /* no children, word must end here */
1329 }
1330
1331 /* Stop looking at end of the line. */
1332 if (ptr[wlen] == NUL)
1333 break;
1334
1335 /* Perform a binary search in the list of accepted bytes. */
1336 c = ptr[wlen];
Bram Moolenaar0c405862005-06-22 22:26:26 +00001337 if (c == TAB) /* <Tab> is handled like <Space> */
1338 c = ' ';
Bram Moolenaar51485f02005-06-04 21:55:20 +00001339 lo = arridx;
1340 hi = arridx + len - 1;
1341 while (lo < hi)
1342 {
1343 m = (lo + hi) / 2;
1344 if (byts[m] > c)
1345 hi = m - 1;
1346 else if (byts[m] < c)
1347 lo = m + 1;
1348 else
1349 {
1350 lo = hi = m;
1351 break;
1352 }
1353 }
1354
1355 /* Stop if there is no matching byte. */
1356 if (hi < lo || byts[lo] != c)
1357 break;
1358
1359 /* Continue at the child (if there is one). */
1360 arridx = idxs[lo];
1361 ++wlen;
1362 --flen;
Bram Moolenaar0c405862005-06-22 22:26:26 +00001363
1364 /* One space in the good word may stand for several spaces in the
1365 * checked word. */
1366 if (c == ' ')
1367 {
1368 for (;;)
1369 {
1370 if (flen <= 0 && *mip->mi_fend != NUL)
1371 flen = fold_more(mip);
1372 if (ptr[wlen] != ' ' && ptr[wlen] != TAB)
1373 break;
1374 ++wlen;
1375 --flen;
1376 }
1377 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00001378 }
1379
1380 /*
1381 * Verify that one of the possible endings is valid. Try the longest
1382 * first.
1383 */
1384 while (endidxcnt > 0)
1385 {
1386 --endidxcnt;
1387 arridx = endidx[endidxcnt];
1388 wlen = endlen[endidxcnt];
1389
1390#ifdef FEAT_MBYTE
1391 if ((*mb_head_off)(ptr, ptr + wlen) > 0)
1392 continue; /* not at first byte of character */
1393#endif
Bram Moolenaar9c96f592005-06-30 21:52:39 +00001394 if (spell_iswordp(ptr + wlen, mip->mi_buf))
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001395 {
Bram Moolenaar78622822005-08-23 21:00:13 +00001396 if (slang->sl_compprog == NULL && !slang->sl_nobreak)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001397 continue; /* next char is a word character */
1398 word_ends = FALSE;
1399 }
1400 else
1401 word_ends = TRUE;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001402 /* The prefix flag is before compound flags. Once a valid prefix flag
1403 * has been found we try compound flags. */
1404 prefix_found = FALSE;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001405
1406#ifdef FEAT_MBYTE
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001407 if (mode != FIND_KEEPWORD && has_mbyte)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001408 {
1409 /* Compute byte length in original word, length may change
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001410 * when folding case. This can be slow, take a shortcut when the
1411 * case-folded word is equal to the keep-case word. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001412 p = mip->mi_word;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001413 if (STRNCMP(ptr, p, wlen) != 0)
1414 {
1415 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
1416 mb_ptr_adv(p);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00001417 wlen = (int)(p - mip->mi_word);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001418 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00001419 }
1420#endif
1421
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001422 /* Check flags and region. For FIND_PREFIX check the condition and
1423 * prefix ID.
1424 * Repeat this if there are more flags/region alternatives until there
1425 * is a match. */
1426 res = SP_BAD;
1427 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0;
1428 --len, ++arridx)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001429 {
1430 flags = idxs[arridx];
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001431
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001432 /* For the fold-case tree check that the case of the checked word
1433 * matches with what the word in the tree requires.
1434 * For keep-case tree the case is always right. For prefixes we
1435 * don't bother to check. */
1436 if (mode == FIND_FOLDWORD)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001437 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001438 if (mip->mi_cend != mip->mi_word + wlen)
1439 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001440 /* mi_capflags was set for a different word length, need
1441 * to do it again. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001442 mip->mi_cend = mip->mi_word + wlen;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001443 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001444 }
1445
Bram Moolenaar0c405862005-06-22 22:26:26 +00001446 if (mip->mi_capflags == WF_KEEPCAP
1447 || !spell_valid_case(mip->mi_capflags, flags))
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001448 continue;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001449 }
1450
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001451 /* When mode is FIND_PREFIX the word must support the prefix:
1452 * check the prefix ID and the condition. Do that for the list at
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001453 * mip->mi_prefarridx that find_prefix() filled. */
Bram Moolenaard12a1322005-08-21 22:08:24 +00001454 else if (mode == FIND_PREFIX && !prefix_found)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001455 {
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001456 c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001457 flags,
Bram Moolenaar53805d12005-08-01 07:08:33 +00001458 mip->mi_word + mip->mi_cprefixlen, slang,
1459 FALSE);
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001460 if (c == 0)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001461 continue;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001462
1463 /* Use the WF_RARE flag for a rare prefix. */
1464 if (c & WF_RAREPFX)
1465 flags |= WF_RARE;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001466 prefix_found = TRUE;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001467 }
1468
Bram Moolenaar78622822005-08-23 21:00:13 +00001469 if (slang->sl_nobreak)
1470 {
1471 if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND)
1472 && (flags & WF_BANNED) == 0)
1473 {
1474 /* NOBREAK: found a valid following word. That's all we
1475 * need to know, so return. */
1476 mip->mi_result = SP_OK;
1477 break;
1478 }
1479 }
1480
1481 else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
1482 || !word_ends))
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001483 {
Bram Moolenaar2113a1d2006-09-11 19:38:08 +00001484 /* If there is no compound flag or the word is shorter than
Bram Moolenaar5195e452005-08-19 20:32:47 +00001485 * COMPOUNDMIN reject it quickly.
1486 * Makes you wonder why someone puts a compound flag on a word
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001487 * that's too short... Myspell compatibility requires this
1488 * anyway. */
Bram Moolenaare52325c2005-08-22 22:54:29 +00001489 if (((unsigned)flags >> 24) == 0
1490 || wlen - mip->mi_compoff < slang->sl_compminlen)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001491 continue;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001492#ifdef FEAT_MBYTE
1493 /* For multi-byte chars check character length against
1494 * COMPOUNDMIN. */
1495 if (has_mbyte
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001496 && slang->sl_compminlen > 0
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001497 && mb_charlen_len(mip->mi_word + mip->mi_compoff,
1498 wlen - mip->mi_compoff) < slang->sl_compminlen)
1499 continue;
1500#endif
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001501
Bram Moolenaar899dddf2006-03-26 21:06:50 +00001502 /* Limit the number of compound words to COMPOUNDWORDMAX if no
Bram Moolenaare52325c2005-08-22 22:54:29 +00001503 * maximum for syllables is specified. */
Bram Moolenaar899dddf2006-03-26 21:06:50 +00001504 if (!word_ends && mip->mi_complen + mip->mi_compextra + 2
1505 > slang->sl_compmax
Bram Moolenaare52325c2005-08-22 22:54:29 +00001506 && slang->sl_compsylmax == MAXWLEN)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001507 continue;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001508
Bram Moolenaar910f66f2006-04-05 20:41:53 +00001509 /* Don't allow compounding on a side where an affix was added,
1510 * unless COMPOUNDPERMITFLAG was used. */
1511 if (mip->mi_complen > 0 && (flags & WF_NOCOMPBEF))
1512 continue;
1513 if (!word_ends && (flags & WF_NOCOMPAFT))
1514 continue;
1515
Bram Moolenaard12a1322005-08-21 22:08:24 +00001516 /* Quickly check if compounding is possible with this flag. */
Bram Moolenaar6de68532005-08-24 22:08:48 +00001517 if (!byte_in_str(mip->mi_complen == 0
Bram Moolenaard12a1322005-08-21 22:08:24 +00001518 ? slang->sl_compstartflags
1519 : slang->sl_compallflags,
Bram Moolenaar6de68532005-08-24 22:08:48 +00001520 ((unsigned)flags >> 24)))
Bram Moolenaar5195e452005-08-19 20:32:47 +00001521 continue;
1522
Bram Moolenaar9f94b052008-11-30 20:12:46 +00001523 /* If there is a match with a CHECKCOMPOUNDPATTERN rule
1524 * discard the compound word. */
1525 if (match_checkcompoundpattern(ptr, wlen, &slang->sl_comppat))
1526 continue;
1527
Bram Moolenaare52325c2005-08-22 22:54:29 +00001528 if (mode == FIND_COMPOUND)
1529 {
1530 int capflags;
1531
1532 /* Need to check the caps type of the appended compound
1533 * word. */
1534#ifdef FEAT_MBYTE
1535 if (has_mbyte && STRNCMP(ptr, mip->mi_word,
1536 mip->mi_compoff) != 0)
1537 {
1538 /* case folding may have changed the length */
1539 p = mip->mi_word;
1540 for (s = ptr; s < ptr + mip->mi_compoff; mb_ptr_adv(s))
1541 mb_ptr_adv(p);
1542 }
1543 else
1544#endif
1545 p = mip->mi_word + mip->mi_compoff;
1546 capflags = captype(p, mip->mi_word + wlen);
1547 if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP
1548 && (flags & WF_FIXCAP) != 0))
1549 continue;
1550
1551 if (capflags != WF_ALLCAP)
1552 {
1553 /* When the character before the word is a word
1554 * character we do not accept a Onecap word. We do
1555 * accept a no-caps word, even when the dictionary
1556 * word specifies ONECAP. */
1557 mb_ptr_back(mip->mi_word, p);
1558 if (spell_iswordp_nmw(p)
1559 ? capflags == WF_ONECAP
1560 : (flags & WF_ONECAP) != 0
1561 && capflags != WF_ONECAP)
1562 continue;
1563 }
1564 }
1565
Bram Moolenaar5195e452005-08-19 20:32:47 +00001566 /* If the word ends the sequence of compound flags of the
Bram Moolenaar362e1a32006-03-06 23:29:24 +00001567 * words must match with one of the COMPOUNDRULE items and
Bram Moolenaar5195e452005-08-19 20:32:47 +00001568 * the number of syllables must not be too large. */
1569 mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24);
1570 mip->mi_compflags[mip->mi_complen + 1] = NUL;
1571 if (word_ends)
1572 {
1573 char_u fword[MAXWLEN];
1574
1575 if (slang->sl_compsylmax < MAXWLEN)
1576 {
1577 /* "fword" is only needed for checking syllables. */
1578 if (ptr == mip->mi_word)
1579 (void)spell_casefold(ptr, wlen, fword, MAXWLEN);
1580 else
1581 vim_strncpy(fword, ptr, endlen[endidxcnt]);
1582 }
1583 if (!can_compound(slang, fword, mip->mi_compflags))
1584 continue;
1585 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00001586 else if (slang->sl_comprules != NULL
1587 && !match_compoundrule(slang, mip->mi_compflags))
1588 /* The compound flags collected so far do not match any
1589 * COMPOUNDRULE, discard the compounded word. */
1590 continue;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001591 }
1592
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001593 /* Check NEEDCOMPOUND: can't use word without compounding. */
1594 else if (flags & WF_NEEDCOMP)
1595 continue;
1596
Bram Moolenaar78622822005-08-23 21:00:13 +00001597 nobreak_result = SP_OK;
1598
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001599 if (!word_ends)
1600 {
Bram Moolenaar78622822005-08-23 21:00:13 +00001601 int save_result = mip->mi_result;
1602 char_u *save_end = mip->mi_end;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001603 langp_T *save_lp = mip->mi_lp;
1604 int lpi;
Bram Moolenaar78622822005-08-23 21:00:13 +00001605
1606 /* Check that a valid word follows. If there is one and we
1607 * are compounding, it will set "mi_result", thus we are
1608 * always finished here. For NOBREAK we only check that a
1609 * valid word follows.
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001610 * Recursive! */
Bram Moolenaar78622822005-08-23 21:00:13 +00001611 if (slang->sl_nobreak)
1612 mip->mi_result = SP_BAD;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001613
1614 /* Find following word in case-folded tree. */
1615 mip->mi_compoff = endlen[endidxcnt];
1616#ifdef FEAT_MBYTE
1617 if (has_mbyte && mode == FIND_KEEPWORD)
1618 {
1619 /* Compute byte length in case-folded word from "wlen":
1620 * byte length in keep-case word. Length may change when
1621 * folding case. This can be slow, take a shortcut when
1622 * the case-folded word is equal to the keep-case word. */
1623 p = mip->mi_fword;
1624 if (STRNCMP(ptr, p, wlen) != 0)
1625 {
1626 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
1627 mb_ptr_adv(p);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00001628 mip->mi_compoff = (int)(p - mip->mi_fword);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001629 }
1630 }
1631#endif
Bram Moolenaard12a1322005-08-21 22:08:24 +00001632 c = mip->mi_compoff;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001633 ++mip->mi_complen;
Bram Moolenaar899dddf2006-03-26 21:06:50 +00001634 if (flags & WF_COMPROOT)
1635 ++mip->mi_compextra;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001636
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001637 /* For NOBREAK we need to try all NOBREAK languages, at least
1638 * to find the ".add" file(s). */
1639 for (lpi = 0; lpi < mip->mi_buf->b_langp.ga_len; ++lpi)
Bram Moolenaar78622822005-08-23 21:00:13 +00001640 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001641 if (slang->sl_nobreak)
1642 {
1643 mip->mi_lp = LANGP_ENTRY(mip->mi_buf->b_langp, lpi);
1644 if (mip->mi_lp->lp_slang->sl_fidxs == NULL
1645 || !mip->mi_lp->lp_slang->sl_nobreak)
1646 continue;
1647 }
Bram Moolenaard12a1322005-08-21 22:08:24 +00001648
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001649 find_word(mip, FIND_COMPOUND);
1650
1651 /* When NOBREAK any word that matches is OK. Otherwise we
1652 * need to find the longest match, thus try with keep-case
1653 * and prefix too. */
Bram Moolenaar78622822005-08-23 21:00:13 +00001654 if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1655 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001656 /* Find following word in keep-case tree. */
1657 mip->mi_compoff = wlen;
1658 find_word(mip, FIND_KEEPCOMPOUND);
1659
Bram Moolenaar910f66f2006-04-05 20:41:53 +00001660#if 0 /* Disabled, a prefix must not appear halfway a compound word,
1661 unless the COMPOUNDPERMITFLAG is used and then it can't be a
1662 postponed prefix. */
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001663 if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1664 {
1665 /* Check for following word with prefix. */
1666 mip->mi_compoff = c;
1667 find_prefix(mip, FIND_COMPOUND);
1668 }
Bram Moolenaar910f66f2006-04-05 20:41:53 +00001669#endif
Bram Moolenaar78622822005-08-23 21:00:13 +00001670 }
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001671
1672 if (!slang->sl_nobreak)
1673 break;
Bram Moolenaar78622822005-08-23 21:00:13 +00001674 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00001675 --mip->mi_complen;
Bram Moolenaar899dddf2006-03-26 21:06:50 +00001676 if (flags & WF_COMPROOT)
1677 --mip->mi_compextra;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001678 mip->mi_lp = save_lp;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001679
Bram Moolenaar78622822005-08-23 21:00:13 +00001680 if (slang->sl_nobreak)
1681 {
1682 nobreak_result = mip->mi_result;
1683 mip->mi_result = save_result;
1684 mip->mi_end = save_end;
1685 }
1686 else
1687 {
1688 if (mip->mi_result == SP_OK)
1689 break;
1690 continue;
1691 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001692 }
1693
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001694 if (flags & WF_BANNED)
1695 res = SP_BANNED;
1696 else if (flags & WF_REGION)
1697 {
1698 /* Check region. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001699 if ((mip->mi_lp->lp_region & (flags >> 16)) != 0)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001700 res = SP_OK;
1701 else
1702 res = SP_LOCAL;
1703 }
1704 else if (flags & WF_RARE)
1705 res = SP_RARE;
1706 else
1707 res = SP_OK;
1708
Bram Moolenaar78622822005-08-23 21:00:13 +00001709 /* Always use the longest match and the best result. For NOBREAK
1710 * we separately keep the longest match without a following good
1711 * word as a fall-back. */
1712 if (nobreak_result == SP_BAD)
1713 {
1714 if (mip->mi_result2 > res)
1715 {
1716 mip->mi_result2 = res;
1717 mip->mi_end2 = mip->mi_word + wlen;
1718 }
1719 else if (mip->mi_result2 == res
1720 && mip->mi_end2 < mip->mi_word + wlen)
1721 mip->mi_end2 = mip->mi_word + wlen;
1722 }
1723 else if (mip->mi_result > res)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001724 {
1725 mip->mi_result = res;
1726 mip->mi_end = mip->mi_word + wlen;
1727 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001728 else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001729 mip->mi_end = mip->mi_word + wlen;
1730
Bram Moolenaar78622822005-08-23 21:00:13 +00001731 if (mip->mi_result == SP_OK)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001732 break;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001733 }
1734
Bram Moolenaar78622822005-08-23 21:00:13 +00001735 if (mip->mi_result == SP_OK)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001736 break;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001737 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001738}
1739
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001740/*
Bram Moolenaar9f94b052008-11-30 20:12:46 +00001741 * Return TRUE if there is a match between the word ptr[wlen] and
1742 * CHECKCOMPOUNDPATTERN rules, assuming that we will concatenate with another
1743 * word.
1744 * A match means that the first part of CHECKCOMPOUNDPATTERN matches at the
1745 * end of ptr[wlen] and the second part matches after it.
1746 */
1747 static int
1748match_checkcompoundpattern(ptr, wlen, gap)
1749 char_u *ptr;
1750 int wlen;
1751 garray_T *gap; /* &sl_comppat */
1752{
1753 int i;
1754 char_u *p;
1755 int len;
1756
1757 for (i = 0; i + 1 < gap->ga_len; i += 2)
1758 {
1759 p = ((char_u **)gap->ga_data)[i + 1];
1760 if (STRNCMP(ptr + wlen, p, STRLEN(p)) == 0)
1761 {
1762 /* Second part matches at start of following compound word, now
1763 * check if first part matches at end of previous word. */
1764 p = ((char_u **)gap->ga_data)[i];
Bram Moolenaar19c9c762008-12-09 21:34:39 +00001765 len = (int)STRLEN(p);
Bram Moolenaar9f94b052008-11-30 20:12:46 +00001766 if (len <= wlen && STRNCMP(ptr + wlen - len, p, len) == 0)
1767 return TRUE;
1768 }
1769 }
1770 return FALSE;
1771}
1772
1773/*
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00001774 * Return TRUE if "flags" is a valid sequence of compound flags and "word"
1775 * does not have too many syllables.
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00001776 */
1777 static int
Bram Moolenaar5195e452005-08-19 20:32:47 +00001778can_compound(slang, word, flags)
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00001779 slang_T *slang;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001780 char_u *word;
1781 char_u *flags;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00001782{
Bram Moolenaar5195e452005-08-19 20:32:47 +00001783 regmatch_T regmatch;
Bram Moolenaar6de68532005-08-24 22:08:48 +00001784#ifdef FEAT_MBYTE
1785 char_u uflags[MAXWLEN * 2];
1786 int i;
1787#endif
1788 char_u *p;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001789
1790 if (slang->sl_compprog == NULL)
1791 return FALSE;
Bram Moolenaar6de68532005-08-24 22:08:48 +00001792#ifdef FEAT_MBYTE
1793 if (enc_utf8)
1794 {
1795 /* Need to convert the single byte flags to utf8 characters. */
1796 p = uflags;
1797 for (i = 0; flags[i] != NUL; ++i)
1798 p += mb_char2bytes(flags[i], p);
1799 *p = NUL;
1800 p = uflags;
1801 }
1802 else
1803#endif
1804 p = flags;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001805 regmatch.regprog = slang->sl_compprog;
1806 regmatch.rm_ic = FALSE;
Bram Moolenaar6de68532005-08-24 22:08:48 +00001807 if (!vim_regexec(&regmatch, p, 0))
Bram Moolenaar5195e452005-08-19 20:32:47 +00001808 return FALSE;
1809
Bram Moolenaare52325c2005-08-22 22:54:29 +00001810 /* Count the number of syllables. This may be slow, do it last. If there
1811 * are too many syllables AND the number of compound words is above
Bram Moolenaar899dddf2006-03-26 21:06:50 +00001812 * COMPOUNDWORDMAX then compounding is not allowed. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00001813 if (slang->sl_compsylmax < MAXWLEN
1814 && count_syllables(slang, word) > slang->sl_compsylmax)
Bram Moolenaar6de68532005-08-24 22:08:48 +00001815 return (int)STRLEN(flags) < slang->sl_compmax;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001816 return TRUE;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00001817}
1818
1819/*
Bram Moolenaar9f94b052008-11-30 20:12:46 +00001820 * Return TRUE when the sequence of flags in "compflags" plus "flag" can
1821 * possibly form a valid compounded word. This also checks the COMPOUNDRULE
1822 * lines if they don't contain wildcards.
1823 */
1824 static int
1825can_be_compound(sp, slang, compflags, flag)
1826 trystate_T *sp;
1827 slang_T *slang;
1828 char_u *compflags;
1829 int flag;
1830{
1831 /* If the flag doesn't appear in sl_compstartflags or sl_compallflags
1832 * then it can't possibly compound. */
1833 if (!byte_in_str(sp->ts_complen == sp->ts_compsplit
1834 ? slang->sl_compstartflags : slang->sl_compallflags, flag))
1835 return FALSE;
1836
1837 /* If there are no wildcards, we can check if the flags collected so far
1838 * possibly can form a match with COMPOUNDRULE patterns. This only
1839 * makes sense when we have two or more words. */
1840 if (slang->sl_comprules != NULL && sp->ts_complen > sp->ts_compsplit)
1841 {
1842 int v;
1843
1844 compflags[sp->ts_complen] = flag;
1845 compflags[sp->ts_complen + 1] = NUL;
1846 v = match_compoundrule(slang, compflags + sp->ts_compsplit);
1847 compflags[sp->ts_complen] = NUL;
1848 return v;
1849 }
1850
1851 return TRUE;
1852}
1853
1854
1855/*
1856 * Return TRUE if the compound flags in compflags[] match the start of any
1857 * compound rule. This is used to stop trying a compound if the flags
1858 * collected so far can't possibly match any compound rule.
1859 * Caller must check that slang->sl_comprules is not NULL.
1860 */
1861 static int
1862match_compoundrule(slang, compflags)
1863 slang_T *slang;
1864 char_u *compflags;
1865{
1866 char_u *p;
1867 int i;
1868 int c;
1869
1870 /* loop over all the COMPOUNDRULE entries */
1871 for (p = slang->sl_comprules; *p != NUL; ++p)
1872 {
1873 /* loop over the flags in the compound word we have made, match
1874 * them against the current rule entry */
1875 for (i = 0; ; ++i)
1876 {
1877 c = compflags[i];
1878 if (c == NUL)
1879 /* found a rule that matches for the flags we have so far */
1880 return TRUE;
1881 if (*p == '/' || *p == NUL)
1882 break; /* end of rule, it's too short */
1883 if (*p == '[')
1884 {
1885 int match = FALSE;
1886
1887 /* compare against all the flags in [] */
1888 ++p;
1889 while (*p != ']' && *p != NUL)
1890 if (*p++ == c)
1891 match = TRUE;
1892 if (!match)
1893 break; /* none matches */
1894 }
1895 else if (*p != c)
1896 break; /* flag of word doesn't match flag in pattern */
1897 ++p;
1898 }
1899
1900 /* Skip to the next "/", where the next pattern starts. */
1901 p = vim_strchr(p, '/');
1902 if (p == NULL)
1903 break;
1904 }
1905
1906 /* Checked all the rules and none of them match the flags, so there
1907 * can't possibly be a compound starting with these flags. */
1908 return FALSE;
1909}
1910
1911/*
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001912 * Return non-zero if the prefix indicated by "arridx" matches with the prefix
1913 * ID in "flags" for the word "word".
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001914 * The WF_RAREPFX flag is included in the return value for a rare prefix.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001915 */
1916 static int
Bram Moolenaar53805d12005-08-01 07:08:33 +00001917valid_word_prefix(totprefcnt, arridx, flags, word, slang, cond_req)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001918 int totprefcnt; /* nr of prefix IDs */
1919 int arridx; /* idx in sl_pidxs[] */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001920 int flags;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001921 char_u *word;
1922 slang_T *slang;
Bram Moolenaar53805d12005-08-01 07:08:33 +00001923 int cond_req; /* only use prefixes with a condition */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001924{
1925 int prefcnt;
1926 int pidx;
1927 regprog_T *rp;
1928 regmatch_T regmatch;
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001929 int prefid;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001930
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001931 prefid = (unsigned)flags >> 24;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001932 for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt)
1933 {
1934 pidx = slang->sl_pidxs[arridx + prefcnt];
1935
1936 /* Check the prefix ID. */
1937 if (prefid != (pidx & 0xff))
1938 continue;
1939
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001940 /* Check if the prefix doesn't combine and the word already has a
1941 * suffix. */
1942 if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC))
1943 continue;
1944
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001945 /* Check the condition, if there is one. The condition index is
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001946 * stored in the two bytes above the prefix ID byte. */
1947 rp = slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff];
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001948 if (rp != NULL)
1949 {
1950 regmatch.regprog = rp;
1951 regmatch.rm_ic = FALSE;
1952 if (!vim_regexec(&regmatch, word, 0))
1953 continue;
1954 }
Bram Moolenaar53805d12005-08-01 07:08:33 +00001955 else if (cond_req)
1956 continue;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001957
Bram Moolenaar53805d12005-08-01 07:08:33 +00001958 /* It's a match! Return the WF_ flags. */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001959 return pidx;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001960 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001961 return 0;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001962}
1963
1964/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001965 * Check if the word at "mip->mi_word" has a matching prefix.
1966 * If it does, then check the following word.
1967 *
Bram Moolenaard12a1322005-08-21 22:08:24 +00001968 * If "mode" is "FIND_COMPOUND" then do the same after another word, find a
1969 * prefix in a compound word.
1970 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001971 * For a match mip->mi_result is updated.
1972 */
1973 static void
Bram Moolenaard12a1322005-08-21 22:08:24 +00001974find_prefix(mip, mode)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001975 matchinf_T *mip;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001976 int mode;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001977{
1978 idx_T arridx = 0;
1979 int len;
1980 int wlen = 0;
1981 int flen;
1982 int c;
1983 char_u *ptr;
1984 idx_T lo, hi, m;
1985 slang_T *slang = mip->mi_lp->lp_slang;
1986 char_u *byts;
1987 idx_T *idxs;
1988
Bram Moolenaar42eeac32005-06-29 22:40:58 +00001989 byts = slang->sl_pbyts;
1990 if (byts == NULL)
1991 return; /* array is empty */
1992
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001993 /* We use the case-folded word here, since prefixes are always
1994 * case-folded. */
1995 ptr = mip->mi_fword;
1996 flen = mip->mi_fwordlen; /* available case-folded bytes */
Bram Moolenaard12a1322005-08-21 22:08:24 +00001997 if (mode == FIND_COMPOUND)
1998 {
1999 /* Skip over the previously found word(s). */
2000 ptr += mip->mi_compoff;
2001 flen -= mip->mi_compoff;
2002 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002003 idxs = slang->sl_pidxs;
2004
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002005 /*
2006 * Repeat advancing in the tree until:
2007 * - there is a byte that doesn't match,
2008 * - we reach the end of the tree,
2009 * - or we reach the end of the line.
2010 */
2011 for (;;)
2012 {
2013 if (flen == 0 && *mip->mi_fend != NUL)
2014 flen = fold_more(mip);
2015
2016 len = byts[arridx++];
2017
2018 /* If the first possible byte is a zero the prefix could end here.
2019 * Check if the following word matches and supports the prefix. */
2020 if (byts[arridx] == 0)
2021 {
2022 /* There can be several prefixes with different conditions. We
2023 * try them all, since we don't know which one will give the
2024 * longest match. The word is the same each time, pass the list
2025 * of possible prefixes to find_word(). */
2026 mip->mi_prefarridx = arridx;
2027 mip->mi_prefcnt = len;
2028 while (len > 0 && byts[arridx] == 0)
2029 {
2030 ++arridx;
2031 --len;
2032 }
2033 mip->mi_prefcnt -= len;
2034
2035 /* Find the word that comes after the prefix. */
2036 mip->mi_prefixlen = wlen;
Bram Moolenaard12a1322005-08-21 22:08:24 +00002037 if (mode == FIND_COMPOUND)
2038 /* Skip over the previously found word(s). */
2039 mip->mi_prefixlen += mip->mi_compoff;
2040
Bram Moolenaar53805d12005-08-01 07:08:33 +00002041#ifdef FEAT_MBYTE
2042 if (has_mbyte)
2043 {
2044 /* Case-folded length may differ from original length. */
Bram Moolenaard12a1322005-08-21 22:08:24 +00002045 mip->mi_cprefixlen = nofold_len(mip->mi_fword,
2046 mip->mi_prefixlen, mip->mi_word);
Bram Moolenaar53805d12005-08-01 07:08:33 +00002047 }
2048 else
Bram Moolenaard12a1322005-08-21 22:08:24 +00002049 mip->mi_cprefixlen = mip->mi_prefixlen;
Bram Moolenaar53805d12005-08-01 07:08:33 +00002050#endif
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002051 find_word(mip, FIND_PREFIX);
2052
2053
2054 if (len == 0)
2055 break; /* no children, word must end here */
2056 }
2057
2058 /* Stop looking at end of the line. */
2059 if (ptr[wlen] == NUL)
2060 break;
2061
2062 /* Perform a binary search in the list of accepted bytes. */
2063 c = ptr[wlen];
2064 lo = arridx;
2065 hi = arridx + len - 1;
2066 while (lo < hi)
2067 {
2068 m = (lo + hi) / 2;
2069 if (byts[m] > c)
2070 hi = m - 1;
2071 else if (byts[m] < c)
2072 lo = m + 1;
2073 else
2074 {
2075 lo = hi = m;
2076 break;
2077 }
2078 }
2079
2080 /* Stop if there is no matching byte. */
2081 if (hi < lo || byts[lo] != c)
2082 break;
2083
2084 /* Continue at the child (if there is one). */
2085 arridx = idxs[lo];
2086 ++wlen;
2087 --flen;
2088 }
2089}
2090
2091/*
2092 * Need to fold at least one more character. Do until next non-word character
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00002093 * for efficiency. Include the non-word character too.
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002094 * Return the length of the folded chars in bytes.
2095 */
2096 static int
2097fold_more(mip)
2098 matchinf_T *mip;
2099{
2100 int flen;
2101 char_u *p;
2102
2103 p = mip->mi_fend;
2104 do
2105 {
2106 mb_ptr_adv(mip->mi_fend);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002107 } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_buf));
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002108
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00002109 /* Include the non-word character so that we can check for the word end. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002110 if (*mip->mi_fend != NUL)
2111 mb_ptr_adv(mip->mi_fend);
2112
2113 (void)spell_casefold(p, (int)(mip->mi_fend - p),
2114 mip->mi_fword + mip->mi_fwordlen,
2115 MAXWLEN - mip->mi_fwordlen);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00002116 flen = (int)STRLEN(mip->mi_fword + mip->mi_fwordlen);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002117 mip->mi_fwordlen += flen;
2118 return flen;
2119}
2120
2121/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002122 * Check case flags for a word. Return TRUE if the word has the requested
2123 * case.
2124 */
2125 static int
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002126spell_valid_case(wordflags, treeflags)
2127 int wordflags; /* flags for the checked word. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002128 int treeflags; /* flags for the word in the spell tree */
2129{
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002130 return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002131 || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002132 && ((treeflags & WF_ONECAP) == 0
2133 || (wordflags & WF_ONECAP) != 0)));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002134}
2135
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002136/*
2137 * Return TRUE if spell checking is not enabled.
2138 */
2139 static int
Bram Moolenaar95529562005-08-25 21:21:38 +00002140no_spell_checking(wp)
2141 win_T *wp;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002142{
Bram Moolenaara226a6d2006-02-26 23:59:20 +00002143 if (!wp->w_p_spell || *wp->w_buffer->b_p_spl == NUL
2144 || wp->w_buffer->b_langp.ga_len == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002145 {
2146 EMSG(_("E756: Spell checking is not enabled"));
2147 return TRUE;
2148 }
2149 return FALSE;
2150}
Bram Moolenaar51485f02005-06-04 21:55:20 +00002151
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002152/*
2153 * Move to next spell error.
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002154 * "curline" is FALSE for "[s", "]s", "[S" and "]S".
2155 * "curline" is TRUE to find word under/after cursor in the same line.
Bram Moolenaar5195e452005-08-19 20:32:47 +00002156 * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move
2157 * to after badly spelled word before the cursor.
Bram Moolenaar6de68532005-08-24 22:08:48 +00002158 * Return 0 if not found, length of the badly spelled word otherwise.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002159 */
2160 int
Bram Moolenaar95529562005-08-25 21:21:38 +00002161spell_move_to(wp, dir, allwords, curline, attrp)
2162 win_T *wp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002163 int dir; /* FORWARD or BACKWARD */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002164 int allwords; /* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002165 int curline;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00002166 hlf_T *attrp; /* return: attributes of bad word or NULL
2167 (only when "dir" is FORWARD) */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002168{
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002169 linenr_T lnum;
2170 pos_T found_pos;
Bram Moolenaar6de68532005-08-24 22:08:48 +00002171 int found_len = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002172 char_u *line;
2173 char_u *p;
Bram Moolenaar0c405862005-06-22 22:26:26 +00002174 char_u *endp;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00002175 hlf_T attr;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002176 int len;
Bram Moolenaarf71a3db2006-03-12 21:50:18 +00002177# ifdef FEAT_SYN_HL
Bram Moolenaar95529562005-08-25 21:21:38 +00002178 int has_syntax = syntax_present(wp->w_buffer);
Bram Moolenaarf71a3db2006-03-12 21:50:18 +00002179# endif
Bram Moolenaar89d40322006-08-29 15:30:07 +00002180 int col;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002181 int can_spell;
Bram Moolenaar0c405862005-06-22 22:26:26 +00002182 char_u *buf = NULL;
2183 int buflen = 0;
2184 int skip = 0;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002185 int capcol = -1;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002186 int found_one = FALSE;
2187 int wrapped = FALSE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002188
Bram Moolenaar95529562005-08-25 21:21:38 +00002189 if (no_spell_checking(wp))
Bram Moolenaar6de68532005-08-24 22:08:48 +00002190 return 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002191
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002192 /*
2193 * Start looking for bad word at the start of the line, because we can't
Bram Moolenaar86ca6e32006-03-29 21:06:37 +00002194 * start halfway a word, we don't know where it starts or ends.
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002195 *
2196 * When searching backwards, we continue in the line to find the last
2197 * bad word (in the cursor line: before the cursor).
Bram Moolenaar0c405862005-06-22 22:26:26 +00002198 *
2199 * We concatenate the start of the next line, so that wrapped words work
2200 * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards
2201 * though...
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002202 */
Bram Moolenaar95529562005-08-25 21:21:38 +00002203 lnum = wp->w_cursor.lnum;
Bram Moolenaare1438bb2006-03-01 22:01:55 +00002204 clearpos(&found_pos);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002205
2206 while (!got_int)
2207 {
Bram Moolenaar95529562005-08-25 21:21:38 +00002208 line = ml_get_buf(wp->w_buffer, lnum, FALSE);
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002209
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00002210 len = (int)STRLEN(line);
Bram Moolenaar0c405862005-06-22 22:26:26 +00002211 if (buflen < len + MAXWLEN + 2)
2212 {
2213 vim_free(buf);
2214 buflen = len + MAXWLEN + 2;
2215 buf = alloc(buflen);
2216 if (buf == NULL)
2217 break;
2218 }
2219
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002220 /* In first line check first word for Capital. */
2221 if (lnum == 1)
2222 capcol = 0;
2223
2224 /* For checking first word with a capital skip white space. */
2225 if (capcol == 0)
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00002226 capcol = (int)(skipwhite(line) - line);
2227 else if (curline && wp == curwin)
2228 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00002229 /* For spellbadword(): check if first word needs a capital. */
Bram Moolenaar89d40322006-08-29 15:30:07 +00002230 col = (int)(skipwhite(line) - line);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00002231 if (check_need_cap(lnum, col))
2232 capcol = col;
2233
2234 /* Need to get the line again, may have looked at the previous
2235 * one. */
2236 line = ml_get_buf(wp->w_buffer, lnum, FALSE);
2237 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002238
Bram Moolenaar0c405862005-06-22 22:26:26 +00002239 /* Copy the line into "buf" and append the start of the next line if
2240 * possible. */
2241 STRCPY(buf, line);
Bram Moolenaar95529562005-08-25 21:21:38 +00002242 if (lnum < wp->w_buffer->b_ml.ml_line_count)
Bram Moolenaar5dd95a12006-05-13 12:09:24 +00002243 spell_cat_line(buf + STRLEN(buf),
2244 ml_get_buf(wp->w_buffer, lnum + 1, FALSE), MAXWLEN);
Bram Moolenaar0c405862005-06-22 22:26:26 +00002245
2246 p = buf + skip;
2247 endp = buf + len;
2248 while (p < endp)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002249 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002250 /* When searching backward don't search after the cursor. Unless
2251 * we wrapped around the end of the buffer. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002252 if (dir == BACKWARD
Bram Moolenaar95529562005-08-25 21:21:38 +00002253 && lnum == wp->w_cursor.lnum
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002254 && !wrapped
Bram Moolenaar95529562005-08-25 21:21:38 +00002255 && (colnr_T)(p - buf) >= wp->w_cursor.col)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002256 break;
2257
2258 /* start of word */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00002259 attr = HLF_COUNT;
Bram Moolenaar4770d092006-01-12 23:22:24 +00002260 len = spell_check(wp, p, &attr, &capcol, FALSE);
Bram Moolenaar51485f02005-06-04 21:55:20 +00002261
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00002262 if (attr != HLF_COUNT)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002263 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002264 /* We found a bad word. Check the attribute. */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00002265 if (allwords || attr == HLF_SPB)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002266 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002267 /* When searching forward only accept a bad word after
2268 * the cursor. */
2269 if (dir == BACKWARD
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002270 || lnum != wp->w_cursor.lnum
Bram Moolenaar95529562005-08-25 21:21:38 +00002271 || (lnum == wp->w_cursor.lnum
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002272 && (wrapped
2273 || (colnr_T)(curline ? p - buf + len
Bram Moolenaar0c405862005-06-22 22:26:26 +00002274 : p - buf)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002275 > wp->w_cursor.col)))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002276 {
Bram Moolenaarf71a3db2006-03-12 21:50:18 +00002277# ifdef FEAT_SYN_HL
Bram Moolenaar51485f02005-06-04 21:55:20 +00002278 if (has_syntax)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002279 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00002280 col = (int)(p - buf);
Bram Moolenaar95529562005-08-25 21:21:38 +00002281 (void)syn_get_id(wp, lnum, (colnr_T)col,
Bram Moolenaar56cefaf2008-01-12 15:47:10 +00002282 FALSE, &can_spell, FALSE);
Bram Moolenaard68071d2006-05-02 22:08:30 +00002283 if (!can_spell)
2284 attr = HLF_COUNT;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002285 }
2286 else
Bram Moolenaarf71a3db2006-03-12 21:50:18 +00002287#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00002288 can_spell = TRUE;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002289
Bram Moolenaar51485f02005-06-04 21:55:20 +00002290 if (can_spell)
2291 {
Bram Moolenaard68071d2006-05-02 22:08:30 +00002292 found_one = TRUE;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002293 found_pos.lnum = lnum;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00002294 found_pos.col = (int)(p - buf);
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002295#ifdef FEAT_VIRTUALEDIT
Bram Moolenaar51485f02005-06-04 21:55:20 +00002296 found_pos.coladd = 0;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002297#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00002298 if (dir == FORWARD)
2299 {
2300 /* No need to search further. */
Bram Moolenaar95529562005-08-25 21:21:38 +00002301 wp->w_cursor = found_pos;
Bram Moolenaar0c405862005-06-22 22:26:26 +00002302 vim_free(buf);
Bram Moolenaar95529562005-08-25 21:21:38 +00002303 if (attrp != NULL)
2304 *attrp = attr;
Bram Moolenaar6de68532005-08-24 22:08:48 +00002305 return len;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002306 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00002307 else if (curline)
2308 /* Insert mode completion: put cursor after
2309 * the bad word. */
2310 found_pos.col += len;
Bram Moolenaar6de68532005-08-24 22:08:48 +00002311 found_len = len;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002312 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002313 }
Bram Moolenaard68071d2006-05-02 22:08:30 +00002314 else
2315 found_one = TRUE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002316 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002317 }
2318
Bram Moolenaar51485f02005-06-04 21:55:20 +00002319 /* advance to character after the word */
2320 p += len;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002321 capcol -= len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002322 }
2323
Bram Moolenaar5195e452005-08-19 20:32:47 +00002324 if (dir == BACKWARD && found_pos.lnum != 0)
2325 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002326 /* Use the last match in the line (before the cursor). */
Bram Moolenaar95529562005-08-25 21:21:38 +00002327 wp->w_cursor = found_pos;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002328 vim_free(buf);
Bram Moolenaar6de68532005-08-24 22:08:48 +00002329 return found_len;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002330 }
2331
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002332 if (curline)
Bram Moolenaar0c405862005-06-22 22:26:26 +00002333 break; /* only check cursor line */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002334
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002335 /* Advance to next line. */
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002336 if (dir == BACKWARD)
2337 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002338 /* If we are back at the starting line and searched it again there
2339 * is no match, give up. */
2340 if (lnum == wp->w_cursor.lnum && wrapped)
Bram Moolenaar0c405862005-06-22 22:26:26 +00002341 break;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002342
2343 if (lnum > 1)
2344 --lnum;
2345 else if (!p_ws)
2346 break; /* at first line and 'nowrapscan' */
2347 else
2348 {
2349 /* Wrap around to the end of the buffer. May search the
2350 * starting line again and accept the last match. */
2351 lnum = wp->w_buffer->b_ml.ml_line_count;
2352 wrapped = TRUE;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002353 if (!shortmess(SHM_SEARCH))
2354 give_warning((char_u *)_(top_bot_msg), TRUE);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002355 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002356 capcol = -1;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002357 }
2358 else
2359 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002360 if (lnum < wp->w_buffer->b_ml.ml_line_count)
2361 ++lnum;
2362 else if (!p_ws)
2363 break; /* at first line and 'nowrapscan' */
2364 else
2365 {
2366 /* Wrap around to the start of the buffer. May search the
2367 * starting line again and accept the first match. */
2368 lnum = 1;
2369 wrapped = TRUE;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002370 if (!shortmess(SHM_SEARCH))
2371 give_warning((char_u *)_(bot_top_msg), TRUE);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002372 }
2373
2374 /* If we are back at the starting line and there is no match then
2375 * give up. */
Bram Moolenaar6ae167a2009-02-11 16:58:49 +00002376 if (lnum == wp->w_cursor.lnum && (!found_one || wrapped))
Bram Moolenaar0c405862005-06-22 22:26:26 +00002377 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +00002378
2379 /* Skip the characters at the start of the next line that were
2380 * included in a match crossing line boundaries. */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00002381 if (attr == HLF_COUNT)
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00002382 skip = (int)(p - endp);
Bram Moolenaar0c405862005-06-22 22:26:26 +00002383 else
2384 skip = 0;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002385
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00002386 /* Capcol skips over the inserted space. */
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002387 --capcol;
2388
2389 /* But after empty line check first word in next line */
2390 if (*skipwhite(line) == NUL)
2391 capcol = 0;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002392 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002393
2394 line_breakcheck();
2395 }
2396
Bram Moolenaar0c405862005-06-22 22:26:26 +00002397 vim_free(buf);
Bram Moolenaar6de68532005-08-24 22:08:48 +00002398 return 0;
Bram Moolenaar0c405862005-06-22 22:26:26 +00002399}
2400
2401/*
2402 * For spell checking: concatenate the start of the following line "line" into
2403 * "buf", blanking-out special characters. Copy less then "maxlen" bytes.
Bram Moolenaar6a5d2ac2008-04-01 15:14:36 +00002404 * Keep the blanks at the start of the next line, this is used in win_line()
2405 * to skip those bytes if the word was OK.
Bram Moolenaar0c405862005-06-22 22:26:26 +00002406 */
2407 void
2408spell_cat_line(buf, line, maxlen)
2409 char_u *buf;
2410 char_u *line;
2411 int maxlen;
2412{
2413 char_u *p;
2414 int n;
2415
2416 p = skipwhite(line);
2417 while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL)
2418 p = skipwhite(p + 1);
2419
2420 if (*p != NUL)
2421 {
Bram Moolenaar6a5d2ac2008-04-01 15:14:36 +00002422 /* Only worth concatenating if there is something else than spaces to
2423 * concatenate. */
2424 n = (int)(p - line) + 1;
2425 if (n < maxlen - 1)
2426 {
2427 vim_memset(buf, ' ', n);
2428 vim_strncpy(buf + n, p, maxlen - 1 - n);
2429 }
Bram Moolenaar0c405862005-06-22 22:26:26 +00002430 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002431}
2432
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00002433/*
2434 * Structure used for the cookie argument of do_in_runtimepath().
2435 */
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002436typedef struct spelload_S
2437{
2438 char_u sl_lang[MAXWLEN + 1]; /* language name */
2439 slang_T *sl_slang; /* resulting slang_T struct */
2440 int sl_nobreak; /* NOBREAK language found */
2441} spelload_T;
2442
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002443/*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002444 * Load word list(s) for "lang" from Vim spell file(s).
Bram Moolenaarb765d632005-06-07 21:00:02 +00002445 * "lang" must be the language without the region: e.g., "en".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002446 */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002447 static void
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002448spell_load_lang(lang)
2449 char_u *lang;
2450{
Bram Moolenaarb765d632005-06-07 21:00:02 +00002451 char_u fname_enc[85];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002452 int r;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002453 spelload_T sl;
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002454#ifdef FEAT_AUTOCMD
2455 int round;
2456#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002457
Bram Moolenaarb765d632005-06-07 21:00:02 +00002458 /* Copy the language name to pass it to spell_load_cb() as a cookie.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002459 * It's truncated when an error is detected. */
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002460 STRCPY(sl.sl_lang, lang);
2461 sl.sl_slang = NULL;
2462 sl.sl_nobreak = FALSE;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002463
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002464#ifdef FEAT_AUTOCMD
2465 /* We may retry when no spell file is found for the language, an
2466 * autocommand may load it then. */
2467 for (round = 1; round <= 2; ++round)
2468#endif
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002469 {
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002470 /*
2471 * Find the first spell file for "lang" in 'runtimepath' and load it.
2472 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002473 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002474 "spell/%s.%s.spl", lang, spell_enc());
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002475 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl);
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002476
2477 if (r == FAIL && *sl.sl_lang != NUL)
2478 {
2479 /* Try loading the ASCII version. */
2480 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
2481 "spell/%s.ascii.spl", lang);
2482 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl);
2483
2484#ifdef FEAT_AUTOCMD
2485 if (r == FAIL && *sl.sl_lang != NUL && round == 1
2486 && apply_autocmds(EVENT_SPELLFILEMISSING, lang,
2487 curbuf->b_fname, FALSE, curbuf))
2488 continue;
2489 break;
2490#endif
2491 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002492#ifdef FEAT_AUTOCMD
2493 break;
2494#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002495 }
2496
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002497 if (r == FAIL)
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002498 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002499 smsg((char_u *)_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""),
2500 lang, spell_enc(), lang);
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002501 }
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002502 else if (sl.sl_slang != NULL)
Bram Moolenaarb765d632005-06-07 21:00:02 +00002503 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00002504 /* At least one file was loaded, now load ALL the additions. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002505 STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl");
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002506 do_in_runtimepath(fname_enc, TRUE, spell_load_cb, &sl);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002507 }
2508}
2509
2510/*
2511 * Return the encoding used for spell checking: Use 'encoding', except that we
2512 * use "latin1" for "latin9". And limit to 60 characters (just in case).
2513 */
2514 static char_u *
2515spell_enc()
2516{
2517
2518#ifdef FEAT_MBYTE
2519 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
2520 return p_enc;
2521#endif
2522 return (char_u *)"latin1";
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002523}
2524
2525/*
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002526 * Get the name of the .spl file for the internal wordlist into
2527 * "fname[MAXPATHL]".
2528 */
2529 static void
2530int_wordlist_spl(fname)
2531 char_u *fname;
2532{
2533 vim_snprintf((char *)fname, MAXPATHL, "%s.%s.spl",
2534 int_wordlist, spell_enc());
2535}
2536
2537/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00002538 * Allocate a new slang_T for language "lang". "lang" can be NULL.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002539 * Caller must fill "sl_next".
2540 */
2541 static slang_T *
2542slang_alloc(lang)
2543 char_u *lang;
2544{
2545 slang_T *lp;
2546
Bram Moolenaar51485f02005-06-04 21:55:20 +00002547 lp = (slang_T *)alloc_clear(sizeof(slang_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002548 if (lp != NULL)
2549 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00002550 if (lang != NULL)
2551 lp->sl_name = vim_strsave(lang);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002552 ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
Bram Moolenaar4770d092006-01-12 23:22:24 +00002553 ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10);
Bram Moolenaar5195e452005-08-19 20:32:47 +00002554 lp->sl_compmax = MAXWLEN;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002555 lp->sl_compsylmax = MAXWLEN;
Bram Moolenaar4770d092006-01-12 23:22:24 +00002556 hash_init(&lp->sl_wordcount);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002557 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00002558
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002559 return lp;
2560}
2561
2562/*
2563 * Free the contents of an slang_T and the structure itself.
2564 */
2565 static void
2566slang_free(lp)
2567 slang_T *lp;
2568{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002569 vim_free(lp->sl_name);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002570 vim_free(lp->sl_fname);
2571 slang_clear(lp);
2572 vim_free(lp);
2573}
2574
2575/*
2576 * Clear an slang_T so that the file can be reloaded.
2577 */
2578 static void
2579slang_clear(lp)
2580 slang_T *lp;
2581{
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002582 garray_T *gap;
2583 fromto_T *ftp;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002584 salitem_T *smp;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002585 int i;
Bram Moolenaar4770d092006-01-12 23:22:24 +00002586 int round;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002587
Bram Moolenaar51485f02005-06-04 21:55:20 +00002588 vim_free(lp->sl_fbyts);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002589 lp->sl_fbyts = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002590 vim_free(lp->sl_kbyts);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002591 lp->sl_kbyts = NULL;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002592 vim_free(lp->sl_pbyts);
2593 lp->sl_pbyts = NULL;
2594
Bram Moolenaar51485f02005-06-04 21:55:20 +00002595 vim_free(lp->sl_fidxs);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002596 lp->sl_fidxs = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002597 vim_free(lp->sl_kidxs);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002598 lp->sl_kidxs = NULL;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002599 vim_free(lp->sl_pidxs);
2600 lp->sl_pidxs = NULL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002601
Bram Moolenaar4770d092006-01-12 23:22:24 +00002602 for (round = 1; round <= 2; ++round)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002603 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00002604 gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal;
2605 while (gap->ga_len > 0)
2606 {
2607 ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len];
2608 vim_free(ftp->ft_from);
2609 vim_free(ftp->ft_to);
2610 }
2611 ga_clear(gap);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002612 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002613
2614 gap = &lp->sl_sal;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002615 if (lp->sl_sofo)
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002616 {
2617 /* "ga_len" is set to 1 without adding an item for latin1 */
2618 if (gap->ga_data != NULL)
2619 /* SOFOFROM and SOFOTO items: free lists of wide characters. */
2620 for (i = 0; i < gap->ga_len; ++i)
2621 vim_free(((int **)gap->ga_data)[i]);
2622 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002623 else
2624 /* SAL items: free salitem_T items */
2625 while (gap->ga_len > 0)
2626 {
2627 smp = &((salitem_T *)gap->ga_data)[--gap->ga_len];
2628 vim_free(smp->sm_lead);
2629 /* Don't free sm_oneof and sm_rules, they point into sm_lead. */
2630 vim_free(smp->sm_to);
2631#ifdef FEAT_MBYTE
2632 vim_free(smp->sm_lead_w);
2633 vim_free(smp->sm_oneof_w);
2634 vim_free(smp->sm_to_w);
2635#endif
2636 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002637 ga_clear(gap);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002638
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002639 for (i = 0; i < lp->sl_prefixcnt; ++i)
2640 vim_free(lp->sl_prefprog[i]);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002641 lp->sl_prefixcnt = 0;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002642 vim_free(lp->sl_prefprog);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002643 lp->sl_prefprog = NULL;
2644
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002645 vim_free(lp->sl_info);
2646 lp->sl_info = NULL;
2647
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002648 vim_free(lp->sl_midword);
2649 lp->sl_midword = NULL;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002650
Bram Moolenaar5195e452005-08-19 20:32:47 +00002651 vim_free(lp->sl_compprog);
Bram Moolenaar9f94b052008-11-30 20:12:46 +00002652 vim_free(lp->sl_comprules);
Bram Moolenaar5195e452005-08-19 20:32:47 +00002653 vim_free(lp->sl_compstartflags);
Bram Moolenaard12a1322005-08-21 22:08:24 +00002654 vim_free(lp->sl_compallflags);
Bram Moolenaar5195e452005-08-19 20:32:47 +00002655 lp->sl_compprog = NULL;
Bram Moolenaar9f94b052008-11-30 20:12:46 +00002656 lp->sl_comprules = NULL;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002657 lp->sl_compstartflags = NULL;
Bram Moolenaard12a1322005-08-21 22:08:24 +00002658 lp->sl_compallflags = NULL;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002659
2660 vim_free(lp->sl_syllable);
2661 lp->sl_syllable = NULL;
2662 ga_clear(&lp->sl_syl_items);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002663
Bram Moolenaar899dddf2006-03-26 21:06:50 +00002664 ga_clear_strings(&lp->sl_comppat);
2665
Bram Moolenaar4770d092006-01-12 23:22:24 +00002666 hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF);
2667 hash_init(&lp->sl_wordcount);
Bram Moolenaarea424162005-06-16 21:51:00 +00002668
Bram Moolenaar4770d092006-01-12 23:22:24 +00002669#ifdef FEAT_MBYTE
2670 hash_clear_all(&lp->sl_map_hash, 0);
Bram Moolenaarea424162005-06-16 21:51:00 +00002671#endif
Bram Moolenaar5195e452005-08-19 20:32:47 +00002672
Bram Moolenaar4770d092006-01-12 23:22:24 +00002673 /* Clear info from .sug file. */
2674 slang_clear_sug(lp);
2675
Bram Moolenaar5195e452005-08-19 20:32:47 +00002676 lp->sl_compmax = MAXWLEN;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002677 lp->sl_compminlen = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002678 lp->sl_compsylmax = MAXWLEN;
2679 lp->sl_regions[0] = NUL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002680}
2681
2682/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00002683 * Clear the info from the .sug file in "lp".
2684 */
2685 static void
2686slang_clear_sug(lp)
2687 slang_T *lp;
2688{
2689 vim_free(lp->sl_sbyts);
2690 lp->sl_sbyts = NULL;
2691 vim_free(lp->sl_sidxs);
2692 lp->sl_sidxs = NULL;
2693 close_spellbuf(lp->sl_sugbuf);
2694 lp->sl_sugbuf = NULL;
2695 lp->sl_sugloaded = FALSE;
2696 lp->sl_sugtime = 0;
2697}
2698
2699/*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002700 * Load one spell file and store the info into a slang_T.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002701 * Invoked through do_in_runtimepath().
2702 */
2703 static void
Bram Moolenaarb765d632005-06-07 21:00:02 +00002704spell_load_cb(fname, cookie)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002705 char_u *fname;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002706 void *cookie;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002707{
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002708 spelload_T *slp = (spelload_T *)cookie;
2709 slang_T *slang;
2710
2711 slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE);
2712 if (slang != NULL)
2713 {
2714 /* When a previously loaded file has NOBREAK also use it for the
2715 * ".add" files. */
2716 if (slp->sl_nobreak && slang->sl_add)
2717 slang->sl_nobreak = TRUE;
2718 else if (slang->sl_nobreak)
2719 slp->sl_nobreak = TRUE;
2720
2721 slp->sl_slang = slang;
2722 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00002723}
2724
2725/*
2726 * Load one spell file and store the info into a slang_T.
2727 *
Bram Moolenaar4770d092006-01-12 23:22:24 +00002728 * This is invoked in three ways:
Bram Moolenaarb765d632005-06-07 21:00:02 +00002729 * - From spell_load_cb() to load a spell file for the first time. "lang" is
2730 * the language name, "old_lp" is NULL. Will allocate an slang_T.
2731 * - To reload a spell file that was changed. "lang" is NULL and "old_lp"
2732 * points to the existing slang_T.
Bram Moolenaar4770d092006-01-12 23:22:24 +00002733 * - Just after writing a .spl file; it's read back to produce the .sug file.
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00002734 * "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T.
2735 *
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002736 * Returns the slang_T the spell file was loaded into. NULL for error.
Bram Moolenaarb765d632005-06-07 21:00:02 +00002737 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002738 static slang_T *
2739spell_load_file(fname, lang, old_lp, silent)
Bram Moolenaarb765d632005-06-07 21:00:02 +00002740 char_u *fname;
2741 char_u *lang;
2742 slang_T *old_lp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002743 int silent; /* no error if file doesn't exist */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002744{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002745 FILE *fd;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002746 char_u buf[VIMSPELLMAGICL];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002747 char_u *p;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002748 int i;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002749 int n;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002750 int len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002751 char_u *save_sourcing_name = sourcing_name;
2752 linenr_T save_sourcing_lnum = sourcing_lnum;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002753 slang_T *lp = NULL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002754 int c = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002755 int res;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002756
Bram Moolenaarb765d632005-06-07 21:00:02 +00002757 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002758 if (fd == NULL)
2759 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002760 if (!silent)
2761 EMSG2(_(e_notopen), fname);
2762 else if (p_verbose > 2)
2763 {
2764 verbose_enter();
2765 smsg((char_u *)e_notopen, fname);
2766 verbose_leave();
2767 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002768 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002769 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00002770 if (p_verbose > 2)
2771 {
2772 verbose_enter();
2773 smsg((char_u *)_("Reading spell file \"%s\""), fname);
2774 verbose_leave();
2775 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002776
Bram Moolenaarb765d632005-06-07 21:00:02 +00002777 if (old_lp == NULL)
2778 {
2779 lp = slang_alloc(lang);
2780 if (lp == NULL)
2781 goto endFAIL;
2782
2783 /* Remember the file name, used to reload the file when it's updated. */
2784 lp->sl_fname = vim_strsave(fname);
2785 if (lp->sl_fname == NULL)
2786 goto endFAIL;
2787
2788 /* Check for .add.spl. */
2789 lp->sl_add = strstr((char *)gettail(fname), ".add.") != NULL;
2790 }
2791 else
2792 lp = old_lp;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002793
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002794 /* Set sourcing_name, so that error messages mention the file name. */
2795 sourcing_name = fname;
2796 sourcing_lnum = 0;
2797
Bram Moolenaar4770d092006-01-12 23:22:24 +00002798 /*
2799 * <HEADER>: <fileID>
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002800 */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002801 for (i = 0; i < VIMSPELLMAGICL; ++i)
2802 buf[i] = getc(fd); /* <fileID> */
2803 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
2804 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002805 EMSG(_("E757: This does not look like a spell file"));
2806 goto endFAIL;
2807 }
2808 c = getc(fd); /* <versionnr> */
2809 if (c < VIMSPELLVERSION)
2810 {
2811 EMSG(_("E771: Old spell file, needs to be updated"));
2812 goto endFAIL;
2813 }
2814 else if (c > VIMSPELLVERSION)
2815 {
2816 EMSG(_("E772: Spell file is for newer version of Vim"));
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002817 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002818 }
2819
Bram Moolenaar5195e452005-08-19 20:32:47 +00002820
2821 /*
2822 * <SECTIONS>: <section> ... <sectionend>
2823 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
2824 */
2825 for (;;)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002826 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002827 n = getc(fd); /* <sectionID> or <sectionend> */
2828 if (n == SN_END)
2829 break;
2830 c = getc(fd); /* <sectionflags> */
Bram Moolenaarb388adb2006-02-28 23:50:17 +00002831 len = get4c(fd); /* <sectionlen> */
Bram Moolenaar5195e452005-08-19 20:32:47 +00002832 if (len < 0)
2833 goto truncerr;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002834
Bram Moolenaar5195e452005-08-19 20:32:47 +00002835 res = 0;
2836 switch (n)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002837 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002838 case SN_INFO:
2839 lp->sl_info = read_string(fd, len); /* <infotext> */
2840 if (lp->sl_info == NULL)
2841 goto endFAIL;
2842 break;
2843
Bram Moolenaar5195e452005-08-19 20:32:47 +00002844 case SN_REGION:
2845 res = read_region_section(fd, lp, len);
2846 break;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002847
Bram Moolenaar5195e452005-08-19 20:32:47 +00002848 case SN_CHARFLAGS:
2849 res = read_charflags_section(fd);
2850 break;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002851
Bram Moolenaar5195e452005-08-19 20:32:47 +00002852 case SN_MIDWORD:
2853 lp->sl_midword = read_string(fd, len); /* <midword> */
2854 if (lp->sl_midword == NULL)
2855 goto endFAIL;
2856 break;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002857
Bram Moolenaar5195e452005-08-19 20:32:47 +00002858 case SN_PREFCOND:
2859 res = read_prefcond_section(fd, lp);
2860 break;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002861
Bram Moolenaar5195e452005-08-19 20:32:47 +00002862 case SN_REP:
Bram Moolenaar4770d092006-01-12 23:22:24 +00002863 res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first);
2864 break;
2865
2866 case SN_REPSAL:
2867 res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first);
Bram Moolenaar5195e452005-08-19 20:32:47 +00002868 break;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002869
Bram Moolenaar5195e452005-08-19 20:32:47 +00002870 case SN_SAL:
2871 res = read_sal_section(fd, lp);
2872 break;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002873
Bram Moolenaar5195e452005-08-19 20:32:47 +00002874 case SN_SOFO:
2875 res = read_sofo_section(fd, lp);
2876 break;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002877
Bram Moolenaar5195e452005-08-19 20:32:47 +00002878 case SN_MAP:
2879 p = read_string(fd, len); /* <mapstr> */
2880 if (p == NULL)
2881 goto endFAIL;
2882 set_map_str(lp, p);
2883 vim_free(p);
2884 break;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002885
Bram Moolenaar4770d092006-01-12 23:22:24 +00002886 case SN_WORDS:
2887 res = read_words_section(fd, lp, len);
2888 break;
2889
2890 case SN_SUGFILE:
Bram Moolenaarb388adb2006-02-28 23:50:17 +00002891 lp->sl_sugtime = get8c(fd); /* <timestamp> */
Bram Moolenaar4770d092006-01-12 23:22:24 +00002892 break;
2893
Bram Moolenaare1438bb2006-03-01 22:01:55 +00002894 case SN_NOSPLITSUGS:
2895 lp->sl_nosplitsugs = TRUE; /* <timestamp> */
2896 break;
2897
Bram Moolenaar5195e452005-08-19 20:32:47 +00002898 case SN_COMPOUND:
2899 res = read_compound(fd, lp, len);
2900 break;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002901
Bram Moolenaar78622822005-08-23 21:00:13 +00002902 case SN_NOBREAK:
2903 lp->sl_nobreak = TRUE;
2904 break;
2905
Bram Moolenaar5195e452005-08-19 20:32:47 +00002906 case SN_SYLLABLE:
2907 lp->sl_syllable = read_string(fd, len); /* <syllable> */
2908 if (lp->sl_syllable == NULL)
2909 goto endFAIL;
2910 if (init_syl_tab(lp) == FAIL)
2911 goto endFAIL;
2912 break;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002913
Bram Moolenaar5195e452005-08-19 20:32:47 +00002914 default:
2915 /* Unsupported section. When it's required give an error
2916 * message. When it's not required skip the contents. */
2917 if (c & SNF_REQUIRED)
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002918 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002919 EMSG(_("E770: Unsupported section in spell file"));
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002920 goto endFAIL;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00002921 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00002922 while (--len >= 0)
2923 if (getc(fd) < 0)
2924 goto truncerr;
2925 break;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00002926 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00002927someerror:
Bram Moolenaar5195e452005-08-19 20:32:47 +00002928 if (res == SP_FORMERROR)
2929 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002930 EMSG(_(e_format));
2931 goto endFAIL;
2932 }
2933 if (res == SP_TRUNCERROR)
2934 {
2935truncerr:
2936 EMSG(_(e_spell_trunc));
2937 goto endFAIL;
2938 }
Bram Moolenaar6de68532005-08-24 22:08:48 +00002939 if (res == SP_OTHERERROR)
Bram Moolenaar5195e452005-08-19 20:32:47 +00002940 goto endFAIL;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002941 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002942
Bram Moolenaar4770d092006-01-12 23:22:24 +00002943 /* <LWORDTREE> */
2944 res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0);
2945 if (res != 0)
2946 goto someerror;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002947
Bram Moolenaar4770d092006-01-12 23:22:24 +00002948 /* <KWORDTREE> */
2949 res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0);
2950 if (res != 0)
2951 goto someerror;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002952
Bram Moolenaar4770d092006-01-12 23:22:24 +00002953 /* <PREFIXTREE> */
2954 res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE,
2955 lp->sl_prefixcnt);
2956 if (res != 0)
2957 goto someerror;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002958
Bram Moolenaarb765d632005-06-07 21:00:02 +00002959 /* For a new file link it in the list of spell files. */
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00002960 if (old_lp == NULL && lang != NULL)
Bram Moolenaarb765d632005-06-07 21:00:02 +00002961 {
2962 lp->sl_next = first_lang;
2963 first_lang = lp;
2964 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002965
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002966 goto endOK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002967
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002968endFAIL:
Bram Moolenaarb765d632005-06-07 21:00:02 +00002969 if (lang != NULL)
2970 /* truncating the name signals the error to spell_load_lang() */
2971 *lang = NUL;
2972 if (lp != NULL && old_lp == NULL)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002973 slang_free(lp);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002974 lp = NULL;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002975
2976endOK:
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002977 if (fd != NULL)
2978 fclose(fd);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002979 sourcing_name = save_sourcing_name;
2980 sourcing_lnum = save_sourcing_lnum;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002981
2982 return lp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002983}
2984
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002985/*
Bram Moolenaarb388adb2006-02-28 23:50:17 +00002986 * Read 2 bytes from "fd" and turn them into an int, MSB first.
2987 */
Bram Moolenaar55debbe2010-05-23 23:34:36 +02002988 int
Bram Moolenaarb388adb2006-02-28 23:50:17 +00002989get2c(fd)
2990 FILE *fd;
2991{
2992 long n;
2993
2994 n = getc(fd);
2995 n = (n << 8) + getc(fd);
2996 return n;
2997}
2998
2999/*
3000 * Read 3 bytes from "fd" and turn them into an int, MSB first.
3001 */
Bram Moolenaar55debbe2010-05-23 23:34:36 +02003002 int
Bram Moolenaarb388adb2006-02-28 23:50:17 +00003003get3c(fd)
3004 FILE *fd;
3005{
3006 long n;
3007
3008 n = getc(fd);
3009 n = (n << 8) + getc(fd);
3010 n = (n << 8) + getc(fd);
3011 return n;
3012}
3013
3014/*
3015 * Read 4 bytes from "fd" and turn them into an int, MSB first.
3016 */
Bram Moolenaar55debbe2010-05-23 23:34:36 +02003017 int
Bram Moolenaarb388adb2006-02-28 23:50:17 +00003018get4c(fd)
3019 FILE *fd;
3020{
3021 long n;
3022
3023 n = getc(fd);
3024 n = (n << 8) + getc(fd);
3025 n = (n << 8) + getc(fd);
3026 n = (n << 8) + getc(fd);
3027 return n;
3028}
3029
3030/*
3031 * Read 8 bytes from "fd" and turn them into a time_t, MSB first.
3032 */
3033 static time_t
3034get8c(fd)
3035 FILE *fd;
3036{
3037 time_t n = 0;
3038 int i;
3039
3040 for (i = 0; i < 8; ++i)
3041 n = (n << 8) + getc(fd);
3042 return n;
3043}
3044
3045/*
Bram Moolenaar9c96f592005-06-30 21:52:39 +00003046 * Read a length field from "fd" in "cnt_bytes" bytes.
Bram Moolenaar7887d882005-07-01 22:33:52 +00003047 * Allocate memory, read the string into it and add a NUL at the end.
Bram Moolenaar9c96f592005-06-30 21:52:39 +00003048 * Returns NULL when the count is zero.
Bram Moolenaar5195e452005-08-19 20:32:47 +00003049 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result
3050 * otherwise.
Bram Moolenaar9c96f592005-06-30 21:52:39 +00003051 */
3052 static char_u *
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00003053read_cnt_string(fd, cnt_bytes, cntp)
Bram Moolenaar9c96f592005-06-30 21:52:39 +00003054 FILE *fd;
3055 int cnt_bytes;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00003056 int *cntp;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00003057{
3058 int cnt = 0;
3059 int i;
3060 char_u *str;
3061
3062 /* read the length bytes, MSB first */
3063 for (i = 0; i < cnt_bytes; ++i)
3064 cnt = (cnt << 8) + getc(fd);
3065 if (cnt < 0)
3066 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00003067 *cntp = SP_TRUNCERROR;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00003068 return NULL;
3069 }
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00003070 *cntp = cnt;
3071 if (cnt == 0)
3072 return NULL; /* nothing to read, return NULL */
Bram Moolenaar9c96f592005-06-30 21:52:39 +00003073
Bram Moolenaar5195e452005-08-19 20:32:47 +00003074 str = read_string(fd, cnt);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00003075 if (str == NULL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003076 *cntp = SP_OTHERERROR;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00003077 return str;
3078}
3079
Bram Moolenaar7887d882005-07-01 22:33:52 +00003080/*
Bram Moolenaar5195e452005-08-19 20:32:47 +00003081 * Read a string of length "cnt" from "fd" into allocated memory.
Bram Moolenaara7241f52008-06-24 20:39:31 +00003082 * Returns NULL when out of memory or unable to read that many bytes.
Bram Moolenaar5195e452005-08-19 20:32:47 +00003083 */
3084 static char_u *
3085read_string(fd, cnt)
3086 FILE *fd;
3087 int cnt;
3088{
3089 char_u *str;
3090 int i;
Bram Moolenaara7241f52008-06-24 20:39:31 +00003091 int c;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003092
3093 /* allocate memory */
3094 str = alloc((unsigned)cnt + 1);
3095 if (str != NULL)
3096 {
Bram Moolenaara7241f52008-06-24 20:39:31 +00003097 /* Read the string. Quit when running into the EOF. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00003098 for (i = 0; i < cnt; ++i)
Bram Moolenaara7241f52008-06-24 20:39:31 +00003099 {
3100 c = getc(fd);
3101 if (c == EOF)
3102 {
3103 vim_free(str);
3104 return NULL;
3105 }
3106 str[i] = c;
3107 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00003108 str[i] = NUL;
3109 }
3110 return str;
3111}
3112
3113/*
3114 * Read SN_REGION: <regionname> ...
3115 * Return SP_*ERROR flags.
3116 */
3117 static int
3118read_region_section(fd, lp, len)
3119 FILE *fd;
3120 slang_T *lp;
3121 int len;
3122{
3123 int i;
3124
3125 if (len > 16)
3126 return SP_FORMERROR;
3127 for (i = 0; i < len; ++i)
3128 lp->sl_regions[i] = getc(fd); /* <regionname> */
3129 lp->sl_regions[len] = NUL;
3130 return 0;
3131}
3132
3133/*
3134 * Read SN_CHARFLAGS section: <charflagslen> <charflags>
3135 * <folcharslen> <folchars>
3136 * Return SP_*ERROR flags.
3137 */
3138 static int
3139read_charflags_section(fd)
3140 FILE *fd;
3141{
3142 char_u *flags;
3143 char_u *fol;
3144 int flagslen, follen;
3145
3146 /* <charflagslen> <charflags> */
3147 flags = read_cnt_string(fd, 1, &flagslen);
3148 if (flagslen < 0)
3149 return flagslen;
3150
3151 /* <folcharslen> <folchars> */
3152 fol = read_cnt_string(fd, 2, &follen);
3153 if (follen < 0)
3154 {
3155 vim_free(flags);
3156 return follen;
3157 }
3158
3159 /* Set the word-char flags and fill SPELL_ISUPPER() table. */
3160 if (flags != NULL && fol != NULL)
3161 set_spell_charflags(flags, flagslen, fol);
3162
3163 vim_free(flags);
3164 vim_free(fol);
3165
3166 /* When <charflagslen> is zero then <fcharlen> must also be zero. */
3167 if ((flags == NULL) != (fol == NULL))
3168 return SP_FORMERROR;
3169 return 0;
3170}
3171
3172/*
3173 * Read SN_PREFCOND section.
3174 * Return SP_*ERROR flags.
3175 */
3176 static int
3177read_prefcond_section(fd, lp)
3178 FILE *fd;
3179 slang_T *lp;
3180{
3181 int cnt;
3182 int i;
3183 int n;
3184 char_u *p;
3185 char_u buf[MAXWLEN + 1];
3186
3187 /* <prefcondcnt> <prefcond> ... */
Bram Moolenaarb388adb2006-02-28 23:50:17 +00003188 cnt = get2c(fd); /* <prefcondcnt> */
Bram Moolenaar5195e452005-08-19 20:32:47 +00003189 if (cnt <= 0)
3190 return SP_FORMERROR;
3191
3192 lp->sl_prefprog = (regprog_T **)alloc_clear(
3193 (unsigned)sizeof(regprog_T *) * cnt);
3194 if (lp->sl_prefprog == NULL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003195 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003196 lp->sl_prefixcnt = cnt;
3197
3198 for (i = 0; i < cnt; ++i)
3199 {
3200 /* <prefcond> : <condlen> <condstr> */
3201 n = getc(fd); /* <condlen> */
3202 if (n < 0 || n >= MAXWLEN)
3203 return SP_FORMERROR;
3204
3205 /* When <condlen> is zero we have an empty condition. Otherwise
3206 * compile the regexp program used to check for the condition. */
3207 if (n > 0)
3208 {
3209 buf[0] = '^'; /* always match at one position only */
3210 p = buf + 1;
3211 while (n-- > 0)
3212 *p++ = getc(fd); /* <condstr> */
3213 *p = NUL;
3214 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
3215 }
3216 }
3217 return 0;
3218}
3219
3220/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00003221 * Read REP or REPSAL items section from "fd": <repcount> <rep> ...
Bram Moolenaar5195e452005-08-19 20:32:47 +00003222 * Return SP_*ERROR flags.
3223 */
3224 static int
Bram Moolenaar4770d092006-01-12 23:22:24 +00003225read_rep_section(fd, gap, first)
Bram Moolenaar5195e452005-08-19 20:32:47 +00003226 FILE *fd;
Bram Moolenaar4770d092006-01-12 23:22:24 +00003227 garray_T *gap;
3228 short *first;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003229{
3230 int cnt;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003231 fromto_T *ftp;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003232 int i;
3233
Bram Moolenaarb388adb2006-02-28 23:50:17 +00003234 cnt = get2c(fd); /* <repcount> */
Bram Moolenaar5195e452005-08-19 20:32:47 +00003235 if (cnt < 0)
3236 return SP_TRUNCERROR;
3237
Bram Moolenaar5195e452005-08-19 20:32:47 +00003238 if (ga_grow(gap, cnt) == FAIL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003239 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003240
3241 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
3242 for (; gap->ga_len < cnt; ++gap->ga_len)
3243 {
3244 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
3245 ftp->ft_from = read_cnt_string(fd, 1, &i);
3246 if (i < 0)
3247 return i;
3248 if (i == 0)
3249 return SP_FORMERROR;
3250 ftp->ft_to = read_cnt_string(fd, 1, &i);
3251 if (i <= 0)
3252 {
3253 vim_free(ftp->ft_from);
3254 if (i < 0)
3255 return i;
3256 return SP_FORMERROR;
3257 }
3258 }
3259
3260 /* Fill the first-index table. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00003261 for (i = 0; i < 256; ++i)
3262 first[i] = -1;
3263 for (i = 0; i < gap->ga_len; ++i)
3264 {
3265 ftp = &((fromto_T *)gap->ga_data)[i];
3266 if (first[*ftp->ft_from] == -1)
3267 first[*ftp->ft_from] = i;
3268 }
3269 return 0;
3270}
3271
3272/*
3273 * Read SN_SAL section: <salflags> <salcount> <sal> ...
3274 * Return SP_*ERROR flags.
3275 */
3276 static int
3277read_sal_section(fd, slang)
3278 FILE *fd;
3279 slang_T *slang;
3280{
3281 int i;
3282 int cnt;
3283 garray_T *gap;
3284 salitem_T *smp;
3285 int ccnt;
3286 char_u *p;
Bram Moolenaard12a1322005-08-21 22:08:24 +00003287 int c = NUL;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003288
3289 slang->sl_sofo = FALSE;
3290
3291 i = getc(fd); /* <salflags> */
3292 if (i & SAL_F0LLOWUP)
3293 slang->sl_followup = TRUE;
3294 if (i & SAL_COLLAPSE)
3295 slang->sl_collapse = TRUE;
3296 if (i & SAL_REM_ACCENTS)
3297 slang->sl_rem_accents = TRUE;
3298
Bram Moolenaarb388adb2006-02-28 23:50:17 +00003299 cnt = get2c(fd); /* <salcount> */
Bram Moolenaar5195e452005-08-19 20:32:47 +00003300 if (cnt < 0)
3301 return SP_TRUNCERROR;
3302
3303 gap = &slang->sl_sal;
3304 ga_init2(gap, sizeof(salitem_T), 10);
Bram Moolenaard5cdbeb2005-10-10 20:59:28 +00003305 if (ga_grow(gap, cnt + 1) == FAIL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003306 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003307
3308 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
3309 for (; gap->ga_len < cnt; ++gap->ga_len)
3310 {
3311 smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
3312 ccnt = getc(fd); /* <salfromlen> */
3313 if (ccnt < 0)
3314 return SP_TRUNCERROR;
3315 if ((p = alloc(ccnt + 2)) == NULL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003316 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003317 smp->sm_lead = p;
3318
3319 /* Read up to the first special char into sm_lead. */
3320 for (i = 0; i < ccnt; ++i)
3321 {
3322 c = getc(fd); /* <salfrom> */
3323 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
3324 break;
3325 *p++ = c;
3326 }
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00003327 smp->sm_leadlen = (int)(p - smp->sm_lead);
Bram Moolenaar5195e452005-08-19 20:32:47 +00003328 *p++ = NUL;
3329
3330 /* Put (abc) chars in sm_oneof, if any. */
3331 if (c == '(')
3332 {
3333 smp->sm_oneof = p;
3334 for (++i; i < ccnt; ++i)
3335 {
3336 c = getc(fd); /* <salfrom> */
3337 if (c == ')')
3338 break;
3339 *p++ = c;
3340 }
3341 *p++ = NUL;
3342 if (++i < ccnt)
3343 c = getc(fd);
3344 }
3345 else
3346 smp->sm_oneof = NULL;
3347
3348 /* Any following chars go in sm_rules. */
3349 smp->sm_rules = p;
3350 if (i < ccnt)
3351 /* store the char we got while checking for end of sm_lead */
3352 *p++ = c;
3353 for (++i; i < ccnt; ++i)
3354 *p++ = getc(fd); /* <salfrom> */
3355 *p++ = NUL;
3356
3357 /* <saltolen> <salto> */
3358 smp->sm_to = read_cnt_string(fd, 1, &ccnt);
3359 if (ccnt < 0)
3360 {
3361 vim_free(smp->sm_lead);
3362 return ccnt;
3363 }
3364
3365#ifdef FEAT_MBYTE
3366 if (has_mbyte)
3367 {
3368 /* convert the multi-byte strings to wide char strings */
3369 smp->sm_lead_w = mb_str2wide(smp->sm_lead);
3370 smp->sm_leadlen = mb_charlen(smp->sm_lead);
3371 if (smp->sm_oneof == NULL)
3372 smp->sm_oneof_w = NULL;
3373 else
3374 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
3375 if (smp->sm_to == NULL)
3376 smp->sm_to_w = NULL;
3377 else
3378 smp->sm_to_w = mb_str2wide(smp->sm_to);
3379 if (smp->sm_lead_w == NULL
3380 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL)
3381 || (smp->sm_to_w == NULL && smp->sm_to != NULL))
3382 {
3383 vim_free(smp->sm_lead);
3384 vim_free(smp->sm_to);
3385 vim_free(smp->sm_lead_w);
3386 vim_free(smp->sm_oneof_w);
3387 vim_free(smp->sm_to_w);
Bram Moolenaar6de68532005-08-24 22:08:48 +00003388 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003389 }
3390 }
3391#endif
3392 }
3393
Bram Moolenaard5cdbeb2005-10-10 20:59:28 +00003394 if (gap->ga_len > 0)
3395 {
3396 /* Add one extra entry to mark the end with an empty sm_lead. Avoids
3397 * that we need to check the index every time. */
3398 smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
3399 if ((p = alloc(1)) == NULL)
3400 return SP_OTHERERROR;
3401 p[0] = NUL;
3402 smp->sm_lead = p;
3403 smp->sm_leadlen = 0;
3404 smp->sm_oneof = NULL;
3405 smp->sm_rules = p;
3406 smp->sm_to = NULL;
3407#ifdef FEAT_MBYTE
3408 if (has_mbyte)
3409 {
3410 smp->sm_lead_w = mb_str2wide(smp->sm_lead);
3411 smp->sm_leadlen = 0;
3412 smp->sm_oneof_w = NULL;
3413 smp->sm_to_w = NULL;
3414 }
3415#endif
3416 ++gap->ga_len;
3417 }
3418
Bram Moolenaar5195e452005-08-19 20:32:47 +00003419 /* Fill the first-index table. */
3420 set_sal_first(slang);
3421
3422 return 0;
3423}
3424
3425/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00003426 * Read SN_WORDS: <word> ...
3427 * Return SP_*ERROR flags.
3428 */
3429 static int
3430read_words_section(fd, lp, len)
3431 FILE *fd;
3432 slang_T *lp;
3433 int len;
3434{
3435 int done = 0;
3436 int i;
Bram Moolenaara7241f52008-06-24 20:39:31 +00003437 int c;
Bram Moolenaar4770d092006-01-12 23:22:24 +00003438 char_u word[MAXWLEN];
3439
3440 while (done < len)
3441 {
3442 /* Read one word at a time. */
3443 for (i = 0; ; ++i)
3444 {
Bram Moolenaara7241f52008-06-24 20:39:31 +00003445 c = getc(fd);
3446 if (c == EOF)
3447 return SP_TRUNCERROR;
3448 word[i] = c;
Bram Moolenaar4770d092006-01-12 23:22:24 +00003449 if (word[i] == NUL)
3450 break;
3451 if (i == MAXWLEN - 1)
3452 return SP_FORMERROR;
3453 }
3454
3455 /* Init the count to 10. */
3456 count_common_word(lp, word, -1, 10);
3457 done += i + 1;
3458 }
3459 return 0;
3460}
3461
3462/*
3463 * Add a word to the hashtable of common words.
3464 * If it's already there then the counter is increased.
3465 */
3466 static void
3467count_common_word(lp, word, len, count)
3468 slang_T *lp;
3469 char_u *word;
3470 int len; /* word length, -1 for upto NUL */
3471 int count; /* 1 to count once, 10 to init */
3472{
3473 hash_T hash;
3474 hashitem_T *hi;
3475 wordcount_T *wc;
3476 char_u buf[MAXWLEN];
3477 char_u *p;
3478
3479 if (len == -1)
3480 p = word;
3481 else
3482 {
3483 vim_strncpy(buf, word, len);
3484 p = buf;
3485 }
3486
3487 hash = hash_hash(p);
3488 hi = hash_lookup(&lp->sl_wordcount, p, hash);
3489 if (HASHITEM_EMPTY(hi))
3490 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00003491 wc = (wordcount_T *)alloc((unsigned)(sizeof(wordcount_T) + STRLEN(p)));
Bram Moolenaar4770d092006-01-12 23:22:24 +00003492 if (wc == NULL)
3493 return;
3494 STRCPY(wc->wc_word, p);
3495 wc->wc_count = count;
3496 hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash);
3497 }
3498 else
3499 {
3500 wc = HI2WC(hi);
3501 if ((wc->wc_count += count) < (unsigned)count) /* check for overflow */
3502 wc->wc_count = MAXWORDCOUNT;
3503 }
3504}
3505
3506/*
3507 * Adjust the score of common words.
3508 */
3509 static int
3510score_wordcount_adj(slang, score, word, split)
3511 slang_T *slang;
3512 int score;
3513 char_u *word;
3514 int split; /* word was split, less bonus */
3515{
3516 hashitem_T *hi;
3517 wordcount_T *wc;
3518 int bonus;
3519 int newscore;
3520
3521 hi = hash_find(&slang->sl_wordcount, word);
3522 if (!HASHITEM_EMPTY(hi))
3523 {
3524 wc = HI2WC(hi);
3525 if (wc->wc_count < SCORE_THRES2)
3526 bonus = SCORE_COMMON1;
3527 else if (wc->wc_count < SCORE_THRES3)
3528 bonus = SCORE_COMMON2;
3529 else
3530 bonus = SCORE_COMMON3;
3531 if (split)
3532 newscore = score - bonus / 2;
3533 else
3534 newscore = score - bonus;
3535 if (newscore < 0)
3536 return 0;
3537 return newscore;
3538 }
3539 return score;
3540}
3541
3542/*
Bram Moolenaar5195e452005-08-19 20:32:47 +00003543 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
3544 * Return SP_*ERROR flags.
3545 */
3546 static int
3547read_sofo_section(fd, slang)
3548 FILE *fd;
3549 slang_T *slang;
3550{
3551 int cnt;
3552 char_u *from, *to;
3553 int res;
3554
3555 slang->sl_sofo = TRUE;
3556
3557 /* <sofofromlen> <sofofrom> */
3558 from = read_cnt_string(fd, 2, &cnt);
3559 if (cnt < 0)
3560 return cnt;
3561
3562 /* <sofotolen> <sofoto> */
3563 to = read_cnt_string(fd, 2, &cnt);
3564 if (cnt < 0)
3565 {
3566 vim_free(from);
3567 return cnt;
3568 }
3569
3570 /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */
3571 if (from != NULL && to != NULL)
3572 res = set_sofo(slang, from, to);
3573 else if (from != NULL || to != NULL)
3574 res = SP_FORMERROR; /* only one of two strings is an error */
3575 else
3576 res = 0;
3577
3578 vim_free(from);
3579 vim_free(to);
3580 return res;
3581}
3582
3583/*
3584 * Read the compound section from the .spl file:
Bram Moolenaar899dddf2006-03-26 21:06:50 +00003585 * <compmax> <compminlen> <compsylmax> <compoptions> <compflags>
Bram Moolenaar5195e452005-08-19 20:32:47 +00003586 * Returns SP_*ERROR flags.
3587 */
3588 static int
3589read_compound(fd, slang, len)
3590 FILE *fd;
3591 slang_T *slang;
3592 int len;
3593{
3594 int todo = len;
3595 int c;
3596 int atstart;
3597 char_u *pat;
3598 char_u *pp;
3599 char_u *cp;
Bram Moolenaard12a1322005-08-21 22:08:24 +00003600 char_u *ap;
Bram Moolenaar9f94b052008-11-30 20:12:46 +00003601 char_u *crp;
Bram Moolenaar899dddf2006-03-26 21:06:50 +00003602 int cnt;
3603 garray_T *gap;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003604
3605 if (todo < 2)
3606 return SP_FORMERROR; /* need at least two bytes */
3607
3608 --todo;
3609 c = getc(fd); /* <compmax> */
3610 if (c < 2)
3611 c = MAXWLEN;
3612 slang->sl_compmax = c;
3613
3614 --todo;
3615 c = getc(fd); /* <compminlen> */
3616 if (c < 1)
Bram Moolenaarda2303d2005-08-30 21:55:26 +00003617 c = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003618 slang->sl_compminlen = c;
3619
3620 --todo;
3621 c = getc(fd); /* <compsylmax> */
3622 if (c < 1)
3623 c = MAXWLEN;
3624 slang->sl_compsylmax = c;
3625
Bram Moolenaar899dddf2006-03-26 21:06:50 +00003626 c = getc(fd); /* <compoptions> */
3627 if (c != 0)
3628 ungetc(c, fd); /* be backwards compatible with Vim 7.0b */
3629 else
3630 {
3631 --todo;
3632 c = getc(fd); /* only use the lower byte for now */
3633 --todo;
3634 slang->sl_compoptions = c;
3635
3636 gap = &slang->sl_comppat;
3637 c = get2c(fd); /* <comppatcount> */
3638 todo -= 2;
3639 ga_init2(gap, sizeof(char_u *), c);
3640 if (ga_grow(gap, c) == OK)
3641 while (--c >= 0)
3642 {
3643 ((char_u **)(gap->ga_data))[gap->ga_len++] =
3644 read_cnt_string(fd, 1, &cnt);
3645 /* <comppatlen> <comppattext> */
3646 if (cnt < 0)
3647 return cnt;
Bram Moolenaar5555acc2006-04-07 21:33:12 +00003648 todo -= cnt + 1;
Bram Moolenaar899dddf2006-03-26 21:06:50 +00003649 }
3650 }
Bram Moolenaar5555acc2006-04-07 21:33:12 +00003651 if (todo < 0)
3652 return SP_FORMERROR;
Bram Moolenaar899dddf2006-03-26 21:06:50 +00003653
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003654 /* Turn the COMPOUNDRULE items into a regexp pattern:
Bram Moolenaar5195e452005-08-19 20:32:47 +00003655 * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$".
Bram Moolenaar6de68532005-08-24 22:08:48 +00003656 * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes.
3657 * Conversion to utf-8 may double the size. */
3658 c = todo * 2 + 7;
3659#ifdef FEAT_MBYTE
3660 if (enc_utf8)
3661 c += todo * 2;
3662#endif
3663 pat = alloc((unsigned)c);
Bram Moolenaar5195e452005-08-19 20:32:47 +00003664 if (pat == NULL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003665 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003666
Bram Moolenaard12a1322005-08-21 22:08:24 +00003667 /* We also need a list of all flags that can appear at the start and one
3668 * for all flags. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00003669 cp = alloc(todo + 1);
3670 if (cp == NULL)
3671 {
3672 vim_free(pat);
Bram Moolenaar6de68532005-08-24 22:08:48 +00003673 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003674 }
3675 slang->sl_compstartflags = cp;
3676 *cp = NUL;
3677
Bram Moolenaard12a1322005-08-21 22:08:24 +00003678 ap = alloc(todo + 1);
3679 if (ap == NULL)
3680 {
3681 vim_free(pat);
Bram Moolenaar6de68532005-08-24 22:08:48 +00003682 return SP_OTHERERROR;
Bram Moolenaard12a1322005-08-21 22:08:24 +00003683 }
3684 slang->sl_compallflags = ap;
3685 *ap = NUL;
3686
Bram Moolenaar9f94b052008-11-30 20:12:46 +00003687 /* And a list of all patterns in their original form, for checking whether
3688 * compounding may work in match_compoundrule(). This is freed when we
3689 * encounter a wildcard, the check doesn't work then. */
3690 crp = alloc(todo + 1);
3691 slang->sl_comprules = crp;
3692
Bram Moolenaar5195e452005-08-19 20:32:47 +00003693 pp = pat;
3694 *pp++ = '^';
3695 *pp++ = '\\';
3696 *pp++ = '(';
3697
3698 atstart = 1;
3699 while (todo-- > 0)
3700 {
3701 c = getc(fd); /* <compflags> */
Bram Moolenaara7241f52008-06-24 20:39:31 +00003702 if (c == EOF)
3703 {
3704 vim_free(pat);
3705 return SP_TRUNCERROR;
3706 }
Bram Moolenaard12a1322005-08-21 22:08:24 +00003707
3708 /* Add all flags to "sl_compallflags". */
3709 if (vim_strchr((char_u *)"+*[]/", c) == NULL
Bram Moolenaar6de68532005-08-24 22:08:48 +00003710 && !byte_in_str(slang->sl_compallflags, c))
Bram Moolenaard12a1322005-08-21 22:08:24 +00003711 {
3712 *ap++ = c;
3713 *ap = NUL;
3714 }
3715
Bram Moolenaar5195e452005-08-19 20:32:47 +00003716 if (atstart != 0)
3717 {
3718 /* At start of item: copy flags to "sl_compstartflags". For a
3719 * [abc] item set "atstart" to 2 and copy up to the ']'. */
3720 if (c == '[')
3721 atstart = 2;
3722 else if (c == ']')
3723 atstart = 0;
3724 else
3725 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00003726 if (!byte_in_str(slang->sl_compstartflags, c))
Bram Moolenaar5195e452005-08-19 20:32:47 +00003727 {
3728 *cp++ = c;
3729 *cp = NUL;
3730 }
3731 if (atstart == 1)
3732 atstart = 0;
3733 }
3734 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00003735
3736 /* Copy flag to "sl_comprules", unless we run into a wildcard. */
3737 if (crp != NULL)
3738 {
3739 if (c == '+' || c == '*')
3740 {
3741 vim_free(slang->sl_comprules);
3742 slang->sl_comprules = NULL;
3743 crp = NULL;
3744 }
3745 else
3746 *crp++ = c;
3747 }
3748
Bram Moolenaar5195e452005-08-19 20:32:47 +00003749 if (c == '/') /* slash separates two items */
3750 {
3751 *pp++ = '\\';
3752 *pp++ = '|';
3753 atstart = 1;
3754 }
3755 else /* normal char, "[abc]" and '*' are copied as-is */
3756 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00003757 if (c == '+' || c == '~')
Bram Moolenaar5195e452005-08-19 20:32:47 +00003758 *pp++ = '\\'; /* "a+" becomes "a\+" */
Bram Moolenaar6de68532005-08-24 22:08:48 +00003759#ifdef FEAT_MBYTE
3760 if (enc_utf8)
3761 pp += mb_char2bytes(c, pp);
3762 else
3763#endif
3764 *pp++ = c;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003765 }
3766 }
3767
3768 *pp++ = '\\';
3769 *pp++ = ')';
3770 *pp++ = '$';
3771 *pp = NUL;
3772
Bram Moolenaar9f94b052008-11-30 20:12:46 +00003773 if (crp != NULL)
3774 *crp = NUL;
3775
Bram Moolenaar5195e452005-08-19 20:32:47 +00003776 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT);
3777 vim_free(pat);
3778 if (slang->sl_compprog == NULL)
3779 return SP_FORMERROR;
3780
3781 return 0;
3782}
3783
Bram Moolenaar6de68532005-08-24 22:08:48 +00003784/*
Bram Moolenaar95529562005-08-25 21:21:38 +00003785 * Return TRUE if byte "n" appears in "str".
Bram Moolenaar6de68532005-08-24 22:08:48 +00003786 * Like strchr() but independent of locale.
3787 */
3788 static int
Bram Moolenaar95529562005-08-25 21:21:38 +00003789byte_in_str(str, n)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003790 char_u *str;
Bram Moolenaar95529562005-08-25 21:21:38 +00003791 int n;
Bram Moolenaar6de68532005-08-24 22:08:48 +00003792{
3793 char_u *p;
3794
3795 for (p = str; *p != NUL; ++p)
Bram Moolenaar95529562005-08-25 21:21:38 +00003796 if (*p == n)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003797 return TRUE;
3798 return FALSE;
3799}
3800
Bram Moolenaar5195e452005-08-19 20:32:47 +00003801#define SY_MAXLEN 30
3802typedef struct syl_item_S
3803{
3804 char_u sy_chars[SY_MAXLEN]; /* the sequence of chars */
3805 int sy_len;
3806} syl_item_T;
3807
3808/*
3809 * Truncate "slang->sl_syllable" at the first slash and put the following items
3810 * in "slang->sl_syl_items".
3811 */
3812 static int
3813init_syl_tab(slang)
3814 slang_T *slang;
3815{
3816 char_u *p;
3817 char_u *s;
3818 int l;
3819 syl_item_T *syl;
3820
3821 ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4);
3822 p = vim_strchr(slang->sl_syllable, '/');
3823 while (p != NULL)
3824 {
3825 *p++ = NUL;
Bram Moolenaar6de68532005-08-24 22:08:48 +00003826 if (*p == NUL) /* trailing slash */
Bram Moolenaar5195e452005-08-19 20:32:47 +00003827 break;
3828 s = p;
3829 p = vim_strchr(p, '/');
3830 if (p == NULL)
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00003831 l = (int)STRLEN(s);
Bram Moolenaar5195e452005-08-19 20:32:47 +00003832 else
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00003833 l = (int)(p - s);
Bram Moolenaar5195e452005-08-19 20:32:47 +00003834 if (l >= SY_MAXLEN)
3835 return SP_FORMERROR;
3836 if (ga_grow(&slang->sl_syl_items, 1) == FAIL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003837 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003838 syl = ((syl_item_T *)slang->sl_syl_items.ga_data)
3839 + slang->sl_syl_items.ga_len++;
3840 vim_strncpy(syl->sy_chars, s, l);
3841 syl->sy_len = l;
3842 }
3843 return OK;
3844}
3845
3846/*
3847 * Count the number of syllables in "word".
3848 * When "word" contains spaces the syllables after the last space are counted.
3849 * Returns zero if syllables are not defines.
3850 */
3851 static int
3852count_syllables(slang, word)
3853 slang_T *slang;
3854 char_u *word;
3855{
3856 int cnt = 0;
3857 int skip = FALSE;
3858 char_u *p;
3859 int len;
3860 int i;
3861 syl_item_T *syl;
3862 int c;
3863
3864 if (slang->sl_syllable == NULL)
3865 return 0;
3866
3867 for (p = word; *p != NUL; p += len)
3868 {
3869 /* When running into a space reset counter. */
3870 if (*p == ' ')
3871 {
3872 len = 1;
3873 cnt = 0;
3874 continue;
3875 }
3876
3877 /* Find longest match of syllable items. */
3878 len = 0;
3879 for (i = 0; i < slang->sl_syl_items.ga_len; ++i)
3880 {
3881 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i;
3882 if (syl->sy_len > len
3883 && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0)
3884 len = syl->sy_len;
3885 }
3886 if (len != 0) /* found a match, count syllable */
3887 {
3888 ++cnt;
3889 skip = FALSE;
3890 }
3891 else
3892 {
3893 /* No recognized syllable item, at least a syllable char then? */
3894#ifdef FEAT_MBYTE
3895 c = mb_ptr2char(p);
3896 len = (*mb_ptr2len)(p);
3897#else
3898 c = *p;
3899 len = 1;
3900#endif
3901 if (vim_strchr(slang->sl_syllable, c) == NULL)
3902 skip = FALSE; /* No, search for next syllable */
3903 else if (!skip)
3904 {
3905 ++cnt; /* Yes, count it */
3906 skip = TRUE; /* don't count following syllable chars */
3907 }
3908 }
3909 }
3910 return cnt;
3911}
3912
3913/*
Bram Moolenaar7887d882005-07-01 22:33:52 +00003914 * Set the SOFOFROM and SOFOTO items in language "lp".
Bram Moolenaar5195e452005-08-19 20:32:47 +00003915 * Returns SP_*ERROR flags when there is something wrong.
Bram Moolenaar7887d882005-07-01 22:33:52 +00003916 */
3917 static int
3918set_sofo(lp, from, to)
3919 slang_T *lp;
3920 char_u *from;
3921 char_u *to;
3922{
3923 int i;
3924
3925#ifdef FEAT_MBYTE
3926 garray_T *gap;
3927 char_u *s;
3928 char_u *p;
3929 int c;
3930 int *inp;
3931
3932 if (has_mbyte)
3933 {
3934 /* Use "sl_sal" as an array with 256 pointers to a list of wide
3935 * characters. The index is the low byte of the character.
3936 * The list contains from-to pairs with a terminating NUL.
3937 * sl_sal_first[] is used for latin1 "from" characters. */
3938 gap = &lp->sl_sal;
3939 ga_init2(gap, sizeof(int *), 1);
3940 if (ga_grow(gap, 256) == FAIL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003941 return SP_OTHERERROR;
Bram Moolenaar7887d882005-07-01 22:33:52 +00003942 vim_memset(gap->ga_data, 0, sizeof(int *) * 256);
3943 gap->ga_len = 256;
3944
3945 /* First count the number of items for each list. Temporarily use
3946 * sl_sal_first[] for this. */
3947 for (p = from, s = to; *p != NUL && *s != NUL; )
3948 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003949 c = mb_cptr2char_adv(&p);
3950 mb_cptr_adv(s);
Bram Moolenaar7887d882005-07-01 22:33:52 +00003951 if (c >= 256)
3952 ++lp->sl_sal_first[c & 0xff];
3953 }
3954 if (*p != NUL || *s != NUL) /* lengths differ */
Bram Moolenaar5195e452005-08-19 20:32:47 +00003955 return SP_FORMERROR;
Bram Moolenaar7887d882005-07-01 22:33:52 +00003956
3957 /* Allocate the lists. */
3958 for (i = 0; i < 256; ++i)
3959 if (lp->sl_sal_first[i] > 0)
3960 {
3961 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
3962 if (p == NULL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003963 return SP_OTHERERROR;
Bram Moolenaar7887d882005-07-01 22:33:52 +00003964 ((int **)gap->ga_data)[i] = (int *)p;
3965 *(int *)p = 0;
3966 }
3967
3968 /* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
3969 * list. */
3970 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
3971 for (p = from, s = to; *p != NUL && *s != NUL; )
3972 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003973 c = mb_cptr2char_adv(&p);
3974 i = mb_cptr2char_adv(&s);
Bram Moolenaar7887d882005-07-01 22:33:52 +00003975 if (c >= 256)
3976 {
3977 /* Append the from-to chars at the end of the list with
3978 * the low byte. */
3979 inp = ((int **)gap->ga_data)[c & 0xff];
3980 while (*inp != 0)
3981 ++inp;
3982 *inp++ = c; /* from char */
3983 *inp++ = i; /* to char */
3984 *inp++ = NUL; /* NUL at the end */
3985 }
3986 else
3987 /* mapping byte to char is done in sl_sal_first[] */
3988 lp->sl_sal_first[c] = i;
3989 }
3990 }
3991 else
3992#endif
3993 {
3994 /* mapping bytes to bytes is done in sl_sal_first[] */
3995 if (STRLEN(from) != STRLEN(to))
Bram Moolenaar5195e452005-08-19 20:32:47 +00003996 return SP_FORMERROR;
Bram Moolenaar7887d882005-07-01 22:33:52 +00003997
3998 for (i = 0; to[i] != NUL; ++i)
3999 lp->sl_sal_first[from[i]] = to[i];
4000 lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */
4001 }
4002
Bram Moolenaar5195e452005-08-19 20:32:47 +00004003 return 0;
Bram Moolenaar7887d882005-07-01 22:33:52 +00004004}
4005
4006/*
4007 * Fill the first-index table for "lp".
4008 */
4009 static void
4010set_sal_first(lp)
4011 slang_T *lp;
4012{
4013 salfirst_T *sfirst;
4014 int i;
4015 salitem_T *smp;
4016 int c;
4017 garray_T *gap = &lp->sl_sal;
4018
4019 sfirst = lp->sl_sal_first;
4020 for (i = 0; i < 256; ++i)
4021 sfirst[i] = -1;
4022 smp = (salitem_T *)gap->ga_data;
4023 for (i = 0; i < gap->ga_len; ++i)
4024 {
4025#ifdef FEAT_MBYTE
4026 if (has_mbyte)
4027 /* Use the lowest byte of the first character. For latin1 it's
4028 * the character, for other encodings it should differ for most
4029 * characters. */
4030 c = *smp[i].sm_lead_w & 0xff;
4031 else
4032#endif
4033 c = *smp[i].sm_lead;
4034 if (sfirst[c] == -1)
4035 {
4036 sfirst[c] = i;
4037#ifdef FEAT_MBYTE
4038 if (has_mbyte)
4039 {
4040 int n;
4041
4042 /* Make sure all entries with this byte are following each
4043 * other. Move the ones that are in the wrong position. Do
4044 * keep the same ordering! */
4045 while (i + 1 < gap->ga_len
4046 && (*smp[i + 1].sm_lead_w & 0xff) == c)
4047 /* Skip over entry with same index byte. */
4048 ++i;
4049
4050 for (n = 1; i + n < gap->ga_len; ++n)
4051 if ((*smp[i + n].sm_lead_w & 0xff) == c)
4052 {
4053 salitem_T tsal;
4054
4055 /* Move entry with same index byte after the entries
4056 * we already found. */
4057 ++i;
4058 --n;
4059 tsal = smp[i + n];
4060 mch_memmove(smp + i + 1, smp + i,
4061 sizeof(salitem_T) * n);
4062 smp[i] = tsal;
4063 }
4064 }
4065#endif
4066 }
4067 }
4068}
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004069
Bram Moolenaara1ba8112005-06-28 23:23:32 +00004070#ifdef FEAT_MBYTE
4071/*
4072 * Turn a multi-byte string into a wide character string.
4073 * Return it in allocated memory (NULL for out-of-memory)
4074 */
4075 static int *
4076mb_str2wide(s)
4077 char_u *s;
4078{
4079 int *res;
4080 char_u *p;
4081 int i = 0;
4082
4083 res = (int *)alloc(sizeof(int) * (mb_charlen(s) + 1));
4084 if (res != NULL)
4085 {
4086 for (p = s; *p != NUL; )
4087 res[i++] = mb_ptr2char_adv(&p);
4088 res[i] = NUL;
4089 }
4090 return res;
4091}
4092#endif
4093
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004094/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00004095 * Read a tree from the .spl or .sug file.
4096 * Allocates the memory and stores pointers in "bytsp" and "idxsp".
4097 * This is skipped when the tree has zero length.
4098 * Returns zero when OK, SP_ value for an error.
4099 */
4100 static int
4101spell_read_tree(fd, bytsp, idxsp, prefixtree, prefixcnt)
4102 FILE *fd;
4103 char_u **bytsp;
4104 idx_T **idxsp;
4105 int prefixtree; /* TRUE for the prefix tree */
4106 int prefixcnt; /* when "prefixtree" is TRUE: prefix count */
4107{
4108 int len;
4109 int idx;
4110 char_u *bp;
4111 idx_T *ip;
4112
4113 /* The tree size was computed when writing the file, so that we can
4114 * allocate it as one long block. <nodecount> */
Bram Moolenaarb388adb2006-02-28 23:50:17 +00004115 len = get4c(fd);
Bram Moolenaar4770d092006-01-12 23:22:24 +00004116 if (len < 0)
4117 return SP_TRUNCERROR;
4118 if (len > 0)
4119 {
4120 /* Allocate the byte array. */
4121 bp = lalloc((long_u)len, TRUE);
4122 if (bp == NULL)
4123 return SP_OTHERERROR;
4124 *bytsp = bp;
4125
4126 /* Allocate the index array. */
4127 ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE);
4128 if (ip == NULL)
4129 return SP_OTHERERROR;
4130 *idxsp = ip;
4131
4132 /* Recursively read the tree and store it in the array. */
4133 idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt);
4134 if (idx < 0)
4135 return idx;
4136 }
4137 return 0;
4138}
4139
4140/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00004141 * Read one row of siblings from the spell file and store it in the byte array
4142 * "byts" and index array "idxs". Recursively read the children.
4143 *
Bram Moolenaar4770d092006-01-12 23:22:24 +00004144 * NOTE: The code here must match put_node()!
Bram Moolenaar51485f02005-06-04 21:55:20 +00004145 *
Bram Moolenaar4770d092006-01-12 23:22:24 +00004146 * Returns the index (>= 0) following the siblings.
4147 * Returns SP_TRUNCERROR if the file is shorter than expected.
4148 * Returns SP_FORMERROR if there is a format error.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004149 */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004150 static idx_T
Bram Moolenaar4770d092006-01-12 23:22:24 +00004151read_tree_node(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004152 FILE *fd;
4153 char_u *byts;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004154 idx_T *idxs;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004155 int maxidx; /* size of arrays */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004156 idx_T startidx; /* current index in "byts" and "idxs" */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004157 int prefixtree; /* TRUE for reading PREFIXTREE */
4158 int maxprefcondnr; /* maximum for <prefcondnr> */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004159{
Bram Moolenaar51485f02005-06-04 21:55:20 +00004160 int len;
4161 int i;
4162 int n;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004163 idx_T idx = startidx;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004164 int c;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004165 int c2;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004166#define SHARED_MASK 0x8000000
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004167
Bram Moolenaar51485f02005-06-04 21:55:20 +00004168 len = getc(fd); /* <siblingcount> */
4169 if (len <= 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +00004170 return SP_TRUNCERROR;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004171
4172 if (startidx + len >= maxidx)
Bram Moolenaar4770d092006-01-12 23:22:24 +00004173 return SP_FORMERROR;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004174 byts[idx++] = len;
4175
4176 /* Read the byte values, flag/region bytes and shared indexes. */
4177 for (i = 1; i <= len; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004178 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00004179 c = getc(fd); /* <byte> */
4180 if (c < 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +00004181 return SP_TRUNCERROR;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004182 if (c <= BY_SPECIAL)
4183 {
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004184 if (c == BY_NOFLAGS && !prefixtree)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004185 {
4186 /* No flags, all regions. */
4187 idxs[idx] = 0;
4188 c = 0;
4189 }
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00004190 else if (c != BY_INDEX)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004191 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004192 if (prefixtree)
4193 {
Bram Moolenaar53805d12005-08-01 07:08:33 +00004194 /* Read the optional pflags byte, the prefix ID and the
4195 * condition nr. In idxs[] store the prefix ID in the low
4196 * byte, the condition index shifted up 8 bits, the flags
4197 * shifted up 24 bits. */
4198 if (c == BY_FLAGS)
4199 c = getc(fd) << 24; /* <pflags> */
4200 else
4201 c = 0;
4202
Bram Moolenaarae5bce12005-08-15 21:41:48 +00004203 c |= getc(fd); /* <affixID> */
Bram Moolenaar53805d12005-08-01 07:08:33 +00004204
Bram Moolenaarb388adb2006-02-28 23:50:17 +00004205 n = get2c(fd); /* <prefcondnr> */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004206 if (n >= maxprefcondnr)
Bram Moolenaar4770d092006-01-12 23:22:24 +00004207 return SP_FORMERROR;
Bram Moolenaar53805d12005-08-01 07:08:33 +00004208 c |= (n << 8);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004209 }
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00004210 else /* c must be BY_FLAGS or BY_FLAGS2 */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004211 {
4212 /* Read flags and optional region and prefix ID. In
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00004213 * idxs[] the flags go in the low two bytes, region above
4214 * that and prefix ID above the region. */
4215 c2 = c;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004216 c = getc(fd); /* <flags> */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00004217 if (c2 == BY_FLAGS2)
4218 c = (getc(fd) << 8) + c; /* <flags2> */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004219 if (c & WF_REGION)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00004220 c = (getc(fd) << 16) + c; /* <region> */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00004221 if (c & WF_AFX)
4222 c = (getc(fd) << 24) + c; /* <affixID> */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004223 }
4224
Bram Moolenaar51485f02005-06-04 21:55:20 +00004225 idxs[idx] = c;
4226 c = 0;
4227 }
4228 else /* c == BY_INDEX */
4229 {
4230 /* <nodeidx> */
Bram Moolenaarb388adb2006-02-28 23:50:17 +00004231 n = get3c(fd);
Bram Moolenaar51485f02005-06-04 21:55:20 +00004232 if (n < 0 || n >= maxidx)
Bram Moolenaar4770d092006-01-12 23:22:24 +00004233 return SP_FORMERROR;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004234 idxs[idx] = n + SHARED_MASK;
4235 c = getc(fd); /* <xbyte> */
4236 }
4237 }
4238 byts[idx++] = c;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004239 }
4240
Bram Moolenaar51485f02005-06-04 21:55:20 +00004241 /* Recursively read the children for non-shared siblings.
4242 * Skip the end-of-word ones (zero byte value) and the shared ones (and
4243 * remove SHARED_MASK) */
4244 for (i = 1; i <= len; ++i)
4245 if (byts[startidx + i] != 0)
4246 {
4247 if (idxs[startidx + i] & SHARED_MASK)
4248 idxs[startidx + i] &= ~SHARED_MASK;
4249 else
4250 {
4251 idxs[startidx + i] = idx;
Bram Moolenaar4770d092006-01-12 23:22:24 +00004252 idx = read_tree_node(fd, byts, idxs, maxidx, idx,
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004253 prefixtree, maxprefcondnr);
Bram Moolenaar51485f02005-06-04 21:55:20 +00004254 if (idx < 0)
4255 break;
4256 }
4257 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004258
Bram Moolenaar51485f02005-06-04 21:55:20 +00004259 return idx;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004260}
4261
4262/*
4263 * Parse 'spelllang' and set buf->b_langp accordingly.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004264 * Returns NULL if it's OK, an error message otherwise.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004265 */
4266 char_u *
4267did_set_spelllang(buf)
4268 buf_T *buf;
4269{
4270 garray_T ga;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004271 char_u *splp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004272 char_u *region;
Bram Moolenaarb6356332005-07-18 21:40:44 +00004273 char_u region_cp[3];
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004274 int filename;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004275 int region_mask;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004276 slang_T *slang;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004277 int c;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004278 char_u lang[MAXWLEN + 1];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004279 char_u spf_name[MAXPATHL];
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004280 int len;
4281 char_u *p;
Bram Moolenaar7887d882005-07-01 22:33:52 +00004282 int round;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004283 char_u *spf;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004284 char_u *use_region = NULL;
4285 int dont_use_region = FALSE;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00004286 int nobreak = FALSE;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004287 int i, j;
4288 langp_T *lp, *lp2;
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00004289 static int recursive = FALSE;
4290 char_u *ret_msg = NULL;
4291 char_u *spl_copy;
4292
4293 /* We don't want to do this recursively. May happen when a language is
4294 * not available and the SpellFileMissing autocommand opens a new buffer
4295 * in which 'spell' is set. */
4296 if (recursive)
4297 return NULL;
4298 recursive = TRUE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004299
4300 ga_init2(&ga, sizeof(langp_T), 2);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004301 clear_midword(buf);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004302
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00004303 /* Make a copy of 'spellang', the SpellFileMissing autocommands may change
4304 * it under our fingers. */
4305 spl_copy = vim_strsave(buf->b_p_spl);
4306 if (spl_copy == NULL)
4307 goto theend;
4308
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004309 /* loop over comma separated language names. */
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00004310 for (splp = spl_copy; *splp != NUL; )
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004311 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004312 /* Get one language name. */
4313 copy_option_part(&splp, lang, MAXWLEN, ",");
4314
Bram Moolenaar5482f332005-04-17 20:18:43 +00004315 region = NULL;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00004316 len = (int)STRLEN(lang);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004317
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004318 /* If the name ends in ".spl" use it as the name of the spell file.
4319 * If there is a region name let "region" point to it and remove it
4320 * from the name. */
4321 if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0)
4322 {
4323 filename = TRUE;
4324
Bram Moolenaarb6356332005-07-18 21:40:44 +00004325 /* Locate a region and remove it from the file name. */
4326 p = vim_strchr(gettail(lang), '_');
4327 if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2])
4328 && !ASCII_ISALPHA(p[3]))
4329 {
4330 vim_strncpy(region_cp, p + 1, 2);
4331 mch_memmove(p, p + 3, len - (p - lang) - 2);
4332 len -= 3;
4333 region = region_cp;
4334 }
4335 else
4336 dont_use_region = TRUE;
4337
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004338 /* Check if we loaded this language before. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004339 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4340 if (fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME)
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004341 break;
4342 }
4343 else
4344 {
4345 filename = FALSE;
4346 if (len > 3 && lang[len - 3] == '_')
4347 {
4348 region = lang + len - 2;
4349 len -= 3;
4350 lang[len] = NUL;
4351 }
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004352 else
4353 dont_use_region = TRUE;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004354
4355 /* Check if we loaded this language before. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004356 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4357 if (STRICMP(lang, slang->sl_name) == 0)
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004358 break;
4359 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004360
Bram Moolenaarb6356332005-07-18 21:40:44 +00004361 if (region != NULL)
4362 {
4363 /* If the region differs from what was used before then don't
4364 * use it for 'spellfile'. */
4365 if (use_region != NULL && STRCMP(region, use_region) != 0)
4366 dont_use_region = TRUE;
4367 use_region = region;
4368 }
4369
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004370 /* If not found try loading the language now. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004371 if (slang == NULL)
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004372 {
4373 if (filename)
4374 (void)spell_load_file(lang, lang, NULL, FALSE);
4375 else
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00004376 {
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004377 spell_load_lang(lang);
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00004378#ifdef FEAT_AUTOCMD
4379 /* SpellFileMissing autocommands may do anything, including
4380 * destroying the buffer we are using... */
4381 if (!buf_valid(buf))
4382 {
4383 ret_msg = (char_u *)"E797: SpellFileMissing autocommand deleted buffer";
4384 goto theend;
4385 }
4386#endif
4387 }
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004388 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004389
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004390 /*
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004391 * Loop over the languages, there can be several files for "lang".
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004392 */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004393 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4394 if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME
4395 : STRICMP(lang, slang->sl_name) == 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004396 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00004397 region_mask = REGION_ALL;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004398 if (!filename && region != NULL)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004399 {
4400 /* find region in sl_regions */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004401 c = find_region(slang->sl_regions, region);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004402 if (c == REGION_ALL)
4403 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004404 if (slang->sl_add)
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004405 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004406 if (*slang->sl_regions != NUL)
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004407 /* This addition file is for other regions. */
4408 region_mask = 0;
4409 }
4410 else
4411 /* This is probably an error. Give a warning and
4412 * accept the words anyway. */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004413 smsg((char_u *)
4414 _("Warning: region %s not supported"),
4415 region);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004416 }
4417 else
4418 region_mask = 1 << c;
4419 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004420
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004421 if (region_mask != 0)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004422 {
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004423 if (ga_grow(&ga, 1) == FAIL)
4424 {
4425 ga_clear(&ga);
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00004426 ret_msg = e_outofmem;
4427 goto theend;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004428 }
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004429 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004430 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
4431 ++ga.ga_len;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004432 use_midword(slang, buf);
4433 if (slang->sl_nobreak)
Bram Moolenaarda2303d2005-08-30 21:55:26 +00004434 nobreak = TRUE;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004435 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004436 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004437 }
4438
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004439 /* round 0: load int_wordlist, if possible.
4440 * round 1: load first name in 'spellfile'.
4441 * round 2: load second name in 'spellfile.
4442 * etc. */
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00004443 spf = buf->b_p_spf;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004444 for (round = 0; round == 0 || *spf != NUL; ++round)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004445 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004446 if (round == 0)
Bram Moolenaar7887d882005-07-01 22:33:52 +00004447 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004448 /* Internal wordlist, if there is one. */
4449 if (int_wordlist == NULL)
Bram Moolenaar7887d882005-07-01 22:33:52 +00004450 continue;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004451 int_wordlist_spl(spf_name);
Bram Moolenaar7887d882005-07-01 22:33:52 +00004452 }
4453 else
4454 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004455 /* One entry in 'spellfile'. */
4456 copy_option_part(&spf, spf_name, MAXPATHL - 5, ",");
4457 STRCAT(spf_name, ".spl");
4458
4459 /* If it was already found above then skip it. */
4460 for (c = 0; c < ga.ga_len; ++c)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004461 {
4462 p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname;
4463 if (p != NULL && fullpathcmp(spf_name, p, FALSE) == FPC_SAME)
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004464 break;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004465 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004466 if (c < ga.ga_len)
Bram Moolenaar7887d882005-07-01 22:33:52 +00004467 continue;
Bram Moolenaar7887d882005-07-01 22:33:52 +00004468 }
4469
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004470 /* Check if it was loaded already. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004471 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4472 if (fullpathcmp(spf_name, slang->sl_fname, FALSE) == FPC_SAME)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004473 break;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004474 if (slang == NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004475 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004476 /* Not loaded, try loading it now. The language name includes the
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004477 * region name, the region is ignored otherwise. for int_wordlist
4478 * use an arbitrary name. */
4479 if (round == 0)
4480 STRCPY(lang, "internal wordlist");
4481 else
Bram Moolenaar7887d882005-07-01 22:33:52 +00004482 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004483 vim_strncpy(lang, gettail(spf_name), MAXWLEN);
Bram Moolenaar7887d882005-07-01 22:33:52 +00004484 p = vim_strchr(lang, '.');
4485 if (p != NULL)
4486 *p = NUL; /* truncate at ".encoding.add" */
4487 }
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004488 slang = spell_load_file(spf_name, lang, NULL, TRUE);
Bram Moolenaarda2303d2005-08-30 21:55:26 +00004489
4490 /* If one of the languages has NOBREAK we assume the addition
4491 * files also have this. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004492 if (slang != NULL && nobreak)
4493 slang->sl_nobreak = TRUE;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004494 }
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004495 if (slang != NULL && ga_grow(&ga, 1) == OK)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004496 {
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004497 region_mask = REGION_ALL;
4498 if (use_region != NULL && !dont_use_region)
4499 {
4500 /* find region in sl_regions */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004501 c = find_region(slang->sl_regions, use_region);
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004502 if (c != REGION_ALL)
4503 region_mask = 1 << c;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004504 else if (*slang->sl_regions != NUL)
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004505 /* This spell file is for other regions. */
4506 region_mask = 0;
4507 }
4508
4509 if (region_mask != 0)
4510 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004511 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
4512 LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL;
4513 LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004514 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
4515 ++ga.ga_len;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004516 use_midword(slang, buf);
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004517 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004518 }
4519 }
4520
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004521 /* Everything is fine, store the new b_langp value. */
4522 ga_clear(&buf->b_langp);
4523 buf->b_langp = ga;
4524
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004525 /* For each language figure out what language to use for sound folding and
4526 * REP items. If the language doesn't support it itself use another one
4527 * with the same name. E.g. for "en-math" use "en". */
4528 for (i = 0; i < ga.ga_len; ++i)
4529 {
4530 lp = LANGP_ENTRY(ga, i);
4531
4532 /* sound folding */
4533 if (lp->lp_slang->sl_sal.ga_len > 0)
4534 /* language does sound folding itself */
4535 lp->lp_sallang = lp->lp_slang;
4536 else
4537 /* find first similar language that does sound folding */
4538 for (j = 0; j < ga.ga_len; ++j)
4539 {
4540 lp2 = LANGP_ENTRY(ga, j);
4541 if (lp2->lp_slang->sl_sal.ga_len > 0
4542 && STRNCMP(lp->lp_slang->sl_name,
4543 lp2->lp_slang->sl_name, 2) == 0)
4544 {
4545 lp->lp_sallang = lp2->lp_slang;
4546 break;
4547 }
4548 }
4549
4550 /* REP items */
4551 if (lp->lp_slang->sl_rep.ga_len > 0)
4552 /* language has REP items itself */
4553 lp->lp_replang = lp->lp_slang;
4554 else
Bram Moolenaar4770d092006-01-12 23:22:24 +00004555 /* find first similar language that has REP items */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004556 for (j = 0; j < ga.ga_len; ++j)
4557 {
4558 lp2 = LANGP_ENTRY(ga, j);
4559 if (lp2->lp_slang->sl_rep.ga_len > 0
4560 && STRNCMP(lp->lp_slang->sl_name,
4561 lp2->lp_slang->sl_name, 2) == 0)
4562 {
4563 lp->lp_replang = lp2->lp_slang;
4564 break;
4565 }
4566 }
4567 }
4568
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00004569theend:
4570 vim_free(spl_copy);
4571 recursive = FALSE;
4572 return ret_msg;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004573}
4574
4575/*
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004576 * Clear the midword characters for buffer "buf".
4577 */
4578 static void
4579clear_midword(buf)
4580 buf_T *buf;
4581{
4582 vim_memset(buf->b_spell_ismw, 0, 256);
4583#ifdef FEAT_MBYTE
4584 vim_free(buf->b_spell_ismw_mb);
4585 buf->b_spell_ismw_mb = NULL;
4586#endif
4587}
4588
4589/*
4590 * Use the "sl_midword" field of language "lp" for buffer "buf".
4591 * They add up to any currently used midword characters.
4592 */
4593 static void
4594use_midword(lp, buf)
4595 slang_T *lp;
4596 buf_T *buf;
4597{
4598 char_u *p;
4599
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004600 if (lp->sl_midword == NULL) /* there aren't any */
4601 return;
4602
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004603 for (p = lp->sl_midword; *p != NUL; )
4604#ifdef FEAT_MBYTE
4605 if (has_mbyte)
4606 {
4607 int c, l, n;
4608 char_u *bp;
4609
4610 c = mb_ptr2char(p);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004611 l = (*mb_ptr2len)(p);
4612 if (c < 256 && l <= 2)
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004613 buf->b_spell_ismw[c] = TRUE;
4614 else if (buf->b_spell_ismw_mb == NULL)
4615 /* First multi-byte char in "b_spell_ismw_mb". */
4616 buf->b_spell_ismw_mb = vim_strnsave(p, l);
4617 else
4618 {
4619 /* Append multi-byte chars to "b_spell_ismw_mb". */
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00004620 n = (int)STRLEN(buf->b_spell_ismw_mb);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004621 bp = vim_strnsave(buf->b_spell_ismw_mb, n + l);
4622 if (bp != NULL)
4623 {
4624 vim_free(buf->b_spell_ismw_mb);
4625 buf->b_spell_ismw_mb = bp;
4626 vim_strncpy(bp + n, p, l);
4627 }
4628 }
4629 p += l;
4630 }
4631 else
4632#endif
4633 buf->b_spell_ismw[*p++] = TRUE;
4634}
4635
4636/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004637 * Find the region "region[2]" in "rp" (points to "sl_regions").
4638 * Each region is simply stored as the two characters of it's name.
Bram Moolenaar7887d882005-07-01 22:33:52 +00004639 * Returns the index if found (first is 0), REGION_ALL if not found.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004640 */
4641 static int
4642find_region(rp, region)
4643 char_u *rp;
4644 char_u *region;
4645{
4646 int i;
4647
4648 for (i = 0; ; i += 2)
4649 {
4650 if (rp[i] == NUL)
4651 return REGION_ALL;
4652 if (rp[i] == region[0] && rp[i + 1] == region[1])
4653 break;
4654 }
4655 return i / 2;
4656}
4657
4658/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004659 * Return case type of word:
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004660 * w word 0
Bram Moolenaar51485f02005-06-04 21:55:20 +00004661 * Word WF_ONECAP
4662 * W WORD WF_ALLCAP
4663 * WoRd wOrd WF_KEEPCAP
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004664 */
4665 static int
4666captype(word, end)
4667 char_u *word;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004668 char_u *end; /* When NULL use up to NUL byte. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004669{
4670 char_u *p;
4671 int c;
4672 int firstcap;
4673 int allcap;
4674 int past_second = FALSE; /* past second word char */
4675
4676 /* find first letter */
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004677 for (p = word; !spell_iswordp_nmw(p); mb_ptr_adv(p))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004678 if (end == NULL ? *p == NUL : p >= end)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004679 return 0; /* only non-word characters, illegal word */
4680#ifdef FEAT_MBYTE
Bram Moolenaarb765d632005-06-07 21:00:02 +00004681 if (has_mbyte)
4682 c = mb_ptr2char_adv(&p);
4683 else
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004684#endif
Bram Moolenaarb765d632005-06-07 21:00:02 +00004685 c = *p++;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004686 firstcap = allcap = SPELL_ISUPPER(c);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004687
4688 /*
4689 * Need to check all letters to find a word with mixed upper/lower.
4690 * But a word with an upper char only at start is a ONECAP.
4691 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004692 for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p))
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004693 if (spell_iswordp_nmw(p))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004694 {
Bram Moolenaar53805d12005-08-01 07:08:33 +00004695 c = PTR2CHAR(p);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004696 if (!SPELL_ISUPPER(c))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004697 {
4698 /* UUl -> KEEPCAP */
4699 if (past_second && allcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004700 return WF_KEEPCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004701 allcap = FALSE;
4702 }
4703 else if (!allcap)
4704 /* UlU -> KEEPCAP */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004705 return WF_KEEPCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004706 past_second = TRUE;
4707 }
4708
4709 if (allcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004710 return WF_ALLCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004711 if (firstcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004712 return WF_ONECAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004713 return 0;
4714}
4715
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004716/*
4717 * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a
4718 * capital. So that make_case_word() can turn WOrd into Word.
4719 * Add ALLCAP for "WOrD".
4720 */
4721 static int
4722badword_captype(word, end)
4723 char_u *word;
4724 char_u *end;
4725{
4726 int flags = captype(word, end);
Bram Moolenaar8b59de92005-08-11 19:59:29 +00004727 int c;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004728 int l, u;
4729 int first;
4730 char_u *p;
4731
4732 if (flags & WF_KEEPCAP)
4733 {
4734 /* Count the number of UPPER and lower case letters. */
4735 l = u = 0;
4736 first = FALSE;
4737 for (p = word; p < end; mb_ptr_adv(p))
4738 {
Bram Moolenaar8b59de92005-08-11 19:59:29 +00004739 c = PTR2CHAR(p);
4740 if (SPELL_ISUPPER(c))
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004741 {
4742 ++u;
4743 if (p == word)
4744 first = TRUE;
4745 }
4746 else
4747 ++l;
4748 }
4749
4750 /* If there are more UPPER than lower case letters suggest an
4751 * ALLCAP word. Otherwise, if the first letter is UPPER then
4752 * suggest ONECAP. Exception: "ALl" most likely should be "All",
4753 * require three upper case letters. */
4754 if (u > l && u > 2)
4755 flags |= WF_ALLCAP;
4756 else if (first)
4757 flags |= WF_ONECAP;
Bram Moolenaar2d3f4892006-01-20 23:02:51 +00004758
4759 if (u >= 2 && l >= 2) /* maCARONI maCAroni */
4760 flags |= WF_MIXCAP;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004761 }
4762 return flags;
4763}
4764
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004765# if defined(FEAT_MBYTE) || defined(EXITFREE) || defined(PROTO)
4766/*
4767 * Free all languages.
4768 */
4769 void
4770spell_free_all()
4771{
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004772 slang_T *slang;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004773 buf_T *buf;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004774 char_u fname[MAXPATHL];
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004775
4776 /* Go through all buffers and handle 'spelllang'. */
4777 for (buf = firstbuf; buf != NULL; buf = buf->b_next)
4778 ga_clear(&buf->b_langp);
4779
4780 while (first_lang != NULL)
4781 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004782 slang = first_lang;
4783 first_lang = slang->sl_next;
4784 slang_free(slang);
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004785 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004786
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004787 if (int_wordlist != NULL)
Bram Moolenaar7887d882005-07-01 22:33:52 +00004788 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004789 /* Delete the internal wordlist and its .spl file */
4790 mch_remove(int_wordlist);
4791 int_wordlist_spl(fname);
4792 mch_remove(fname);
4793 vim_free(int_wordlist);
4794 int_wordlist = NULL;
Bram Moolenaar7887d882005-07-01 22:33:52 +00004795 }
4796
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004797 init_spell_chartab();
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00004798
4799 vim_free(repl_to);
4800 repl_to = NULL;
4801 vim_free(repl_from);
4802 repl_from = NULL;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004803}
4804# endif
4805
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004806# if defined(FEAT_MBYTE) || defined(PROTO)
4807/*
4808 * Clear all spelling tables and reload them.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004809 * Used after 'encoding' is set and when ":mkspell" was used.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004810 */
4811 void
4812spell_reload()
4813{
4814 buf_T *buf;
Bram Moolenaar3982c542005-06-08 21:56:31 +00004815 win_T *wp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004816
Bram Moolenaarea408852005-06-25 22:49:46 +00004817 /* Initialize the table for spell_iswordp(). */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004818 init_spell_chartab();
4819
4820 /* Unload all allocated memory. */
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004821 spell_free_all();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004822
4823 /* Go through all buffers and handle 'spelllang'. */
4824 for (buf = firstbuf; buf != NULL; buf = buf->b_next)
4825 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00004826 /* Only load the wordlists when 'spelllang' is set and there is a
4827 * window for this buffer in which 'spell' is set. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004828 if (*buf->b_p_spl != NUL)
Bram Moolenaar3982c542005-06-08 21:56:31 +00004829 {
4830 FOR_ALL_WINDOWS(wp)
4831 if (wp->w_buffer == buf && wp->w_p_spell)
4832 {
4833 (void)did_set_spelllang(buf);
4834# ifdef FEAT_WINDOWS
4835 break;
4836# endif
4837 }
4838 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004839 }
4840}
4841# endif
4842
Bram Moolenaarb765d632005-06-07 21:00:02 +00004843/*
4844 * Reload the spell file "fname" if it's loaded.
4845 */
4846 static void
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004847spell_reload_one(fname, added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00004848 char_u *fname;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004849 int added_word; /* invoked through "zg" */
Bram Moolenaarb765d632005-06-07 21:00:02 +00004850{
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004851 slang_T *slang;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004852 int didit = FALSE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004853
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004854 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004855 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004856 if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME)
Bram Moolenaarb765d632005-06-07 21:00:02 +00004857 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004858 slang_clear(slang);
4859 if (spell_load_file(fname, NULL, slang, FALSE) == NULL)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004860 /* reloading failed, clear the language */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004861 slang_clear(slang);
Bram Moolenaarf71a3db2006-03-12 21:50:18 +00004862 redraw_all_later(SOME_VALID);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004863 didit = TRUE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00004864 }
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004865 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004866
4867 /* When "zg" was used and the file wasn't loaded yet, should redo
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00004868 * 'spelllang' to load it now. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004869 if (added_word && !didit)
4870 did_set_spelllang(curbuf);
Bram Moolenaarb765d632005-06-07 21:00:02 +00004871}
4872
4873
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004874/*
4875 * Functions for ":mkspell".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004876 */
4877
Bram Moolenaar51485f02005-06-04 21:55:20 +00004878#define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004879 and .dic file. */
4880/*
4881 * Main structure to store the contents of a ".aff" file.
4882 */
4883typedef struct afffile_S
4884{
4885 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */
Bram Moolenaar95529562005-08-25 21:21:38 +00004886 int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */
Bram Moolenaar371baa92005-12-29 22:43:53 +00004887 unsigned af_rare; /* RARE ID for rare word */
4888 unsigned af_keepcase; /* KEEPCASE ID for keep-case word */
Bram Moolenaar6de68532005-08-24 22:08:48 +00004889 unsigned af_bad; /* BAD ID for banned word */
4890 unsigned af_needaffix; /* NEEDAFFIX ID */
Bram Moolenaar8dff8182006-04-06 20:18:50 +00004891 unsigned af_circumfix; /* CIRCUMFIX ID */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004892 unsigned af_needcomp; /* NEEDCOMPOUND ID */
Bram Moolenaar899dddf2006-03-26 21:06:50 +00004893 unsigned af_comproot; /* COMPOUNDROOT ID */
4894 unsigned af_compforbid; /* COMPOUNDFORBIDFLAG ID */
4895 unsigned af_comppermit; /* COMPOUNDPERMITFLAG ID */
Bram Moolenaare1438bb2006-03-01 22:01:55 +00004896 unsigned af_nosuggest; /* NOSUGGEST ID */
Bram Moolenaar899dddf2006-03-26 21:06:50 +00004897 int af_pfxpostpone; /* postpone prefixes without chop string and
4898 without flags */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004899 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */
4900 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */
Bram Moolenaar6de68532005-08-24 22:08:48 +00004901 hashtab_T af_comp; /* hashtable for compound flags, compitem_T */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004902} afffile_T;
4903
Bram Moolenaar6de68532005-08-24 22:08:48 +00004904#define AFT_CHAR 0 /* flags are one character */
Bram Moolenaar95529562005-08-25 21:21:38 +00004905#define AFT_LONG 1 /* flags are two characters */
4906#define AFT_CAPLONG 2 /* flags are one or two characters */
4907#define AFT_NUM 3 /* flags are numbers, comma separated */
Bram Moolenaar6de68532005-08-24 22:08:48 +00004908
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004909typedef struct affentry_S affentry_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004910/* Affix entry from ".aff" file. Used for prefixes and suffixes. */
4911struct affentry_S
4912{
4913 affentry_T *ae_next; /* next affix with same name/number */
4914 char_u *ae_chop; /* text to chop off basic word (can be NULL) */
4915 char_u *ae_add; /* text to add to basic word (can be NULL) */
Bram Moolenaar899dddf2006-03-26 21:06:50 +00004916 char_u *ae_flags; /* flags on the affix (can be NULL) */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004917 char_u *ae_cond; /* condition (NULL for ".") */
4918 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */
Bram Moolenaar5555acc2006-04-07 21:33:12 +00004919 char ae_compforbid; /* COMPOUNDFORBIDFLAG found */
4920 char ae_comppermit; /* COMPOUNDPERMITFLAG found */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004921};
4922
Bram Moolenaar6de68532005-08-24 22:08:48 +00004923#ifdef FEAT_MBYTE
4924# define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */
4925#else
Bram Moolenaar95529562005-08-25 21:21:38 +00004926# define AH_KEY_LEN 7 /* 6 digits + NUL */
Bram Moolenaar6de68532005-08-24 22:08:48 +00004927#endif
Bram Moolenaar53805d12005-08-01 07:08:33 +00004928
Bram Moolenaar51485f02005-06-04 21:55:20 +00004929/* Affix header from ".aff" file. Used for af_pref and af_suff. */
4930typedef struct affheader_S
4931{
Bram Moolenaar6de68532005-08-24 22:08:48 +00004932 char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */
4933 unsigned ah_flag; /* affix name as number, uses "af_flagtype" */
4934 int ah_newID; /* prefix ID after renumbering; 0 if not used */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004935 int ah_combine; /* suffix may combine with prefix */
Bram Moolenaar95529562005-08-25 21:21:38 +00004936 int ah_follows; /* another affix block should be following */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004937 affentry_T *ah_first; /* first affix entry */
4938} affheader_T;
4939
4940#define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
4941
Bram Moolenaar6de68532005-08-24 22:08:48 +00004942/* Flag used in compound items. */
4943typedef struct compitem_S
4944{
4945 char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */
4946 unsigned ci_flag; /* affix name as number, uses "af_flagtype" */
4947 int ci_newID; /* affix ID after renumbering. */
4948} compitem_T;
4949
4950#define HI2CI(hi) ((compitem_T *)(hi)->hi_key)
4951
Bram Moolenaar51485f02005-06-04 21:55:20 +00004952/*
4953 * Structure that is used to store the items in the word tree. This avoids
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004954 * the need to keep track of each allocated thing, everything is freed all at
4955 * once after ":mkspell" is done.
Bram Moolenaar6ae167a2009-02-11 16:58:49 +00004956 * Note: "sb_next" must be just before "sb_data" to make sure the alignment of
4957 * "sb_data" is correct for systems where pointers must be aligned on
4958 * pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc).
Bram Moolenaar51485f02005-06-04 21:55:20 +00004959 */
4960#define SBLOCKSIZE 16000 /* size of sb_data */
4961typedef struct sblock_S sblock_T;
4962struct sblock_S
4963{
Bram Moolenaar51485f02005-06-04 21:55:20 +00004964 int sb_used; /* nr of bytes already in use */
Bram Moolenaar6ae167a2009-02-11 16:58:49 +00004965 sblock_T *sb_next; /* next block in list */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004966 char_u sb_data[1]; /* data, actually longer */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004967};
4968
4969/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00004970 * A node in the tree.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004971 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004972typedef struct wordnode_S wordnode_T;
4973struct wordnode_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004974{
Bram Moolenaar0c405862005-06-22 22:26:26 +00004975 union /* shared to save space */
4976 {
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00004977 char_u hashkey[6]; /* the hash key, only used while compressing */
Bram Moolenaar0c405862005-06-22 22:26:26 +00004978 int index; /* index in written nodes (valid after first
4979 round) */
4980 } wn_u1;
4981 union /* shared to save space */
4982 {
4983 wordnode_T *next; /* next node with same hash key */
4984 wordnode_T *wnode; /* parent node that will write this node */
4985 } wn_u2;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004986 wordnode_T *wn_child; /* child (next byte in word) */
4987 wordnode_T *wn_sibling; /* next sibling (alternate byte in word,
4988 always sorted) */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004989 int wn_refs; /* Nr. of references to this node. Only
4990 relevant for first node in a list of
4991 siblings, in following siblings it is
4992 always one. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004993 char_u wn_byte; /* Byte for this node. NUL for word end */
Bram Moolenaar4770d092006-01-12 23:22:24 +00004994
4995 /* Info for when "wn_byte" is NUL.
4996 * In PREFIXTREE "wn_region" is used for the prefcondnr.
4997 * In the soundfolded word tree "wn_flags" has the MSW of the wordnr and
4998 * "wn_region" the LSW of the wordnr. */
4999 char_u wn_affixID; /* supported/required prefix ID or 0 */
5000 short_u wn_flags; /* WF_ flags */
5001 short wn_region; /* region mask */
5002
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00005003#ifdef SPELL_PRINTTREE
5004 int wn_nr; /* sequence nr for printing */
5005#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005006};
5007
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00005008#define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */
5009
Bram Moolenaar51485f02005-06-04 21:55:20 +00005010#define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005011
Bram Moolenaar51485f02005-06-04 21:55:20 +00005012/*
5013 * Info used while reading the spell files.
5014 */
5015typedef struct spellinfo_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005016{
Bram Moolenaar51485f02005-06-04 21:55:20 +00005017 wordnode_T *si_foldroot; /* tree with case-folded words */
Bram Moolenaar8db73182005-06-17 21:51:16 +00005018 long si_foldwcount; /* nr of words in si_foldroot */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005019
Bram Moolenaar51485f02005-06-04 21:55:20 +00005020 wordnode_T *si_keeproot; /* tree with keep-case words */
Bram Moolenaar8db73182005-06-17 21:51:16 +00005021 long si_keepwcount; /* nr of words in si_keeproot */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005022
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005023 wordnode_T *si_prefroot; /* tree with postponed prefixes */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005024
Bram Moolenaar4770d092006-01-12 23:22:24 +00005025 long si_sugtree; /* creating the soundfolding trie */
5026
Bram Moolenaar51485f02005-06-04 21:55:20 +00005027 sblock_T *si_blocks; /* memory blocks used */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00005028 long si_blocks_cnt; /* memory blocks allocated */
5029 long si_compress_cnt; /* words to add before lowering
5030 compression limit */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005031 wordnode_T *si_first_free; /* List of nodes that have been freed during
5032 compression, linked by "wn_child" field. */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00005033 long si_free_count; /* number of nodes in si_first_free */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005034#ifdef SPELL_PRINTTREE
5035 int si_wordnode_nr; /* sequence nr for nodes */
5036#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +00005037 buf_T *si_spellbuf; /* buffer used to store soundfold word table */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005038
Bram Moolenaar51485f02005-06-04 21:55:20 +00005039 int si_ascii; /* handling only ASCII words */
Bram Moolenaarb765d632005-06-07 21:00:02 +00005040 int si_add; /* addition file */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00005041 int si_clear_chartab; /* when TRUE clear char tables */
Bram Moolenaar51485f02005-06-04 21:55:20 +00005042 int si_region; /* region mask */
5043 vimconv_T si_conv; /* for conversion to 'encoding' */
Bram Moolenaar50cde822005-06-05 21:54:54 +00005044 int si_memtot; /* runtime memory used */
Bram Moolenaarb765d632005-06-07 21:00:02 +00005045 int si_verbose; /* verbose messages */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005046 int si_msg_count; /* number of words added since last message */
Bram Moolenaar362e1a32006-03-06 23:29:24 +00005047 char_u *si_info; /* info text chars or NULL */
Bram Moolenaar3982c542005-06-08 21:56:31 +00005048 int si_region_count; /* number of regions supported (1 when there
5049 are no regions) */
Bram Moolenaar5195e452005-08-19 20:32:47 +00005050 char_u si_region_name[16]; /* region names; used only if
5051 * si_region_count > 1) */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005052
5053 garray_T si_rep; /* list of fromto_T entries from REP lines */
Bram Moolenaar4770d092006-01-12 23:22:24 +00005054 garray_T si_repsal; /* list of fromto_T entries from REPSAL lines */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005055 garray_T si_sal; /* list of fromto_T entries from SAL lines */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005056 char_u *si_sofofr; /* SOFOFROM text */
5057 char_u *si_sofoto; /* SOFOTO text */
Bram Moolenaar4770d092006-01-12 23:22:24 +00005058 int si_nosugfile; /* NOSUGFILE item found */
Bram Moolenaare1438bb2006-03-01 22:01:55 +00005059 int si_nosplitsugs; /* NOSPLITSUGS item found */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005060 int si_followup; /* soundsalike: ? */
5061 int si_collapse; /* soundsalike: ? */
Bram Moolenaar4770d092006-01-12 23:22:24 +00005062 hashtab_T si_commonwords; /* hashtable for common words */
5063 time_t si_sugtime; /* timestamp for .sug file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005064 int si_rem_accents; /* soundsalike: remove accents */
5065 garray_T si_map; /* MAP info concatenated */
Bram Moolenaar6de68532005-08-24 22:08:48 +00005066 char_u *si_midword; /* MIDWORD chars or NULL */
Bram Moolenaar5195e452005-08-19 20:32:47 +00005067 int si_compmax; /* max nr of words for compounding */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005068 int si_compminlen; /* minimal length for compounding */
Bram Moolenaar5195e452005-08-19 20:32:47 +00005069 int si_compsylmax; /* max nr of syllables for compounding */
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005070 int si_compoptions; /* COMP_ flags */
5071 garray_T si_comppat; /* CHECKCOMPOUNDPATTERN items, each stored as
5072 a string */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005073 char_u *si_compflags; /* flags used for compounding */
Bram Moolenaar78622822005-08-23 21:00:13 +00005074 char_u si_nobreak; /* NOBREAK */
Bram Moolenaar5195e452005-08-19 20:32:47 +00005075 char_u *si_syllable; /* syllable string */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005076 garray_T si_prefcond; /* table with conditions for postponed
5077 * prefixes, each stored as a string */
Bram Moolenaar6de68532005-08-24 22:08:48 +00005078 int si_newprefID; /* current value for ah_newID */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005079 int si_newcompID; /* current value for compound ID */
Bram Moolenaar51485f02005-06-04 21:55:20 +00005080} spellinfo_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005081
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005082static afffile_T *spell_read_aff __ARGS((spellinfo_T *spin, char_u *fname));
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005083static int is_aff_rule __ARGS((char_u **items, int itemcnt, char *rulename, int mincount));
Bram Moolenaar5555acc2006-04-07 21:33:12 +00005084static void aff_process_flags __ARGS((afffile_T *affile, affentry_T *entry));
Bram Moolenaar362e1a32006-03-06 23:29:24 +00005085static int spell_info_item __ARGS((char_u *s));
Bram Moolenaar6de68532005-08-24 22:08:48 +00005086static unsigned affitem2flag __ARGS((int flagtype, char_u *item, char_u *fname, int lnum));
5087static unsigned get_affitem __ARGS((int flagtype, char_u **pp));
5088static void process_compflags __ARGS((spellinfo_T *spin, afffile_T *aff, char_u *compflags));
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005089static void check_renumber __ARGS((spellinfo_T *spin));
Bram Moolenaar6de68532005-08-24 22:08:48 +00005090static int flag_in_afflist __ARGS((int flagtype, char_u *afflist, unsigned flag));
5091static void aff_check_number __ARGS((int spinval, int affval, char *name));
5092static void aff_check_string __ARGS((char_u *spinval, char_u *affval, char *name));
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005093static int str_equal __ARGS((char_u *s1, char_u *s2));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005094static void add_fromto __ARGS((spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to));
5095static int sal_to_bool __ARGS((char_u *s));
Bram Moolenaar5482f332005-04-17 20:18:43 +00005096static int has_non_ascii __ARGS((char_u *s));
Bram Moolenaar51485f02005-06-04 21:55:20 +00005097static void spell_free_aff __ARGS((afffile_T *aff));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005098static int spell_read_dic __ARGS((spellinfo_T *spin, char_u *fname, afffile_T *affile));
Bram Moolenaar8dff8182006-04-06 20:18:50 +00005099static int get_affix_flags __ARGS((afffile_T *affile, char_u *afflist));
Bram Moolenaar5195e452005-08-19 20:32:47 +00005100static int get_pfxlist __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist));
Bram Moolenaar6de68532005-08-24 22:08:48 +00005101static void get_compflags __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist));
Bram Moolenaar8dff8182006-04-06 20:18:50 +00005102static int store_aff_word __ARGS((spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int condit, int flags, char_u *pfxlist, int pfxlen));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005103static int spell_read_wordfile __ARGS((spellinfo_T *spin, char_u *fname));
5104static void *getroom __ARGS((spellinfo_T *spin, size_t len, int align));
5105static char_u *getroom_save __ARGS((spellinfo_T *spin, char_u *s));
Bram Moolenaar51485f02005-06-04 21:55:20 +00005106static void free_blocks __ARGS((sblock_T *bl));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005107static wordnode_T *wordtree_alloc __ARGS((spellinfo_T *spin));
Bram Moolenaar5195e452005-08-19 20:32:47 +00005108static int store_word __ARGS((spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix));
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005109static int tree_add_word __ARGS((spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005110static wordnode_T *get_wordnode __ARGS((spellinfo_T *spin));
Bram Moolenaar4770d092006-01-12 23:22:24 +00005111static int deref_wordnode __ARGS((spellinfo_T *spin, wordnode_T *node));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005112static void free_wordnode __ARGS((spellinfo_T *spin, wordnode_T *n));
5113static void wordtree_compress __ARGS((spellinfo_T *spin, wordnode_T *root));
5114static int node_compress __ARGS((spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot));
Bram Moolenaar51485f02005-06-04 21:55:20 +00005115static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2));
Bram Moolenaar4770d092006-01-12 23:22:24 +00005116static void put_sugtime __ARGS((spellinfo_T *spin, FILE *fd));
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005117static int write_vim_spell __ARGS((spellinfo_T *spin, char_u *fname));
Bram Moolenaar0c405862005-06-22 22:26:26 +00005118static void clear_node __ARGS((wordnode_T *node));
Bram Moolenaarfe86f2d2008-11-28 20:29:07 +00005119static int put_node __ARGS((FILE *fd, wordnode_T *node, int idx, int regionmask, int prefixtree));
Bram Moolenaar4770d092006-01-12 23:22:24 +00005120static void spell_make_sugfile __ARGS((spellinfo_T *spin, char_u *wfname));
5121static int sug_filltree __ARGS((spellinfo_T *spin, slang_T *slang));
5122static int sug_maketable __ARGS((spellinfo_T *spin));
5123static int sug_filltable __ARGS((spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap));
5124static int offset2bytes __ARGS((int nr, char_u *buf));
5125static int bytes2offset __ARGS((char_u **pp));
5126static void sug_write __ARGS((spellinfo_T *spin, char_u *fname));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005127static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word));
Bram Moolenaar4770d092006-01-12 23:22:24 +00005128static void spell_message __ARGS((spellinfo_T *spin, char_u *str));
Bram Moolenaarb765d632005-06-07 21:00:02 +00005129static void init_spellfile __ARGS((void));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005130
Bram Moolenaar53805d12005-08-01 07:08:33 +00005131/* In the postponed prefixes tree wn_flags is used to store the WFP_ flags,
5132 * but it must be negative to indicate the prefix tree to tree_add_word().
5133 * Use a negative number with the lower 8 bits zero. */
5134#define PFX_FLAGS -256
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00005135
Bram Moolenaar8dff8182006-04-06 20:18:50 +00005136/* flags for "condit" argument of store_aff_word() */
5137#define CONDIT_COMB 1 /* affix must combine */
5138#define CONDIT_CFIX 2 /* affix must have CIRCUMFIX flag */
5139#define CONDIT_SUF 4 /* add a suffix for matching flags */
5140#define CONDIT_AFF 8 /* word already has an affix */
5141
Bram Moolenaar5195e452005-08-19 20:32:47 +00005142/*
5143 * Tunable parameters for when the tree is compressed. See 'mkspellmem'.
5144 */
5145static long compress_start = 30000; /* memory / SBLOCKSIZE */
5146static long compress_inc = 100; /* memory / SBLOCKSIZE */
5147static long compress_added = 500000; /* word count */
5148
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00005149#ifdef SPELL_PRINTTREE
5150/*
5151 * For debugging the tree code: print the current tree in a (more or less)
5152 * readable format, so that we can see what happens when adding a word and/or
5153 * compressing the tree.
5154 * Based on code from Olaf Seibert.
5155 */
5156#define PRINTLINESIZE 1000
5157#define PRINTWIDTH 6
5158
5159#define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
5160 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
5161
5162static char line1[PRINTLINESIZE];
5163static char line2[PRINTLINESIZE];
5164static char line3[PRINTLINESIZE];
5165
5166 static void
5167spell_clear_flags(wordnode_T *node)
5168{
5169 wordnode_T *np;
5170
5171 for (np = node; np != NULL; np = np->wn_sibling)
5172 {
5173 np->wn_u1.index = FALSE;
5174 spell_clear_flags(np->wn_child);
5175 }
5176}
5177
5178 static void
5179spell_print_node(wordnode_T *node, int depth)
5180{
5181 if (node->wn_u1.index)
5182 {
5183 /* Done this node before, print the reference. */
5184 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
5185 PRINTSOME(line2, depth, " ", 0, 0);
5186 PRINTSOME(line3, depth, " ", 0, 0);
5187 msg(line1);
5188 msg(line2);
5189 msg(line3);
5190 }
5191 else
5192 {
5193 node->wn_u1.index = TRUE;
5194
5195 if (node->wn_byte != NUL)
5196 {
5197 if (node->wn_child != NULL)
5198 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0);
5199 else
5200 /* Cannot happen? */
5201 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0);
5202 }
5203 else
5204 PRINTSOME(line1, depth, " $ ", 0, 0);
5205
5206 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs);
5207
5208 if (node->wn_sibling != NULL)
5209 PRINTSOME(line3, depth, " | ", 0, 0);
5210 else
5211 PRINTSOME(line3, depth, " ", 0, 0);
5212
5213 if (node->wn_byte == NUL)
5214 {
5215 msg(line1);
5216 msg(line2);
5217 msg(line3);
5218 }
5219
5220 /* do the children */
5221 if (node->wn_byte != NUL && node->wn_child != NULL)
5222 spell_print_node(node->wn_child, depth + 1);
5223
5224 /* do the siblings */
5225 if (node->wn_sibling != NULL)
5226 {
5227 /* get rid of all parent details except | */
5228 STRCPY(line1, line3);
5229 STRCPY(line2, line3);
5230 spell_print_node(node->wn_sibling, depth);
5231 }
5232 }
5233}
5234
5235 static void
5236spell_print_tree(wordnode_T *root)
5237{
5238 if (root != NULL)
5239 {
5240 /* Clear the "wn_u1.index" fields, used to remember what has been
5241 * done. */
5242 spell_clear_flags(root);
5243
5244 /* Recursively print the tree. */
5245 spell_print_node(root, 0);
5246 }
5247}
5248#endif /* SPELL_PRINTTREE */
5249
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005250/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005251 * Read the affix file "fname".
Bram Moolenaar3982c542005-06-08 21:56:31 +00005252 * Returns an afffile_T, NULL for complete failure.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005253 */
5254 static afffile_T *
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005255spell_read_aff(spin, fname)
Bram Moolenaar51485f02005-06-04 21:55:20 +00005256 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005257 char_u *fname;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005258{
5259 FILE *fd;
5260 afffile_T *aff;
5261 char_u rline[MAXLINELEN];
5262 char_u *line;
5263 char_u *pc = NULL;
Bram Moolenaar4770d092006-01-12 23:22:24 +00005264#define MAXITEMCNT 30
Bram Moolenaar8db73182005-06-17 21:51:16 +00005265 char_u *(items[MAXITEMCNT]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005266 int itemcnt;
5267 char_u *p;
5268 int lnum = 0;
5269 affheader_T *cur_aff = NULL;
Bram Moolenaar6de68532005-08-24 22:08:48 +00005270 int did_postpone_prefix = FALSE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005271 int aff_todo = 0;
5272 hashtab_T *tp;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005273 char_u *low = NULL;
5274 char_u *fol = NULL;
5275 char_u *upp = NULL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005276 int do_rep;
Bram Moolenaar4770d092006-01-12 23:22:24 +00005277 int do_repsal;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005278 int do_sal;
Bram Moolenaar89d40322006-08-29 15:30:07 +00005279 int do_mapline;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005280 int found_map = FALSE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005281 hashitem_T *hi;
Bram Moolenaar53805d12005-08-01 07:08:33 +00005282 int l;
Bram Moolenaar6de68532005-08-24 22:08:48 +00005283 int compminlen = 0; /* COMPOUNDMIN value */
5284 int compsylmax = 0; /* COMPOUNDSYLMAX value */
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005285 int compoptions = 0; /* COMP_ flags */
5286 int compmax = 0; /* COMPOUNDWORDMAX value */
Bram Moolenaar362e1a32006-03-06 23:29:24 +00005287 char_u *compflags = NULL; /* COMPOUNDFLAG and COMPOUNDRULE
Bram Moolenaar6de68532005-08-24 22:08:48 +00005288 concatenated */
5289 char_u *midword = NULL; /* MIDWORD value */
5290 char_u *syllable = NULL; /* SYLLABLE value */
5291 char_u *sofofrom = NULL; /* SOFOFROM value */
5292 char_u *sofoto = NULL; /* SOFOTO value */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005293
Bram Moolenaar51485f02005-06-04 21:55:20 +00005294 /*
5295 * Open the file.
5296 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00005297 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005298 if (fd == NULL)
5299 {
5300 EMSG2(_(e_notopen), fname);
5301 return NULL;
5302 }
5303
Bram Moolenaar4770d092006-01-12 23:22:24 +00005304 vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname);
5305 spell_message(spin, IObuff);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005306
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005307 /* Only do REP lines when not done in another .aff file already. */
5308 do_rep = spin->si_rep.ga_len == 0;
5309
Bram Moolenaar4770d092006-01-12 23:22:24 +00005310 /* Only do REPSAL lines when not done in another .aff file already. */
5311 do_repsal = spin->si_repsal.ga_len == 0;
5312
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005313 /* Only do SAL lines when not done in another .aff file already. */
5314 do_sal = spin->si_sal.ga_len == 0;
5315
5316 /* Only do MAP lines when not done in another .aff file already. */
Bram Moolenaar89d40322006-08-29 15:30:07 +00005317 do_mapline = spin->si_map.ga_len == 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005318
Bram Moolenaar51485f02005-06-04 21:55:20 +00005319 /*
5320 * Allocate and init the afffile_T structure.
5321 */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005322 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005323 if (aff == NULL)
Bram Moolenaareb3593b2006-04-22 22:33:57 +00005324 {
5325 fclose(fd);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005326 return NULL;
Bram Moolenaareb3593b2006-04-22 22:33:57 +00005327 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005328 hash_init(&aff->af_pref);
5329 hash_init(&aff->af_suff);
Bram Moolenaar6de68532005-08-24 22:08:48 +00005330 hash_init(&aff->af_comp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005331
5332 /*
5333 * Read all the lines in the file one by one.
5334 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005335 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005336 {
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005337 line_breakcheck();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005338 ++lnum;
5339
5340 /* Skip comment lines. */
5341 if (*rline == '#')
5342 continue;
5343
5344 /* Convert from "SET" to 'encoding' when needed. */
5345 vim_free(pc);
Bram Moolenaarb765d632005-06-07 21:00:02 +00005346#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00005347 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005348 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00005349 pc = string_convert(&spin->si_conv, rline, NULL);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005350 if (pc == NULL)
5351 {
5352 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
5353 fname, lnum, rline);
5354 continue;
5355 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005356 line = pc;
5357 }
5358 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00005359#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005360 {
5361 pc = NULL;
5362 line = rline;
5363 }
5364
5365 /* Split the line up in white separated items. Put a NUL after each
5366 * item. */
5367 itemcnt = 0;
5368 for (p = line; ; )
5369 {
5370 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */
5371 ++p;
5372 if (*p == NUL)
5373 break;
Bram Moolenaar8db73182005-06-17 21:51:16 +00005374 if (itemcnt == MAXITEMCNT) /* too many items */
Bram Moolenaar51485f02005-06-04 21:55:20 +00005375 break;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005376 items[itemcnt++] = p;
Bram Moolenaar362e1a32006-03-06 23:29:24 +00005377 /* A few items have arbitrary text argument, don't split them. */
5378 if (itemcnt == 2 && spell_info_item(items[0]))
5379 while (*p >= ' ' || *p == TAB) /* skip until CR/NL */
5380 ++p;
5381 else
5382 while (*p > ' ') /* skip until white space or CR/NL */
5383 ++p;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005384 if (*p == NUL)
5385 break;
5386 *p++ = NUL;
5387 }
5388
5389 /* Handle non-empty lines. */
5390 if (itemcnt > 0)
5391 {
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005392 if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005393 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00005394#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00005395 /* Setup for conversion from "ENC" to 'encoding'. */
5396 aff->af_enc = enc_canonize(items[1]);
5397 if (aff->af_enc != NULL && !spin->si_ascii
5398 && convert_setup(&spin->si_conv, aff->af_enc,
5399 p_enc) == FAIL)
5400 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
5401 fname, aff->af_enc, p_enc);
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005402 spin->si_conv.vc_fail = TRUE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00005403#else
5404 smsg((char_u *)_("Conversion in %s not supported"), fname);
5405#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005406 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005407 else if (is_aff_rule(items, itemcnt, "FLAG", 2)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005408 && aff->af_flagtype == AFT_CHAR)
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005409 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005410 if (STRCMP(items[1], "long") == 0)
Bram Moolenaar95529562005-08-25 21:21:38 +00005411 aff->af_flagtype = AFT_LONG;
Bram Moolenaar6de68532005-08-24 22:08:48 +00005412 else if (STRCMP(items[1], "num") == 0)
Bram Moolenaar95529562005-08-25 21:21:38 +00005413 aff->af_flagtype = AFT_NUM;
5414 else if (STRCMP(items[1], "caplong") == 0)
5415 aff->af_flagtype = AFT_CAPLONG;
Bram Moolenaar6de68532005-08-24 22:08:48 +00005416 else
5417 smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"),
5418 fname, lnum, items[1]);
Bram Moolenaar371baa92005-12-29 22:43:53 +00005419 if (aff->af_rare != 0
5420 || aff->af_keepcase != 0
5421 || aff->af_bad != 0
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005422 || aff->af_needaffix != 0
Bram Moolenaar8dff8182006-04-06 20:18:50 +00005423 || aff->af_circumfix != 0
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005424 || aff->af_needcomp != 0
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005425 || aff->af_comproot != 0
Bram Moolenaare1438bb2006-03-01 22:01:55 +00005426 || aff->af_nosuggest != 0
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005427 || compflags != NULL
Bram Moolenaar6de68532005-08-24 22:08:48 +00005428 || aff->af_suff.ht_used > 0
5429 || aff->af_pref.ht_used > 0)
5430 smsg((char_u *)_("FLAG after using flags in %s line %d: %s"),
5431 fname, lnum, items[1]);
5432 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00005433 else if (spell_info_item(items[0]))
5434 {
5435 p = (char_u *)getroom(spin,
5436 (spin->si_info == NULL ? 0 : STRLEN(spin->si_info))
5437 + STRLEN(items[0])
5438 + STRLEN(items[1]) + 3, FALSE);
5439 if (p != NULL)
5440 {
5441 if (spin->si_info != NULL)
5442 {
5443 STRCPY(p, spin->si_info);
5444 STRCAT(p, "\n");
5445 }
5446 STRCAT(p, items[0]);
5447 STRCAT(p, " ");
5448 STRCAT(p, items[1]);
5449 spin->si_info = p;
5450 }
5451 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005452 else if (is_aff_rule(items, itemcnt, "MIDWORD", 2)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005453 && midword == NULL)
5454 {
5455 midword = getroom_save(spin, items[1]);
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005456 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005457 else if (is_aff_rule(items, itemcnt, "TRY", 2))
Bram Moolenaar51485f02005-06-04 21:55:20 +00005458 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005459 /* ignored, we look in the tree for what chars may appear */
Bram Moolenaar51485f02005-06-04 21:55:20 +00005460 }
Bram Moolenaar371baa92005-12-29 22:43:53 +00005461 /* TODO: remove "RAR" later */
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005462 else if ((is_aff_rule(items, itemcnt, "RAR", 2)
5463 || is_aff_rule(items, itemcnt, "RARE", 2))
5464 && aff->af_rare == 0)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005465 {
Bram Moolenaar371baa92005-12-29 22:43:53 +00005466 aff->af_rare = affitem2flag(aff->af_flagtype, items[1],
Bram Moolenaar6de68532005-08-24 22:08:48 +00005467 fname, lnum);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005468 }
Bram Moolenaar371baa92005-12-29 22:43:53 +00005469 /* TODO: remove "KEP" later */
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005470 else if ((is_aff_rule(items, itemcnt, "KEP", 2)
5471 || is_aff_rule(items, itemcnt, "KEEPCASE", 2))
Bram Moolenaar371baa92005-12-29 22:43:53 +00005472 && aff->af_keepcase == 0)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005473 {
Bram Moolenaar371baa92005-12-29 22:43:53 +00005474 aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1],
Bram Moolenaar6de68532005-08-24 22:08:48 +00005475 fname, lnum);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005476 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005477 else if ((is_aff_rule(items, itemcnt, "BAD", 2)
5478 || is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2))
5479 && aff->af_bad == 0)
Bram Moolenaar0c405862005-06-22 22:26:26 +00005480 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005481 aff->af_bad = affitem2flag(aff->af_flagtype, items[1],
5482 fname, lnum);
Bram Moolenaar0c405862005-06-22 22:26:26 +00005483 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005484 else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005485 && aff->af_needaffix == 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005486 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005487 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1],
5488 fname, lnum);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005489 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005490 else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2)
Bram Moolenaar8dff8182006-04-06 20:18:50 +00005491 && aff->af_circumfix == 0)
5492 {
5493 aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1],
5494 fname, lnum);
5495 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005496 else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2)
Bram Moolenaare1438bb2006-03-01 22:01:55 +00005497 && aff->af_nosuggest == 0)
5498 {
5499 aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1],
5500 fname, lnum);
5501 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005502 else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2)
5503 || is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2))
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005504 && aff->af_needcomp == 0)
5505 {
5506 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1],
5507 fname, lnum);
5508 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005509 else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2)
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005510 && aff->af_comproot == 0)
5511 {
5512 aff->af_comproot = affitem2flag(aff->af_flagtype, items[1],
5513 fname, lnum);
5514 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005515 else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2)
5516 && aff->af_compforbid == 0)
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005517 {
5518 aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1],
5519 fname, lnum);
Bram Moolenaar5555acc2006-04-07 21:33:12 +00005520 if (aff->af_pref.ht_used > 0)
5521 smsg((char_u *)_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"),
5522 fname, lnum);
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005523 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005524 else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2)
5525 && aff->af_comppermit == 0)
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005526 {
5527 aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1],
5528 fname, lnum);
Bram Moolenaar5555acc2006-04-07 21:33:12 +00005529 if (aff->af_pref.ht_used > 0)
5530 smsg((char_u *)_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"),
5531 fname, lnum);
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005532 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005533 else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005534 && compflags == NULL)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005535 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00005536 /* Turn flag "c" into COMPOUNDRULE compatible string "c+",
Bram Moolenaar6de68532005-08-24 22:08:48 +00005537 * "Na" into "Na+", "1234" into "1234+". */
5538 p = getroom(spin, STRLEN(items[1]) + 2, FALSE);
Bram Moolenaar5195e452005-08-19 20:32:47 +00005539 if (p != NULL)
5540 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005541 STRCPY(p, items[1]);
5542 STRCAT(p, "+");
5543 compflags = p;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005544 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00005545 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005546 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2))
5547 {
5548 /* We don't use the count, but do check that it's a number and
5549 * not COMPOUNDRULE mistyped. */
5550 if (atoi((char *)items[1]) == 0)
5551 smsg((char_u *)_("Wrong COMPOUNDRULES value in %s line %d: %s"),
5552 fname, lnum, items[1]);
5553 }
5554 else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2))
Bram Moolenaar5195e452005-08-19 20:32:47 +00005555 {
5556 /* Concatenate this string to previously defined ones, using a
5557 * slash to separate them. */
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00005558 l = (int)STRLEN(items[1]) + 1;
Bram Moolenaar6de68532005-08-24 22:08:48 +00005559 if (compflags != NULL)
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00005560 l += (int)STRLEN(compflags) + 1;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005561 p = getroom(spin, l, FALSE);
5562 if (p != NULL)
5563 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005564 if (compflags != NULL)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005565 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005566 STRCPY(p, compflags);
Bram Moolenaar5195e452005-08-19 20:32:47 +00005567 STRCAT(p, "/");
5568 }
5569 STRCAT(p, items[1]);
Bram Moolenaar6de68532005-08-24 22:08:48 +00005570 compflags = p;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005571 }
5572 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005573 else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005574 && compmax == 0)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005575 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005576 compmax = atoi((char *)items[1]);
5577 if (compmax == 0)
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005578 smsg((char_u *)_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"),
Bram Moolenaar5195e452005-08-19 20:32:47 +00005579 fname, lnum, items[1]);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005580 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005581 else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005582 && compminlen == 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005583 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005584 compminlen = atoi((char *)items[1]);
5585 if (compminlen == 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005586 smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"),
5587 fname, lnum, items[1]);
5588 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005589 else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005590 && compsylmax == 0)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005591 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005592 compsylmax = atoi((char *)items[1]);
5593 if (compsylmax == 0)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005594 smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"),
5595 fname, lnum, items[1]);
5596 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005597 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1))
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005598 {
5599 compoptions |= COMP_CHECKDUP;
5600 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005601 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1))
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005602 {
5603 compoptions |= COMP_CHECKREP;
5604 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005605 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1))
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005606 {
5607 compoptions |= COMP_CHECKCASE;
5608 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005609 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1))
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005610 {
5611 compoptions |= COMP_CHECKTRIPLE;
5612 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005613 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2))
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005614 {
5615 if (atoi((char *)items[1]) == 0)
5616 smsg((char_u *)_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"),
5617 fname, lnum, items[1]);
5618 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005619 else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3))
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005620 {
5621 garray_T *gap = &spin->si_comppat;
5622 int i;
5623
5624 /* Only add the couple if it isn't already there. */
5625 for (i = 0; i < gap->ga_len - 1; i += 2)
5626 if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0
5627 && STRCMP(((char_u **)(gap->ga_data))[i + 1],
5628 items[2]) == 0)
5629 break;
5630 if (i >= gap->ga_len && ga_grow(gap, 2) == OK)
5631 {
5632 ((char_u **)(gap->ga_data))[gap->ga_len++]
5633 = getroom_save(spin, items[1]);
5634 ((char_u **)(gap->ga_data))[gap->ga_len++]
5635 = getroom_save(spin, items[2]);
5636 }
5637 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005638 else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005639 && syllable == NULL)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005640 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005641 syllable = getroom_save(spin, items[1]);
Bram Moolenaar5195e452005-08-19 20:32:47 +00005642 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005643 else if (is_aff_rule(items, itemcnt, "NOBREAK", 1))
Bram Moolenaar78622822005-08-23 21:00:13 +00005644 {
5645 spin->si_nobreak = TRUE;
5646 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005647 else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1))
Bram Moolenaare1438bb2006-03-01 22:01:55 +00005648 {
5649 spin->si_nosplitsugs = TRUE;
5650 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005651 else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1))
Bram Moolenaar4770d092006-01-12 23:22:24 +00005652 {
5653 spin->si_nosugfile = TRUE;
5654 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005655 else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1))
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005656 {
5657 aff->af_pfxpostpone = TRUE;
5658 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005659 else if ((STRCMP(items[0], "PFX") == 0
5660 || STRCMP(items[0], "SFX") == 0)
5661 && aff_todo == 0
Bram Moolenaar8db73182005-06-17 21:51:16 +00005662 && itemcnt >= 4)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005663 {
Bram Moolenaar95529562005-08-25 21:21:38 +00005664 int lasti = 4;
5665 char_u key[AH_KEY_LEN];
5666
5667 if (*items[0] == 'P')
5668 tp = &aff->af_pref;
5669 else
5670 tp = &aff->af_suff;
5671
5672 /* Myspell allows the same affix name to be used multiple
5673 * times. The affix files that do this have an undocumented
5674 * "S" flag on all but the last block, thus we check for that
5675 * and store it in ah_follows. */
5676 vim_strncpy(key, items[1], AH_KEY_LEN - 1);
5677 hi = hash_find(tp, key);
5678 if (!HASHITEM_EMPTY(hi))
5679 {
5680 cur_aff = HI2AH(hi);
5681 if (cur_aff->ah_combine != (*items[2] == 'Y'))
5682 smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"),
5683 fname, lnum, items[1]);
5684 if (!cur_aff->ah_follows)
5685 smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
5686 fname, lnum, items[1]);
5687 }
5688 else
5689 {
5690 /* New affix letter. */
5691 cur_aff = (affheader_T *)getroom(spin,
5692 sizeof(affheader_T), TRUE);
5693 if (cur_aff == NULL)
5694 break;
5695 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1],
5696 fname, lnum);
5697 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN)
5698 break;
5699 if (cur_aff->ah_flag == aff->af_bad
Bram Moolenaar371baa92005-12-29 22:43:53 +00005700 || cur_aff->ah_flag == aff->af_rare
5701 || cur_aff->ah_flag == aff->af_keepcase
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005702 || cur_aff->ah_flag == aff->af_needaffix
Bram Moolenaar8dff8182006-04-06 20:18:50 +00005703 || cur_aff->ah_flag == aff->af_circumfix
Bram Moolenaare1438bb2006-03-01 22:01:55 +00005704 || cur_aff->ah_flag == aff->af_nosuggest
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005705 || cur_aff->ah_flag == aff->af_needcomp
5706 || cur_aff->ah_flag == aff->af_comproot)
Bram Moolenaare1438bb2006-03-01 22:01:55 +00005707 smsg((char_u *)_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"),
Bram Moolenaar95529562005-08-25 21:21:38 +00005708 fname, lnum, items[1]);
5709 STRCPY(cur_aff->ah_key, items[1]);
5710 hash_add(tp, cur_aff->ah_key);
5711
5712 cur_aff->ah_combine = (*items[2] == 'Y');
5713 }
5714
5715 /* Check for the "S" flag, which apparently means that another
5716 * block with the same affix name is following. */
5717 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0)
5718 {
5719 ++lasti;
5720 cur_aff->ah_follows = TRUE;
5721 }
5722 else
5723 cur_aff->ah_follows = FALSE;
5724
Bram Moolenaar8db73182005-06-17 21:51:16 +00005725 /* Myspell allows extra text after the item, but that might
5726 * mean mistakes go unnoticed. Require a comment-starter. */
Bram Moolenaar95529562005-08-25 21:21:38 +00005727 if (itemcnt > lasti && *items[lasti] != '#')
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005728 smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]);
Bram Moolenaar8db73182005-06-17 21:51:16 +00005729
Bram Moolenaar95529562005-08-25 21:21:38 +00005730 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005731 smsg((char_u *)_("Expected Y or N in %s line %d: %s"),
5732 fname, lnum, items[2]);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005733
Bram Moolenaar95529562005-08-25 21:21:38 +00005734 if (*items[0] == 'P' && aff->af_pfxpostpone)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005735 {
Bram Moolenaar95529562005-08-25 21:21:38 +00005736 if (cur_aff->ah_newID == 0)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005737 {
5738 /* Use a new number in the .spl file later, to be able
5739 * to handle multiple .aff files. */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005740 check_renumber(spin);
Bram Moolenaar6de68532005-08-24 22:08:48 +00005741 cur_aff->ah_newID = ++spin->si_newprefID;
5742
5743 /* We only really use ah_newID if the prefix is
5744 * postponed. We know that only after handling all
5745 * the items. */
5746 did_postpone_prefix = FALSE;
5747 }
Bram Moolenaar95529562005-08-25 21:21:38 +00005748 else
5749 /* Did use the ID in a previous block. */
5750 did_postpone_prefix = TRUE;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005751 }
Bram Moolenaar95529562005-08-25 21:21:38 +00005752
Bram Moolenaar51485f02005-06-04 21:55:20 +00005753 aff_todo = atoi((char *)items[3]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005754 }
5755 else if ((STRCMP(items[0], "PFX") == 0
5756 || STRCMP(items[0], "SFX") == 0)
5757 && aff_todo > 0
5758 && STRCMP(cur_aff->ah_key, items[1]) == 0
Bram Moolenaar8db73182005-06-17 21:51:16 +00005759 && itemcnt >= 5)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005760 {
5761 affentry_T *aff_entry;
Bram Moolenaar53805d12005-08-01 07:08:33 +00005762 int upper = FALSE;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005763 int lasti = 5;
5764
Bram Moolenaar8db73182005-06-17 21:51:16 +00005765 /* Myspell allows extra text after the item, but that might
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005766 * mean mistakes go unnoticed. Require a comment-starter.
5767 * Hunspell uses a "-" item. */
5768 if (itemcnt > lasti && *items[lasti] != '#'
5769 && (STRCMP(items[lasti], "-") != 0
5770 || itemcnt != lasti + 1))
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00005771 smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]);
Bram Moolenaar8db73182005-06-17 21:51:16 +00005772
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005773 /* New item for an affix letter. */
5774 --aff_todo;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005775 aff_entry = (affentry_T *)getroom(spin,
Bram Moolenaarcfc7d632005-07-28 22:28:16 +00005776 sizeof(affentry_T), TRUE);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005777 if (aff_entry == NULL)
5778 break;
Bram Moolenaar5482f332005-04-17 20:18:43 +00005779
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005780 if (STRCMP(items[2], "0") != 0)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005781 aff_entry->ae_chop = getroom_save(spin, items[2]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005782 if (STRCMP(items[3], "0") != 0)
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005783 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005784 aff_entry->ae_add = getroom_save(spin, items[3]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005785
Bram Moolenaar5555acc2006-04-07 21:33:12 +00005786 /* Recognize flags on the affix: abcd/XYZ */
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005787 aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/');
5788 if (aff_entry->ae_flags != NULL)
Bram Moolenaar5555acc2006-04-07 21:33:12 +00005789 {
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005790 *aff_entry->ae_flags++ = NUL;
Bram Moolenaar5555acc2006-04-07 21:33:12 +00005791 aff_process_flags(aff, aff_entry);
5792 }
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005793 }
5794
Bram Moolenaar51485f02005-06-04 21:55:20 +00005795 /* Don't use an affix entry with non-ASCII characters when
5796 * "spin->si_ascii" is TRUE. */
5797 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop)
Bram Moolenaar5482f332005-04-17 20:18:43 +00005798 || has_non_ascii(aff_entry->ae_add)))
5799 {
Bram Moolenaar5482f332005-04-17 20:18:43 +00005800 aff_entry->ae_next = cur_aff->ah_first;
5801 cur_aff->ah_first = aff_entry;
Bram Moolenaar51485f02005-06-04 21:55:20 +00005802
5803 if (STRCMP(items[4], ".") != 0)
5804 {
5805 char_u buf[MAXLINELEN];
5806
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005807 aff_entry->ae_cond = getroom_save(spin, items[4]);
Bram Moolenaar51485f02005-06-04 21:55:20 +00005808 if (*items[0] == 'P')
5809 sprintf((char *)buf, "^%s", items[4]);
5810 else
5811 sprintf((char *)buf, "%s$", items[4]);
5812 aff_entry->ae_prog = vim_regcomp(buf,
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005813 RE_MAGIC + RE_STRING + RE_STRICT);
5814 if (aff_entry->ae_prog == NULL)
5815 smsg((char_u *)_("Broken condition in %s line %d: %s"),
5816 fname, lnum, items[4]);
Bram Moolenaar51485f02005-06-04 21:55:20 +00005817 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005818
5819 /* For postponed prefixes we need an entry in si_prefcond
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005820 * for the condition. Use an existing one if possible.
Bram Moolenaar5555acc2006-04-07 21:33:12 +00005821 * Can't be done for an affix with flags, ignoring
5822 * COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. */
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005823 if (*items[0] == 'P' && aff->af_pfxpostpone
5824 && aff_entry->ae_flags == NULL)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005825 {
Bram Moolenaar53805d12005-08-01 07:08:33 +00005826 /* When the chop string is one lower-case letter and
5827 * the add string ends in the upper-case letter we set
5828 * the "upper" flag, clear "ae_chop" and remove the
5829 * letters from "ae_add". The condition must either
5830 * be empty or start with the same letter. */
5831 if (aff_entry->ae_chop != NULL
5832 && aff_entry->ae_add != NULL
5833#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005834 && aff_entry->ae_chop[(*mb_ptr2len)(
Bram Moolenaar53805d12005-08-01 07:08:33 +00005835 aff_entry->ae_chop)] == NUL
5836#else
5837 && aff_entry->ae_chop[1] == NUL
5838#endif
5839 )
5840 {
5841 int c, c_up;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005842
Bram Moolenaar53805d12005-08-01 07:08:33 +00005843 c = PTR2CHAR(aff_entry->ae_chop);
5844 c_up = SPELL_TOUPPER(c);
5845 if (c_up != c
5846 && (aff_entry->ae_cond == NULL
5847 || PTR2CHAR(aff_entry->ae_cond) == c))
5848 {
5849 p = aff_entry->ae_add
5850 + STRLEN(aff_entry->ae_add);
5851 mb_ptr_back(aff_entry->ae_add, p);
5852 if (PTR2CHAR(p) == c_up)
5853 {
5854 upper = TRUE;
5855 aff_entry->ae_chop = NULL;
5856 *p = NUL;
5857
5858 /* The condition is matched with the
5859 * actual word, thus must check for the
5860 * upper-case letter. */
5861 if (aff_entry->ae_cond != NULL)
5862 {
5863 char_u buf[MAXLINELEN];
5864#ifdef FEAT_MBYTE
5865 if (has_mbyte)
5866 {
5867 onecap_copy(items[4], buf, TRUE);
5868 aff_entry->ae_cond = getroom_save(
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005869 spin, buf);
Bram Moolenaar53805d12005-08-01 07:08:33 +00005870 }
5871 else
5872#endif
5873 *aff_entry->ae_cond = c_up;
5874 if (aff_entry->ae_cond != NULL)
5875 {
5876 sprintf((char *)buf, "^%s",
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005877 aff_entry->ae_cond);
Bram Moolenaar53805d12005-08-01 07:08:33 +00005878 vim_free(aff_entry->ae_prog);
5879 aff_entry->ae_prog = vim_regcomp(
5880 buf, RE_MAGIC + RE_STRING);
5881 }
5882 }
5883 }
5884 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005885 }
5886
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005887 if (aff_entry->ae_chop == NULL
5888 && aff_entry->ae_flags == NULL)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00005889 {
Bram Moolenaar53805d12005-08-01 07:08:33 +00005890 int idx;
5891 char_u **pp;
5892 int n;
5893
Bram Moolenaar6de68532005-08-24 22:08:48 +00005894 /* Find a previously used condition. */
Bram Moolenaar53805d12005-08-01 07:08:33 +00005895 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0;
5896 --idx)
5897 {
5898 p = ((char_u **)spin->si_prefcond.ga_data)[idx];
5899 if (str_equal(p, aff_entry->ae_cond))
5900 break;
5901 }
5902 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK)
5903 {
5904 /* Not found, add a new condition. */
5905 idx = spin->si_prefcond.ga_len++;
5906 pp = ((char_u **)spin->si_prefcond.ga_data)
5907 + idx;
5908 if (aff_entry->ae_cond == NULL)
5909 *pp = NULL;
5910 else
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005911 *pp = getroom_save(spin,
Bram Moolenaar53805d12005-08-01 07:08:33 +00005912 aff_entry->ae_cond);
5913 }
5914
5915 /* Add the prefix to the prefix tree. */
5916 if (aff_entry->ae_add == NULL)
5917 p = (char_u *)"";
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00005918 else
Bram Moolenaar53805d12005-08-01 07:08:33 +00005919 p = aff_entry->ae_add;
Bram Moolenaar899dddf2006-03-26 21:06:50 +00005920
Bram Moolenaar53805d12005-08-01 07:08:33 +00005921 /* PFX_FLAGS is a negative number, so that
5922 * tree_add_word() knows this is the prefix tree. */
5923 n = PFX_FLAGS;
Bram Moolenaar53805d12005-08-01 07:08:33 +00005924 if (!cur_aff->ah_combine)
5925 n |= WFP_NC;
5926 if (upper)
5927 n |= WFP_UP;
Bram Moolenaar5555acc2006-04-07 21:33:12 +00005928 if (aff_entry->ae_comppermit)
5929 n |= WFP_COMPPERMIT;
5930 if (aff_entry->ae_compforbid)
5931 n |= WFP_COMPFORBID;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005932 tree_add_word(spin, p, spin->si_prefroot, n,
5933 idx, cur_aff->ah_newID);
Bram Moolenaar6de68532005-08-24 22:08:48 +00005934 did_postpone_prefix = TRUE;
5935 }
5936
5937 /* Didn't actually use ah_newID, backup si_newprefID. */
5938 if (aff_todo == 0 && !did_postpone_prefix)
5939 {
5940 --spin->si_newprefID;
5941 cur_aff->ah_newID = 0;
Bram Moolenaar53805d12005-08-01 07:08:33 +00005942 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005943 }
Bram Moolenaar5482f332005-04-17 20:18:43 +00005944 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005945 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005946 else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005947 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005948 fol = vim_strsave(items[1]);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005949 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005950 else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005951 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005952 low = vim_strsave(items[1]);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005953 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005954 else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005955 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005956 upp = vim_strsave(items[1]);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005957 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005958 else if (is_aff_rule(items, itemcnt, "REP", 2)
5959 || is_aff_rule(items, itemcnt, "REPSAL", 2))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005960 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00005961 /* Ignore REP/REPSAL count */;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005962 if (!isdigit(*items[1]))
Bram Moolenaar4770d092006-01-12 23:22:24 +00005963 smsg((char_u *)_("Expected REP(SAL) count in %s line %d"),
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005964 fname, lnum);
5965 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005966 else if ((STRCMP(items[0], "REP") == 0
5967 || STRCMP(items[0], "REPSAL") == 0)
5968 && itemcnt >= 3)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005969 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00005970 /* REP/REPSAL item */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00005971 /* Myspell ignores extra arguments, we require it starts with
5972 * # to detect mistakes. */
5973 if (itemcnt > 3 && items[3][0] != '#')
5974 smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005975 if (items[0][3] == 'S' ? do_repsal : do_rep)
Bram Moolenaar1e015462005-09-25 22:16:38 +00005976 {
5977 /* Replace underscore with space (can't include a space
5978 * directly). */
5979 for (p = items[1]; *p != NUL; mb_ptr_adv(p))
5980 if (*p == '_')
5981 *p = ' ';
5982 for (p = items[2]; *p != NUL; mb_ptr_adv(p))
5983 if (*p == '_')
5984 *p = ' ';
Bram Moolenaar4770d092006-01-12 23:22:24 +00005985 add_fromto(spin, items[0][3] == 'S'
5986 ? &spin->si_repsal
5987 : &spin->si_rep, items[1], items[2]);
Bram Moolenaar1e015462005-09-25 22:16:38 +00005988 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005989 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00005990 else if (is_aff_rule(items, itemcnt, "MAP", 2))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005991 {
5992 /* MAP item or count */
5993 if (!found_map)
5994 {
5995 /* First line contains the count. */
5996 found_map = TRUE;
5997 if (!isdigit(*items[1]))
5998 smsg((char_u *)_("Expected MAP count in %s line %d"),
5999 fname, lnum);
6000 }
Bram Moolenaar89d40322006-08-29 15:30:07 +00006001 else if (do_mapline)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006002 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00006003 int c;
6004
6005 /* Check that every character appears only once. */
6006 for (p = items[1]; *p != NUL; )
6007 {
6008#ifdef FEAT_MBYTE
6009 c = mb_ptr2char_adv(&p);
6010#else
6011 c = *p++;
6012#endif
6013 if ((spin->si_map.ga_len > 0
6014 && vim_strchr(spin->si_map.ga_data, c)
6015 != NULL)
6016 || vim_strchr(p, c) != NULL)
6017 smsg((char_u *)_("Duplicate character in MAP in %s line %d"),
6018 fname, lnum);
6019 }
6020
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006021 /* We simply concatenate all the MAP strings, separated by
6022 * slashes. */
6023 ga_concat(&spin->si_map, items[1]);
6024 ga_append(&spin->si_map, '/');
6025 }
6026 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00006027 /* Accept "SAL from to" and "SAL from to #comment". */
6028 else if (is_aff_rule(items, itemcnt, "SAL", 3))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006029 {
6030 if (do_sal)
6031 {
6032 /* SAL item (sounds-a-like)
6033 * Either one of the known keys or a from-to pair. */
6034 if (STRCMP(items[1], "followup") == 0)
6035 spin->si_followup = sal_to_bool(items[2]);
6036 else if (STRCMP(items[1], "collapse_result") == 0)
6037 spin->si_collapse = sal_to_bool(items[2]);
6038 else if (STRCMP(items[1], "remove_accents") == 0)
6039 spin->si_rem_accents = sal_to_bool(items[2]);
6040 else
6041 /* when "to" is "_" it means empty */
6042 add_fromto(spin, &spin->si_sal, items[1],
6043 STRCMP(items[2], "_") == 0 ? (char_u *)""
6044 : items[2]);
6045 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006046 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00006047 else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2)
Bram Moolenaar6de68532005-08-24 22:08:48 +00006048 && sofofrom == NULL)
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006049 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00006050 sofofrom = getroom_save(spin, items[1]);
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006051 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +00006052 else if (is_aff_rule(items, itemcnt, "SOFOTO", 2)
Bram Moolenaar6de68532005-08-24 22:08:48 +00006053 && sofoto == NULL)
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006054 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00006055 sofoto = getroom_save(spin, items[1]);
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006056 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00006057 else if (STRCMP(items[0], "COMMON") == 0)
6058 {
6059 int i;
6060
6061 for (i = 1; i < itemcnt; ++i)
6062 {
6063 if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords,
6064 items[i])))
6065 {
6066 p = vim_strsave(items[i]);
6067 if (p == NULL)
6068 break;
6069 hash_add(&spin->si_commonwords, p);
6070 }
6071 }
6072 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00006073 else
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006074 smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"),
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006075 fname, lnum, items[0]);
6076 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006077 }
6078
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006079 if (fol != NULL || low != NULL || upp != NULL)
6080 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006081 if (spin->si_clear_chartab)
6082 {
6083 /* Clear the char type tables, don't want to use any of the
6084 * currently used spell properties. */
6085 init_spell_chartab();
6086 spin->si_clear_chartab = FALSE;
6087 }
6088
Bram Moolenaar3982c542005-06-08 21:56:31 +00006089 /*
6090 * Don't write a word table for an ASCII file, so that we don't check
6091 * for conflicts with a word table that matches 'encoding'.
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006092 * Don't write one for utf-8 either, we use utf_*() and
Bram Moolenaar3982c542005-06-08 21:56:31 +00006093 * mb_get_class(), the list of chars in the file will be incomplete.
6094 */
6095 if (!spin->si_ascii
6096#ifdef FEAT_MBYTE
6097 && !enc_utf8
6098#endif
6099 )
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00006100 {
6101 if (fol == NULL || low == NULL || upp == NULL)
6102 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
6103 else
Bram Moolenaar3982c542005-06-08 21:56:31 +00006104 (void)set_spell_chartab(fol, low, upp);
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00006105 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006106
6107 vim_free(fol);
6108 vim_free(low);
6109 vim_free(upp);
6110 }
6111
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006112 /* Use compound specifications of the .aff file for the spell info. */
Bram Moolenaar6de68532005-08-24 22:08:48 +00006113 if (compmax != 0)
Bram Moolenaar5195e452005-08-19 20:32:47 +00006114 {
Bram Moolenaar899dddf2006-03-26 21:06:50 +00006115 aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX");
Bram Moolenaar6de68532005-08-24 22:08:48 +00006116 spin->si_compmax = compmax;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006117 }
6118
Bram Moolenaar6de68532005-08-24 22:08:48 +00006119 if (compminlen != 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006120 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00006121 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN");
6122 spin->si_compminlen = compminlen;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006123 }
6124
Bram Moolenaar6de68532005-08-24 22:08:48 +00006125 if (compsylmax != 0)
Bram Moolenaar5195e452005-08-19 20:32:47 +00006126 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00006127 if (syllable == NULL)
6128 smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE"));
6129 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX");
6130 spin->si_compsylmax = compsylmax;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006131 }
6132
Bram Moolenaar899dddf2006-03-26 21:06:50 +00006133 if (compoptions != 0)
6134 {
6135 aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options");
6136 spin->si_compoptions |= compoptions;
6137 }
6138
Bram Moolenaar6de68532005-08-24 22:08:48 +00006139 if (compflags != NULL)
6140 process_compflags(spin, aff, compflags);
6141
6142 /* Check that we didn't use too many renumbered flags. */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00006143 if (spin->si_newcompID < spin->si_newprefID)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006144 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00006145 if (spin->si_newcompID == 127 || spin->si_newcompID == 255)
Bram Moolenaar6de68532005-08-24 22:08:48 +00006146 MSG(_("Too many postponed prefixes"));
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00006147 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127)
Bram Moolenaar6de68532005-08-24 22:08:48 +00006148 MSG(_("Too many compound flags"));
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006149 else
Bram Moolenaar6949d1d2008-08-25 02:14:05 +00006150 MSG(_("Too many postponed prefixes and/or compound flags"));
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006151 }
6152
Bram Moolenaar6de68532005-08-24 22:08:48 +00006153 if (syllable != NULL)
Bram Moolenaar5195e452005-08-19 20:32:47 +00006154 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00006155 aff_check_string(spin->si_syllable, syllable, "SYLLABLE");
6156 spin->si_syllable = syllable;
6157 }
6158
6159 if (sofofrom != NULL || sofoto != NULL)
6160 {
6161 if (sofofrom == NULL || sofoto == NULL)
6162 smsg((char_u *)_("Missing SOFO%s line in %s"),
6163 sofofrom == NULL ? "FROM" : "TO", fname);
6164 else if (spin->si_sal.ga_len > 0)
6165 smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname);
Bram Moolenaar5195e452005-08-19 20:32:47 +00006166 else
Bram Moolenaar6de68532005-08-24 22:08:48 +00006167 {
6168 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM");
6169 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO");
6170 spin->si_sofofr = sofofrom;
6171 spin->si_sofoto = sofoto;
6172 }
6173 }
6174
6175 if (midword != NULL)
6176 {
6177 aff_check_string(spin->si_midword, midword, "MIDWORD");
6178 spin->si_midword = midword;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006179 }
6180
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006181 vim_free(pc);
6182 fclose(fd);
6183 return aff;
6184}
6185
6186/*
Bram Moolenaar9f94b052008-11-30 20:12:46 +00006187 * Return TRUE when items[0] equals "rulename", there are "mincount" items or
6188 * a comment is following after item "mincount".
6189 */
6190 static int
6191is_aff_rule(items, itemcnt, rulename, mincount)
6192 char_u **items;
6193 int itemcnt;
6194 char *rulename;
6195 int mincount;
6196{
6197 return (STRCMP(items[0], rulename) == 0
6198 && (itemcnt == mincount
6199 || (itemcnt > mincount && items[mincount][0] == '#')));
6200}
6201
6202/*
Bram Moolenaar5555acc2006-04-07 21:33:12 +00006203 * For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from
6204 * ae_flags to ae_comppermit and ae_compforbid.
6205 */
6206 static void
6207aff_process_flags(affile, entry)
6208 afffile_T *affile;
6209 affentry_T *entry;
6210{
6211 char_u *p;
6212 char_u *prevp;
Bram Moolenaar779b74b2006-04-10 14:55:34 +00006213 unsigned flag;
Bram Moolenaar5555acc2006-04-07 21:33:12 +00006214
6215 if (entry->ae_flags != NULL
6216 && (affile->af_compforbid != 0 || affile->af_comppermit != 0))
6217 {
6218 for (p = entry->ae_flags; *p != NUL; )
6219 {
6220 prevp = p;
6221 flag = get_affitem(affile->af_flagtype, &p);
6222 if (flag == affile->af_comppermit || flag == affile->af_compforbid)
6223 {
Bram Moolenaara7241f52008-06-24 20:39:31 +00006224 STRMOVE(prevp, p);
Bram Moolenaar5555acc2006-04-07 21:33:12 +00006225 p = prevp;
6226 if (flag == affile->af_comppermit)
6227 entry->ae_comppermit = TRUE;
6228 else
6229 entry->ae_compforbid = TRUE;
6230 }
6231 if (affile->af_flagtype == AFT_NUM && *p == ',')
6232 ++p;
6233 }
6234 if (*entry->ae_flags == NUL)
6235 entry->ae_flags = NULL; /* nothing left */
6236 }
6237}
6238
6239/*
Bram Moolenaar362e1a32006-03-06 23:29:24 +00006240 * Return TRUE if "s" is the name of an info item in the affix file.
6241 */
6242 static int
6243spell_info_item(s)
6244 char_u *s;
6245{
6246 return STRCMP(s, "NAME") == 0
6247 || STRCMP(s, "HOME") == 0
6248 || STRCMP(s, "VERSION") == 0
6249 || STRCMP(s, "AUTHOR") == 0
6250 || STRCMP(s, "EMAIL") == 0
6251 || STRCMP(s, "COPYRIGHT") == 0;
6252}
6253
6254/*
Bram Moolenaar6de68532005-08-24 22:08:48 +00006255 * Turn an affix flag name into a number, according to the FLAG type.
6256 * returns zero for failure.
6257 */
6258 static unsigned
6259affitem2flag(flagtype, item, fname, lnum)
6260 int flagtype;
6261 char_u *item;
6262 char_u *fname;
6263 int lnum;
6264{
6265 unsigned res;
6266 char_u *p = item;
6267
6268 res = get_affitem(flagtype, &p);
6269 if (res == 0)
6270 {
Bram Moolenaar95529562005-08-25 21:21:38 +00006271 if (flagtype == AFT_NUM)
Bram Moolenaar6de68532005-08-24 22:08:48 +00006272 smsg((char_u *)_("Flag is not a number in %s line %d: %s"),
6273 fname, lnum, item);
6274 else
6275 smsg((char_u *)_("Illegal flag in %s line %d: %s"),
6276 fname, lnum, item);
6277 }
6278 if (*p != NUL)
6279 {
6280 smsg((char_u *)_(e_affname), fname, lnum, item);
6281 return 0;
6282 }
6283
6284 return res;
6285}
6286
6287/*
6288 * Get one affix name from "*pp" and advance the pointer.
6289 * Returns zero for an error, still advances the pointer then.
6290 */
6291 static unsigned
6292get_affitem(flagtype, pp)
6293 int flagtype;
6294 char_u **pp;
6295{
6296 int res;
6297
Bram Moolenaar95529562005-08-25 21:21:38 +00006298 if (flagtype == AFT_NUM)
Bram Moolenaar6de68532005-08-24 22:08:48 +00006299 {
6300 if (!VIM_ISDIGIT(**pp))
6301 {
Bram Moolenaar95529562005-08-25 21:21:38 +00006302 ++*pp; /* always advance, avoid getting stuck */
Bram Moolenaar6de68532005-08-24 22:08:48 +00006303 return 0;
6304 }
6305 res = getdigits(pp);
6306 }
6307 else
6308 {
6309#ifdef FEAT_MBYTE
6310 res = mb_ptr2char_adv(pp);
6311#else
6312 res = *(*pp)++;
6313#endif
Bram Moolenaar95529562005-08-25 21:21:38 +00006314 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG
Bram Moolenaar6de68532005-08-24 22:08:48 +00006315 && res >= 'A' && res <= 'Z'))
6316 {
6317 if (**pp == NUL)
6318 return 0;
6319#ifdef FEAT_MBYTE
6320 res = mb_ptr2char_adv(pp) + (res << 16);
6321#else
6322 res = *(*pp)++ + (res << 16);
6323#endif
6324 }
6325 }
6326 return res;
6327}
6328
6329/*
6330 * Process the "compflags" string used in an affix file and append it to
6331 * spin->si_compflags.
6332 * The processing involves changing the affix names to ID numbers, so that
6333 * they fit in one byte.
6334 */
6335 static void
6336process_compflags(spin, aff, compflags)
6337 spellinfo_T *spin;
6338 afffile_T *aff;
6339 char_u *compflags;
6340{
6341 char_u *p;
6342 char_u *prevp;
6343 unsigned flag;
6344 compitem_T *ci;
6345 int id;
6346 int len;
6347 char_u *tp;
6348 char_u key[AH_KEY_LEN];
6349 hashitem_T *hi;
6350
6351 /* Make room for the old and the new compflags, concatenated with a / in
6352 * between. Processing it makes it shorter, but we don't know by how
6353 * much, thus allocate the maximum. */
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00006354 len = (int)STRLEN(compflags) + 1;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006355 if (spin->si_compflags != NULL)
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00006356 len += (int)STRLEN(spin->si_compflags) + 1;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006357 p = getroom(spin, len, FALSE);
6358 if (p == NULL)
6359 return;
6360 if (spin->si_compflags != NULL)
6361 {
6362 STRCPY(p, spin->si_compflags);
6363 STRCAT(p, "/");
6364 }
Bram Moolenaar6de68532005-08-24 22:08:48 +00006365 spin->si_compflags = p;
6366 tp = p + STRLEN(p);
6367
6368 for (p = compflags; *p != NUL; )
6369 {
6370 if (vim_strchr((char_u *)"/*+[]", *p) != NULL)
6371 /* Copy non-flag characters directly. */
6372 *tp++ = *p++;
6373 else
6374 {
6375 /* First get the flag number, also checks validity. */
6376 prevp = p;
6377 flag = get_affitem(aff->af_flagtype, &p);
6378 if (flag != 0)
6379 {
6380 /* Find the flag in the hashtable. If it was used before, use
6381 * the existing ID. Otherwise add a new entry. */
6382 vim_strncpy(key, prevp, p - prevp);
6383 hi = hash_find(&aff->af_comp, key);
6384 if (!HASHITEM_EMPTY(hi))
6385 id = HI2CI(hi)->ci_newID;
6386 else
6387 {
6388 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE);
6389 if (ci == NULL)
6390 break;
6391 STRCPY(ci->ci_key, key);
6392 ci->ci_flag = flag;
6393 /* Avoid using a flag ID that has a special meaning in a
6394 * regexp (also inside []). */
6395 do
6396 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00006397 check_renumber(spin);
6398 id = spin->si_newcompID--;
6399 } while (vim_strchr((char_u *)"/+*[]\\-^", id) != NULL);
Bram Moolenaar6de68532005-08-24 22:08:48 +00006400 ci->ci_newID = id;
6401 hash_add(&aff->af_comp, ci->ci_key);
6402 }
6403 *tp++ = id;
6404 }
Bram Moolenaar95529562005-08-25 21:21:38 +00006405 if (aff->af_flagtype == AFT_NUM && *p == ',')
Bram Moolenaar6de68532005-08-24 22:08:48 +00006406 ++p;
6407 }
6408 }
6409
6410 *tp = NUL;
6411}
6412
6413/*
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00006414 * Check that the new IDs for postponed affixes and compounding don't overrun
6415 * each other. We have almost 255 available, but start at 0-127 to avoid
6416 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255.
6417 * When that is used up an error message is given.
6418 */
6419 static void
6420check_renumber(spin)
6421 spellinfo_T *spin;
6422{
6423 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128)
6424 {
6425 spin->si_newprefID = 127;
6426 spin->si_newcompID = 255;
6427 }
6428}
6429
6430/*
Bram Moolenaar6de68532005-08-24 22:08:48 +00006431 * Return TRUE if flag "flag" appears in affix list "afflist".
6432 */
6433 static int
6434flag_in_afflist(flagtype, afflist, flag)
6435 int flagtype;
6436 char_u *afflist;
6437 unsigned flag;
6438{
6439 char_u *p;
6440 unsigned n;
6441
6442 switch (flagtype)
6443 {
6444 case AFT_CHAR:
6445 return vim_strchr(afflist, flag) != NULL;
6446
Bram Moolenaar95529562005-08-25 21:21:38 +00006447 case AFT_CAPLONG:
6448 case AFT_LONG:
Bram Moolenaar6de68532005-08-24 22:08:48 +00006449 for (p = afflist; *p != NUL; )
6450 {
6451#ifdef FEAT_MBYTE
6452 n = mb_ptr2char_adv(&p);
6453#else
6454 n = *p++;
6455#endif
Bram Moolenaar95529562005-08-25 21:21:38 +00006456 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z'))
Bram Moolenaar6de68532005-08-24 22:08:48 +00006457 && *p != NUL)
6458#ifdef FEAT_MBYTE
6459 n = mb_ptr2char_adv(&p) + (n << 16);
6460#else
6461 n = *p++ + (n << 16);
6462#endif
6463 if (n == flag)
6464 return TRUE;
6465 }
6466 break;
6467
Bram Moolenaar95529562005-08-25 21:21:38 +00006468 case AFT_NUM:
Bram Moolenaar6de68532005-08-24 22:08:48 +00006469 for (p = afflist; *p != NUL; )
6470 {
6471 n = getdigits(&p);
6472 if (n == flag)
6473 return TRUE;
6474 if (*p != NUL) /* skip over comma */
6475 ++p;
6476 }
6477 break;
6478 }
6479 return FALSE;
6480}
6481
6482/*
6483 * Give a warning when "spinval" and "affval" numbers are set and not the same.
6484 */
6485 static void
6486aff_check_number(spinval, affval, name)
6487 int spinval;
6488 int affval;
6489 char *name;
6490{
6491 if (spinval != 0 && spinval != affval)
6492 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
6493}
6494
6495/*
6496 * Give a warning when "spinval" and "affval" strings are set and not the same.
6497 */
6498 static void
6499aff_check_string(spinval, affval, name)
6500 char_u *spinval;
6501 char_u *affval;
6502 char *name;
6503{
6504 if (spinval != NULL && STRCMP(spinval, affval) != 0)
6505 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
6506}
6507
6508/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006509 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being
6510 * NULL as equal.
6511 */
6512 static int
6513str_equal(s1, s2)
6514 char_u *s1;
6515 char_u *s2;
6516{
6517 if (s1 == NULL || s2 == NULL)
6518 return s1 == s2;
6519 return STRCMP(s1, s2) == 0;
6520}
6521
6522/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006523 * Add a from-to item to "gap". Used for REP and SAL items.
6524 * They are stored case-folded.
6525 */
6526 static void
6527add_fromto(spin, gap, from, to)
6528 spellinfo_T *spin;
6529 garray_T *gap;
6530 char_u *from;
6531 char_u *to;
6532{
6533 fromto_T *ftp;
6534 char_u word[MAXWLEN];
6535
6536 if (ga_grow(gap, 1) == OK)
6537 {
6538 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00006539 (void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006540 ftp->ft_from = getroom_save(spin, word);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00006541 (void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006542 ftp->ft_to = getroom_save(spin, word);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006543 ++gap->ga_len;
6544 }
6545}
6546
6547/*
6548 * Convert a boolean argument in a SAL line to TRUE or FALSE;
6549 */
6550 static int
6551sal_to_bool(s)
6552 char_u *s;
6553{
6554 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0;
6555}
6556
6557/*
Bram Moolenaar5482f332005-04-17 20:18:43 +00006558 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
6559 * When "s" is NULL FALSE is returned.
6560 */
6561 static int
6562has_non_ascii(s)
6563 char_u *s;
6564{
6565 char_u *p;
6566
6567 if (s != NULL)
6568 for (p = s; *p != NUL; ++p)
6569 if (*p >= 128)
6570 return TRUE;
6571 return FALSE;
6572}
6573
6574/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006575 * Free the structure filled by spell_read_aff().
6576 */
6577 static void
6578spell_free_aff(aff)
6579 afffile_T *aff;
6580{
6581 hashtab_T *ht;
6582 hashitem_T *hi;
6583 int todo;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006584 affheader_T *ah;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006585 affentry_T *ae;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006586
6587 vim_free(aff->af_enc);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006588
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006589 /* All this trouble to free the "ae_prog" items... */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006590 for (ht = &aff->af_pref; ; ht = &aff->af_suff)
6591 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00006592 todo = (int)ht->ht_used;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006593 for (hi = ht->ht_array; todo > 0; ++hi)
6594 {
6595 if (!HASHITEM_EMPTY(hi))
6596 {
6597 --todo;
6598 ah = HI2AH(hi);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006599 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
6600 vim_free(ae->ae_prog);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006601 }
6602 }
6603 if (ht == &aff->af_suff)
6604 break;
6605 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00006606
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006607 hash_clear(&aff->af_pref);
6608 hash_clear(&aff->af_suff);
Bram Moolenaar6de68532005-08-24 22:08:48 +00006609 hash_clear(&aff->af_comp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006610}
6611
6612/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00006613 * Read dictionary file "fname".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006614 * Returns OK or FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006615 */
6616 static int
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006617spell_read_dic(spin, fname, affile)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006618 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006619 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006620 afffile_T *affile;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006621{
Bram Moolenaar51485f02005-06-04 21:55:20 +00006622 hashtab_T ht;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006623 char_u line[MAXLINELEN];
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006624 char_u *p;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006625 char_u *afflist;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006626 char_u store_afflist[MAXWLEN];
6627 int pfxlen;
6628 int need_affix;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006629 char_u *dw;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006630 char_u *pc;
6631 char_u *w;
6632 int l;
6633 hash_T hash;
6634 hashitem_T *hi;
6635 FILE *fd;
6636 int lnum = 1;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006637 int non_ascii = 0;
6638 int retval = OK;
6639 char_u message[MAXLINELEN + MAXWLEN];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006640 int flags;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006641 int duplicate = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006642
Bram Moolenaar51485f02005-06-04 21:55:20 +00006643 /*
6644 * Open the file.
6645 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00006646 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006647 if (fd == NULL)
6648 {
6649 EMSG2(_(e_notopen), fname);
6650 return FAIL;
6651 }
6652
Bram Moolenaar51485f02005-06-04 21:55:20 +00006653 /* The hashtable is only used to detect duplicated words. */
6654 hash_init(&ht);
6655
Bram Moolenaar4770d092006-01-12 23:22:24 +00006656 vim_snprintf((char *)IObuff, IOSIZE,
6657 _("Reading dictionary file %s ..."), fname);
6658 spell_message(spin, IObuff);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006659
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006660 /* start with a message for the first line */
6661 spin->si_msg_count = 999999;
6662
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006663 /* Read and ignore the first line: word count. */
6664 (void)vim_fgets(line, MAXLINELEN, fd);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006665 if (!vim_isdigit(*skipwhite(line)))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006666 EMSG2(_("E760: No word count in %s"), fname);
6667
6668 /*
6669 * Read all the lines in the file one by one.
6670 * The words are converted to 'encoding' here, before being added to
6671 * the hashtable.
6672 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006673 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006674 {
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006675 line_breakcheck();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006676 ++lnum;
Bram Moolenaar53805d12005-08-01 07:08:33 +00006677 if (line[0] == '#' || line[0] == '/')
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006678 continue; /* comment line */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006679
Bram Moolenaar51485f02005-06-04 21:55:20 +00006680 /* Remove CR, LF and white space from the end. White space halfway
6681 * the word is kept to allow e.g., "et al.". */
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00006682 l = (int)STRLEN(line);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006683 while (l > 0 && line[l - 1] <= ' ')
6684 --l;
6685 if (l == 0)
6686 continue; /* empty line */
6687 line[l] = NUL;
6688
Bram Moolenaarb765d632005-06-07 21:00:02 +00006689#ifdef FEAT_MBYTE
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006690 /* Convert from "SET" to 'encoding' when needed. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00006691 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006692 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006693 pc = string_convert(&spin->si_conv, line, NULL);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006694 if (pc == NULL)
6695 {
6696 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
6697 fname, lnum, line);
6698 continue;
6699 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006700 w = pc;
6701 }
6702 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00006703#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006704 {
6705 pc = NULL;
6706 w = line;
6707 }
6708
Bram Moolenaar5555acc2006-04-07 21:33:12 +00006709 /* Truncate the word at the "/", set "afflist" to what follows.
6710 * Replace "\/" by "/" and "\\" by "\". */
6711 afflist = NULL;
6712 for (p = w; *p != NUL; mb_ptr_adv(p))
6713 {
6714 if (*p == '\\' && (p[1] == '\\' || p[1] == '/'))
Bram Moolenaara7241f52008-06-24 20:39:31 +00006715 STRMOVE(p, p + 1);
Bram Moolenaar5555acc2006-04-07 21:33:12 +00006716 else if (*p == '/')
6717 {
6718 *p = NUL;
6719 afflist = p + 1;
6720 break;
6721 }
6722 }
6723
6724 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
6725 if (spin->si_ascii && has_non_ascii(w))
6726 {
6727 ++non_ascii;
Bram Moolenaar779b74b2006-04-10 14:55:34 +00006728 vim_free(pc);
Bram Moolenaar5555acc2006-04-07 21:33:12 +00006729 continue;
6730 }
6731
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006732 /* This takes time, print a message every 10000 words. */
6733 if (spin->si_verbose && spin->si_msg_count > 10000)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006734 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006735 spin->si_msg_count = 0;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006736 vim_snprintf((char *)message, sizeof(message),
6737 _("line %6d, word %6d - %s"),
6738 lnum, spin->si_foldwcount + spin->si_keepwcount, w);
6739 msg_start();
6740 msg_puts_long_attr(message, 0);
6741 msg_clr_eos();
6742 msg_didout = FALSE;
6743 msg_col = 0;
6744 out_flush();
6745 }
6746
Bram Moolenaar51485f02005-06-04 21:55:20 +00006747 /* Store the word in the hashtable to be able to find duplicates. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006748 dw = (char_u *)getroom_save(spin, w);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006749 if (dw == NULL)
Bram Moolenaar779b74b2006-04-10 14:55:34 +00006750 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006751 retval = FAIL;
Bram Moolenaar779b74b2006-04-10 14:55:34 +00006752 vim_free(pc);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006753 break;
Bram Moolenaar779b74b2006-04-10 14:55:34 +00006754 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006755
Bram Moolenaar51485f02005-06-04 21:55:20 +00006756 hash = hash_hash(dw);
6757 hi = hash_lookup(&ht, dw, hash);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006758 if (!HASHITEM_EMPTY(hi))
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006759 {
6760 if (p_verbose > 0)
6761 smsg((char_u *)_("Duplicate word in %s line %d: %s"),
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006762 fname, lnum, dw);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006763 else if (duplicate == 0)
6764 smsg((char_u *)_("First duplicate word in %s line %d: %s"),
6765 fname, lnum, dw);
6766 ++duplicate;
6767 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006768 else
Bram Moolenaar51485f02005-06-04 21:55:20 +00006769 hash_add_item(&ht, hi, dw, hash);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006770
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006771 flags = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006772 store_afflist[0] = NUL;
6773 pfxlen = 0;
6774 need_affix = FALSE;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006775 if (afflist != NULL)
6776 {
Bram Moolenaar8dff8182006-04-06 20:18:50 +00006777 /* Extract flags from the affix list. */
6778 flags |= get_affix_flags(affile, afflist);
6779
Bram Moolenaar6de68532005-08-24 22:08:48 +00006780 if (affile->af_needaffix != 0 && flag_in_afflist(
6781 affile->af_flagtype, afflist, affile->af_needaffix))
Bram Moolenaar5195e452005-08-19 20:32:47 +00006782 need_affix = TRUE;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006783
6784 if (affile->af_pfxpostpone)
6785 /* Need to store the list of prefix IDs with the word. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006786 pfxlen = get_pfxlist(affile, afflist, store_afflist);
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006787
Bram Moolenaar5195e452005-08-19 20:32:47 +00006788 if (spin->si_compflags != NULL)
6789 /* Need to store the list of compound flags with the word.
6790 * Concatenate them to the list of prefix IDs. */
Bram Moolenaar6de68532005-08-24 22:08:48 +00006791 get_compflags(affile, afflist, store_afflist + pfxlen);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006792 }
6793
Bram Moolenaar51485f02005-06-04 21:55:20 +00006794 /* Add the word to the word tree(s). */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006795 if (store_word(spin, dw, flags, spin->si_region,
6796 store_afflist, need_affix) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006797 retval = FAIL;
6798
6799 if (afflist != NULL)
6800 {
6801 /* Find all matching suffixes and add the resulting words.
6802 * Additionally do matching prefixes that combine. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006803 if (store_aff_word(spin, dw, afflist, affile,
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006804 &affile->af_suff, &affile->af_pref,
Bram Moolenaar8dff8182006-04-06 20:18:50 +00006805 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006806 retval = FAIL;
6807
6808 /* Find all matching prefixes and add the resulting words. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006809 if (store_aff_word(spin, dw, afflist, affile,
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006810 &affile->af_pref, NULL,
Bram Moolenaar8dff8182006-04-06 20:18:50 +00006811 CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006812 retval = FAIL;
6813 }
Bram Moolenaar779b74b2006-04-10 14:55:34 +00006814
6815 vim_free(pc);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006816 }
6817
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006818 if (duplicate > 0)
6819 smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006820 if (spin->si_ascii && non_ascii > 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006821 smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"),
6822 non_ascii, fname);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006823 hash_clear(&ht);
6824
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006825 fclose(fd);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006826 return retval;
6827}
6828
6829/*
Bram Moolenaar8dff8182006-04-06 20:18:50 +00006830 * Check for affix flags in "afflist" that are turned into word flags.
6831 * Return WF_ flags.
6832 */
6833 static int
6834get_affix_flags(affile, afflist)
6835 afffile_T *affile;
6836 char_u *afflist;
6837{
6838 int flags = 0;
6839
6840 if (affile->af_keepcase != 0 && flag_in_afflist(
6841 affile->af_flagtype, afflist, affile->af_keepcase))
6842 flags |= WF_KEEPCAP | WF_FIXCAP;
6843 if (affile->af_rare != 0 && flag_in_afflist(
6844 affile->af_flagtype, afflist, affile->af_rare))
6845 flags |= WF_RARE;
6846 if (affile->af_bad != 0 && flag_in_afflist(
6847 affile->af_flagtype, afflist, affile->af_bad))
6848 flags |= WF_BANNED;
6849 if (affile->af_needcomp != 0 && flag_in_afflist(
6850 affile->af_flagtype, afflist, affile->af_needcomp))
6851 flags |= WF_NEEDCOMP;
6852 if (affile->af_comproot != 0 && flag_in_afflist(
6853 affile->af_flagtype, afflist, affile->af_comproot))
6854 flags |= WF_COMPROOT;
6855 if (affile->af_nosuggest != 0 && flag_in_afflist(
6856 affile->af_flagtype, afflist, affile->af_nosuggest))
6857 flags |= WF_NOSUGGEST;
6858 return flags;
6859}
6860
6861/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006862 * Get the list of prefix IDs from the affix list "afflist".
6863 * Used for PFXPOSTPONE.
Bram Moolenaar5195e452005-08-19 20:32:47 +00006864 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL
6865 * and return the number of affixes.
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006866 */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006867 static int
6868get_pfxlist(affile, afflist, store_afflist)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006869 afffile_T *affile;
6870 char_u *afflist;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006871 char_u *store_afflist;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006872{
6873 char_u *p;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006874 char_u *prevp;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006875 int cnt = 0;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006876 int id;
6877 char_u key[AH_KEY_LEN];
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006878 hashitem_T *hi;
6879
Bram Moolenaar6de68532005-08-24 22:08:48 +00006880 for (p = afflist; *p != NUL; )
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006881 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00006882 prevp = p;
6883 if (get_affitem(affile->af_flagtype, &p) != 0)
6884 {
6885 /* A flag is a postponed prefix flag if it appears in "af_pref"
6886 * and it's ID is not zero. */
6887 vim_strncpy(key, prevp, p - prevp);
6888 hi = hash_find(&affile->af_pref, key);
6889 if (!HASHITEM_EMPTY(hi))
6890 {
6891 id = HI2AH(hi)->ah_newID;
6892 if (id != 0)
6893 store_afflist[cnt++] = id;
6894 }
6895 }
Bram Moolenaar95529562005-08-25 21:21:38 +00006896 if (affile->af_flagtype == AFT_NUM && *p == ',')
Bram Moolenaar6de68532005-08-24 22:08:48 +00006897 ++p;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006898 }
6899
Bram Moolenaar5195e452005-08-19 20:32:47 +00006900 store_afflist[cnt] = NUL;
6901 return cnt;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006902}
6903
6904/*
Bram Moolenaar6de68532005-08-24 22:08:48 +00006905 * Get the list of compound IDs from the affix list "afflist" that are used
6906 * for compound words.
Bram Moolenaar5195e452005-08-19 20:32:47 +00006907 * Puts the flags in "store_afflist[]".
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006908 */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006909 static void
Bram Moolenaar6de68532005-08-24 22:08:48 +00006910get_compflags(affile, afflist, store_afflist)
6911 afffile_T *affile;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006912 char_u *afflist;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006913 char_u *store_afflist;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006914{
6915 char_u *p;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006916 char_u *prevp;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006917 int cnt = 0;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006918 char_u key[AH_KEY_LEN];
6919 hashitem_T *hi;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006920
Bram Moolenaar6de68532005-08-24 22:08:48 +00006921 for (p = afflist; *p != NUL; )
6922 {
6923 prevp = p;
6924 if (get_affitem(affile->af_flagtype, &p) != 0)
6925 {
6926 /* A flag is a compound flag if it appears in "af_comp". */
6927 vim_strncpy(key, prevp, p - prevp);
6928 hi = hash_find(&affile->af_comp, key);
6929 if (!HASHITEM_EMPTY(hi))
6930 store_afflist[cnt++] = HI2CI(hi)->ci_newID;
6931 }
Bram Moolenaar95529562005-08-25 21:21:38 +00006932 if (affile->af_flagtype == AFT_NUM && *p == ',')
Bram Moolenaar6de68532005-08-24 22:08:48 +00006933 ++p;
6934 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006935
Bram Moolenaar5195e452005-08-19 20:32:47 +00006936 store_afflist[cnt] = NUL;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006937}
6938
6939/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00006940 * Apply affixes to a word and store the resulting words.
6941 * "ht" is the hashtable with affentry_T that need to be applied, either
6942 * prefixes or suffixes.
6943 * "xht", when not NULL, is the prefix hashtable, to be used additionally on
6944 * the resulting words for combining affixes.
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006945 *
6946 * Returns FAIL when out of memory.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006947 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006948 static int
Bram Moolenaar8dff8182006-04-06 20:18:50 +00006949store_aff_word(spin, word, afflist, affile, ht, xht, condit, flags,
Bram Moolenaar5195e452005-08-19 20:32:47 +00006950 pfxlist, pfxlen)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006951 spellinfo_T *spin; /* spell info */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006952 char_u *word; /* basic word start */
Bram Moolenaar51485f02005-06-04 21:55:20 +00006953 char_u *afflist; /* list of names of supported affixes */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006954 afffile_T *affile;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006955 hashtab_T *ht;
6956 hashtab_T *xht;
Bram Moolenaar8dff8182006-04-06 20:18:50 +00006957 int condit; /* CONDIT_SUF et al. */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006958 int flags; /* flags for the word */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006959 char_u *pfxlist; /* list of prefix IDs */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006960 int pfxlen; /* nr of flags in "pfxlist" for prefixes, rest
6961 * is compound flags */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006962{
6963 int todo;
6964 hashitem_T *hi;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006965 affheader_T *ah;
6966 affentry_T *ae;
6967 regmatch_T regmatch;
6968 char_u newword[MAXWLEN];
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006969 int retval = OK;
Bram Moolenaar8dff8182006-04-06 20:18:50 +00006970 int i, j;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006971 char_u *p;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00006972 int use_flags;
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00006973 char_u *use_pfxlist;
Bram Moolenaar8dff8182006-04-06 20:18:50 +00006974 int use_pfxlen;
6975 int need_affix;
6976 char_u store_afflist[MAXWLEN];
Bram Moolenaar5195e452005-08-19 20:32:47 +00006977 char_u pfx_pfxlist[MAXWLEN];
Bram Moolenaar5195e452005-08-19 20:32:47 +00006978 size_t wordlen = STRLEN(word);
Bram Moolenaar8dff8182006-04-06 20:18:50 +00006979 int use_condit;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006980
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00006981 todo = (int)ht->ht_used;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006982 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006983 {
6984 if (!HASHITEM_EMPTY(hi))
6985 {
6986 --todo;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006987 ah = HI2AH(hi);
Bram Moolenaar5482f332005-04-17 20:18:43 +00006988
Bram Moolenaar51485f02005-06-04 21:55:20 +00006989 /* Check that the affix combines, if required, and that the word
6990 * supports this affix. */
Bram Moolenaar8dff8182006-04-06 20:18:50 +00006991 if (((condit & CONDIT_COMB) == 0 || ah->ah_combine)
6992 && flag_in_afflist(affile->af_flagtype, afflist,
6993 ah->ah_flag))
Bram Moolenaar5482f332005-04-17 20:18:43 +00006994 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006995 /* Loop over all affix entries with this name. */
6996 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006997 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006998 /* Check the condition. It's not logical to match case
6999 * here, but it is required for compatibility with
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007000 * Myspell.
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007001 * Another requirement from Myspell is that the chop
7002 * string is shorter than the word itself.
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007003 * For prefixes, when "PFXPOSTPONE" was used, only do
Bram Moolenaar8dff8182006-04-06 20:18:50 +00007004 * prefixes with a chop string and/or flags.
7005 * When a previously added affix had CIRCUMFIX this one
7006 * must have it too, if it had not then this one must not
7007 * have one either. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00007008 regmatch.regprog = ae->ae_prog;
7009 regmatch.rm_ic = FALSE;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007010 if ((xht != NULL || !affile->af_pfxpostpone
Bram Moolenaar899dddf2006-03-26 21:06:50 +00007011 || ae->ae_chop != NULL
7012 || ae->ae_flags != NULL)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007013 && (ae->ae_chop == NULL
7014 || STRLEN(ae->ae_chop) < wordlen)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007015 && (ae->ae_prog == NULL
Bram Moolenaar8dff8182006-04-06 20:18:50 +00007016 || vim_regexec(&regmatch, word, (colnr_T)0))
7017 && (((condit & CONDIT_CFIX) == 0)
7018 == ((condit & CONDIT_AFF) == 0
7019 || ae->ae_flags == NULL
7020 || !flag_in_afflist(affile->af_flagtype,
7021 ae->ae_flags, affile->af_circumfix))))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007022 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007023 /* Match. Remove the chop and add the affix. */
7024 if (xht == NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007025 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007026 /* prefix: chop/add at the start of the word */
7027 if (ae->ae_add == NULL)
7028 *newword = NUL;
7029 else
7030 STRCPY(newword, ae->ae_add);
7031 p = word;
7032 if (ae->ae_chop != NULL)
Bram Moolenaarb765d632005-06-07 21:00:02 +00007033 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007034 /* Skip chop string. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00007035#ifdef FEAT_MBYTE
7036 if (has_mbyte)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007037 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00007038 i = mb_charlen(ae->ae_chop);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007039 for ( ; i > 0; --i)
7040 mb_ptr_adv(p);
7041 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00007042 else
7043#endif
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007044 p += STRLEN(ae->ae_chop);
Bram Moolenaarb765d632005-06-07 21:00:02 +00007045 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00007046 STRCAT(newword, p);
7047 }
7048 else
7049 {
7050 /* suffix: chop/add at the end of the word */
7051 STRCPY(newword, word);
7052 if (ae->ae_chop != NULL)
7053 {
7054 /* Remove chop string. */
7055 p = newword + STRLEN(newword);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00007056 i = (int)MB_CHARLEN(ae->ae_chop);
Bram Moolenaarb765d632005-06-07 21:00:02 +00007057 for ( ; i > 0; --i)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007058 mb_ptr_back(newword, p);
7059 *p = NUL;
7060 }
7061 if (ae->ae_add != NULL)
7062 STRCAT(newword, ae->ae_add);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007063 }
7064
Bram Moolenaar8dff8182006-04-06 20:18:50 +00007065 use_flags = flags;
7066 use_pfxlist = pfxlist;
7067 use_pfxlen = pfxlen;
7068 need_affix = FALSE;
7069 use_condit = condit | CONDIT_COMB | CONDIT_AFF;
7070 if (ae->ae_flags != NULL)
7071 {
7072 /* Extract flags from the affix list. */
7073 use_flags |= get_affix_flags(affile, ae->ae_flags);
7074
7075 if (affile->af_needaffix != 0 && flag_in_afflist(
7076 affile->af_flagtype, ae->ae_flags,
7077 affile->af_needaffix))
7078 need_affix = TRUE;
7079
7080 /* When there is a CIRCUMFIX flag the other affix
7081 * must also have it and we don't add the word
7082 * with one affix. */
7083 if (affile->af_circumfix != 0 && flag_in_afflist(
7084 affile->af_flagtype, ae->ae_flags,
7085 affile->af_circumfix))
7086 {
7087 use_condit |= CONDIT_CFIX;
7088 if ((condit & CONDIT_CFIX) == 0)
7089 need_affix = TRUE;
7090 }
7091
7092 if (affile->af_pfxpostpone
7093 || spin->si_compflags != NULL)
7094 {
7095 if (affile->af_pfxpostpone)
7096 /* Get prefix IDS from the affix list. */
7097 use_pfxlen = get_pfxlist(affile,
7098 ae->ae_flags, store_afflist);
7099 else
7100 use_pfxlen = 0;
7101 use_pfxlist = store_afflist;
7102
7103 /* Combine the prefix IDs. Avoid adding the
7104 * same ID twice. */
7105 for (i = 0; i < pfxlen; ++i)
7106 {
7107 for (j = 0; j < use_pfxlen; ++j)
7108 if (pfxlist[i] == use_pfxlist[j])
7109 break;
7110 if (j == use_pfxlen)
7111 use_pfxlist[use_pfxlen++] = pfxlist[i];
7112 }
7113
7114 if (spin->si_compflags != NULL)
7115 /* Get compound IDS from the affix list. */
7116 get_compflags(affile, ae->ae_flags,
7117 use_pfxlist + use_pfxlen);
7118
7119 /* Combine the list of compound flags.
7120 * Concatenate them to the prefix IDs list.
7121 * Avoid adding the same ID twice. */
7122 for (i = pfxlen; pfxlist[i] != NUL; ++i)
7123 {
7124 for (j = use_pfxlen;
7125 use_pfxlist[j] != NUL; ++j)
7126 if (pfxlist[i] == use_pfxlist[j])
7127 break;
7128 if (use_pfxlist[j] == NUL)
7129 {
7130 use_pfxlist[j++] = pfxlist[i];
7131 use_pfxlist[j] = NUL;
7132 }
7133 }
7134 }
7135 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00007136
Bram Moolenaar910f66f2006-04-05 20:41:53 +00007137 /* Obey a "COMPOUNDFORBIDFLAG" of the affix: don't
Bram Moolenaar899dddf2006-03-26 21:06:50 +00007138 * use the compound flags. */
Bram Moolenaar5555acc2006-04-07 21:33:12 +00007139 if (use_pfxlist != NULL && ae->ae_compforbid)
Bram Moolenaar5195e452005-08-19 20:32:47 +00007140 {
Bram Moolenaar8dff8182006-04-06 20:18:50 +00007141 vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen);
Bram Moolenaar5195e452005-08-19 20:32:47 +00007142 use_pfxlist = pfx_pfxlist;
7143 }
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00007144
7145 /* When there are postponed prefixes... */
Bram Moolenaar551f84f2005-07-06 22:29:20 +00007146 if (spin->si_prefroot != NULL
7147 && spin->si_prefroot->wn_sibling != NULL)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00007148 {
7149 /* ... add a flag to indicate an affix was used. */
7150 use_flags |= WF_HAS_AFF;
7151
7152 /* ... don't use a prefix list if combining
Bram Moolenaar5195e452005-08-19 20:32:47 +00007153 * affixes is not allowed. But do use the
7154 * compound flags after them. */
Bram Moolenaar18144c82006-04-12 21:52:12 +00007155 if (!ah->ah_combine && use_pfxlist != NULL)
Bram Moolenaar8dff8182006-04-06 20:18:50 +00007156 use_pfxlist += use_pfxlen;
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00007157 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00007158
Bram Moolenaar910f66f2006-04-05 20:41:53 +00007159 /* When compounding is supported and there is no
7160 * "COMPOUNDPERMITFLAG" then forbid compounding on the
7161 * side where the affix is applied. */
Bram Moolenaar5555acc2006-04-07 21:33:12 +00007162 if (spin->si_compflags != NULL && !ae->ae_comppermit)
Bram Moolenaar910f66f2006-04-05 20:41:53 +00007163 {
7164 if (xht != NULL)
7165 use_flags |= WF_NOCOMPAFT;
7166 else
7167 use_flags |= WF_NOCOMPBEF;
7168 }
7169
Bram Moolenaar51485f02005-06-04 21:55:20 +00007170 /* Store the modified word. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007171 if (store_word(spin, newword, use_flags,
Bram Moolenaar8dff8182006-04-06 20:18:50 +00007172 spin->si_region, use_pfxlist,
7173 need_affix) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007174 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007175
Bram Moolenaar8dff8182006-04-06 20:18:50 +00007176 /* When added a prefix or a first suffix and the affix
7177 * has flags may add a(nother) suffix. RECURSIVE! */
7178 if ((condit & CONDIT_SUF) && ae->ae_flags != NULL)
7179 if (store_aff_word(spin, newword, ae->ae_flags,
7180 affile, &affile->af_suff, xht,
7181 use_condit & (xht == NULL
7182 ? ~0 : ~CONDIT_SUF),
Bram Moolenaar5195e452005-08-19 20:32:47 +00007183 use_flags, use_pfxlist, pfxlen) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007184 retval = FAIL;
Bram Moolenaar8dff8182006-04-06 20:18:50 +00007185
7186 /* When added a suffix and combining is allowed also
7187 * try adding a prefix additionally. Both for the
7188 * word flags and for the affix flags. RECURSIVE! */
7189 if (xht != NULL && ah->ah_combine)
7190 {
7191 if (store_aff_word(spin, newword,
7192 afflist, affile,
7193 xht, NULL, use_condit,
7194 use_flags, use_pfxlist,
7195 pfxlen) == FAIL
7196 || (ae->ae_flags != NULL
7197 && store_aff_word(spin, newword,
7198 ae->ae_flags, affile,
7199 xht, NULL, use_condit,
7200 use_flags, use_pfxlist,
7201 pfxlen) == FAIL))
7202 retval = FAIL;
7203 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007204 }
7205 }
7206 }
7207 }
7208 }
7209
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00007210 return retval;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007211}
7212
7213/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00007214 * Read a file with a list of words.
7215 */
7216 static int
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007217spell_read_wordfile(spin, fname)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007218 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007219 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007220{
7221 FILE *fd;
7222 long lnum = 0;
7223 char_u rline[MAXLINELEN];
7224 char_u *line;
7225 char_u *pc = NULL;
Bram Moolenaar7887d882005-07-01 22:33:52 +00007226 char_u *p;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007227 int l;
7228 int retval = OK;
7229 int did_word = FALSE;
7230 int non_ascii = 0;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00007231 int flags;
Bram Moolenaar3982c542005-06-08 21:56:31 +00007232 int regionmask;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007233
7234 /*
7235 * Open the file.
7236 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00007237 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar51485f02005-06-04 21:55:20 +00007238 if (fd == NULL)
7239 {
7240 EMSG2(_(e_notopen), fname);
7241 return FAIL;
7242 }
7243
Bram Moolenaar4770d092006-01-12 23:22:24 +00007244 vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname);
7245 spell_message(spin, IObuff);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007246
7247 /*
7248 * Read all the lines in the file one by one.
7249 */
7250 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
7251 {
7252 line_breakcheck();
7253 ++lnum;
7254
7255 /* Skip comment lines. */
7256 if (*rline == '#')
7257 continue;
7258
7259 /* Remove CR, LF and white space from the end. */
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00007260 l = (int)STRLEN(rline);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007261 while (l > 0 && rline[l - 1] <= ' ')
7262 --l;
7263 if (l == 0)
7264 continue; /* empty or blank line */
7265 rline[l] = NUL;
7266
Bram Moolenaar9c102382006-05-03 21:26:49 +00007267 /* Convert from "/encoding={encoding}" to 'encoding' when needed. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00007268 vim_free(pc);
Bram Moolenaarb765d632005-06-07 21:00:02 +00007269#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00007270 if (spin->si_conv.vc_type != CONV_NONE)
7271 {
7272 pc = string_convert(&spin->si_conv, rline, NULL);
7273 if (pc == NULL)
7274 {
7275 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
7276 fname, lnum, rline);
7277 continue;
7278 }
7279 line = pc;
7280 }
7281 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00007282#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00007283 {
7284 pc = NULL;
7285 line = rline;
7286 }
7287
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00007288 if (*line == '/')
Bram Moolenaar51485f02005-06-04 21:55:20 +00007289 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00007290 ++line;
7291 if (STRNCMP(line, "encoding=", 9) == 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007292 {
7293 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar3982c542005-06-08 21:56:31 +00007294 smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"),
7295 fname, lnum, line - 1);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007296 else if (did_word)
Bram Moolenaar3982c542005-06-08 21:56:31 +00007297 smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"),
7298 fname, lnum, line - 1);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007299 else
7300 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00007301#ifdef FEAT_MBYTE
7302 char_u *enc;
7303
Bram Moolenaar51485f02005-06-04 21:55:20 +00007304 /* Setup for conversion to 'encoding'. */
Bram Moolenaar9c102382006-05-03 21:26:49 +00007305 line += 9;
Bram Moolenaar3982c542005-06-08 21:56:31 +00007306 enc = enc_canonize(line);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007307 if (enc != NULL && !spin->si_ascii
7308 && convert_setup(&spin->si_conv, enc,
7309 p_enc) == FAIL)
7310 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
Bram Moolenaar3982c542005-06-08 21:56:31 +00007311 fname, line, p_enc);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007312 vim_free(enc);
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007313 spin->si_conv.vc_fail = TRUE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00007314#else
7315 smsg((char_u *)_("Conversion in %s not supported"), fname);
7316#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00007317 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00007318 continue;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007319 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00007320
Bram Moolenaar3982c542005-06-08 21:56:31 +00007321 if (STRNCMP(line, "regions=", 8) == 0)
7322 {
7323 if (spin->si_region_count > 1)
7324 smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"),
7325 fname, lnum, line);
7326 else
7327 {
7328 line += 8;
7329 if (STRLEN(line) > 16)
7330 smsg((char_u *)_("Too many regions in %s line %d: %s"),
7331 fname, lnum, line);
7332 else
7333 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00007334 spin->si_region_count = (int)STRLEN(line) / 2;
Bram Moolenaar3982c542005-06-08 21:56:31 +00007335 STRCPY(spin->si_region_name, line);
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00007336
7337 /* Adjust the mask for a word valid in all regions. */
7338 spin->si_region = (1 << spin->si_region_count) - 1;
Bram Moolenaar3982c542005-06-08 21:56:31 +00007339 }
7340 }
7341 continue;
7342 }
7343
Bram Moolenaar7887d882005-07-01 22:33:52 +00007344 smsg((char_u *)_("/ line ignored in %s line %d: %s"),
7345 fname, lnum, line - 1);
7346 continue;
7347 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00007348
Bram Moolenaar7887d882005-07-01 22:33:52 +00007349 flags = 0;
7350 regionmask = spin->si_region;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00007351
Bram Moolenaar7887d882005-07-01 22:33:52 +00007352 /* Check for flags and region after a slash. */
7353 p = vim_strchr(line, '/');
7354 if (p != NULL)
7355 {
7356 *p++ = NUL;
7357 while (*p != NUL)
Bram Moolenaar3982c542005-06-08 21:56:31 +00007358 {
Bram Moolenaar7887d882005-07-01 22:33:52 +00007359 if (*p == '=') /* keep-case word */
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00007360 flags |= WF_KEEPCAP | WF_FIXCAP;
Bram Moolenaar7887d882005-07-01 22:33:52 +00007361 else if (*p == '!') /* Bad, bad, wicked word. */
7362 flags |= WF_BANNED;
7363 else if (*p == '?') /* Rare word. */
7364 flags |= WF_RARE;
7365 else if (VIM_ISDIGIT(*p)) /* region number(s) */
Bram Moolenaar3982c542005-06-08 21:56:31 +00007366 {
Bram Moolenaar7887d882005-07-01 22:33:52 +00007367 if ((flags & WF_REGION) == 0) /* first one */
7368 regionmask = 0;
7369 flags |= WF_REGION;
7370
7371 l = *p - '0';
Bram Moolenaar3982c542005-06-08 21:56:31 +00007372 if (l > spin->si_region_count)
7373 {
7374 smsg((char_u *)_("Invalid region nr in %s line %d: %s"),
Bram Moolenaar7887d882005-07-01 22:33:52 +00007375 fname, lnum, p);
Bram Moolenaar3982c542005-06-08 21:56:31 +00007376 break;
7377 }
7378 regionmask |= 1 << (l - 1);
Bram Moolenaar3982c542005-06-08 21:56:31 +00007379 }
Bram Moolenaar7887d882005-07-01 22:33:52 +00007380 else
7381 {
7382 smsg((char_u *)_("Unrecognized flags in %s line %d: %s"),
7383 fname, lnum, p);
7384 break;
7385 }
7386 ++p;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00007387 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00007388 }
7389
7390 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
7391 if (spin->si_ascii && has_non_ascii(line))
7392 {
7393 ++non_ascii;
7394 continue;
7395 }
7396
7397 /* Normal word: store it. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00007398 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007399 {
7400 retval = FAIL;
7401 break;
7402 }
7403 did_word = TRUE;
7404 }
7405
7406 vim_free(pc);
7407 fclose(fd);
7408
Bram Moolenaar4770d092006-01-12 23:22:24 +00007409 if (spin->si_ascii && non_ascii > 0)
Bram Moolenaarb765d632005-06-07 21:00:02 +00007410 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00007411 vim_snprintf((char *)IObuff, IOSIZE,
7412 _("Ignored %d words with non-ASCII characters"), non_ascii);
7413 spell_message(spin, IObuff);
Bram Moolenaarb765d632005-06-07 21:00:02 +00007414 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00007415
Bram Moolenaar51485f02005-06-04 21:55:20 +00007416 return retval;
7417}
7418
7419/*
7420 * Get part of an sblock_T, "len" bytes long.
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007421 * This avoids calling free() for every little struct we use (and keeping
7422 * track of them).
Bram Moolenaar51485f02005-06-04 21:55:20 +00007423 * The memory is cleared to all zeros.
7424 * Returns NULL when out of memory.
7425 */
7426 static void *
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007427getroom(spin, len, align)
7428 spellinfo_T *spin;
Bram Moolenaarcfc7d632005-07-28 22:28:16 +00007429 size_t len; /* length needed */
7430 int align; /* align for pointer */
Bram Moolenaar51485f02005-06-04 21:55:20 +00007431{
7432 char_u *p;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007433 sblock_T *bl = spin->si_blocks;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007434
Bram Moolenaarcfc7d632005-07-28 22:28:16 +00007435 if (align && bl != NULL)
7436 /* Round size up for alignment. On some systems structures need to be
7437 * aligned to the size of a pointer (e.g., SPARC). */
7438 bl->sb_used = (bl->sb_used + sizeof(char *) - 1)
7439 & ~(sizeof(char *) - 1);
7440
Bram Moolenaar51485f02005-06-04 21:55:20 +00007441 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE)
7442 {
7443 /* Allocate a block of memory. This is not freed until much later. */
7444 bl = (sblock_T *)alloc_clear((unsigned)(sizeof(sblock_T) + SBLOCKSIZE));
7445 if (bl == NULL)
7446 return NULL;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007447 bl->sb_next = spin->si_blocks;
7448 spin->si_blocks = bl;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007449 bl->sb_used = 0;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007450 ++spin->si_blocks_cnt;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007451 }
7452
7453 p = bl->sb_data + bl->sb_used;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00007454 bl->sb_used += (int)len;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007455
7456 return p;
7457}
7458
7459/*
7460 * Make a copy of a string into memory allocated with getroom().
7461 */
7462 static char_u *
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007463getroom_save(spin, s)
7464 spellinfo_T *spin;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007465 char_u *s;
7466{
7467 char_u *sc;
7468
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007469 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007470 if (sc != NULL)
7471 STRCPY(sc, s);
7472 return sc;
7473}
7474
7475
7476/*
7477 * Free the list of allocated sblock_T.
7478 */
7479 static void
7480free_blocks(bl)
7481 sblock_T *bl;
7482{
7483 sblock_T *next;
7484
7485 while (bl != NULL)
7486 {
7487 next = bl->sb_next;
7488 vim_free(bl);
7489 bl = next;
7490 }
7491}
7492
7493/*
7494 * Allocate the root of a word tree.
7495 */
7496 static wordnode_T *
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007497wordtree_alloc(spin)
7498 spellinfo_T *spin;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007499{
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007500 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007501}
7502
7503/*
7504 * Store a word in the tree(s).
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00007505 * Always store it in the case-folded tree. For a keep-case word this is
7506 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and
7507 * used to find suggestions.
Bram Moolenaar51485f02005-06-04 21:55:20 +00007508 * For a keep-case word also store it in the keep-case tree.
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007509 * When "pfxlist" is not NULL store the word for each postponed prefix ID and
7510 * compound flag.
Bram Moolenaar51485f02005-06-04 21:55:20 +00007511 */
7512 static int
Bram Moolenaar5195e452005-08-19 20:32:47 +00007513store_word(spin, word, flags, region, pfxlist, need_affix)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007514 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007515 char_u *word;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00007516 int flags; /* extra flags, WF_BANNED */
Bram Moolenaar3982c542005-06-08 21:56:31 +00007517 int region; /* supported region(s) */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007518 char_u *pfxlist; /* list of prefix IDs or NULL */
Bram Moolenaar5195e452005-08-19 20:32:47 +00007519 int need_affix; /* only store word with affix ID */
Bram Moolenaar51485f02005-06-04 21:55:20 +00007520{
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00007521 int len = (int)STRLEN(word);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007522 int ct = captype(word, word + len);
7523 char_u foldword[MAXWLEN];
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007524 int res = OK;
7525 char_u *p;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007526
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007527 (void)spell_casefold(word, len, foldword, MAXWLEN);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007528 for (p = pfxlist; res == OK; ++p)
7529 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00007530 if (!need_affix || (p != NULL && *p != NUL))
7531 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags,
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007532 region, p == NULL ? 0 : *p);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007533 if (p == NULL || *p == NUL)
7534 break;
7535 }
Bram Moolenaar8db73182005-06-17 21:51:16 +00007536 ++spin->si_foldwcount;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00007537
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007538 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP)))
Bram Moolenaar8db73182005-06-17 21:51:16 +00007539 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007540 for (p = pfxlist; res == OK; ++p)
7541 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00007542 if (!need_affix || (p != NULL && *p != NUL))
7543 res = tree_add_word(spin, word, spin->si_keeproot, flags,
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007544 region, p == NULL ? 0 : *p);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007545 if (p == NULL || *p == NUL)
7546 break;
7547 }
Bram Moolenaar8db73182005-06-17 21:51:16 +00007548 ++spin->si_keepwcount;
7549 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00007550 return res;
7551}
7552
7553/*
7554 * Add word "word" to a word tree at "root".
Bram Moolenaar4770d092006-01-12 23:22:24 +00007555 * When "flags" < 0 we are adding to the prefix tree where "flags" is used for
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00007556 * "rare" and "region" is the condition nr.
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00007557 * Returns FAIL when out of memory.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007558 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00007559 static int
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007560tree_add_word(spin, word, root, flags, region, affixID)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007561 spellinfo_T *spin;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007562 char_u *word;
7563 wordnode_T *root;
7564 int flags;
7565 int region;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007566 int affixID;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007567{
Bram Moolenaar51485f02005-06-04 21:55:20 +00007568 wordnode_T *node = root;
7569 wordnode_T *np;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007570 wordnode_T *copyp, **copyprev;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007571 wordnode_T **prev = NULL;
7572 int i;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007573
Bram Moolenaar51485f02005-06-04 21:55:20 +00007574 /* Add each byte of the word to the tree, including the NUL at the end. */
7575 for (i = 0; ; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007576 {
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007577 /* When there is more than one reference to this node we need to make
7578 * a copy, so that we can modify it. Copy the whole list of siblings
7579 * (we don't optimize for a partly shared list of siblings). */
7580 if (node != NULL && node->wn_refs > 1)
7581 {
7582 --node->wn_refs;
7583 copyprev = prev;
7584 for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling)
7585 {
7586 /* Allocate a new node and copy the info. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007587 np = get_wordnode(spin);
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007588 if (np == NULL)
7589 return FAIL;
7590 np->wn_child = copyp->wn_child;
7591 if (np->wn_child != NULL)
7592 ++np->wn_child->wn_refs; /* child gets extra ref */
7593 np->wn_byte = copyp->wn_byte;
7594 if (np->wn_byte == NUL)
7595 {
7596 np->wn_flags = copyp->wn_flags;
7597 np->wn_region = copyp->wn_region;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007598 np->wn_affixID = copyp->wn_affixID;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007599 }
7600
7601 /* Link the new node in the list, there will be one ref. */
7602 np->wn_refs = 1;
Bram Moolenaareb3593b2006-04-22 22:33:57 +00007603 if (copyprev != NULL)
7604 *copyprev = np;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007605 copyprev = &np->wn_sibling;
7606
7607 /* Let "node" point to the head of the copied list. */
7608 if (copyp == node)
7609 node = np;
7610 }
7611 }
7612
Bram Moolenaar51485f02005-06-04 21:55:20 +00007613 /* Look for the sibling that has the same character. They are sorted
7614 * on byte value, thus stop searching when a sibling is found with a
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007615 * higher byte value. For zero bytes (end of word) the sorting is
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007616 * done on flags and then on affixID. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007617 while (node != NULL
7618 && (node->wn_byte < word[i]
7619 || (node->wn_byte == NUL
7620 && (flags < 0
Bram Moolenaar4770d092006-01-12 23:22:24 +00007621 ? node->wn_affixID < (unsigned)affixID
7622 : (node->wn_flags < (unsigned)(flags & WN_MASK)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00007623 || (node->wn_flags == (flags & WN_MASK)
Bram Moolenaar4770d092006-01-12 23:22:24 +00007624 && (spin->si_sugtree
7625 ? (node->wn_region & 0xffff) < region
7626 : node->wn_affixID
7627 < (unsigned)affixID)))))))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007628 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007629 prev = &node->wn_sibling;
7630 node = *prev;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007631 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007632 if (node == NULL
7633 || node->wn_byte != word[i]
7634 || (word[i] == NUL
7635 && (flags < 0
Bram Moolenaar4770d092006-01-12 23:22:24 +00007636 || spin->si_sugtree
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00007637 || node->wn_flags != (flags & WN_MASK)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007638 || node->wn_affixID != affixID)))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007639 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007640 /* Allocate a new node. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007641 np = get_wordnode(spin);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007642 if (np == NULL)
7643 return FAIL;
7644 np->wn_byte = word[i];
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007645
7646 /* If "node" is NULL this is a new child or the end of the sibling
7647 * list: ref count is one. Otherwise use ref count of sibling and
7648 * make ref count of sibling one (matters when inserting in front
7649 * of the list of siblings). */
7650 if (node == NULL)
7651 np->wn_refs = 1;
7652 else
7653 {
7654 np->wn_refs = node->wn_refs;
7655 node->wn_refs = 1;
7656 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00007657 *prev = np;
7658 np->wn_sibling = node;
7659 node = np;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007660 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007661
Bram Moolenaar51485f02005-06-04 21:55:20 +00007662 if (word[i] == NUL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007663 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007664 node->wn_flags = flags;
7665 node->wn_region |= region;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007666 node->wn_affixID = affixID;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007667 break;
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +00007668 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00007669 prev = &node->wn_child;
7670 node = *prev;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00007671 }
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007672#ifdef SPELL_PRINTTREE
7673 smsg("Added \"%s\"", word);
7674 spell_print_tree(root->wn_sibling);
7675#endif
7676
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007677 /* count nr of words added since last message */
7678 ++spin->si_msg_count;
7679
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007680 if (spin->si_compress_cnt > 1)
7681 {
7682 if (--spin->si_compress_cnt == 1)
7683 /* Did enough words to lower the block count limit. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00007684 spin->si_blocks_cnt += compress_inc;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007685 }
7686
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007687 /*
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007688 * When we have allocated lots of memory we need to compress the word tree
7689 * to free up some room. But compression is slow, and we might actually
7690 * need that room, thus only compress in the following situations:
7691 * 1. When not compressed before (si_compress_cnt == 0): when using
Bram Moolenaar5195e452005-08-19 20:32:47 +00007692 * "compress_start" blocks.
7693 * 2. When compressed before and used "compress_inc" blocks before
7694 * adding "compress_added" words (si_compress_cnt > 1).
7695 * 3. When compressed before, added "compress_added" words
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007696 * (si_compress_cnt == 1) and the number of free nodes drops below the
7697 * maximum word length.
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007698 */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007699#ifndef SPELL_PRINTTREE
7700 if (spin->si_compress_cnt == 1
7701 ? spin->si_free_count < MAXWLEN
Bram Moolenaar5195e452005-08-19 20:32:47 +00007702 : spin->si_blocks_cnt >= compress_start)
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007703#endif
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007704 {
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007705 /* Decrement the block counter. The effect is that we compress again
Bram Moolenaar5195e452005-08-19 20:32:47 +00007706 * when the freed up room has been used and another "compress_inc"
7707 * blocks have been allocated. Unless "compress_added" words have
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007708 * been added, then the limit is put back again. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00007709 spin->si_blocks_cnt -= compress_inc;
7710 spin->si_compress_cnt = compress_added;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007711
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007712 if (spin->si_verbose)
7713 {
7714 msg_start();
7715 msg_puts((char_u *)_(msg_compressing));
7716 msg_clr_eos();
7717 msg_didout = FALSE;
7718 msg_col = 0;
7719 out_flush();
7720 }
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007721
7722 /* Compress both trees. Either they both have many nodes, which makes
7723 * compression useful, or one of them is small, which means
Bram Moolenaar4770d092006-01-12 23:22:24 +00007724 * compression goes fast. But when filling the souldfold word tree
7725 * there is no keep-case tree. */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007726 wordtree_compress(spin, spin->si_foldroot);
Bram Moolenaar4770d092006-01-12 23:22:24 +00007727 if (affixID >= 0)
7728 wordtree_compress(spin, spin->si_keeproot);
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007729 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00007730
7731 return OK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007732}
7733
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007734/*
Bram Moolenaar5195e452005-08-19 20:32:47 +00007735 * Check the 'mkspellmem' option. Return FAIL if it's wrong.
7736 * Sets "sps_flags".
7737 */
7738 int
7739spell_check_msm()
7740{
7741 char_u *p = p_msm;
7742 long start = 0;
Bram Moolenaar89d40322006-08-29 15:30:07 +00007743 long incr = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00007744 long added = 0;
7745
7746 if (!VIM_ISDIGIT(*p))
7747 return FAIL;
7748 /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/
7749 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102);
7750 if (*p != ',')
7751 return FAIL;
7752 ++p;
7753 if (!VIM_ISDIGIT(*p))
7754 return FAIL;
Bram Moolenaar89d40322006-08-29 15:30:07 +00007755 incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10);
Bram Moolenaar5195e452005-08-19 20:32:47 +00007756 if (*p != ',')
7757 return FAIL;
7758 ++p;
7759 if (!VIM_ISDIGIT(*p))
7760 return FAIL;
7761 added = getdigits(&p) * 1024;
7762 if (*p != NUL)
7763 return FAIL;
7764
Bram Moolenaar89d40322006-08-29 15:30:07 +00007765 if (start == 0 || incr == 0 || added == 0 || incr > start)
Bram Moolenaar5195e452005-08-19 20:32:47 +00007766 return FAIL;
7767
7768 compress_start = start;
Bram Moolenaar89d40322006-08-29 15:30:07 +00007769 compress_inc = incr;
Bram Moolenaar5195e452005-08-19 20:32:47 +00007770 compress_added = added;
7771 return OK;
7772}
7773
7774
7775/*
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007776 * Get a wordnode_T, either from the list of previously freed nodes or
7777 * allocate a new one.
7778 */
7779 static wordnode_T *
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007780get_wordnode(spin)
7781 spellinfo_T *spin;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007782{
7783 wordnode_T *n;
7784
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007785 if (spin->si_first_free == NULL)
7786 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007787 else
7788 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007789 n = spin->si_first_free;
7790 spin->si_first_free = n->wn_child;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007791 vim_memset(n, 0, sizeof(wordnode_T));
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007792 --spin->si_free_count;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007793 }
7794#ifdef SPELL_PRINTTREE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007795 n->wn_nr = ++spin->si_wordnode_nr;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007796#endif
7797 return n;
7798}
7799
7800/*
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007801 * Decrement the reference count on a node (which is the head of a list of
7802 * siblings). If the reference count becomes zero free the node and its
7803 * siblings.
Bram Moolenaar4770d092006-01-12 23:22:24 +00007804 * Returns the number of nodes actually freed.
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007805 */
Bram Moolenaar4770d092006-01-12 23:22:24 +00007806 static int
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007807deref_wordnode(spin, node)
7808 spellinfo_T *spin;
7809 wordnode_T *node;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007810{
Bram Moolenaar4770d092006-01-12 23:22:24 +00007811 wordnode_T *np;
7812 int cnt = 0;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007813
7814 if (--node->wn_refs == 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +00007815 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007816 for (np = node; np != NULL; np = np->wn_sibling)
7817 {
7818 if (np->wn_child != NULL)
Bram Moolenaar4770d092006-01-12 23:22:24 +00007819 cnt += deref_wordnode(spin, np->wn_child);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007820 free_wordnode(spin, np);
Bram Moolenaar4770d092006-01-12 23:22:24 +00007821 ++cnt;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007822 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00007823 ++cnt; /* length field */
7824 }
7825 return cnt;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007826}
7827
7828/*
7829 * Free a wordnode_T for re-use later.
7830 * Only the "wn_child" field becomes invalid.
7831 */
7832 static void
7833free_wordnode(spin, n)
7834 spellinfo_T *spin;
7835 wordnode_T *n;
7836{
7837 n->wn_child = spin->si_first_free;
7838 spin->si_first_free = n;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007839 ++spin->si_free_count;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007840}
7841
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007842/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00007843 * Compress a tree: find tails that are identical and can be shared.
7844 */
7845 static void
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007846wordtree_compress(spin, root)
Bram Moolenaarb765d632005-06-07 21:00:02 +00007847 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007848 wordnode_T *root;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007849{
7850 hashtab_T ht;
7851 int n;
7852 int tot = 0;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007853 int perc;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007854
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007855 /* Skip the root itself, it's not actually used. The first sibling is the
7856 * start of the tree. */
7857 if (root->wn_sibling != NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007858 {
7859 hash_init(&ht);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007860 n = node_compress(spin, root->wn_sibling, &ht, &tot);
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007861
7862#ifndef SPELL_PRINTTREE
Bram Moolenaarb765d632005-06-07 21:00:02 +00007863 if (spin->si_verbose || p_verbose > 2)
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007864#endif
Bram Moolenaarb765d632005-06-07 21:00:02 +00007865 {
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007866 if (tot > 1000000)
7867 perc = (tot - n) / (tot / 100);
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007868 else if (tot == 0)
7869 perc = 0;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007870 else
7871 perc = (tot - n) * 100 / tot;
Bram Moolenaar4770d092006-01-12 23:22:24 +00007872 vim_snprintf((char *)IObuff, IOSIZE,
7873 _("Compressed %d of %d nodes; %d (%d%%) remaining"),
7874 n, tot, tot - n, perc);
7875 spell_message(spin, IObuff);
Bram Moolenaarb765d632005-06-07 21:00:02 +00007876 }
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007877#ifdef SPELL_PRINTTREE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007878 spell_print_tree(root->wn_sibling);
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007879#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00007880 hash_clear(&ht);
7881 }
7882}
7883
7884/*
7885 * Compress a node, its siblings and its children, depth first.
7886 * Returns the number of compressed nodes.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007887 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00007888 static int
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007889node_compress(spin, node, ht, tot)
7890 spellinfo_T *spin;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007891 wordnode_T *node;
7892 hashtab_T *ht;
7893 int *tot; /* total count of nodes before compressing,
7894 incremented while going through the tree */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007895{
Bram Moolenaar51485f02005-06-04 21:55:20 +00007896 wordnode_T *np;
7897 wordnode_T *tp;
7898 wordnode_T *child;
7899 hash_T hash;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007900 hashitem_T *hi;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007901 int len = 0;
7902 unsigned nr, n;
7903 int compressed = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007904
Bram Moolenaar51485f02005-06-04 21:55:20 +00007905 /*
7906 * Go through the list of siblings. Compress each child and then try
7907 * finding an identical child to replace it.
7908 * Note that with "child" we mean not just the node that is pointed to,
Bram Moolenaar4770d092006-01-12 23:22:24 +00007909 * but the whole list of siblings of which the child node is the first.
Bram Moolenaar51485f02005-06-04 21:55:20 +00007910 */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007911 for (np = node; np != NULL && !got_int; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007912 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007913 ++len;
7914 if ((child = np->wn_child) != NULL)
7915 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00007916 /* Compress the child first. This fills hashkey. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007917 compressed += node_compress(spin, child, ht, tot);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007918
7919 /* Try to find an identical child. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007920 hash = hash_hash(child->wn_u1.hashkey);
7921 hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007922 if (!HASHITEM_EMPTY(hi))
7923 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00007924 /* There are children we encountered before with a hash value
7925 * identical to the current child. Now check if there is one
7926 * that is really identical. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007927 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007928 if (node_equal(child, tp))
7929 {
7930 /* Found one! Now use that child in place of the
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007931 * current one. This means the current child and all
7932 * its siblings is unlinked from the tree. */
7933 ++tp->wn_refs;
Bram Moolenaar4770d092006-01-12 23:22:24 +00007934 compressed += deref_wordnode(spin, child);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007935 np->wn_child = tp;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007936 break;
7937 }
7938 if (tp == NULL)
7939 {
7940 /* No other child with this hash value equals the child of
7941 * the node, add it to the linked list after the first
7942 * item. */
7943 tp = HI2WN(hi);
Bram Moolenaar0c405862005-06-22 22:26:26 +00007944 child->wn_u2.next = tp->wn_u2.next;
7945 tp->wn_u2.next = child;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007946 }
7947 }
7948 else
7949 /* No other child has this hash value, add it to the
7950 * hashtable. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007951 hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007952 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007953 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00007954 *tot += len + 1; /* add one for the node that stores the length */
Bram Moolenaar51485f02005-06-04 21:55:20 +00007955
7956 /*
7957 * Make a hash key for the node and its siblings, so that we can quickly
7958 * find a lookalike node. This must be done after compressing the sibling
7959 * list, otherwise the hash key would become invalid by the compression.
7960 */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007961 node->wn_u1.hashkey[0] = len;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007962 nr = 0;
7963 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007964 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007965 if (np->wn_byte == NUL)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007966 /* end node: use wn_flags, wn_region and wn_affixID */
7967 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007968 else
7969 /* byte node: use the byte value and the child pointer */
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00007970 n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8));
Bram Moolenaar51485f02005-06-04 21:55:20 +00007971 nr = nr * 101 + n;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007972 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00007973
7974 /* Avoid NUL bytes, it terminates the hash key. */
7975 n = nr & 0xff;
Bram Moolenaar0c405862005-06-22 22:26:26 +00007976 node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007977 n = (nr >> 8) & 0xff;
Bram Moolenaar0c405862005-06-22 22:26:26 +00007978 node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007979 n = (nr >> 16) & 0xff;
Bram Moolenaar0c405862005-06-22 22:26:26 +00007980 node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007981 n = (nr >> 24) & 0xff;
Bram Moolenaar0c405862005-06-22 22:26:26 +00007982 node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
7983 node->wn_u1.hashkey[5] = NUL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007984
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007985 /* Check for CTRL-C pressed now and then. */
7986 fast_breakcheck();
7987
Bram Moolenaar51485f02005-06-04 21:55:20 +00007988 return compressed;
7989}
7990
7991/*
7992 * Return TRUE when two nodes have identical siblings and children.
7993 */
7994 static int
7995node_equal(n1, n2)
7996 wordnode_T *n1;
7997 wordnode_T *n2;
7998{
7999 wordnode_T *p1;
8000 wordnode_T *p2;
8001
8002 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
8003 p1 = p1->wn_sibling, p2 = p2->wn_sibling)
8004 if (p1->wn_byte != p2->wn_byte
8005 || (p1->wn_byte == NUL
8006 ? (p1->wn_flags != p2->wn_flags
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008007 || p1->wn_region != p2->wn_region
Bram Moolenaarae5bce12005-08-15 21:41:48 +00008008 || p1->wn_affixID != p2->wn_affixID)
Bram Moolenaar51485f02005-06-04 21:55:20 +00008009 : (p1->wn_child != p2->wn_child)))
8010 break;
8011
8012 return p1 == NULL && p2 == NULL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008013}
8014
8015/*
8016 * Write a number to file "fd", MSB first, in "len" bytes.
8017 */
Bram Moolenaar55debbe2010-05-23 23:34:36 +02008018 int
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008019put_bytes(fd, nr, len)
8020 FILE *fd;
8021 long_u nr;
8022 int len;
8023{
8024 int i;
8025
8026 for (i = len - 1; i >= 0; --i)
Bram Moolenaar55debbe2010-05-23 23:34:36 +02008027 if (putc((int)(nr >> (i * 8)), fd) == EOF)
8028 return FAIL;
8029 return OK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008030}
8031
Bram Moolenaar362e1a32006-03-06 23:29:24 +00008032#ifdef _MSC_VER
8033# if (_MSC_VER <= 1200)
8034/* This line is required for VC6 without the service pack. Also see the
8035 * matching #pragma below. */
Bram Moolenaar5fdec472007-07-24 08:45:13 +00008036 # pragma optimize("", off)
Bram Moolenaar362e1a32006-03-06 23:29:24 +00008037# endif
8038#endif
8039
Bram Moolenaar4770d092006-01-12 23:22:24 +00008040/*
8041 * Write spin->si_sugtime to file "fd".
8042 */
8043 static void
8044put_sugtime(spin, fd)
8045 spellinfo_T *spin;
8046 FILE *fd;
8047{
8048 int c;
8049 int i;
8050
8051 /* time_t can be up to 8 bytes in size, more than long_u, thus we
8052 * can't use put_bytes() here. */
8053 for (i = 7; i >= 0; --i)
Bram Moolenaar2c4278f2009-05-17 11:33:22 +00008054 if (i + 1 > (int)sizeof(time_t))
Bram Moolenaar4770d092006-01-12 23:22:24 +00008055 /* ">>" doesn't work well when shifting more bits than avail */
8056 putc(0, fd);
8057 else
8058 {
8059 c = (unsigned)spin->si_sugtime >> (i * 8);
8060 putc(c, fd);
8061 }
8062}
8063
Bram Moolenaar362e1a32006-03-06 23:29:24 +00008064#ifdef _MSC_VER
8065# if (_MSC_VER <= 1200)
Bram Moolenaar5fdec472007-07-24 08:45:13 +00008066 # pragma optimize("", on)
Bram Moolenaar362e1a32006-03-06 23:29:24 +00008067# endif
8068#endif
8069
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008070static int
8071#ifdef __BORLANDC__
8072_RTLENTRYF
8073#endif
8074rep_compare __ARGS((const void *s1, const void *s2));
8075
8076/*
8077 * Function given to qsort() to sort the REP items on "from" string.
8078 */
8079 static int
8080#ifdef __BORLANDC__
8081_RTLENTRYF
8082#endif
8083rep_compare(s1, s2)
8084 const void *s1;
8085 const void *s2;
8086{
8087 fromto_T *p1 = (fromto_T *)s1;
8088 fromto_T *p2 = (fromto_T *)s2;
8089
8090 return STRCMP(p1->ft_from, p2->ft_from);
8091}
8092
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008093/*
Bram Moolenaar5195e452005-08-19 20:32:47 +00008094 * Write the Vim .spl file "fname".
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00008095 * Return FAIL or OK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008096 */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00008097 static int
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00008098write_vim_spell(spin, fname)
Bram Moolenaar51485f02005-06-04 21:55:20 +00008099 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00008100 char_u *fname;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008101{
Bram Moolenaar51485f02005-06-04 21:55:20 +00008102 FILE *fd;
8103 int regionmask;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008104 int round;
Bram Moolenaar51485f02005-06-04 21:55:20 +00008105 wordnode_T *tree;
8106 int nodecount;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008107 int i;
8108 int l;
8109 garray_T *gap;
8110 fromto_T *ftp;
8111 char_u *p;
8112 int rr;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00008113 int retval = OK;
Bram Moolenaar2eb6eb32008-11-29 19:19:19 +00008114 size_t fwv = 1; /* collect return value of fwrite() to avoid
Bram Moolenaar3f3766b2008-11-28 09:08:51 +00008115 warnings from picky compiler */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008116
Bram Moolenaarb765d632005-06-07 21:00:02 +00008117 fd = mch_fopen((char *)fname, "w");
Bram Moolenaar51485f02005-06-04 21:55:20 +00008118 if (fd == NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008119 {
8120 EMSG2(_(e_notopen), fname);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00008121 return FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008122 }
8123
Bram Moolenaar5195e452005-08-19 20:32:47 +00008124 /* <HEADER>: <fileID> <versionnr> */
Bram Moolenaar51485f02005-06-04 21:55:20 +00008125 /* <fileID> */
Bram Moolenaar3f3766b2008-11-28 09:08:51 +00008126 fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd);
Bram Moolenaar2eb6eb32008-11-29 19:19:19 +00008127 if (fwv != (size_t)1)
8128 /* Catch first write error, don't try writing more. */
8129 goto theend;
8130
Bram Moolenaar5195e452005-08-19 20:32:47 +00008131 putc(VIMSPELLVERSION, fd); /* <versionnr> */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008132
Bram Moolenaar5195e452005-08-19 20:32:47 +00008133 /*
8134 * <SECTIONS>: <section> ... <sectionend>
8135 */
8136
Bram Moolenaar362e1a32006-03-06 23:29:24 +00008137 /* SN_INFO: <infotext> */
8138 if (spin->si_info != NULL)
8139 {
8140 putc(SN_INFO, fd); /* <sectionID> */
8141 putc(0, fd); /* <sectionflags> */
8142
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00008143 i = (int)STRLEN(spin->si_info);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00008144 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */
Bram Moolenaar3f3766b2008-11-28 09:08:51 +00008145 fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); /* <infotext> */
Bram Moolenaar362e1a32006-03-06 23:29:24 +00008146 }
8147
Bram Moolenaar5195e452005-08-19 20:32:47 +00008148 /* SN_REGION: <regionname> ...
8149 * Write the region names only if there is more than one. */
Bram Moolenaar3982c542005-06-08 21:56:31 +00008150 if (spin->si_region_count > 1)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008151 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00008152 putc(SN_REGION, fd); /* <sectionID> */
8153 putc(SNF_REQUIRED, fd); /* <sectionflags> */
8154 l = spin->si_region_count * 2;
8155 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
Bram Moolenaar3f3766b2008-11-28 09:08:51 +00008156 fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd);
Bram Moolenaar5195e452005-08-19 20:32:47 +00008157 /* <regionname> ... */
Bram Moolenaar3982c542005-06-08 21:56:31 +00008158 regionmask = (1 << spin->si_region_count) - 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008159 }
8160 else
Bram Moolenaar51485f02005-06-04 21:55:20 +00008161 regionmask = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008162
Bram Moolenaar5195e452005-08-19 20:32:47 +00008163 /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars>
8164 *
8165 * The table with character flags and the table for case folding.
8166 * This makes sure the same characters are recognized as word characters
8167 * when generating an when using a spell file.
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00008168 * Skip this for ASCII, the table may conflict with the one used for
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008169 * 'encoding'.
8170 * Also skip this for an .add.spl file, the main spell file must contain
8171 * the table (avoids that it conflicts). File is shorter too.
8172 */
Bram Moolenaar5195e452005-08-19 20:32:47 +00008173 if (!spin->si_ascii && !spin->si_add)
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00008174 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00008175 char_u folchars[128 * 8];
8176 int flags;
8177
Bram Moolenaard12a1322005-08-21 22:08:24 +00008178 putc(SN_CHARFLAGS, fd); /* <sectionID> */
Bram Moolenaar5195e452005-08-19 20:32:47 +00008179 putc(SNF_REQUIRED, fd); /* <sectionflags> */
8180
8181 /* Form the <folchars> string first, we need to know its length. */
8182 l = 0;
8183 for (i = 128; i < 256; ++i)
8184 {
8185#ifdef FEAT_MBYTE
8186 if (has_mbyte)
8187 l += mb_char2bytes(spelltab.st_fold[i], folchars + l);
8188 else
8189#endif
8190 folchars[l++] = spelltab.st_fold[i];
8191 }
8192 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */
8193
8194 fputc(128, fd); /* <charflagslen> */
8195 for (i = 128; i < 256; ++i)
8196 {
8197 flags = 0;
8198 if (spelltab.st_isw[i])
8199 flags |= CF_WORD;
8200 if (spelltab.st_isu[i])
8201 flags |= CF_UPPER;
8202 fputc(flags, fd); /* <charflags> */
8203 }
8204
8205 put_bytes(fd, (long_u)l, 2); /* <folcharslen> */
Bram Moolenaar3f3766b2008-11-28 09:08:51 +00008206 fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00008207 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00008208
Bram Moolenaar5195e452005-08-19 20:32:47 +00008209 /* SN_MIDWORD: <midword> */
8210 if (spin->si_midword != NULL)
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00008211 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00008212 putc(SN_MIDWORD, fd); /* <sectionID> */
8213 putc(SNF_REQUIRED, fd); /* <sectionflags> */
8214
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00008215 i = (int)STRLEN(spin->si_midword);
Bram Moolenaar5195e452005-08-19 20:32:47 +00008216 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */
Bram Moolenaar3f3766b2008-11-28 09:08:51 +00008217 fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd);
8218 /* <midword> */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00008219 }
8220
Bram Moolenaar5195e452005-08-19 20:32:47 +00008221 /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */
8222 if (spin->si_prefcond.ga_len > 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00008223 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00008224 putc(SN_PREFCOND, fd); /* <sectionID> */
8225 putc(SNF_REQUIRED, fd); /* <sectionflags> */
8226
8227 l = write_spell_prefcond(NULL, &spin->si_prefcond);
8228 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
8229
8230 write_spell_prefcond(fd, &spin->si_prefcond);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00008231 }
8232
Bram Moolenaar5195e452005-08-19 20:32:47 +00008233 /* SN_REP: <repcount> <rep> ...
Bram Moolenaar4770d092006-01-12 23:22:24 +00008234 * SN_SAL: <salflags> <salcount> <sal> ...
8235 * SN_REPSAL: <repcount> <rep> ... */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008236
Bram Moolenaar5195e452005-08-19 20:32:47 +00008237 /* round 1: SN_REP section
Bram Moolenaar4770d092006-01-12 23:22:24 +00008238 * round 2: SN_SAL section (unless SN_SOFO is used)
8239 * round 3: SN_REPSAL section */
8240 for (round = 1; round <= 3; ++round)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008241 {
8242 if (round == 1)
8243 gap = &spin->si_rep;
Bram Moolenaar4770d092006-01-12 23:22:24 +00008244 else if (round == 2)
8245 {
8246 /* Don't write SN_SAL when using a SN_SOFO section */
8247 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
8248 continue;
8249 gap = &spin->si_sal;
Bram Moolenaar5195e452005-08-19 20:32:47 +00008250 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008251 else
Bram Moolenaar4770d092006-01-12 23:22:24 +00008252 gap = &spin->si_repsal;
8253
8254 /* Don't write the section if there are no items. */
8255 if (gap->ga_len == 0)
8256 continue;
8257
8258 /* Sort the REP/REPSAL items. */
8259 if (round != 2)
8260 qsort(gap->ga_data, (size_t)gap->ga_len,
8261 sizeof(fromto_T), rep_compare);
8262
8263 i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL);
8264 putc(i, fd); /* <sectionID> */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008265
Bram Moolenaar5195e452005-08-19 20:32:47 +00008266 /* This is for making suggestions, section is not required. */
8267 putc(0, fd); /* <sectionflags> */
8268
8269 /* Compute the length of what follows. */
8270 l = 2; /* count <repcount> or <salcount> */
8271 for (i = 0; i < gap->ga_len; ++i)
8272 {
8273 ftp = &((fromto_T *)gap->ga_data)[i];
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00008274 l += 1 + (int)STRLEN(ftp->ft_from); /* count <*fromlen> and <*from> */
8275 l += 1 + (int)STRLEN(ftp->ft_to); /* count <*tolen> and <*to> */
Bram Moolenaar5195e452005-08-19 20:32:47 +00008276 }
8277 if (round == 2)
8278 ++l; /* count <salflags> */
8279 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
8280
8281 if (round == 2)
8282 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008283 i = 0;
8284 if (spin->si_followup)
8285 i |= SAL_F0LLOWUP;
8286 if (spin->si_collapse)
8287 i |= SAL_COLLAPSE;
8288 if (spin->si_rem_accents)
8289 i |= SAL_REM_ACCENTS;
8290 putc(i, fd); /* <salflags> */
8291 }
8292
8293 put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */
8294 for (i = 0; i < gap->ga_len; ++i)
8295 {
8296 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
8297 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
8298 ftp = &((fromto_T *)gap->ga_data)[i];
8299 for (rr = 1; rr <= 2; ++rr)
8300 {
8301 p = rr == 1 ? ftp->ft_from : ftp->ft_to;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00008302 l = (int)STRLEN(p);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008303 putc(l, fd);
Bram Moolenaar9bf13612008-11-29 19:11:40 +00008304 if (l > 0)
8305 fwv &= fwrite(p, l, (size_t)1, fd);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008306 }
8307 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00008308
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008309 }
8310
Bram Moolenaar5195e452005-08-19 20:32:47 +00008311 /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
8312 * This is for making suggestions, section is not required. */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008313 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
8314 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00008315 putc(SN_SOFO, fd); /* <sectionID> */
8316 putc(0, fd); /* <sectionflags> */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008317
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00008318 l = (int)STRLEN(spin->si_sofofr);
Bram Moolenaar5195e452005-08-19 20:32:47 +00008319 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4);
8320 /* <sectionlen> */
8321
8322 put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */
Bram Moolenaar3f3766b2008-11-28 09:08:51 +00008323 fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008324
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00008325 l = (int)STRLEN(spin->si_sofoto);
Bram Moolenaar5195e452005-08-19 20:32:47 +00008326 put_bytes(fd, (long_u)l, 2); /* <sofotolen> */
Bram Moolenaar3f3766b2008-11-28 09:08:51 +00008327 fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008328 }
8329
Bram Moolenaar4770d092006-01-12 23:22:24 +00008330 /* SN_WORDS: <word> ...
8331 * This is for making suggestions, section is not required. */
8332 if (spin->si_commonwords.ht_used > 0)
8333 {
8334 putc(SN_WORDS, fd); /* <sectionID> */
8335 putc(0, fd); /* <sectionflags> */
8336
8337 /* round 1: count the bytes
8338 * round 2: write the bytes */
8339 for (round = 1; round <= 2; ++round)
8340 {
8341 int todo;
8342 int len = 0;
8343 hashitem_T *hi;
8344
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00008345 todo = (int)spin->si_commonwords.ht_used;
Bram Moolenaar4770d092006-01-12 23:22:24 +00008346 for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi)
8347 if (!HASHITEM_EMPTY(hi))
8348 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00008349 l = (int)STRLEN(hi->hi_key) + 1;
Bram Moolenaar4770d092006-01-12 23:22:24 +00008350 len += l;
8351 if (round == 2) /* <word> */
Bram Moolenaar3f3766b2008-11-28 09:08:51 +00008352 fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd);
Bram Moolenaar4770d092006-01-12 23:22:24 +00008353 --todo;
8354 }
8355 if (round == 1)
8356 put_bytes(fd, (long_u)len, 4); /* <sectionlen> */
8357 }
8358 }
8359
Bram Moolenaar5195e452005-08-19 20:32:47 +00008360 /* SN_MAP: <mapstr>
8361 * This is for making suggestions, section is not required. */
8362 if (spin->si_map.ga_len > 0)
8363 {
8364 putc(SN_MAP, fd); /* <sectionID> */
8365 putc(0, fd); /* <sectionflags> */
8366 l = spin->si_map.ga_len;
8367 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
Bram Moolenaar3f3766b2008-11-28 09:08:51 +00008368 fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd);
Bram Moolenaar5195e452005-08-19 20:32:47 +00008369 /* <mapstr> */
8370 }
8371
Bram Moolenaar4770d092006-01-12 23:22:24 +00008372 /* SN_SUGFILE: <timestamp>
8373 * This is used to notify that a .sug file may be available and at the
8374 * same time allows for checking that a .sug file that is found matches
8375 * with this .spl file. That's because the word numbers must be exactly
8376 * right. */
8377 if (!spin->si_nosugfile
8378 && (spin->si_sal.ga_len > 0
8379 || (spin->si_sofofr != NULL && spin->si_sofoto != NULL)))
8380 {
8381 putc(SN_SUGFILE, fd); /* <sectionID> */
8382 putc(0, fd); /* <sectionflags> */
8383 put_bytes(fd, (long_u)8, 4); /* <sectionlen> */
8384
8385 /* Set si_sugtime and write it to the file. */
8386 spin->si_sugtime = time(NULL);
8387 put_sugtime(spin, fd); /* <timestamp> */
8388 }
8389
Bram Moolenaare1438bb2006-03-01 22:01:55 +00008390 /* SN_NOSPLITSUGS: nothing
8391 * This is used to notify that no suggestions with word splits are to be
8392 * made. */
8393 if (spin->si_nosplitsugs)
8394 {
8395 putc(SN_NOSPLITSUGS, fd); /* <sectionID> */
8396 putc(0, fd); /* <sectionflags> */
8397 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
8398 }
8399
Bram Moolenaar5195e452005-08-19 20:32:47 +00008400 /* SN_COMPOUND: compound info.
8401 * We don't mark it required, when not supported all compound words will
8402 * be bad words. */
8403 if (spin->si_compflags != NULL)
8404 {
8405 putc(SN_COMPOUND, fd); /* <sectionID> */
8406 putc(0, fd); /* <sectionflags> */
8407
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00008408 l = (int)STRLEN(spin->si_compflags);
Bram Moolenaar899dddf2006-03-26 21:06:50 +00008409 for (i = 0; i < spin->si_comppat.ga_len; ++i)
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00008410 l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1;
Bram Moolenaar899dddf2006-03-26 21:06:50 +00008411 put_bytes(fd, (long_u)(l + 7), 4); /* <sectionlen> */
8412
Bram Moolenaar5195e452005-08-19 20:32:47 +00008413 putc(spin->si_compmax, fd); /* <compmax> */
8414 putc(spin->si_compminlen, fd); /* <compminlen> */
8415 putc(spin->si_compsylmax, fd); /* <compsylmax> */
Bram Moolenaar899dddf2006-03-26 21:06:50 +00008416 putc(0, fd); /* for Vim 7.0b compatibility */
8417 putc(spin->si_compoptions, fd); /* <compoptions> */
8418 put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2);
8419 /* <comppatcount> */
8420 for (i = 0; i < spin->si_comppat.ga_len; ++i)
8421 {
8422 p = ((char_u **)(spin->si_comppat.ga_data))[i];
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00008423 putc((int)STRLEN(p), fd); /* <comppatlen> */
Bram Moolenaar3f3766b2008-11-28 09:08:51 +00008424 fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd);
8425 /* <comppattext> */
Bram Moolenaar899dddf2006-03-26 21:06:50 +00008426 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00008427 /* <compflags> */
Bram Moolenaar3f3766b2008-11-28 09:08:51 +00008428 fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags),
Bram Moolenaar899dddf2006-03-26 21:06:50 +00008429 (size_t)1, fd);
Bram Moolenaar5195e452005-08-19 20:32:47 +00008430 }
8431
Bram Moolenaar78622822005-08-23 21:00:13 +00008432 /* SN_NOBREAK: NOBREAK flag */
8433 if (spin->si_nobreak)
8434 {
8435 putc(SN_NOBREAK, fd); /* <sectionID> */
8436 putc(0, fd); /* <sectionflags> */
8437
Bram Moolenaarf711faf2007-05-10 16:48:19 +00008438 /* It's empty, the presence of the section flags the feature. */
Bram Moolenaar78622822005-08-23 21:00:13 +00008439 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
8440 }
8441
Bram Moolenaar5195e452005-08-19 20:32:47 +00008442 /* SN_SYLLABLE: syllable info.
8443 * We don't mark it required, when not supported syllables will not be
8444 * counted. */
8445 if (spin->si_syllable != NULL)
8446 {
8447 putc(SN_SYLLABLE, fd); /* <sectionID> */
8448 putc(0, fd); /* <sectionflags> */
8449
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00008450 l = (int)STRLEN(spin->si_syllable);
Bram Moolenaar5195e452005-08-19 20:32:47 +00008451 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
Bram Moolenaar3f3766b2008-11-28 09:08:51 +00008452 fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd);
8453 /* <syllable> */
Bram Moolenaar5195e452005-08-19 20:32:47 +00008454 }
8455
8456 /* end of <SECTIONS> */
8457 putc(SN_END, fd); /* <sectionend> */
8458
Bram Moolenaar50cde822005-06-05 21:54:54 +00008459
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008460 /*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008461 * <LWORDTREE> <KWORDTREE> <PREFIXTREE>
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008462 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008463 spin->si_memtot = 0;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008464 for (round = 1; round <= 3; ++round)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008465 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008466 if (round == 1)
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00008467 tree = spin->si_foldroot->wn_sibling;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008468 else if (round == 2)
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00008469 tree = spin->si_keeproot->wn_sibling;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008470 else
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00008471 tree = spin->si_prefroot->wn_sibling;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008472
Bram Moolenaar0c405862005-06-22 22:26:26 +00008473 /* Clear the index and wnode fields in the tree. */
8474 clear_node(tree);
8475
Bram Moolenaar51485f02005-06-04 21:55:20 +00008476 /* Count the number of nodes. Needed to be able to allocate the
Bram Moolenaar0c405862005-06-22 22:26:26 +00008477 * memory when reading the nodes. Also fills in index for shared
Bram Moolenaar51485f02005-06-04 21:55:20 +00008478 * nodes. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00008479 nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008480
Bram Moolenaar51485f02005-06-04 21:55:20 +00008481 /* number of nodes in 4 bytes */
8482 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
Bram Moolenaar50cde822005-06-05 21:54:54 +00008483 spin->si_memtot += nodecount + nodecount * sizeof(int);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008484
Bram Moolenaar51485f02005-06-04 21:55:20 +00008485 /* Write the nodes. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00008486 (void)put_node(fd, tree, 0, regionmask, round == 3);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008487 }
8488
Bram Moolenaar3f3766b2008-11-28 09:08:51 +00008489 /* Write another byte to check for errors (file system full). */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00008490 if (putc(0, fd) == EOF)
8491 retval = FAIL;
Bram Moolenaar2eb6eb32008-11-29 19:19:19 +00008492theend:
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00008493 if (fclose(fd) == EOF)
8494 retval = FAIL;
8495
Bram Moolenaar2eb6eb32008-11-29 19:19:19 +00008496 if (fwv != (size_t)1)
Bram Moolenaar3f3766b2008-11-28 09:08:51 +00008497 retval = FAIL;
8498 if (retval == FAIL)
8499 EMSG(_(e_write));
8500
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00008501 return retval;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00008502}
8503
8504/*
Bram Moolenaar0c405862005-06-22 22:26:26 +00008505 * Clear the index and wnode fields of "node", it siblings and its
8506 * children. This is needed because they are a union with other items to save
8507 * space.
8508 */
8509 static void
8510clear_node(node)
8511 wordnode_T *node;
8512{
8513 wordnode_T *np;
8514
8515 if (node != NULL)
8516 for (np = node; np != NULL; np = np->wn_sibling)
8517 {
8518 np->wn_u1.index = 0;
8519 np->wn_u2.wnode = NULL;
8520
8521 if (np->wn_byte != NUL)
8522 clear_node(np->wn_child);
8523 }
8524}
8525
8526
8527/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00008528 * Dump a word tree at node "node".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008529 *
Bram Moolenaar51485f02005-06-04 21:55:20 +00008530 * This first writes the list of possible bytes (siblings). Then for each
8531 * byte recursively write the children.
8532 *
Bram Moolenaar4770d092006-01-12 23:22:24 +00008533 * NOTE: The code here must match the code in read_tree_node(), since
8534 * assumptions are made about the indexes (so that we don't have to write them
8535 * in the file).
Bram Moolenaar51485f02005-06-04 21:55:20 +00008536 *
8537 * Returns the number of nodes used.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008538 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00008539 static int
Bram Moolenaar89d40322006-08-29 15:30:07 +00008540put_node(fd, node, idx, regionmask, prefixtree)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008541 FILE *fd; /* NULL when only counting */
Bram Moolenaar51485f02005-06-04 21:55:20 +00008542 wordnode_T *node;
Bram Moolenaar89d40322006-08-29 15:30:07 +00008543 int idx;
Bram Moolenaar51485f02005-06-04 21:55:20 +00008544 int regionmask;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008545 int prefixtree; /* TRUE for PREFIXTREE */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008546{
Bram Moolenaar89d40322006-08-29 15:30:07 +00008547 int newindex = idx;
Bram Moolenaar51485f02005-06-04 21:55:20 +00008548 int siblingcount = 0;
8549 wordnode_T *np;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008550 int flags;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008551
Bram Moolenaar51485f02005-06-04 21:55:20 +00008552 /* If "node" is zero the tree is empty. */
8553 if (node == NULL)
8554 return 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008555
Bram Moolenaar51485f02005-06-04 21:55:20 +00008556 /* Store the index where this node is written. */
Bram Moolenaar89d40322006-08-29 15:30:07 +00008557 node->wn_u1.index = idx;
Bram Moolenaar51485f02005-06-04 21:55:20 +00008558
8559 /* Count the number of siblings. */
8560 for (np = node; np != NULL; np = np->wn_sibling)
8561 ++siblingcount;
8562
8563 /* Write the sibling count. */
8564 if (fd != NULL)
8565 putc(siblingcount, fd); /* <siblingcount> */
8566
8567 /* Write each sibling byte and optionally extra info. */
8568 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008569 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00008570 if (np->wn_byte == 0)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00008571 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00008572 if (fd != NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008573 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008574 /* For a NUL byte (end of word) write the flags etc. */
8575 if (prefixtree)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00008576 {
Bram Moolenaarae5bce12005-08-15 21:41:48 +00008577 /* In PREFIXTREE write the required affixID and the
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00008578 * associated condition nr (stored in wn_region). The
8579 * byte value is misused to store the "rare" and "not
8580 * combining" flags */
Bram Moolenaar53805d12005-08-01 07:08:33 +00008581 if (np->wn_flags == (short_u)PFX_FLAGS)
8582 putc(BY_NOFLAGS, fd); /* <byte> */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00008583 else
Bram Moolenaar53805d12005-08-01 07:08:33 +00008584 {
8585 putc(BY_FLAGS, fd); /* <byte> */
8586 putc(np->wn_flags, fd); /* <pflags> */
8587 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00008588 putc(np->wn_affixID, fd); /* <affixID> */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008589 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */
Bram Moolenaar51485f02005-06-04 21:55:20 +00008590 }
8591 else
8592 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008593 /* For word trees we write the flag/region items. */
8594 flags = np->wn_flags;
8595 if (regionmask != 0 && np->wn_region != regionmask)
8596 flags |= WF_REGION;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00008597 if (np->wn_affixID != 0)
8598 flags |= WF_AFX;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008599 if (flags == 0)
8600 {
8601 /* word without flags or region */
8602 putc(BY_NOFLAGS, fd); /* <byte> */
8603 }
8604 else
8605 {
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00008606 if (np->wn_flags >= 0x100)
8607 {
8608 putc(BY_FLAGS2, fd); /* <byte> */
8609 putc(flags, fd); /* <flags> */
8610 putc((unsigned)flags >> 8, fd); /* <flags2> */
8611 }
8612 else
8613 {
8614 putc(BY_FLAGS, fd); /* <byte> */
8615 putc(flags, fd); /* <flags> */
8616 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008617 if (flags & WF_REGION)
8618 putc(np->wn_region, fd); /* <region> */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00008619 if (flags & WF_AFX)
8620 putc(np->wn_affixID, fd); /* <affixID> */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008621 }
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00008622 }
8623 }
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00008624 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00008625 else
8626 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00008627 if (np->wn_child->wn_u1.index != 0
8628 && np->wn_child->wn_u2.wnode != node)
Bram Moolenaar51485f02005-06-04 21:55:20 +00008629 {
8630 /* The child is written elsewhere, write the reference. */
8631 if (fd != NULL)
8632 {
8633 putc(BY_INDEX, fd); /* <byte> */
8634 /* <nodeidx> */
Bram Moolenaar0c405862005-06-22 22:26:26 +00008635 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
Bram Moolenaar51485f02005-06-04 21:55:20 +00008636 }
8637 }
Bram Moolenaar0c405862005-06-22 22:26:26 +00008638 else if (np->wn_child->wn_u2.wnode == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00008639 /* We will write the child below and give it an index. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00008640 np->wn_child->wn_u2.wnode = node;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00008641
Bram Moolenaar51485f02005-06-04 21:55:20 +00008642 if (fd != NULL)
8643 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
8644 {
8645 EMSG(_(e_write));
8646 return 0;
8647 }
8648 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008649 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00008650
8651 /* Space used in the array when reading: one for each sibling and one for
8652 * the count. */
8653 newindex += siblingcount + 1;
8654
8655 /* Recursively dump the children of each sibling. */
8656 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar0c405862005-06-22 22:26:26 +00008657 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
8658 newindex = put_node(fd, np->wn_child, newindex, regionmask,
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008659 prefixtree);
Bram Moolenaar51485f02005-06-04 21:55:20 +00008660
8661 return newindex;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008662}
8663
8664
8665/*
Bram Moolenaarb765d632005-06-07 21:00:02 +00008666 * ":mkspell [-ascii] outfile infile ..."
8667 * ":mkspell [-ascii] addfile"
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008668 */
8669 void
8670ex_mkspell(eap)
8671 exarg_T *eap;
8672{
8673 int fcount;
8674 char_u **fnames;
Bram Moolenaarb765d632005-06-07 21:00:02 +00008675 char_u *arg = eap->arg;
8676 int ascii = FALSE;
8677
8678 if (STRNCMP(arg, "-ascii", 6) == 0)
8679 {
8680 ascii = TRUE;
8681 arg = skipwhite(arg + 6);
8682 }
8683
8684 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
8685 if (get_arglist_exp(arg, &fcount, &fnames) == OK)
8686 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008687 mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
Bram Moolenaarb765d632005-06-07 21:00:02 +00008688 FreeWild(fcount, fnames);
8689 }
8690}
8691
8692/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00008693 * Create the .sug file.
8694 * Uses the soundfold info in "spin".
8695 * Writes the file with the name "wfname", with ".spl" changed to ".sug".
8696 */
8697 static void
8698spell_make_sugfile(spin, wfname)
8699 spellinfo_T *spin;
8700 char_u *wfname;
8701{
8702 char_u fname[MAXPATHL];
8703 int len;
8704 slang_T *slang;
8705 int free_slang = FALSE;
8706
8707 /*
8708 * Read back the .spl file that was written. This fills the required
8709 * info for soundfolding. This also uses less memory than the
8710 * pointer-linked version of the trie. And it avoids having two versions
8711 * of the code for the soundfolding stuff.
8712 * It might have been done already by spell_reload_one().
8713 */
8714 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
8715 if (fullpathcmp(wfname, slang->sl_fname, FALSE) == FPC_SAME)
8716 break;
8717 if (slang == NULL)
8718 {
8719 spell_message(spin, (char_u *)_("Reading back spell file..."));
8720 slang = spell_load_file(wfname, NULL, NULL, FALSE);
8721 if (slang == NULL)
8722 return;
Bram Moolenaar4770d092006-01-12 23:22:24 +00008723 free_slang = TRUE;
8724 }
8725
8726 /*
8727 * Clear the info in "spin" that is used.
8728 */
8729 spin->si_blocks = NULL;
8730 spin->si_blocks_cnt = 0;
8731 spin->si_compress_cnt = 0; /* will stay at 0 all the time*/
8732 spin->si_free_count = 0;
8733 spin->si_first_free = NULL;
8734 spin->si_foldwcount = 0;
8735
8736 /*
8737 * Go through the trie of good words, soundfold each word and add it to
8738 * the soundfold trie.
8739 */
8740 spell_message(spin, (char_u *)_("Performing soundfolding..."));
8741 if (sug_filltree(spin, slang) == FAIL)
8742 goto theend;
8743
8744 /*
8745 * Create the table which links each soundfold word with a list of the
8746 * good words it may come from. Creates buffer "spin->si_spellbuf".
8747 * This also removes the wordnr from the NUL byte entries to make
8748 * compression possible.
8749 */
8750 if (sug_maketable(spin) == FAIL)
8751 goto theend;
8752
8753 smsg((char_u *)_("Number of words after soundfolding: %ld"),
8754 (long)spin->si_spellbuf->b_ml.ml_line_count);
8755
8756 /*
8757 * Compress the soundfold trie.
8758 */
8759 spell_message(spin, (char_u *)_(msg_compressing));
8760 wordtree_compress(spin, spin->si_foldroot);
8761
8762 /*
8763 * Write the .sug file.
8764 * Make the file name by changing ".spl" to ".sug".
8765 */
8766 STRCPY(fname, wfname);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00008767 len = (int)STRLEN(fname);
Bram Moolenaar4770d092006-01-12 23:22:24 +00008768 fname[len - 2] = 'u';
8769 fname[len - 1] = 'g';
8770 sug_write(spin, fname);
8771
8772theend:
8773 if (free_slang)
8774 slang_free(slang);
8775 free_blocks(spin->si_blocks);
8776 close_spellbuf(spin->si_spellbuf);
8777}
8778
8779/*
8780 * Build the soundfold trie for language "slang".
8781 */
8782 static int
8783sug_filltree(spin, slang)
8784 spellinfo_T *spin;
8785 slang_T *slang;
8786{
8787 char_u *byts;
8788 idx_T *idxs;
8789 int depth;
8790 idx_T arridx[MAXWLEN];
8791 int curi[MAXWLEN];
8792 char_u tword[MAXWLEN];
8793 char_u tsalword[MAXWLEN];
8794 int c;
8795 idx_T n;
8796 unsigned words_done = 0;
8797 int wordcount[MAXWLEN];
8798
8799 /* We use si_foldroot for the souldfolded trie. */
8800 spin->si_foldroot = wordtree_alloc(spin);
8801 if (spin->si_foldroot == NULL)
8802 return FAIL;
8803
8804 /* let tree_add_word() know we're adding to the soundfolded tree */
8805 spin->si_sugtree = TRUE;
8806
8807 /*
8808 * Go through the whole case-folded tree, soundfold each word and put it
8809 * in the trie.
8810 */
8811 byts = slang->sl_fbyts;
8812 idxs = slang->sl_fidxs;
8813
8814 arridx[0] = 0;
8815 curi[0] = 1;
8816 wordcount[0] = 0;
8817
8818 depth = 0;
8819 while (depth >= 0 && !got_int)
8820 {
8821 if (curi[depth] > byts[arridx[depth]])
8822 {
8823 /* Done all bytes at this node, go up one level. */
8824 idxs[arridx[depth]] = wordcount[depth];
8825 if (depth > 0)
8826 wordcount[depth - 1] += wordcount[depth];
8827
8828 --depth;
8829 line_breakcheck();
8830 }
8831 else
8832 {
8833
8834 /* Do one more byte at this node. */
8835 n = arridx[depth] + curi[depth];
8836 ++curi[depth];
8837
8838 c = byts[n];
8839 if (c == 0)
8840 {
8841 /* Sound-fold the word. */
8842 tword[depth] = NUL;
8843 spell_soundfold(slang, tword, TRUE, tsalword);
8844
8845 /* We use the "flags" field for the MSB of the wordnr,
8846 * "region" for the LSB of the wordnr. */
8847 if (tree_add_word(spin, tsalword, spin->si_foldroot,
8848 words_done >> 16, words_done & 0xffff,
8849 0) == FAIL)
8850 return FAIL;
8851
8852 ++words_done;
8853 ++wordcount[depth];
8854
8855 /* Reset the block count each time to avoid compression
8856 * kicking in. */
8857 spin->si_blocks_cnt = 0;
8858
8859 /* Skip over any other NUL bytes (same word with different
8860 * flags). */
8861 while (byts[n + 1] == 0)
8862 {
8863 ++n;
8864 ++curi[depth];
8865 }
8866 }
8867 else
8868 {
8869 /* Normal char, go one level deeper. */
8870 tword[depth++] = c;
8871 arridx[depth] = idxs[n];
8872 curi[depth] = 1;
8873 wordcount[depth] = 0;
8874 }
8875 }
8876 }
8877
8878 smsg((char_u *)_("Total number of words: %d"), words_done);
8879
8880 return OK;
8881}
8882
8883/*
8884 * Make the table that links each word in the soundfold trie to the words it
8885 * can be produced from.
8886 * This is not unlike lines in a file, thus use a memfile to be able to access
8887 * the table efficiently.
8888 * Returns FAIL when out of memory.
8889 */
8890 static int
8891sug_maketable(spin)
8892 spellinfo_T *spin;
8893{
8894 garray_T ga;
8895 int res = OK;
8896
8897 /* Allocate a buffer, open a memline for it and create the swap file
8898 * (uses a temp file, not a .swp file). */
8899 spin->si_spellbuf = open_spellbuf();
8900 if (spin->si_spellbuf == NULL)
8901 return FAIL;
8902
8903 /* Use a buffer to store the line info, avoids allocating many small
8904 * pieces of memory. */
8905 ga_init2(&ga, 1, 100);
8906
8907 /* recursively go through the tree */
8908 if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1)
8909 res = FAIL;
8910
8911 ga_clear(&ga);
8912 return res;
8913}
8914
8915/*
8916 * Fill the table for one node and its children.
8917 * Returns the wordnr at the start of the node.
8918 * Returns -1 when out of memory.
8919 */
8920 static int
8921sug_filltable(spin, node, startwordnr, gap)
8922 spellinfo_T *spin;
8923 wordnode_T *node;
8924 int startwordnr;
8925 garray_T *gap; /* place to store line of numbers */
8926{
8927 wordnode_T *p, *np;
8928 int wordnr = startwordnr;
8929 int nr;
8930 int prev_nr;
8931
8932 for (p = node; p != NULL; p = p->wn_sibling)
8933 {
8934 if (p->wn_byte == NUL)
8935 {
8936 gap->ga_len = 0;
8937 prev_nr = 0;
8938 for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling)
8939 {
8940 if (ga_grow(gap, 10) == FAIL)
8941 return -1;
8942
8943 nr = (np->wn_flags << 16) + (np->wn_region & 0xffff);
8944 /* Compute the offset from the previous nr and store the
8945 * offset in a way that it takes a minimum number of bytes.
8946 * It's a bit like utf-8, but without the need to mark
8947 * following bytes. */
8948 nr -= prev_nr;
8949 prev_nr += nr;
8950 gap->ga_len += offset2bytes(nr,
8951 (char_u *)gap->ga_data + gap->ga_len);
8952 }
8953
8954 /* add the NUL byte */
8955 ((char_u *)gap->ga_data)[gap->ga_len++] = NUL;
8956
8957 if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr,
8958 gap->ga_data, gap->ga_len, TRUE) == FAIL)
8959 return -1;
8960 ++wordnr;
8961
8962 /* Remove extra NUL entries, we no longer need them. We don't
8963 * bother freeing the nodes, the won't be reused anyway. */
8964 while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL)
8965 p->wn_sibling = p->wn_sibling->wn_sibling;
8966
8967 /* Clear the flags on the remaining NUL node, so that compression
8968 * works a lot better. */
8969 p->wn_flags = 0;
8970 p->wn_region = 0;
8971 }
8972 else
8973 {
8974 wordnr = sug_filltable(spin, p->wn_child, wordnr, gap);
8975 if (wordnr == -1)
8976 return -1;
8977 }
8978 }
8979 return wordnr;
8980}
8981
8982/*
8983 * Convert an offset into a minimal number of bytes.
8984 * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL
8985 * bytes.
8986 */
8987 static int
8988offset2bytes(nr, buf)
8989 int nr;
8990 char_u *buf;
8991{
8992 int rem;
8993 int b1, b2, b3, b4;
8994
8995 /* Split the number in parts of base 255. We need to avoid NUL bytes. */
8996 b1 = nr % 255 + 1;
8997 rem = nr / 255;
8998 b2 = rem % 255 + 1;
8999 rem = rem / 255;
9000 b3 = rem % 255 + 1;
9001 b4 = rem / 255 + 1;
9002
9003 if (b4 > 1 || b3 > 0x1f) /* 4 bytes */
9004 {
9005 buf[0] = 0xe0 + b4;
9006 buf[1] = b3;
9007 buf[2] = b2;
9008 buf[3] = b1;
9009 return 4;
9010 }
9011 if (b3 > 1 || b2 > 0x3f ) /* 3 bytes */
9012 {
9013 buf[0] = 0xc0 + b3;
9014 buf[1] = b2;
9015 buf[2] = b1;
9016 return 3;
9017 }
9018 if (b2 > 1 || b1 > 0x7f ) /* 2 bytes */
9019 {
9020 buf[0] = 0x80 + b2;
9021 buf[1] = b1;
9022 return 2;
9023 }
9024 /* 1 byte */
9025 buf[0] = b1;
9026 return 1;
9027}
9028
9029/*
9030 * Opposite of offset2bytes().
9031 * "pp" points to the bytes and is advanced over it.
9032 * Returns the offset.
9033 */
9034 static int
9035bytes2offset(pp)
9036 char_u **pp;
9037{
9038 char_u *p = *pp;
9039 int nr;
9040 int c;
9041
9042 c = *p++;
9043 if ((c & 0x80) == 0x00) /* 1 byte */
9044 {
9045 nr = c - 1;
9046 }
9047 else if ((c & 0xc0) == 0x80) /* 2 bytes */
9048 {
9049 nr = (c & 0x3f) - 1;
9050 nr = nr * 255 + (*p++ - 1);
9051 }
9052 else if ((c & 0xe0) == 0xc0) /* 3 bytes */
9053 {
9054 nr = (c & 0x1f) - 1;
9055 nr = nr * 255 + (*p++ - 1);
9056 nr = nr * 255 + (*p++ - 1);
9057 }
9058 else /* 4 bytes */
9059 {
9060 nr = (c & 0x0f) - 1;
9061 nr = nr * 255 + (*p++ - 1);
9062 nr = nr * 255 + (*p++ - 1);
9063 nr = nr * 255 + (*p++ - 1);
9064 }
9065
9066 *pp = p;
9067 return nr;
9068}
9069
9070/*
9071 * Write the .sug file in "fname".
9072 */
9073 static void
9074sug_write(spin, fname)
9075 spellinfo_T *spin;
9076 char_u *fname;
9077{
9078 FILE *fd;
9079 wordnode_T *tree;
9080 int nodecount;
9081 int wcount;
9082 char_u *line;
9083 linenr_T lnum;
9084 int len;
9085
9086 /* Create the file. Note that an existing file is silently overwritten! */
9087 fd = mch_fopen((char *)fname, "w");
9088 if (fd == NULL)
9089 {
9090 EMSG2(_(e_notopen), fname);
9091 return;
9092 }
9093
9094 vim_snprintf((char *)IObuff, IOSIZE,
9095 _("Writing suggestion file %s ..."), fname);
9096 spell_message(spin, IObuff);
9097
9098 /*
9099 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
9100 */
9101 if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */
9102 {
9103 EMSG(_(e_write));
9104 goto theend;
9105 }
9106 putc(VIMSUGVERSION, fd); /* <versionnr> */
9107
9108 /* Write si_sugtime to the file. */
9109 put_sugtime(spin, fd); /* <timestamp> */
9110
9111 /*
9112 * <SUGWORDTREE>
9113 */
9114 spin->si_memtot = 0;
9115 tree = spin->si_foldroot->wn_sibling;
9116
9117 /* Clear the index and wnode fields in the tree. */
9118 clear_node(tree);
9119
9120 /* Count the number of nodes. Needed to be able to allocate the
9121 * memory when reading the nodes. Also fills in index for shared
9122 * nodes. */
9123 nodecount = put_node(NULL, tree, 0, 0, FALSE);
9124
9125 /* number of nodes in 4 bytes */
9126 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
9127 spin->si_memtot += nodecount + nodecount * sizeof(int);
9128
9129 /* Write the nodes. */
9130 (void)put_node(fd, tree, 0, 0, FALSE);
9131
9132 /*
9133 * <SUGTABLE>: <sugwcount> <sugline> ...
9134 */
9135 wcount = spin->si_spellbuf->b_ml.ml_line_count;
9136 put_bytes(fd, (long_u)wcount, 4); /* <sugwcount> */
9137
9138 for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum)
9139 {
9140 /* <sugline>: <sugnr> ... NUL */
9141 line = ml_get_buf(spin->si_spellbuf, lnum, FALSE);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00009142 len = (int)STRLEN(line) + 1;
Bram Moolenaar4770d092006-01-12 23:22:24 +00009143 if (fwrite(line, (size_t)len, (size_t)1, fd) == 0)
9144 {
9145 EMSG(_(e_write));
9146 goto theend;
9147 }
9148 spin->si_memtot += len;
9149 }
9150
9151 /* Write another byte to check for errors. */
9152 if (putc(0, fd) == EOF)
9153 EMSG(_(e_write));
9154
9155 vim_snprintf((char *)IObuff, IOSIZE,
9156 _("Estimated runtime memory use: %d bytes"), spin->si_memtot);
9157 spell_message(spin, IObuff);
9158
9159theend:
9160 /* close the file */
9161 fclose(fd);
9162}
9163
9164/*
9165 * Open a spell buffer. This is a nameless buffer that is not in the buffer
9166 * list and only contains text lines. Can use a swapfile to reduce memory
9167 * use.
9168 * Most other fields are invalid! Esp. watch out for string options being
9169 * NULL and there is no undo info.
9170 * Returns NULL when out of memory.
9171 */
9172 static buf_T *
9173open_spellbuf()
9174{
9175 buf_T *buf;
9176
9177 buf = (buf_T *)alloc_clear(sizeof(buf_T));
9178 if (buf != NULL)
9179 {
9180 buf->b_spell = TRUE;
9181 buf->b_p_swf = TRUE; /* may create a swap file */
9182 ml_open(buf);
9183 ml_open_file(buf); /* create swap file now */
9184 }
9185 return buf;
9186}
9187
9188/*
9189 * Close the buffer used for spell info.
9190 */
9191 static void
9192close_spellbuf(buf)
9193 buf_T *buf;
9194{
9195 if (buf != NULL)
9196 {
9197 ml_close(buf, TRUE);
9198 vim_free(buf);
9199 }
9200}
9201
9202
9203/*
Bram Moolenaarb765d632005-06-07 21:00:02 +00009204 * Create a Vim spell file from one or more word lists.
9205 * "fnames[0]" is the output file name.
9206 * "fnames[fcount - 1]" is the last input file name.
9207 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name
9208 * and ".spl" is appended to make the output file name.
9209 */
9210 static void
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009211mkspell(fcount, fnames, ascii, overwrite, added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00009212 int fcount;
9213 char_u **fnames;
9214 int ascii; /* -ascii argument given */
9215 int overwrite; /* overwrite existing output file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009216 int added_word; /* invoked through "zg" */
Bram Moolenaarb765d632005-06-07 21:00:02 +00009217{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009218 char_u fname[MAXPATHL];
9219 char_u wfname[MAXPATHL];
Bram Moolenaarb765d632005-06-07 21:00:02 +00009220 char_u **innames;
9221 int incount;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009222 afffile_T *(afile[8]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009223 int i;
9224 int len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009225 struct stat st;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00009226 int error = FALSE;
Bram Moolenaar51485f02005-06-04 21:55:20 +00009227 spellinfo_T spin;
9228
9229 vim_memset(&spin, 0, sizeof(spin));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009230 spin.si_verbose = !added_word;
Bram Moolenaarb765d632005-06-07 21:00:02 +00009231 spin.si_ascii = ascii;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009232 spin.si_followup = TRUE;
9233 spin.si_rem_accents = TRUE;
9234 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20);
Bram Moolenaar4770d092006-01-12 23:22:24 +00009235 ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009236 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
9237 ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
Bram Moolenaar899dddf2006-03-26 21:06:50 +00009238 ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009239 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
Bram Moolenaar4770d092006-01-12 23:22:24 +00009240 hash_init(&spin.si_commonwords);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00009241 spin.si_newcompID = 127; /* start compound ID at first maximum */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009242
Bram Moolenaarb765d632005-06-07 21:00:02 +00009243 /* default: fnames[0] is output file, following are input files */
9244 innames = &fnames[1];
9245 incount = fcount - 1;
9246
9247 if (fcount >= 1)
Bram Moolenaar5482f332005-04-17 20:18:43 +00009248 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00009249 len = (int)STRLEN(fnames[0]);
Bram Moolenaarb765d632005-06-07 21:00:02 +00009250 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
9251 {
9252 /* For ":mkspell path/en.latin1.add" output file is
9253 * "path/en.latin1.add.spl". */
9254 innames = &fnames[0];
9255 incount = 1;
9256 vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]);
9257 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009258 else if (fcount == 1)
9259 {
9260 /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */
9261 innames = &fnames[0];
9262 incount = 1;
9263 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
9264 spin.si_ascii ? (char_u *)"ascii" : spell_enc());
9265 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00009266 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
9267 {
9268 /* Name ends in ".spl", use as the file name. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009269 vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1);
Bram Moolenaarb765d632005-06-07 21:00:02 +00009270 }
9271 else
9272 /* Name should be language, make the file name from it. */
9273 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
9274 spin.si_ascii ? (char_u *)"ascii" : spell_enc());
9275
9276 /* Check for .ascii.spl. */
9277 if (strstr((char *)gettail(wfname), ".ascii.") != NULL)
9278 spin.si_ascii = TRUE;
9279
9280 /* Check for .add.spl. */
9281 if (strstr((char *)gettail(wfname), ".add.") != NULL)
9282 spin.si_add = TRUE;
Bram Moolenaar5482f332005-04-17 20:18:43 +00009283 }
9284
Bram Moolenaarb765d632005-06-07 21:00:02 +00009285 if (incount <= 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009286 EMSG(_(e_invarg)); /* need at least output and input names */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009287 else if (vim_strchr(gettail(wfname), '_') != NULL)
9288 EMSG(_("E751: Output file name must not have region name"));
Bram Moolenaarb765d632005-06-07 21:00:02 +00009289 else if (incount > 8)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009290 EMSG(_("E754: Only up to 8 regions supported"));
9291 else
9292 {
9293 /* Check for overwriting before doing things that may take a lot of
9294 * time. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00009295 if (!overwrite && mch_stat((char *)wfname, &st) >= 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009296 {
9297 EMSG(_(e_exists));
Bram Moolenaarb765d632005-06-07 21:00:02 +00009298 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009299 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00009300 if (mch_isdir(wfname))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009301 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00009302 EMSG2(_(e_isadir2), wfname);
9303 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009304 }
9305
9306 /*
9307 * Init the aff and dic pointers.
9308 * Get the region names if there are more than 2 arguments.
9309 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00009310 for (i = 0; i < incount; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009311 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00009312 afile[i] = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00009313
Bram Moolenaar3982c542005-06-08 21:56:31 +00009314 if (incount > 1)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009315 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00009316 len = (int)STRLEN(innames[i]);
Bram Moolenaarb765d632005-06-07 21:00:02 +00009317 if (STRLEN(gettail(innames[i])) < 5
9318 || innames[i][len - 3] != '_')
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009319 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00009320 EMSG2(_("E755: Invalid region in %s"), innames[i]);
9321 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009322 }
Bram Moolenaar3982c542005-06-08 21:56:31 +00009323 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
9324 spin.si_region_name[i * 2 + 1] =
9325 TOLOWER_ASC(innames[i][len - 1]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009326 }
9327 }
Bram Moolenaar3982c542005-06-08 21:56:31 +00009328 spin.si_region_count = incount;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009329
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009330 spin.si_foldroot = wordtree_alloc(&spin);
9331 spin.si_keeproot = wordtree_alloc(&spin);
9332 spin.si_prefroot = wordtree_alloc(&spin);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009333 if (spin.si_foldroot == NULL
9334 || spin.si_keeproot == NULL
9335 || spin.si_prefroot == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00009336 {
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00009337 free_blocks(spin.si_blocks);
Bram Moolenaarb765d632005-06-07 21:00:02 +00009338 return;
Bram Moolenaar51485f02005-06-04 21:55:20 +00009339 }
9340
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009341 /* When not producing a .add.spl file clear the character table when
9342 * we encounter one in the .aff file. This means we dump the current
9343 * one in the .spl file if the .aff file doesn't define one. That's
9344 * better than guessing the contents, the table will match a
9345 * previously loaded spell file. */
9346 if (!spin.si_add)
9347 spin.si_clear_chartab = TRUE;
9348
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009349 /*
9350 * Read all the .aff and .dic files.
9351 * Text is converted to 'encoding'.
Bram Moolenaar51485f02005-06-04 21:55:20 +00009352 * Words are stored in the case-folded and keep-case trees.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009353 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00009354 for (i = 0; i < incount && !error; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009355 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00009356 spin.si_conv.vc_type = CONV_NONE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00009357 spin.si_region = 1 << i;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009358
Bram Moolenaarb765d632005-06-07 21:00:02 +00009359 vim_snprintf((char *)fname, sizeof(fname), "%s.aff", innames[i]);
Bram Moolenaar51485f02005-06-04 21:55:20 +00009360 if (mch_stat((char *)fname, &st) >= 0)
9361 {
9362 /* Read the .aff file. Will init "spin->si_conv" based on the
9363 * "SET" line. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009364 afile[i] = spell_read_aff(&spin, fname);
Bram Moolenaarb765d632005-06-07 21:00:02 +00009365 if (afile[i] == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00009366 error = TRUE;
9367 else
9368 {
9369 /* Read the .dic file and store the words in the trees. */
9370 vim_snprintf((char *)fname, sizeof(fname), "%s.dic",
Bram Moolenaarb765d632005-06-07 21:00:02 +00009371 innames[i]);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009372 if (spell_read_dic(&spin, fname, afile[i]) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00009373 error = TRUE;
9374 }
9375 }
9376 else
9377 {
9378 /* No .aff file, try reading the file as a word list. Store
9379 * the words in the trees. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009380 if (spell_read_wordfile(&spin, innames[i]) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00009381 error = TRUE;
9382 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009383
Bram Moolenaarb765d632005-06-07 21:00:02 +00009384#ifdef FEAT_MBYTE
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009385 /* Free any conversion stuff. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00009386 convert_setup(&spin.si_conv, NULL, NULL);
Bram Moolenaarb765d632005-06-07 21:00:02 +00009387#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009388 }
9389
Bram Moolenaar78622822005-08-23 21:00:13 +00009390 if (spin.si_compflags != NULL && spin.si_nobreak)
9391 MSG(_("Warning: both compounding and NOBREAK specified"));
9392
Bram Moolenaar4770d092006-01-12 23:22:24 +00009393 if (!error && !got_int)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009394 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00009395 /*
Bram Moolenaar51485f02005-06-04 21:55:20 +00009396 * Combine tails in the tree.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009397 */
Bram Moolenaar4770d092006-01-12 23:22:24 +00009398 spell_message(&spin, (char_u *)_(msg_compressing));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009399 wordtree_compress(&spin, spin.si_foldroot);
9400 wordtree_compress(&spin, spin.si_keeproot);
9401 wordtree_compress(&spin, spin.si_prefroot);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009402 }
9403
Bram Moolenaar4770d092006-01-12 23:22:24 +00009404 if (!error && !got_int)
Bram Moolenaar51485f02005-06-04 21:55:20 +00009405 {
9406 /*
9407 * Write the info in the spell file.
9408 */
Bram Moolenaar4770d092006-01-12 23:22:24 +00009409 vim_snprintf((char *)IObuff, IOSIZE,
9410 _("Writing spell file %s ..."), wfname);
9411 spell_message(&spin, IObuff);
Bram Moolenaar50cde822005-06-05 21:54:54 +00009412
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00009413 error = write_vim_spell(&spin, wfname) == FAIL;
Bram Moolenaarb765d632005-06-07 21:00:02 +00009414
Bram Moolenaar4770d092006-01-12 23:22:24 +00009415 spell_message(&spin, (char_u *)_("Done!"));
9416 vim_snprintf((char *)IObuff, IOSIZE,
9417 _("Estimated runtime memory use: %d bytes"), spin.si_memtot);
9418 spell_message(&spin, IObuff);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009419
Bram Moolenaar4770d092006-01-12 23:22:24 +00009420 /*
9421 * If the file is loaded need to reload it.
9422 */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00009423 if (!error)
9424 spell_reload_one(wfname, added_word);
Bram Moolenaar51485f02005-06-04 21:55:20 +00009425 }
9426
9427 /* Free the allocated memory. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009428 ga_clear(&spin.si_rep);
Bram Moolenaar4770d092006-01-12 23:22:24 +00009429 ga_clear(&spin.si_repsal);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009430 ga_clear(&spin.si_sal);
9431 ga_clear(&spin.si_map);
Bram Moolenaar899dddf2006-03-26 21:06:50 +00009432 ga_clear(&spin.si_comppat);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009433 ga_clear(&spin.si_prefcond);
Bram Moolenaar4770d092006-01-12 23:22:24 +00009434 hash_clear_all(&spin.si_commonwords, 0);
Bram Moolenaar51485f02005-06-04 21:55:20 +00009435
9436 /* Free the .aff file structures. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00009437 for (i = 0; i < incount; ++i)
9438 if (afile[i] != NULL)
9439 spell_free_aff(afile[i]);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009440
9441 /* Free all the bits and pieces at once. */
9442 free_blocks(spin.si_blocks);
Bram Moolenaar4770d092006-01-12 23:22:24 +00009443
9444 /*
9445 * If there is soundfolding info and no NOSUGFILE item create the
9446 * .sug file with the soundfolded word trie.
9447 */
9448 if (spin.si_sugtime != 0 && !error && !got_int)
9449 spell_make_sugfile(&spin, wfname);
9450
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009451 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009452}
9453
Bram Moolenaar4770d092006-01-12 23:22:24 +00009454/*
9455 * Display a message for spell file processing when 'verbose' is set or using
9456 * ":mkspell". "str" can be IObuff.
9457 */
9458 static void
9459spell_message(spin, str)
9460 spellinfo_T *spin;
9461 char_u *str;
9462{
9463 if (spin->si_verbose || p_verbose > 2)
9464 {
9465 if (!spin->si_verbose)
9466 verbose_enter();
9467 MSG(str);
9468 out_flush();
9469 if (!spin->si_verbose)
9470 verbose_leave();
9471 }
9472}
Bram Moolenaarb765d632005-06-07 21:00:02 +00009473
9474/*
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009475 * ":[count]spellgood {word}"
9476 * ":[count]spellwrong {word}"
Bram Moolenaard0131a82006-03-04 21:46:13 +00009477 * ":[count]spellundo {word}"
Bram Moolenaarb765d632005-06-07 21:00:02 +00009478 */
9479 void
9480ex_spell(eap)
9481 exarg_T *eap;
9482{
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00009483 spell_add_word(eap->arg, (int)STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong,
Bram Moolenaard0131a82006-03-04 21:46:13 +00009484 eap->forceit ? 0 : (int)eap->line2,
9485 eap->cmdidx == CMD_spellundo);
Bram Moolenaarb765d632005-06-07 21:00:02 +00009486}
9487
9488/*
9489 * Add "word[len]" to 'spellfile' as a good or bad word.
9490 */
9491 void
Bram Moolenaar89d40322006-08-29 15:30:07 +00009492spell_add_word(word, len, bad, idx, undo)
Bram Moolenaarb765d632005-06-07 21:00:02 +00009493 char_u *word;
9494 int len;
9495 int bad;
Bram Moolenaar89d40322006-08-29 15:30:07 +00009496 int idx; /* "zG" and "zW": zero, otherwise index in
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009497 'spellfile' */
Bram Moolenaard0131a82006-03-04 21:46:13 +00009498 int undo; /* TRUE for "zug", "zuG", "zuw" and "zuW" */
Bram Moolenaarb765d632005-06-07 21:00:02 +00009499{
Bram Moolenaara3917072006-09-14 08:48:14 +00009500 FILE *fd = NULL;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009501 buf_T *buf = NULL;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009502 int new_spf = FALSE;
Bram Moolenaar7887d882005-07-01 22:33:52 +00009503 char_u *fname;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009504 char_u fnamebuf[MAXPATHL];
9505 char_u line[MAXWLEN * 2];
9506 long fpos, fpos_next = 0;
9507 int i;
9508 char_u *spf;
Bram Moolenaarb765d632005-06-07 21:00:02 +00009509
Bram Moolenaar89d40322006-08-29 15:30:07 +00009510 if (idx == 0) /* use internal wordlist */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009511 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009512 if (int_wordlist == NULL)
Bram Moolenaar7887d882005-07-01 22:33:52 +00009513 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009514 int_wordlist = vim_tempname('s');
9515 if (int_wordlist == NULL)
Bram Moolenaar7887d882005-07-01 22:33:52 +00009516 return;
9517 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009518 fname = int_wordlist;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009519 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00009520 else
9521 {
Bram Moolenaar7887d882005-07-01 22:33:52 +00009522 /* If 'spellfile' isn't set figure out a good default value. */
9523 if (*curbuf->b_p_spf == NUL)
9524 {
9525 init_spellfile();
9526 new_spf = TRUE;
9527 }
9528
9529 if (*curbuf->b_p_spf == NUL)
9530 {
Bram Moolenaarf75a9632005-09-13 21:20:47 +00009531 EMSG2(_(e_notset), "spellfile");
Bram Moolenaar7887d882005-07-01 22:33:52 +00009532 return;
9533 }
9534
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009535 for (spf = curbuf->b_p_spf, i = 1; *spf != NUL; ++i)
9536 {
9537 copy_option_part(&spf, fnamebuf, MAXPATHL, ",");
Bram Moolenaar89d40322006-08-29 15:30:07 +00009538 if (i == idx)
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009539 break;
9540 if (*spf == NUL)
9541 {
Bram Moolenaar89d40322006-08-29 15:30:07 +00009542 EMSGN(_("E765: 'spellfile' does not have %ld entries"), idx);
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009543 return;
9544 }
9545 }
9546
Bram Moolenaarb765d632005-06-07 21:00:02 +00009547 /* Check that the user isn't editing the .add file somewhere. */
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009548 buf = buflist_findname_exp(fnamebuf);
Bram Moolenaarb765d632005-06-07 21:00:02 +00009549 if (buf != NULL && buf->b_ml.ml_mfp == NULL)
9550 buf = NULL;
9551 if (buf != NULL && bufIsChanged(buf))
Bram Moolenaarb765d632005-06-07 21:00:02 +00009552 {
Bram Moolenaar7887d882005-07-01 22:33:52 +00009553 EMSG(_(e_bufloaded));
9554 return;
Bram Moolenaarb765d632005-06-07 21:00:02 +00009555 }
Bram Moolenaar7887d882005-07-01 22:33:52 +00009556
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009557 fname = fnamebuf;
9558 }
9559
Bram Moolenaard0131a82006-03-04 21:46:13 +00009560 if (bad || undo)
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009561 {
Bram Moolenaard0131a82006-03-04 21:46:13 +00009562 /* When the word appears as good word we need to remove that one,
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009563 * since its flags sort before the one with WF_BANNED. */
9564 fd = mch_fopen((char *)fname, "r");
9565 if (fd != NULL)
9566 {
9567 while (!vim_fgets(line, MAXWLEN * 2, fd))
9568 {
9569 fpos = fpos_next;
9570 fpos_next = ftell(fd);
9571 if (STRNCMP(word, line, len) == 0
9572 && (line[len] == '/' || line[len] < ' '))
9573 {
9574 /* Found duplicate word. Remove it by writing a '#' at
9575 * the start of the line. Mixing reading and writing
9576 * doesn't work for all systems, close the file first. */
9577 fclose(fd);
9578 fd = mch_fopen((char *)fname, "r+");
9579 if (fd == NULL)
9580 break;
9581 if (fseek(fd, fpos, SEEK_SET) == 0)
Bram Moolenaard0131a82006-03-04 21:46:13 +00009582 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009583 fputc('#', fd);
Bram Moolenaard0131a82006-03-04 21:46:13 +00009584 if (undo)
Bram Moolenaar2113a1d2006-09-11 19:38:08 +00009585 {
9586 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
Bram Moolenaarf193fff2006-04-27 00:02:13 +00009587 smsg((char_u *)_("Word removed from %s"), NameBuff);
Bram Moolenaar2113a1d2006-09-11 19:38:08 +00009588 }
Bram Moolenaard0131a82006-03-04 21:46:13 +00009589 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009590 fseek(fd, fpos_next, SEEK_SET);
9591 }
9592 }
9593 fclose(fd);
9594 }
Bram Moolenaar7887d882005-07-01 22:33:52 +00009595 }
Bram Moolenaarac2adc72006-09-12 20:25:24 +00009596
9597 if (!undo)
Bram Moolenaar7887d882005-07-01 22:33:52 +00009598 {
Bram Moolenaard0131a82006-03-04 21:46:13 +00009599 fd = mch_fopen((char *)fname, "a");
9600 if (fd == NULL && new_spf)
Bram Moolenaar7887d882005-07-01 22:33:52 +00009601 {
Bram Moolenaarac2adc72006-09-12 20:25:24 +00009602 char_u *p;
9603
Bram Moolenaard0131a82006-03-04 21:46:13 +00009604 /* We just initialized the 'spellfile' option and can't open the
9605 * file. We may need to create the "spell" directory first. We
9606 * already checked the runtime directory is writable in
9607 * init_spellfile(). */
Bram Moolenaarac2adc72006-09-12 20:25:24 +00009608 if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname)
Bram Moolenaard0131a82006-03-04 21:46:13 +00009609 {
Bram Moolenaarac2adc72006-09-12 20:25:24 +00009610 int c = *p;
9611
Bram Moolenaard0131a82006-03-04 21:46:13 +00009612 /* The directory doesn't exist. Try creating it and opening
9613 * the file again. */
Bram Moolenaarac2adc72006-09-12 20:25:24 +00009614 *p = NUL;
9615 vim_mkdir(fname, 0755);
9616 *p = c;
Bram Moolenaard0131a82006-03-04 21:46:13 +00009617 fd = mch_fopen((char *)fname, "a");
9618 }
9619 }
9620
9621 if (fd == NULL)
9622 EMSG2(_(e_notopen), fname);
9623 else
9624 {
9625 if (bad)
9626 fprintf(fd, "%.*s/!\n", len, word);
9627 else
9628 fprintf(fd, "%.*s\n", len, word);
9629 fclose(fd);
9630
9631 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
9632 smsg((char_u *)_("Word added to %s"), NameBuff);
Bram Moolenaar7887d882005-07-01 22:33:52 +00009633 }
9634 }
9635
Bram Moolenaard0131a82006-03-04 21:46:13 +00009636 if (fd != NULL)
Bram Moolenaar7887d882005-07-01 22:33:52 +00009637 {
Bram Moolenaar7887d882005-07-01 22:33:52 +00009638 /* Update the .add.spl file. */
9639 mkspell(1, &fname, FALSE, TRUE, TRUE);
9640
9641 /* If the .add file is edited somewhere, reload it. */
9642 if (buf != NULL)
Bram Moolenaarea8bd732006-01-14 21:15:59 +00009643 buf_reload(buf, buf->b_orig_mode);
Bram Moolenaar7887d882005-07-01 22:33:52 +00009644
Bram Moolenaarf71a3db2006-03-12 21:50:18 +00009645 redraw_all_later(SOME_VALID);
Bram Moolenaarb765d632005-06-07 21:00:02 +00009646 }
9647}
9648
9649/*
9650 * Initialize 'spellfile' for the current buffer.
9651 */
9652 static void
9653init_spellfile()
9654{
9655 char_u buf[MAXPATHL];
9656 int l;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00009657 char_u *fname;
Bram Moolenaarb765d632005-06-07 21:00:02 +00009658 char_u *rtp;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009659 char_u *lend;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00009660 int aspath = FALSE;
9661 char_u *lstart = curbuf->b_p_spl;
Bram Moolenaarb765d632005-06-07 21:00:02 +00009662
9663 if (*curbuf->b_p_spl != NUL && curbuf->b_langp.ga_len > 0)
9664 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +00009665 /* Find the end of the language name. Exclude the region. If there
9666 * is a path separator remember the start of the tail. */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009667 for (lend = curbuf->b_p_spl; *lend != NUL
9668 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend)
Bram Moolenaarda2303d2005-08-30 21:55:26 +00009669 if (vim_ispathsep(*lend))
9670 {
9671 aspath = TRUE;
9672 lstart = lend + 1;
9673 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009674
9675 /* Loop over all entries in 'runtimepath'. Use the first one where we
9676 * are allowed to write. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00009677 rtp = p_rtp;
9678 while (*rtp != NUL)
9679 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +00009680 if (aspath)
9681 /* Use directory of an entry with path, e.g., for
9682 * "/dir/lg.utf-8.spl" use "/dir". */
9683 vim_strncpy(buf, curbuf->b_p_spl, lstart - curbuf->b_p_spl - 1);
9684 else
9685 /* Copy the path from 'runtimepath' to buf[]. */
9686 copy_option_part(&rtp, buf, MAXPATHL, ",");
Bram Moolenaarb765d632005-06-07 21:00:02 +00009687 if (filewritable(buf) == 2)
9688 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00009689 /* Use the first language name from 'spelllang' and the
9690 * encoding used in the first loaded .spl file. */
Bram Moolenaarda2303d2005-08-30 21:55:26 +00009691 if (aspath)
9692 vim_strncpy(buf, curbuf->b_p_spl, lend - curbuf->b_p_spl);
9693 else
9694 {
Bram Moolenaar910f66f2006-04-05 20:41:53 +00009695 /* Create the "spell" directory if it doesn't exist yet. */
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00009696 l = (int)STRLEN(buf);
Bram Moolenaar910f66f2006-04-05 20:41:53 +00009697 vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell");
9698 if (!filewritable(buf) != 2)
9699 vim_mkdir(buf, 0755);
9700
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00009701 l = (int)STRLEN(buf);
Bram Moolenaarda2303d2005-08-30 21:55:26 +00009702 vim_snprintf((char *)buf + l, MAXPATHL - l,
Bram Moolenaar910f66f2006-04-05 20:41:53 +00009703 "/%.*s", (int)(lend - lstart), lstart);
Bram Moolenaarda2303d2005-08-30 21:55:26 +00009704 }
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00009705 l = (int)STRLEN(buf);
Bram Moolenaarda2303d2005-08-30 21:55:26 +00009706 fname = LANGP_ENTRY(curbuf->b_langp, 0)->lp_slang->sl_fname;
9707 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add",
9708 fname != NULL
9709 && strstr((char *)gettail(fname), ".ascii.") != NULL
9710 ? (char_u *)"ascii" : spell_enc());
Bram Moolenaarb765d632005-06-07 21:00:02 +00009711 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
9712 break;
9713 }
Bram Moolenaarda2303d2005-08-30 21:55:26 +00009714 aspath = FALSE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00009715 }
9716 }
9717}
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009718
Bram Moolenaar51485f02005-06-04 21:55:20 +00009719
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009720/*
9721 * Init the chartab used for spelling for ASCII.
9722 * EBCDIC is not supported!
9723 */
9724 static void
9725clear_spell_chartab(sp)
9726 spelltab_T *sp;
9727{
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009728 int i;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009729
9730 /* Init everything to FALSE. */
9731 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
9732 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
9733 for (i = 0; i < 256; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009734 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009735 sp->st_fold[i] = i;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009736 sp->st_upper[i] = i;
9737 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009738
9739 /* We include digits. A word shouldn't start with a digit, but handling
9740 * that is done separately. */
9741 for (i = '0'; i <= '9'; ++i)
9742 sp->st_isw[i] = TRUE;
9743 for (i = 'A'; i <= 'Z'; ++i)
9744 {
9745 sp->st_isw[i] = TRUE;
9746 sp->st_isu[i] = TRUE;
9747 sp->st_fold[i] = i + 0x20;
9748 }
9749 for (i = 'a'; i <= 'z'; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009750 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009751 sp->st_isw[i] = TRUE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009752 sp->st_upper[i] = i - 0x20;
9753 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009754}
9755
9756/*
9757 * Init the chartab used for spelling. Only depends on 'encoding'.
9758 * Called once while starting up and when 'encoding' changes.
9759 * The default is to use isalpha(), but the spell file should define the word
9760 * characters to make it possible that 'encoding' differs from the current
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009761 * locale. For utf-8 we don't use isalpha() but our own functions.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009762 */
9763 void
9764init_spell_chartab()
9765{
9766 int i;
9767
9768 did_set_spelltab = FALSE;
9769 clear_spell_chartab(&spelltab);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009770#ifdef FEAT_MBYTE
9771 if (enc_dbcs)
9772 {
9773 /* DBCS: assume double-wide characters are word characters. */
9774 for (i = 128; i <= 255; ++i)
9775 if (MB_BYTE2LEN(i) == 2)
9776 spelltab.st_isw[i] = TRUE;
9777 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009778 else if (enc_utf8)
9779 {
9780 for (i = 128; i < 256; ++i)
9781 {
Bram Moolenaar54ab0f12010-05-13 17:46:58 +02009782 int f = utf_fold(i);
9783 int u = utf_toupper(i);
9784
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009785 spelltab.st_isu[i] = utf_isupper(i);
9786 spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
Bram Moolenaar54ab0f12010-05-13 17:46:58 +02009787 /* The folded/upper-cased value is different between latin1 and
9788 * utf8 for 0xb5, causing E763 for no good reason. Use the latin1
9789 * value for utf-8 to avoid this. */
9790 spelltab.st_fold[i] = (f < 256) ? f : i;
9791 spelltab.st_upper[i] = (u < 256) ? u : i;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009792 }
9793 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009794 else
9795#endif
9796 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009797 /* Rough guess: use locale-dependent library functions. */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009798 for (i = 128; i < 256; ++i)
9799 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009800 if (MB_ISUPPER(i))
9801 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009802 spelltab.st_isw[i] = TRUE;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009803 spelltab.st_isu[i] = TRUE;
9804 spelltab.st_fold[i] = MB_TOLOWER(i);
9805 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009806 else if (MB_ISLOWER(i))
9807 {
9808 spelltab.st_isw[i] = TRUE;
9809 spelltab.st_upper[i] = MB_TOUPPER(i);
9810 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009811 }
9812 }
9813}
9814
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009815/*
9816 * Set the spell character tables from strings in the affix file.
9817 */
9818 static int
9819set_spell_chartab(fol, low, upp)
9820 char_u *fol;
9821 char_u *low;
9822 char_u *upp;
9823{
9824 /* We build the new tables here first, so that we can compare with the
9825 * previous one. */
9826 spelltab_T new_st;
9827 char_u *pf = fol, *pl = low, *pu = upp;
9828 int f, l, u;
9829
9830 clear_spell_chartab(&new_st);
9831
9832 while (*pf != NUL)
9833 {
9834 if (*pl == NUL || *pu == NUL)
9835 {
9836 EMSG(_(e_affform));
9837 return FAIL;
9838 }
9839#ifdef FEAT_MBYTE
9840 f = mb_ptr2char_adv(&pf);
9841 l = mb_ptr2char_adv(&pl);
9842 u = mb_ptr2char_adv(&pu);
9843#else
9844 f = *pf++;
9845 l = *pl++;
9846 u = *pu++;
9847#endif
9848 /* Every character that appears is a word character. */
9849 if (f < 256)
9850 new_st.st_isw[f] = TRUE;
9851 if (l < 256)
9852 new_st.st_isw[l] = TRUE;
9853 if (u < 256)
9854 new_st.st_isw[u] = TRUE;
9855
9856 /* if "LOW" and "FOL" are not the same the "LOW" char needs
9857 * case-folding */
9858 if (l < 256 && l != f)
9859 {
9860 if (f >= 256)
9861 {
9862 EMSG(_(e_affrange));
9863 return FAIL;
9864 }
9865 new_st.st_fold[l] = f;
9866 }
9867
9868 /* if "UPP" and "FOL" are not the same the "UPP" char needs
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009869 * case-folding, it's upper case and the "UPP" is the upper case of
9870 * "FOL" . */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009871 if (u < 256 && u != f)
9872 {
9873 if (f >= 256)
9874 {
9875 EMSG(_(e_affrange));
9876 return FAIL;
9877 }
9878 new_st.st_fold[u] = f;
9879 new_st.st_isu[u] = TRUE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009880 new_st.st_upper[f] = u;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009881 }
9882 }
9883
9884 if (*pl != NUL || *pu != NUL)
9885 {
9886 EMSG(_(e_affform));
9887 return FAIL;
9888 }
9889
9890 return set_spell_finish(&new_st);
9891}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009892
9893/*
9894 * Set the spell character tables from strings in the .spl file.
9895 */
Bram Moolenaar5195e452005-08-19 20:32:47 +00009896 static void
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009897set_spell_charflags(flags, cnt, fol)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009898 char_u *flags;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009899 int cnt; /* length of "flags" */
9900 char_u *fol;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009901{
9902 /* We build the new tables here first, so that we can compare with the
9903 * previous one. */
9904 spelltab_T new_st;
9905 int i;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009906 char_u *p = fol;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009907 int c;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009908
9909 clear_spell_chartab(&new_st);
9910
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009911 for (i = 0; i < 128; ++i)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009912 {
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009913 if (i < cnt)
9914 {
9915 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
9916 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
9917 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009918
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009919 if (*p != NUL)
9920 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009921#ifdef FEAT_MBYTE
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009922 c = mb_ptr2char_adv(&p);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009923#else
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009924 c = *p++;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009925#endif
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009926 new_st.st_fold[i + 128] = c;
9927 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
9928 new_st.st_upper[c] = i + 128;
9929 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009930 }
9931
Bram Moolenaar5195e452005-08-19 20:32:47 +00009932 (void)set_spell_finish(&new_st);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009933}
9934
9935 static int
9936set_spell_finish(new_st)
9937 spelltab_T *new_st;
9938{
9939 int i;
9940
9941 if (did_set_spelltab)
9942 {
9943 /* check that it's the same table */
9944 for (i = 0; i < 256; ++i)
9945 {
9946 if (spelltab.st_isw[i] != new_st->st_isw[i]
9947 || spelltab.st_isu[i] != new_st->st_isu[i]
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009948 || spelltab.st_fold[i] != new_st->st_fold[i]
9949 || spelltab.st_upper[i] != new_st->st_upper[i])
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009950 {
9951 EMSG(_("E763: Word characters differ between spell files"));
9952 return FAIL;
9953 }
9954 }
9955 }
9956 else
9957 {
9958 /* copy the new spelltab into the one being used */
9959 spelltab = *new_st;
9960 did_set_spelltab = TRUE;
9961 }
9962
9963 return OK;
9964}
9965
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009966/*
Bram Moolenaarea408852005-06-25 22:49:46 +00009967 * Return TRUE if "p" points to a word character.
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009968 * As a special case we see "midword" characters as word character when it is
Bram Moolenaarea408852005-06-25 22:49:46 +00009969 * followed by a word character. This finds they'there but not 'they there'.
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009970 * Thus this only works properly when past the first character of the word.
Bram Moolenaarea408852005-06-25 22:49:46 +00009971 */
9972 static int
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009973spell_iswordp(p, buf)
Bram Moolenaarea408852005-06-25 22:49:46 +00009974 char_u *p;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009975 buf_T *buf; /* buffer used */
Bram Moolenaarea408852005-06-25 22:49:46 +00009976{
Bram Moolenaarea408852005-06-25 22:49:46 +00009977#ifdef FEAT_MBYTE
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009978 char_u *s;
9979 int l;
9980 int c;
9981
9982 if (has_mbyte)
9983 {
9984 l = MB_BYTE2LEN(*p);
9985 s = p;
9986 if (l == 1)
9987 {
9988 /* be quick for ASCII */
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009989 if (buf->b_spell_ismw[*p])
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009990 {
9991 s = p + 1; /* skip a mid-word character */
9992 l = MB_BYTE2LEN(*s);
9993 }
9994 }
9995 else
9996 {
9997 c = mb_ptr2char(p);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009998 if (c < 256 ? buf->b_spell_ismw[c]
9999 : (buf->b_spell_ismw_mb != NULL
10000 && vim_strchr(buf->b_spell_ismw_mb, c) != NULL))
Bram Moolenaarcf6bf392005-06-27 22:27:46 +000010001 {
10002 s = p + l;
10003 l = MB_BYTE2LEN(*s);
10004 }
10005 }
10006
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000010007 c = mb_ptr2char(s);
10008 if (c > 255)
Bram Moolenaar7a91a4a2008-04-09 13:49:57 +000010009 return spell_mb_isword_class(mb_get_class(s));
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000010010 return spelltab.st_isw[c];
Bram Moolenaarcf6bf392005-06-27 22:27:46 +000010011 }
Bram Moolenaarea408852005-06-25 22:49:46 +000010012#endif
Bram Moolenaarcf6bf392005-06-27 22:27:46 +000010013
Bram Moolenaar9c96f592005-06-30 21:52:39 +000010014 return spelltab.st_isw[buf->b_spell_ismw[*p] ? p[1] : p[0]];
10015}
10016
10017/*
10018 * Return TRUE if "p" points to a word character.
10019 * Unlike spell_iswordp() this doesn't check for "midword" characters.
10020 */
10021 static int
10022spell_iswordp_nmw(p)
10023 char_u *p;
10024{
10025#ifdef FEAT_MBYTE
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000010026 int c;
Bram Moolenaar9c96f592005-06-30 21:52:39 +000010027
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000010028 if (has_mbyte)
10029 {
10030 c = mb_ptr2char(p);
10031 if (c > 255)
Bram Moolenaar7a91a4a2008-04-09 13:49:57 +000010032 return spell_mb_isword_class(mb_get_class(p));
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000010033 return spelltab.st_isw[c];
10034 }
10035#endif
Bram Moolenaar9c96f592005-06-30 21:52:39 +000010036 return spelltab.st_isw[*p];
Bram Moolenaarea408852005-06-25 22:49:46 +000010037}
10038
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010039#ifdef FEAT_MBYTE
10040/*
Bram Moolenaar7a91a4a2008-04-09 13:49:57 +000010041 * Return TRUE if word class indicates a word character.
10042 * Only for characters above 255.
10043 * Unicode subscript and superscript are not considered word characters.
10044 */
10045 static int
10046spell_mb_isword_class(cl)
10047 int cl;
10048{
10049 return cl >= 2 && cl != 0x2070 && cl != 0x2080;
10050}
10051
10052/*
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010053 * Return TRUE if "p" points to a word character.
10054 * Wide version of spell_iswordp().
10055 */
10056 static int
Bram Moolenaar9c96f592005-06-30 21:52:39 +000010057spell_iswordp_w(p, buf)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010058 int *p;
Bram Moolenaar9c96f592005-06-30 21:52:39 +000010059 buf_T *buf;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010060{
10061 int *s;
10062
Bram Moolenaar9c96f592005-06-30 21:52:39 +000010063 if (*p < 256 ? buf->b_spell_ismw[*p]
10064 : (buf->b_spell_ismw_mb != NULL
10065 && vim_strchr(buf->b_spell_ismw_mb, *p) != NULL))
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010066 s = p + 1;
10067 else
10068 s = p;
10069
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000010070 if (*s > 255)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010071 {
10072 if (enc_utf8)
Bram Moolenaar7a91a4a2008-04-09 13:49:57 +000010073 return spell_mb_isword_class(utf_class(*s));
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010074 if (enc_dbcs)
10075 return dbcs_class((unsigned)*s >> 8, *s & 0xff) >= 2;
10076 return 0;
10077 }
10078 return spelltab.st_isw[*s];
10079}
10080#endif
10081
Bram Moolenaarea408852005-06-25 22:49:46 +000010082/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +000010083 * Write the table with prefix conditions to the .spl file.
Bram Moolenaar5195e452005-08-19 20:32:47 +000010084 * When "fd" is NULL only count the length of what is written.
Bram Moolenaar1d73c882005-06-19 22:48:47 +000010085 */
Bram Moolenaar5195e452005-08-19 20:32:47 +000010086 static int
Bram Moolenaar1d73c882005-06-19 22:48:47 +000010087write_spell_prefcond(fd, gap)
10088 FILE *fd;
10089 garray_T *gap;
10090{
10091 int i;
10092 char_u *p;
10093 int len;
Bram Moolenaar5195e452005-08-19 20:32:47 +000010094 int totlen;
Bram Moolenaar2eb6eb32008-11-29 19:19:19 +000010095 size_t x = 1; /* collect return value of fwrite() */
Bram Moolenaar1d73c882005-06-19 22:48:47 +000010096
Bram Moolenaar5195e452005-08-19 20:32:47 +000010097 if (fd != NULL)
10098 put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */
10099
10100 totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */
Bram Moolenaar1d73c882005-06-19 22:48:47 +000010101
10102 for (i = 0; i < gap->ga_len; ++i)
10103 {
10104 /* <prefcond> : <condlen> <condstr> */
10105 p = ((char_u **)gap->ga_data)[i];
Bram Moolenaar5195e452005-08-19 20:32:47 +000010106 if (p != NULL)
Bram Moolenaar1d73c882005-06-19 22:48:47 +000010107 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000010108 len = (int)STRLEN(p);
Bram Moolenaar5195e452005-08-19 20:32:47 +000010109 if (fd != NULL)
10110 {
10111 fputc(len, fd);
Bram Moolenaar3f3766b2008-11-28 09:08:51 +000010112 x &= fwrite(p, (size_t)len, (size_t)1, fd);
Bram Moolenaar5195e452005-08-19 20:32:47 +000010113 }
10114 totlen += len;
Bram Moolenaar1d73c882005-06-19 22:48:47 +000010115 }
Bram Moolenaar5195e452005-08-19 20:32:47 +000010116 else if (fd != NULL)
10117 fputc(0, fd);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000010118 }
10119
Bram Moolenaar5195e452005-08-19 20:32:47 +000010120 return totlen;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000010121}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000010122
10123/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +000010124 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated.
10125 * Uses the character definitions from the .spl file.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000010126 * When using a multi-byte 'encoding' the length may change!
10127 * Returns FAIL when something wrong.
10128 */
10129 static int
Bram Moolenaar9f30f502005-06-14 22:01:04 +000010130spell_casefold(str, len, buf, buflen)
10131 char_u *str;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000010132 int len;
10133 char_u *buf;
10134 int buflen;
10135{
10136 int i;
10137
10138 if (len >= buflen)
10139 {
10140 buf[0] = NUL;
10141 return FAIL; /* result will not fit */
10142 }
10143
10144#ifdef FEAT_MBYTE
10145 if (has_mbyte)
10146 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000010147 int outi = 0;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000010148 char_u *p;
10149 int c;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000010150
10151 /* Fold one character at a time. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000010152 for (p = str; p < str + len; )
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000010153 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000010154 if (outi + MB_MAXBYTES > buflen)
10155 {
10156 buf[outi] = NUL;
10157 return FAIL;
10158 }
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000010159 c = mb_cptr2char_adv(&p);
Bram Moolenaar9f30f502005-06-14 22:01:04 +000010160 outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000010161 }
10162 buf[outi] = NUL;
10163 }
10164 else
10165#endif
10166 {
10167 /* Be quick for non-multibyte encodings. */
10168 for (i = 0; i < len; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +000010169 buf[i] = spelltab.st_fold[str[i]];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000010170 buf[i] = NUL;
10171 }
10172
10173 return OK;
10174}
10175
Bram Moolenaar4770d092006-01-12 23:22:24 +000010176/* values for sps_flags */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010177#define SPS_BEST 1
10178#define SPS_FAST 2
10179#define SPS_DOUBLE 4
10180
Bram Moolenaar4770d092006-01-12 23:22:24 +000010181static int sps_flags = SPS_BEST; /* flags from 'spellsuggest' */
10182static int sps_limit = 9999; /* max nr of suggestions given */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010183
10184/*
10185 * Check the 'spellsuggest' option. Return FAIL if it's wrong.
Bram Moolenaar5195e452005-08-19 20:32:47 +000010186 * Sets "sps_flags" and "sps_limit".
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010187 */
10188 int
10189spell_check_sps()
10190{
10191 char_u *p;
Bram Moolenaar5195e452005-08-19 20:32:47 +000010192 char_u *s;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010193 char_u buf[MAXPATHL];
10194 int f;
10195
10196 sps_flags = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +000010197 sps_limit = 9999;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010198
10199 for (p = p_sps; *p != NUL; )
10200 {
10201 copy_option_part(&p, buf, MAXPATHL, ",");
10202
10203 f = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +000010204 if (VIM_ISDIGIT(*buf))
10205 {
10206 s = buf;
10207 sps_limit = getdigits(&s);
10208 if (*s != NUL && !VIM_ISDIGIT(*s))
10209 f = -1;
10210 }
10211 else if (STRCMP(buf, "best") == 0)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010212 f = SPS_BEST;
10213 else if (STRCMP(buf, "fast") == 0)
10214 f = SPS_FAST;
10215 else if (STRCMP(buf, "double") == 0)
10216 f = SPS_DOUBLE;
10217 else if (STRNCMP(buf, "expr:", 5) != 0
10218 && STRNCMP(buf, "file:", 5) != 0)
10219 f = -1;
10220
10221 if (f == -1 || (sps_flags != 0 && f != 0))
10222 {
10223 sps_flags = SPS_BEST;
Bram Moolenaar5195e452005-08-19 20:32:47 +000010224 sps_limit = 9999;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010225 return FAIL;
10226 }
10227 if (f != 0)
10228 sps_flags = f;
10229 }
10230
10231 if (sps_flags == 0)
10232 sps_flags = SPS_BEST;
10233
10234 return OK;
10235}
10236
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010237/*
10238 * "z?": Find badly spelled word under or after the cursor.
10239 * Give suggestions for the properly spelled word.
Bram Moolenaar66fa2712006-01-22 23:22:22 +000010240 * In Visual mode use the highlighted word as the bad word.
Bram Moolenaard12a1322005-08-21 22:08:24 +000010241 * When "count" is non-zero use that suggestion.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010242 */
10243 void
Bram Moolenaard12a1322005-08-21 22:08:24 +000010244spell_suggest(count)
10245 int count;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010246{
10247 char_u *line;
10248 pos_T prev_cursor = curwin->w_cursor;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010249 char_u wcopy[MAXWLEN + 2];
10250 char_u *p;
10251 int i;
10252 int c;
10253 suginfo_T sug;
10254 suggest_T *stp;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010255 int mouse_used;
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +000010256 int need_cap;
Bram Moolenaar5195e452005-08-19 20:32:47 +000010257 int limit;
Bram Moolenaard12a1322005-08-21 22:08:24 +000010258 int selected = count;
Bram Moolenaar66fa2712006-01-22 23:22:22 +000010259 int badlen = 0;
Bram Moolenaarb2450162009-07-22 09:04:20 +000010260 int msg_scroll_save = msg_scroll;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010261
Bram Moolenaar66fa2712006-01-22 23:22:22 +000010262 if (no_spell_checking(curwin))
10263 return;
10264
10265#ifdef FEAT_VISUAL
10266 if (VIsual_active)
10267 {
10268 /* Use the Visually selected text as the bad word. But reject
10269 * a multi-line selection. */
10270 if (curwin->w_cursor.lnum != VIsual.lnum)
10271 {
10272 vim_beep();
10273 return;
10274 }
10275 badlen = (int)curwin->w_cursor.col - (int)VIsual.col;
10276 if (badlen < 0)
10277 badlen = -badlen;
10278 else
10279 curwin->w_cursor.col = VIsual.col;
10280 ++badlen;
10281 end_visual_mode();
10282 }
10283 else
10284#endif
10285 /* Find the start of the badly spelled word. */
10286 if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0
Bram Moolenaar0c405862005-06-22 22:26:26 +000010287 || curwin->w_cursor.col > prev_cursor.col)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010288 {
Bram Moolenaar0c405862005-06-22 22:26:26 +000010289 /* No bad word or it starts after the cursor: use the word under the
10290 * cursor. */
10291 curwin->w_cursor = prev_cursor;
10292 line = ml_get_curline();
10293 p = line + curwin->w_cursor.col;
10294 /* Backup to before start of word. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000010295 while (p > line && spell_iswordp_nmw(p))
Bram Moolenaar0c405862005-06-22 22:26:26 +000010296 mb_ptr_back(line, p);
10297 /* Forward to start of word. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000010298 while (*p != NUL && !spell_iswordp_nmw(p))
Bram Moolenaar0c405862005-06-22 22:26:26 +000010299 mb_ptr_adv(p);
10300
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000010301 if (!spell_iswordp_nmw(p)) /* No word found. */
Bram Moolenaar0c405862005-06-22 22:26:26 +000010302 {
10303 beep_flush();
10304 return;
10305 }
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000010306 curwin->w_cursor.col = (colnr_T)(p - line);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010307 }
10308
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010309 /* Get the word and its length. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010310
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +000010311 /* Figure out if the word should be capitalised. */
Bram Moolenaar8b59de92005-08-11 19:59:29 +000010312 need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col);
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +000010313
Bram Moolenaar3ea38ef2010-01-19 13:08:42 +010010314 /* Make a copy of current line since autocommands may free the line. */
10315 line = vim_strsave(ml_get_curline());
10316 if (line == NULL)
10317 goto skip;
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +000010318
Bram Moolenaar5195e452005-08-19 20:32:47 +000010319 /* Get the list of suggestions. Limit to 'lines' - 2 or the number in
10320 * 'spellsuggest', whatever is smaller. */
10321 if (sps_limit > (int)Rows - 2)
10322 limit = (int)Rows - 2;
10323 else
10324 limit = sps_limit;
Bram Moolenaar66fa2712006-01-22 23:22:22 +000010325 spell_find_suggest(line + curwin->w_cursor.col, badlen, &sug, limit,
Bram Moolenaar4770d092006-01-12 23:22:24 +000010326 TRUE, need_cap, TRUE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010327
10328 if (sug.su_ga.ga_len == 0)
10329 MSG(_("Sorry, no suggestions"));
Bram Moolenaard12a1322005-08-21 22:08:24 +000010330 else if (count > 0)
10331 {
10332 if (count > sug.su_ga.ga_len)
10333 smsg((char_u *)_("Sorry, only %ld suggestions"),
10334 (long)sug.su_ga.ga_len);
10335 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010336 else
10337 {
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010338 vim_free(repl_from);
10339 repl_from = NULL;
10340 vim_free(repl_to);
10341 repl_to = NULL;
10342
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000010343#ifdef FEAT_RIGHTLEFT
10344 /* When 'rightleft' is set the list is drawn right-left. */
10345 cmdmsg_rl = curwin->w_p_rl;
10346 if (cmdmsg_rl)
10347 msg_col = Columns - 1;
10348#endif
10349
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010350 /* List the suggestions. */
10351 msg_start();
Bram Moolenaar412f7442006-07-23 19:51:57 +000010352 msg_row = Rows - 1; /* for when 'cmdheight' > 1 */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010353 lines_left = Rows; /* avoid more prompt */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010354 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"),
10355 sug.su_badlen, sug.su_badptr);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000010356#ifdef FEAT_RIGHTLEFT
10357 if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0)
10358 {
10359 /* And now the rabbit from the high hat: Avoid showing the
10360 * untranslated message rightleft. */
10361 vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC",
10362 sug.su_badlen, sug.su_badptr);
10363 }
10364#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010365 msg_puts(IObuff);
10366 msg_clr_eos();
10367 msg_putchar('\n');
Bram Moolenaar0c405862005-06-22 22:26:26 +000010368
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010369 msg_scroll = TRUE;
10370 for (i = 0; i < sug.su_ga.ga_len; ++i)
10371 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010372 stp = &SUG(sug.su_ga, i);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010373
10374 /* The suggested word may replace only part of the bad word, add
10375 * the not replaced part. */
10376 STRCPY(wcopy, stp->st_word);
10377 if (sug.su_badlen > stp->st_orglen)
Bram Moolenaar4770d092006-01-12 23:22:24 +000010378 vim_strncpy(wcopy + stp->st_wordlen,
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010379 sug.su_badptr + stp->st_orglen,
10380 sug.su_badlen - stp->st_orglen);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000010381 vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1);
10382#ifdef FEAT_RIGHTLEFT
10383 if (cmdmsg_rl)
10384 rl_mirror(IObuff);
10385#endif
10386 msg_puts(IObuff);
10387
10388 vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy);
Bram Moolenaar0c405862005-06-22 22:26:26 +000010389 msg_puts(IObuff);
10390
10391 /* The word may replace more than "su_badlen". */
10392 if (sug.su_badlen < stp->st_orglen)
10393 {
10394 vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""),
10395 stp->st_orglen, sug.su_badptr);
10396 msg_puts(IObuff);
10397 }
10398
Bram Moolenaar9f30f502005-06-14 22:01:04 +000010399 if (p_verbose > 0)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010400 {
Bram Moolenaar0c405862005-06-22 22:26:26 +000010401 /* Add the score. */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000010402 if (sps_flags & (SPS_DOUBLE | SPS_BEST))
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000010403 vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)",
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010404 stp->st_salscore ? "s " : "",
10405 stp->st_score, stp->st_altscore);
10406 else
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000010407 vim_snprintf((char *)IObuff, IOSIZE, " (%d)",
Bram Moolenaar0c405862005-06-22 22:26:26 +000010408 stp->st_score);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000010409#ifdef FEAT_RIGHTLEFT
10410 if (cmdmsg_rl)
10411 /* Mirror the numbers, but keep the leading space. */
10412 rl_mirror(IObuff + 1);
10413#endif
Bram Moolenaar0c405862005-06-22 22:26:26 +000010414 msg_advance(30);
10415 msg_puts(IObuff);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010416 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010417 msg_putchar('\n');
10418 }
10419
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000010420#ifdef FEAT_RIGHTLEFT
10421 cmdmsg_rl = FALSE;
10422 msg_col = 0;
10423#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010424 /* Ask for choice. */
Bram Moolenaard12a1322005-08-21 22:08:24 +000010425 selected = prompt_for_number(&mouse_used);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010426 if (mouse_used)
Bram Moolenaard12a1322005-08-21 22:08:24 +000010427 selected -= lines_left;
Bram Moolenaarb2450162009-07-22 09:04:20 +000010428 lines_left = Rows; /* avoid more prompt */
10429 /* don't delay for 'smd' in normal_cmd() */
10430 msg_scroll = msg_scroll_save;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010431 }
10432
Bram Moolenaard12a1322005-08-21 22:08:24 +000010433 if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK)
10434 {
10435 /* Save the from and to text for :spellrepall. */
10436 stp = &SUG(sug.su_ga, selected - 1);
Bram Moolenaard5cdbeb2005-10-10 20:59:28 +000010437 if (sug.su_badlen > stp->st_orglen)
10438 {
10439 /* Replacing less than "su_badlen", append the remainder to
10440 * repl_to. */
10441 repl_from = vim_strnsave(sug.su_badptr, sug.su_badlen);
10442 vim_snprintf((char *)IObuff, IOSIZE, "%s%.*s", stp->st_word,
10443 sug.su_badlen - stp->st_orglen,
10444 sug.su_badptr + stp->st_orglen);
10445 repl_to = vim_strsave(IObuff);
10446 }
10447 else
10448 {
10449 /* Replacing su_badlen or more, use the whole word. */
10450 repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen);
10451 repl_to = vim_strsave(stp->st_word);
10452 }
Bram Moolenaard12a1322005-08-21 22:08:24 +000010453
10454 /* Replace the word. */
Bram Moolenaarb2450162009-07-22 09:04:20 +000010455 p = alloc((unsigned)STRLEN(line) - stp->st_orglen
10456 + stp->st_wordlen + 1);
Bram Moolenaard12a1322005-08-21 22:08:24 +000010457 if (p != NULL)
10458 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000010459 c = (int)(sug.su_badptr - line);
Bram Moolenaard12a1322005-08-21 22:08:24 +000010460 mch_memmove(p, line, c);
10461 STRCPY(p + c, stp->st_word);
10462 STRCAT(p, sug.su_badptr + stp->st_orglen);
10463 ml_replace(curwin->w_cursor.lnum, p, FALSE);
10464 curwin->w_cursor.col = c;
Bram Moolenaard12a1322005-08-21 22:08:24 +000010465
10466 /* For redo we use a change-word command. */
10467 ResetRedobuff();
10468 AppendToRedobuff((char_u *)"ciw");
Bram Moolenaarebefac62005-12-28 22:39:57 +000010469 AppendToRedobuffLit(p + c,
Bram Moolenaar4770d092006-01-12 23:22:24 +000010470 stp->st_wordlen + sug.su_badlen - stp->st_orglen);
Bram Moolenaard12a1322005-08-21 22:08:24 +000010471 AppendCharToRedobuff(ESC);
Bram Moolenaar910f66f2006-04-05 20:41:53 +000010472
10473 /* After this "p" may be invalid. */
10474 changed_bytes(curwin->w_cursor.lnum, c);
Bram Moolenaard12a1322005-08-21 22:08:24 +000010475 }
10476 }
10477 else
10478 curwin->w_cursor = prev_cursor;
10479
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010480 spell_find_cleanup(&sug);
Bram Moolenaar3ea38ef2010-01-19 13:08:42 +010010481skip:
10482 vim_free(line);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010483}
10484
10485/*
Bram Moolenaar8b59de92005-08-11 19:59:29 +000010486 * Check if the word at line "lnum" column "col" is required to start with a
10487 * capital. This uses 'spellcapcheck' of the current buffer.
10488 */
10489 static int
10490check_need_cap(lnum, col)
10491 linenr_T lnum;
10492 colnr_T col;
10493{
10494 int need_cap = FALSE;
10495 char_u *line;
10496 char_u *line_copy = NULL;
10497 char_u *p;
10498 colnr_T endcol;
10499 regmatch_T regmatch;
10500
10501 if (curbuf->b_cap_prog == NULL)
10502 return FALSE;
10503
10504 line = ml_get_curline();
10505 endcol = 0;
10506 if ((int)(skipwhite(line) - line) >= (int)col)
10507 {
10508 /* At start of line, check if previous line is empty or sentence
10509 * ends there. */
10510 if (lnum == 1)
10511 need_cap = TRUE;
10512 else
10513 {
10514 line = ml_get(lnum - 1);
10515 if (*skipwhite(line) == NUL)
10516 need_cap = TRUE;
10517 else
10518 {
10519 /* Append a space in place of the line break. */
10520 line_copy = concat_str(line, (char_u *)" ");
10521 line = line_copy;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000010522 endcol = (colnr_T)STRLEN(line);
Bram Moolenaar8b59de92005-08-11 19:59:29 +000010523 }
10524 }
10525 }
10526 else
10527 endcol = col;
10528
10529 if (endcol > 0)
10530 {
10531 /* Check if sentence ends before the bad word. */
10532 regmatch.regprog = curbuf->b_cap_prog;
10533 regmatch.rm_ic = FALSE;
10534 p = line + endcol;
10535 for (;;)
10536 {
10537 mb_ptr_back(line, p);
10538 if (p == line || spell_iswordp_nmw(p))
10539 break;
10540 if (vim_regexec(&regmatch, p, 0)
10541 && regmatch.endp[0] == line + endcol)
10542 {
10543 need_cap = TRUE;
10544 break;
10545 }
10546 }
10547 }
10548
10549 vim_free(line_copy);
10550
10551 return need_cap;
10552}
10553
10554
10555/*
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010556 * ":spellrepall"
10557 */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010558 void
10559ex_spellrepall(eap)
Bram Moolenaar2c4278f2009-05-17 11:33:22 +000010560 exarg_T *eap UNUSED;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010561{
10562 pos_T pos = curwin->w_cursor;
10563 char_u *frompat;
10564 int addlen;
10565 char_u *line;
10566 char_u *p;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010567 int save_ws = p_ws;
Bram Moolenaar5195e452005-08-19 20:32:47 +000010568 linenr_T prev_lnum = 0;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010569
10570 if (repl_from == NULL || repl_to == NULL)
10571 {
10572 EMSG(_("E752: No previous spell replacement"));
10573 return;
10574 }
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000010575 addlen = (int)(STRLEN(repl_to) - STRLEN(repl_from));
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010576
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000010577 frompat = alloc((unsigned)STRLEN(repl_from) + 7);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010578 if (frompat == NULL)
10579 return;
10580 sprintf((char *)frompat, "\\V\\<%s\\>", repl_from);
10581 p_ws = FALSE;
10582
Bram Moolenaar5195e452005-08-19 20:32:47 +000010583 sub_nsubs = 0;
10584 sub_nlines = 0;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010585 curwin->w_cursor.lnum = 0;
10586 while (!got_int)
10587 {
Bram Moolenaar91a4e822008-01-19 14:59:58 +000010588 if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP, NULL) == 0
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010589 || u_save_cursor() == FAIL)
10590 break;
10591
10592 /* Only replace when the right word isn't there yet. This happens
10593 * when changing "etc" to "etc.". */
10594 line = ml_get_curline();
10595 if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col,
10596 repl_to, STRLEN(repl_to)) != 0)
10597 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000010598 p = alloc((unsigned)STRLEN(line) + addlen + 1);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010599 if (p == NULL)
10600 break;
10601 mch_memmove(p, line, curwin->w_cursor.col);
10602 STRCPY(p + curwin->w_cursor.col, repl_to);
10603 STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from));
10604 ml_replace(curwin->w_cursor.lnum, p, FALSE);
10605 changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col);
Bram Moolenaar5195e452005-08-19 20:32:47 +000010606
10607 if (curwin->w_cursor.lnum != prev_lnum)
10608 {
10609 ++sub_nlines;
10610 prev_lnum = curwin->w_cursor.lnum;
10611 }
10612 ++sub_nsubs;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010613 }
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000010614 curwin->w_cursor.col += (colnr_T)STRLEN(repl_to);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010615 }
10616
10617 p_ws = save_ws;
10618 curwin->w_cursor = pos;
10619 vim_free(frompat);
10620
Bram Moolenaar5195e452005-08-19 20:32:47 +000010621 if (sub_nsubs == 0)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010622 EMSG2(_("E753: Not found: %s"), repl_from);
Bram Moolenaar5195e452005-08-19 20:32:47 +000010623 else
10624 do_sub_msg(FALSE);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010625}
10626
10627/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010628 * Find spell suggestions for "word". Return them in the growarray "*gap" as
10629 * a list of allocated strings.
10630 */
10631 void
Bram Moolenaar4770d092006-01-12 23:22:24 +000010632spell_suggest_list(gap, word, maxcount, need_cap, interactive)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010633 garray_T *gap;
10634 char_u *word;
10635 int maxcount; /* maximum nr of suggestions */
Bram Moolenaar8b59de92005-08-11 19:59:29 +000010636 int need_cap; /* 'spellcapcheck' matched */
Bram Moolenaar4770d092006-01-12 23:22:24 +000010637 int interactive;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010638{
10639 suginfo_T sug;
10640 int i;
10641 suggest_T *stp;
10642 char_u *wcopy;
10643
Bram Moolenaar66fa2712006-01-22 23:22:22 +000010644 spell_find_suggest(word, 0, &sug, maxcount, FALSE, need_cap, interactive);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010645
10646 /* Make room in "gap". */
10647 ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1);
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000010648 if (ga_grow(gap, sug.su_ga.ga_len) == OK)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010649 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000010650 for (i = 0; i < sug.su_ga.ga_len; ++i)
10651 {
10652 stp = &SUG(sug.su_ga, i);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010653
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000010654 /* The suggested word may replace only part of "word", add the not
10655 * replaced part. */
10656 wcopy = alloc(stp->st_wordlen
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000010657 + (unsigned)STRLEN(sug.su_badptr + stp->st_orglen) + 1);
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000010658 if (wcopy == NULL)
10659 break;
10660 STRCPY(wcopy, stp->st_word);
10661 STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen);
10662 ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy;
10663 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010664 }
10665
10666 spell_find_cleanup(&sug);
10667}
10668
10669/*
10670 * Find spell suggestions for the word at the start of "badptr".
10671 * Return the suggestions in "su->su_ga".
10672 * The maximum number of suggestions is "maxcount".
10673 * Note: does use info for the current window.
10674 * This is based on the mechanisms of Aspell, but completely reimplemented.
10675 */
10676 static void
Bram Moolenaar66fa2712006-01-22 23:22:22 +000010677spell_find_suggest(badptr, badlen, su, maxcount, banbadword, need_cap, interactive)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010678 char_u *badptr;
Bram Moolenaar66fa2712006-01-22 23:22:22 +000010679 int badlen; /* length of bad word or 0 if unknown */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010680 suginfo_T *su;
10681 int maxcount;
Bram Moolenaarea408852005-06-25 22:49:46 +000010682 int banbadword; /* don't include badword in suggestions */
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +000010683 int need_cap; /* word should start with capital */
Bram Moolenaar4770d092006-01-12 23:22:24 +000010684 int interactive;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010685{
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000010686 hlf_T attr = HLF_COUNT;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010687 char_u buf[MAXPATHL];
10688 char_u *p;
10689 int do_combine = FALSE;
10690 char_u *sps_copy;
10691#ifdef FEAT_EVAL
10692 static int expr_busy = FALSE;
10693#endif
Bram Moolenaarf9184a12005-07-02 23:10:47 +000010694 int c;
Bram Moolenaar8b96d642005-09-05 22:05:30 +000010695 int i;
10696 langp_T *lp;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010697
10698 /*
10699 * Set the info in "*su".
10700 */
10701 vim_memset(su, 0, sizeof(suginfo_T));
10702 ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10);
10703 ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10);
Bram Moolenaar0a5fe212005-06-24 23:01:23 +000010704 if (*badptr == NUL)
10705 return;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010706 hash_init(&su->su_banned);
10707
10708 su->su_badptr = badptr;
Bram Moolenaar66fa2712006-01-22 23:22:22 +000010709 if (badlen != 0)
10710 su->su_badlen = badlen;
10711 else
10712 su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL, FALSE);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010713 su->su_maxcount = maxcount;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010714 su->su_maxscore = SCORE_MAXINIT;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010715
10716 if (su->su_badlen >= MAXWLEN)
10717 su->su_badlen = MAXWLEN - 1; /* just in case */
10718 vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen);
10719 (void)spell_casefold(su->su_badptr, su->su_badlen,
10720 su->su_fbadword, MAXWLEN);
Bram Moolenaar0c405862005-06-22 22:26:26 +000010721 /* get caps flags for bad word */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000010722 su->su_badflags = badword_captype(su->su_badptr,
10723 su->su_badptr + su->su_badlen);
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +000010724 if (need_cap)
10725 su->su_badflags |= WF_ONECAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010726
Bram Moolenaar8b96d642005-09-05 22:05:30 +000010727 /* Find the default language for sound folding. We simply use the first
10728 * one in 'spelllang' that supports sound folding. That's good for when
10729 * using multiple files for one language, it's not that bad when mixing
10730 * languages (e.g., "pl,en"). */
10731 for (i = 0; i < curbuf->b_langp.ga_len; ++i)
10732 {
10733 lp = LANGP_ENTRY(curbuf->b_langp, i);
10734 if (lp->lp_sallang != NULL)
10735 {
10736 su->su_sallang = lp->lp_sallang;
10737 break;
10738 }
10739 }
10740
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000010741 /* Soundfold the bad word with the default sound folding, so that we don't
10742 * have to do this many times. */
10743 if (su->su_sallang != NULL)
10744 spell_soundfold(su->su_sallang, su->su_fbadword, TRUE,
10745 su->su_sal_badword);
10746
Bram Moolenaarf9184a12005-07-02 23:10:47 +000010747 /* If the word is not capitalised and spell_check() doesn't consider the
10748 * word to be bad then it might need to be capitalised. Add a suggestion
10749 * for that. */
Bram Moolenaar53805d12005-08-01 07:08:33 +000010750 c = PTR2CHAR(su->su_badptr);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000010751 if (!SPELL_ISUPPER(c) && attr == HLF_COUNT)
Bram Moolenaarf9184a12005-07-02 23:10:47 +000010752 {
10753 make_case_word(su->su_badword, buf, WF_ONECAP);
10754 add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE,
Bram Moolenaar4770d092006-01-12 23:22:24 +000010755 0, TRUE, su->su_sallang, FALSE);
Bram Moolenaarf9184a12005-07-02 23:10:47 +000010756 }
10757
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010758 /* Ban the bad word itself. It may appear in another region. */
Bram Moolenaarea408852005-06-25 22:49:46 +000010759 if (banbadword)
10760 add_banned(su, su->su_badword);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010761
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010762 /* Make a copy of 'spellsuggest', because the expression may change it. */
10763 sps_copy = vim_strsave(p_sps);
10764 if (sps_copy == NULL)
10765 return;
10766
10767 /* Loop over the items in 'spellsuggest'. */
10768 for (p = sps_copy; *p != NUL; )
10769 {
10770 copy_option_part(&p, buf, MAXPATHL, ",");
10771
10772 if (STRNCMP(buf, "expr:", 5) == 0)
10773 {
10774#ifdef FEAT_EVAL
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010775 /* Evaluate an expression. Skip this when called recursively,
10776 * when using spellsuggest() in the expression. */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010777 if (!expr_busy)
10778 {
10779 expr_busy = TRUE;
10780 spell_suggest_expr(su, buf + 5);
10781 expr_busy = FALSE;
10782 }
10783#endif
10784 }
10785 else if (STRNCMP(buf, "file:", 5) == 0)
10786 /* Use list of suggestions in a file. */
10787 spell_suggest_file(su, buf + 5);
10788 else
10789 {
10790 /* Use internal method. */
Bram Moolenaar4770d092006-01-12 23:22:24 +000010791 spell_suggest_intern(su, interactive);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010792 if (sps_flags & SPS_DOUBLE)
10793 do_combine = TRUE;
10794 }
10795 }
10796
10797 vim_free(sps_copy);
10798
10799 if (do_combine)
10800 /* Combine the two list of suggestions. This must be done last,
10801 * because sorting changes the order again. */
10802 score_combine(su);
10803}
10804
10805#ifdef FEAT_EVAL
10806/*
10807 * Find suggestions by evaluating expression "expr".
10808 */
10809 static void
10810spell_suggest_expr(su, expr)
10811 suginfo_T *su;
10812 char_u *expr;
10813{
10814 list_T *list;
10815 listitem_T *li;
10816 int score;
10817 char_u *p;
10818
10819 /* The work is split up in a few parts to avoid having to export
10820 * suginfo_T.
10821 * First evaluate the expression and get the resulting list. */
10822 list = eval_spell_expr(su->su_badword, expr);
10823 if (list != NULL)
10824 {
10825 /* Loop over the items in the list. */
10826 for (li = list->lv_first; li != NULL; li = li->li_next)
10827 if (li->li_tv.v_type == VAR_LIST)
10828 {
10829 /* Get the word and the score from the items. */
10830 score = get_spellword(li->li_tv.vval.v_list, &p);
Bram Moolenaar4770d092006-01-12 23:22:24 +000010831 if (score >= 0 && score <= su->su_maxscore)
10832 add_suggestion(su, &su->su_ga, p, su->su_badlen,
10833 score, 0, TRUE, su->su_sallang, FALSE);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010834 }
10835 list_unref(list);
10836 }
10837
Bram Moolenaar4770d092006-01-12 23:22:24 +000010838 /* Remove bogus suggestions, sort and truncate at "maxcount". */
10839 check_suggestions(su, &su->su_ga);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010840 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
10841}
10842#endif
10843
10844/*
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000010845 * Find suggestions in file "fname". Used for "file:" in 'spellsuggest'.
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010846 */
10847 static void
10848spell_suggest_file(su, fname)
10849 suginfo_T *su;
10850 char_u *fname;
10851{
10852 FILE *fd;
10853 char_u line[MAXWLEN * 2];
10854 char_u *p;
10855 int len;
10856 char_u cword[MAXWLEN];
10857
10858 /* Open the file. */
10859 fd = mch_fopen((char *)fname, "r");
10860 if (fd == NULL)
10861 {
10862 EMSG2(_(e_notopen), fname);
10863 return;
10864 }
10865
10866 /* Read it line by line. */
10867 while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int)
10868 {
10869 line_breakcheck();
10870
10871 p = vim_strchr(line, '/');
10872 if (p == NULL)
10873 continue; /* No Tab found, just skip the line. */
10874 *p++ = NUL;
10875 if (STRICMP(su->su_badword, line) == 0)
10876 {
10877 /* Match! Isolate the good word, until CR or NL. */
10878 for (len = 0; p[len] >= ' '; ++len)
10879 ;
10880 p[len] = NUL;
10881
10882 /* If the suggestion doesn't have specific case duplicate the case
10883 * of the bad word. */
10884 if (captype(p, NULL) == 0)
10885 {
10886 make_case_word(p, cword, su->su_badflags);
10887 p = cword;
10888 }
10889
10890 add_suggestion(su, &su->su_ga, p, su->su_badlen,
Bram Moolenaar4770d092006-01-12 23:22:24 +000010891 SCORE_FILE, 0, TRUE, su->su_sallang, FALSE);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010892 }
10893 }
10894
10895 fclose(fd);
10896
Bram Moolenaar4770d092006-01-12 23:22:24 +000010897 /* Remove bogus suggestions, sort and truncate at "maxcount". */
10898 check_suggestions(su, &su->su_ga);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010899 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
10900}
10901
10902/*
10903 * Find suggestions for the internal method indicated by "sps_flags".
10904 */
10905 static void
Bram Moolenaar4770d092006-01-12 23:22:24 +000010906spell_suggest_intern(su, interactive)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010907 suginfo_T *su;
Bram Moolenaar4770d092006-01-12 23:22:24 +000010908 int interactive;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010909{
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010910 /*
Bram Moolenaar4770d092006-01-12 23:22:24 +000010911 * Load the .sug file(s) that are available and not done yet.
10912 */
10913 suggest_load_files();
10914
10915 /*
Bram Moolenaar0c405862005-06-22 22:26:26 +000010916 * 1. Try special cases, such as repeating a word: "the the" -> "the".
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010917 *
10918 * Set a maximum score to limit the combination of operations that is
10919 * tried.
10920 */
Bram Moolenaar0c405862005-06-22 22:26:26 +000010921 suggest_try_special(su);
10922
10923 /*
10924 * 2. Try inserting/deleting/swapping/changing a letter, use REP entries
10925 * from the .aff file and inserting a space (split the word).
10926 */
10927 suggest_try_change(su);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010928
10929 /* For the resulting top-scorers compute the sound-a-like score. */
10930 if (sps_flags & SPS_DOUBLE)
10931 score_comp_sal(su);
10932
10933 /*
Bram Moolenaar0c405862005-06-22 22:26:26 +000010934 * 3. Try finding sound-a-like words.
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010935 */
Bram Moolenaar4770d092006-01-12 23:22:24 +000010936 if ((sps_flags & SPS_FAST) == 0)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010937 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000010938 if (sps_flags & SPS_BEST)
10939 /* Adjust the word score for the suggestions found so far for how
10940 * they sounds like. */
10941 rescore_suggestions(su);
10942
10943 /*
Bram Moolenaar3ea38ef2010-01-19 13:08:42 +010010944 * While going through the soundfold tree "su_maxscore" is the score
Bram Moolenaar4770d092006-01-12 23:22:24 +000010945 * for the soundfold word, limits the changes that are being tried,
10946 * and "su_sfmaxscore" the rescored score, which is set by
10947 * cleanup_suggestions().
10948 * First find words with a small edit distance, because this is much
10949 * faster and often already finds the top-N suggestions. If we didn't
10950 * find many suggestions try again with a higher edit distance.
10951 * "sl_sounddone" is used to avoid doing the same word twice.
10952 */
10953 suggest_try_soundalike_prep();
10954 su->su_maxscore = SCORE_SFMAX1;
10955 su->su_sfmaxscore = SCORE_MAXINIT * 3;
Bram Moolenaar0c405862005-06-22 22:26:26 +000010956 suggest_try_soundalike(su);
Bram Moolenaar4770d092006-01-12 23:22:24 +000010957 if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
10958 {
10959 /* We didn't find enough matches, try again, allowing more
10960 * changes to the soundfold word. */
10961 su->su_maxscore = SCORE_SFMAX2;
10962 suggest_try_soundalike(su);
10963 if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
10964 {
10965 /* Still didn't find enough matches, try again, allowing even
10966 * more changes to the soundfold word. */
10967 su->su_maxscore = SCORE_SFMAX3;
10968 suggest_try_soundalike(su);
10969 }
10970 }
10971 su->su_maxscore = su->su_sfmaxscore;
10972 suggest_try_soundalike_finish();
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010973 }
10974
Bram Moolenaar4770d092006-01-12 23:22:24 +000010975 /* When CTRL-C was hit while searching do show the results. Only clear
10976 * got_int when using a command, not for spellsuggest(). */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010977 ui_breakcheck();
Bram Moolenaar4770d092006-01-12 23:22:24 +000010978 if (interactive && got_int)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010979 {
10980 (void)vgetc();
10981 got_int = FALSE;
10982 }
10983
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010984 if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010985 {
10986 if (sps_flags & SPS_BEST)
10987 /* Adjust the word score for how it sounds like. */
10988 rescore_suggestions(su);
10989
Bram Moolenaar4770d092006-01-12 23:22:24 +000010990 /* Remove bogus suggestions, sort and truncate at "maxcount". */
10991 check_suggestions(su, &su->su_ga);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010992 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010993 }
10994}
10995
10996/*
Bram Moolenaar4770d092006-01-12 23:22:24 +000010997 * Load the .sug files for languages that have one and weren't loaded yet.
10998 */
10999 static void
11000suggest_load_files()
11001{
11002 langp_T *lp;
11003 int lpi;
11004 slang_T *slang;
11005 char_u *dotp;
11006 FILE *fd;
11007 char_u buf[MAXWLEN];
11008 int i;
11009 time_t timestamp;
11010 int wcount;
11011 int wordnr;
11012 garray_T ga;
11013 int c;
11014
11015 /* Do this for all languages that support sound folding. */
11016 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
11017 {
11018 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
11019 slang = lp->lp_slang;
11020 if (slang->sl_sugtime != 0 && !slang->sl_sugloaded)
11021 {
11022 /* Change ".spl" to ".sug" and open the file. When the file isn't
11023 * found silently skip it. Do set "sl_sugloaded" so that we
11024 * don't try again and again. */
11025 slang->sl_sugloaded = TRUE;
11026
11027 dotp = vim_strrchr(slang->sl_fname, '.');
11028 if (dotp == NULL || fnamecmp(dotp, ".spl") != 0)
11029 continue;
11030 STRCPY(dotp, ".sug");
Bram Moolenaar5555acc2006-04-07 21:33:12 +000011031 fd = mch_fopen((char *)slang->sl_fname, "r");
Bram Moolenaar4770d092006-01-12 23:22:24 +000011032 if (fd == NULL)
11033 goto nextone;
11034
11035 /*
11036 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
11037 */
11038 for (i = 0; i < VIMSUGMAGICL; ++i)
11039 buf[i] = getc(fd); /* <fileID> */
11040 if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0)
11041 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000011042 EMSG2(_("E778: This does not look like a .sug file: %s"),
Bram Moolenaar4770d092006-01-12 23:22:24 +000011043 slang->sl_fname);
11044 goto nextone;
11045 }
11046 c = getc(fd); /* <versionnr> */
11047 if (c < VIMSUGVERSION)
11048 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000011049 EMSG2(_("E779: Old .sug file, needs to be updated: %s"),
Bram Moolenaar4770d092006-01-12 23:22:24 +000011050 slang->sl_fname);
11051 goto nextone;
11052 }
11053 else if (c > VIMSUGVERSION)
11054 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000011055 EMSG2(_("E780: .sug file is for newer version of Vim: %s"),
Bram Moolenaar4770d092006-01-12 23:22:24 +000011056 slang->sl_fname);
11057 goto nextone;
11058 }
11059
11060 /* Check the timestamp, it must be exactly the same as the one in
11061 * the .spl file. Otherwise the word numbers won't match. */
Bram Moolenaarb388adb2006-02-28 23:50:17 +000011062 timestamp = get8c(fd); /* <timestamp> */
Bram Moolenaar4770d092006-01-12 23:22:24 +000011063 if (timestamp != slang->sl_sugtime)
11064 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000011065 EMSG2(_("E781: .sug file doesn't match .spl file: %s"),
Bram Moolenaar4770d092006-01-12 23:22:24 +000011066 slang->sl_fname);
11067 goto nextone;
11068 }
11069
11070 /*
11071 * <SUGWORDTREE>: <wordtree>
11072 * Read the trie with the soundfolded words.
11073 */
11074 if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs,
11075 FALSE, 0) != 0)
11076 {
11077someerror:
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000011078 EMSG2(_("E782: error while reading .sug file: %s"),
Bram Moolenaar4770d092006-01-12 23:22:24 +000011079 slang->sl_fname);
11080 slang_clear_sug(slang);
11081 goto nextone;
11082 }
11083
11084 /*
11085 * <SUGTABLE>: <sugwcount> <sugline> ...
11086 *
11087 * Read the table with word numbers. We use a file buffer for
11088 * this, because it's so much like a file with lines. Makes it
11089 * possible to swap the info and save on memory use.
11090 */
11091 slang->sl_sugbuf = open_spellbuf();
11092 if (slang->sl_sugbuf == NULL)
11093 goto someerror;
11094 /* <sugwcount> */
Bram Moolenaarb388adb2006-02-28 23:50:17 +000011095 wcount = get4c(fd);
Bram Moolenaar4770d092006-01-12 23:22:24 +000011096 if (wcount < 0)
11097 goto someerror;
11098
11099 /* Read all the wordnr lists into the buffer, one NUL terminated
11100 * list per line. */
11101 ga_init2(&ga, 1, 100);
11102 for (wordnr = 0; wordnr < wcount; ++wordnr)
11103 {
11104 ga.ga_len = 0;
11105 for (;;)
11106 {
11107 c = getc(fd); /* <sugline> */
11108 if (c < 0 || ga_grow(&ga, 1) == FAIL)
11109 goto someerror;
11110 ((char_u *)ga.ga_data)[ga.ga_len++] = c;
11111 if (c == NUL)
11112 break;
11113 }
11114 if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr,
11115 ga.ga_data, ga.ga_len, TRUE) == FAIL)
11116 goto someerror;
11117 }
11118 ga_clear(&ga);
11119
11120 /*
11121 * Need to put word counts in the word tries, so that we can find
11122 * a word by its number.
11123 */
11124 tree_count_words(slang->sl_fbyts, slang->sl_fidxs);
11125 tree_count_words(slang->sl_sbyts, slang->sl_sidxs);
11126
11127nextone:
11128 if (fd != NULL)
11129 fclose(fd);
11130 STRCPY(dotp, ".spl");
11131 }
11132 }
11133}
11134
11135
11136/*
11137 * Fill in the wordcount fields for a trie.
11138 * Returns the total number of words.
11139 */
11140 static void
11141tree_count_words(byts, idxs)
11142 char_u *byts;
11143 idx_T *idxs;
11144{
11145 int depth;
11146 idx_T arridx[MAXWLEN];
11147 int curi[MAXWLEN];
11148 int c;
11149 idx_T n;
11150 int wordcount[MAXWLEN];
11151
11152 arridx[0] = 0;
11153 curi[0] = 1;
11154 wordcount[0] = 0;
11155 depth = 0;
11156 while (depth >= 0 && !got_int)
11157 {
11158 if (curi[depth] > byts[arridx[depth]])
11159 {
11160 /* Done all bytes at this node, go up one level. */
11161 idxs[arridx[depth]] = wordcount[depth];
11162 if (depth > 0)
11163 wordcount[depth - 1] += wordcount[depth];
11164
11165 --depth;
11166 fast_breakcheck();
11167 }
11168 else
11169 {
11170 /* Do one more byte at this node. */
11171 n = arridx[depth] + curi[depth];
11172 ++curi[depth];
11173
11174 c = byts[n];
11175 if (c == 0)
11176 {
11177 /* End of word, count it. */
11178 ++wordcount[depth];
11179
11180 /* Skip over any other NUL bytes (same word with different
11181 * flags). */
11182 while (byts[n + 1] == 0)
11183 {
11184 ++n;
11185 ++curi[depth];
11186 }
11187 }
11188 else
11189 {
11190 /* Normal char, go one level deeper to count the words. */
11191 ++depth;
11192 arridx[depth] = idxs[n];
11193 curi[depth] = 1;
11194 wordcount[depth] = 0;
11195 }
11196 }
11197 }
11198}
11199
11200/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +000011201 * Free the info put in "*su" by spell_find_suggest().
11202 */
11203 static void
11204spell_find_cleanup(su)
11205 suginfo_T *su;
11206{
11207 int i;
11208
11209 /* Free the suggestions. */
11210 for (i = 0; i < su->su_ga.ga_len; ++i)
11211 vim_free(SUG(su->su_ga, i).st_word);
11212 ga_clear(&su->su_ga);
11213 for (i = 0; i < su->su_sga.ga_len; ++i)
11214 vim_free(SUG(su->su_sga, i).st_word);
11215 ga_clear(&su->su_sga);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011216
11217 /* Free the banned words. */
Bram Moolenaar4770d092006-01-12 23:22:24 +000011218 hash_clear_all(&su->su_banned, 0);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011219}
11220
11221/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +000011222 * Make a copy of "word", with the first letter upper or lower cased, to
11223 * "wcopy[MAXWLEN]". "word" must not be empty.
11224 * The result is NUL terminated.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011225 */
11226 static void
Bram Moolenaar9f30f502005-06-14 22:01:04 +000011227onecap_copy(word, wcopy, upper)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011228 char_u *word;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011229 char_u *wcopy;
11230 int upper; /* TRUE: first letter made upper case */
11231{
11232 char_u *p;
11233 int c;
11234 int l;
11235
11236 p = word;
11237#ifdef FEAT_MBYTE
11238 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000011239 c = mb_cptr2char_adv(&p);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011240 else
11241#endif
11242 c = *p++;
11243 if (upper)
Bram Moolenaar9f30f502005-06-14 22:01:04 +000011244 c = SPELL_TOUPPER(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011245 else
Bram Moolenaar9f30f502005-06-14 22:01:04 +000011246 c = SPELL_TOFOLD(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011247#ifdef FEAT_MBYTE
11248 if (has_mbyte)
11249 l = mb_char2bytes(c, wcopy);
11250 else
11251#endif
11252 {
11253 l = 1;
11254 wcopy[0] = c;
11255 }
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011256 vim_strncpy(wcopy + l, p, MAXWLEN - l - 1);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011257}
11258
11259/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +000011260 * Make a copy of "word" with all the letters upper cased into
11261 * "wcopy[MAXWLEN]". The result is NUL terminated.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011262 */
11263 static void
11264allcap_copy(word, wcopy)
11265 char_u *word;
11266 char_u *wcopy;
11267{
11268 char_u *s;
11269 char_u *d;
11270 int c;
11271
11272 d = wcopy;
11273 for (s = word; *s != NUL; )
11274 {
11275#ifdef FEAT_MBYTE
11276 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000011277 c = mb_cptr2char_adv(&s);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011278 else
11279#endif
11280 c = *s++;
Bram Moolenaar78622822005-08-23 21:00:13 +000011281
11282#ifdef FEAT_MBYTE
11283 /* We only change ß to SS when we are certain latin1 is used. It
11284 * would cause weird errors in other 8-bit encodings. */
11285 if (enc_latin1like && c == 0xdf)
11286 {
11287 c = 'S';
11288 if (d - wcopy >= MAXWLEN - 1)
11289 break;
11290 *d++ = c;
11291 }
11292 else
11293#endif
11294 c = SPELL_TOUPPER(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011295
11296#ifdef FEAT_MBYTE
11297 if (has_mbyte)
11298 {
11299 if (d - wcopy >= MAXWLEN - MB_MAXBYTES)
11300 break;
11301 d += mb_char2bytes(c, d);
11302 }
11303 else
11304#endif
11305 {
11306 if (d - wcopy >= MAXWLEN - 1)
11307 break;
11308 *d++ = c;
11309 }
11310 }
11311 *d = NUL;
11312}
11313
11314/*
Bram Moolenaar0c405862005-06-22 22:26:26 +000011315 * Try finding suggestions by recognizing specific situations.
11316 */
11317 static void
11318suggest_try_special(su)
11319 suginfo_T *su;
11320{
11321 char_u *p;
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011322 size_t len;
Bram Moolenaar0c405862005-06-22 22:26:26 +000011323 int c;
11324 char_u word[MAXWLEN];
11325
11326 /*
11327 * Recognize a word that is repeated: "the the".
11328 */
11329 p = skiptowhite(su->su_fbadword);
11330 len = p - su->su_fbadword;
11331 p = skipwhite(p);
11332 if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0)
11333 {
11334 /* Include badflags: if the badword is onecap or allcap
11335 * use that for the goodword too: "The the" -> "The". */
11336 c = su->su_fbadword[len];
11337 su->su_fbadword[len] = NUL;
11338 make_case_word(su->su_fbadword, word, su->su_badflags);
11339 su->su_fbadword[len] = c;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000011340
11341 /* Give a soundalike score of 0, compute the score as if deleting one
11342 * character. */
11343 add_suggestion(su, &su->su_ga, word, su->su_badlen,
Bram Moolenaar4770d092006-01-12 23:22:24 +000011344 RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang, FALSE);
Bram Moolenaar0c405862005-06-22 22:26:26 +000011345 }
11346}
11347
11348/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011349 * Try finding suggestions by adding/removing/swapping letters.
11350 */
11351 static void
Bram Moolenaar0c405862005-06-22 22:26:26 +000011352suggest_try_change(su)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011353 suginfo_T *su;
11354{
11355 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000011356 int n;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011357 char_u *p;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000011358 int lpi;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011359 langp_T *lp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011360
11361 /* We make a copy of the case-folded bad word, so that we can modify it
Bram Moolenaar0c405862005-06-22 22:26:26 +000011362 * to find matches (esp. REP items). Append some more text, changing
11363 * chars after the bad word may help. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011364 STRCPY(fword, su->su_fbadword);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000011365 n = (int)STRLEN(fword);
Bram Moolenaar0c405862005-06-22 22:26:26 +000011366 p = su->su_badptr + su->su_badlen;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000011367 (void)spell_casefold(p, (int)STRLEN(p), fword + n, MAXWLEN - n);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011368
Bram Moolenaar8b96d642005-09-05 22:05:30 +000011369 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011370 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +000011371 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011372
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000011373 /* If reloading a spell file fails it's still in the list but
11374 * everything has been cleared. */
Bram Moolenaar4770d092006-01-12 23:22:24 +000011375 if (lp->lp_slang->sl_fbyts == NULL)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000011376 continue;
11377
Bram Moolenaar4770d092006-01-12 23:22:24 +000011378 /* Try it for this language. Will add possible suggestions. */
11379 suggest_trie_walk(su, lp, fword, FALSE);
11380 }
11381}
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011382
Bram Moolenaar4770d092006-01-12 23:22:24 +000011383/* Check the maximum score, if we go over it we won't try this change. */
11384#define TRY_DEEPER(su, stack, depth, add) \
11385 (stack[depth].ts_score + (add) < su->su_maxscore)
11386
11387/*
11388 * Try finding suggestions by adding/removing/swapping letters.
11389 *
11390 * This uses a state machine. At each node in the tree we try various
11391 * operations. When trying if an operation works "depth" is increased and the
11392 * stack[] is used to store info. This allows combinations, thus insert one
11393 * character, replace one and delete another. The number of changes is
11394 * limited by su->su_maxscore.
11395 *
11396 * After implementing this I noticed an article by Kemal Oflazer that
11397 * describes something similar: "Error-tolerant Finite State Recognition with
11398 * Applications to Morphological Analysis and Spelling Correction" (1996).
11399 * The implementation in the article is simplified and requires a stack of
11400 * unknown depth. The implementation here only needs a stack depth equal to
11401 * the length of the word.
11402 *
11403 * This is also used for the sound-folded word, "soundfold" is TRUE then.
11404 * The mechanism is the same, but we find a match with a sound-folded word
11405 * that comes from one or more original words. Each of these words may be
11406 * added, this is done by add_sound_suggest().
11407 * Don't use:
11408 * the prefix tree or the keep-case tree
11409 * "su->su_badlen"
11410 * anything to do with upper and lower case
11411 * anything to do with word or non-word characters ("spell_iswordp()")
11412 * banned words
11413 * word flags (rare, region, compounding)
11414 * word splitting for now
11415 * "similar_chars()"
11416 * use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep"
11417 */
11418 static void
11419suggest_trie_walk(su, lp, fword, soundfold)
11420 suginfo_T *su;
11421 langp_T *lp;
11422 char_u *fword;
11423 int soundfold;
11424{
11425 char_u tword[MAXWLEN]; /* good word collected so far */
11426 trystate_T stack[MAXWLEN];
11427 char_u preword[MAXWLEN * 3]; /* word found with proper case;
Bram Moolenaar3ea38ef2010-01-19 13:08:42 +010011428 * concatenation of prefix compound
Bram Moolenaar4770d092006-01-12 23:22:24 +000011429 * words and split word. NUL terminated
11430 * when going deeper but not when coming
11431 * back. */
11432 char_u compflags[MAXWLEN]; /* compound flags, one for each word */
11433 trystate_T *sp;
11434 int newscore;
11435 int score;
11436 char_u *byts, *fbyts, *pbyts;
11437 idx_T *idxs, *fidxs, *pidxs;
11438 int depth;
11439 int c, c2, c3;
11440 int n = 0;
11441 int flags;
11442 garray_T *gap;
11443 idx_T arridx;
11444 int len;
11445 char_u *p;
11446 fromto_T *ftp;
11447 int fl = 0, tl;
11448 int repextra = 0; /* extra bytes in fword[] from REP item */
11449 slang_T *slang = lp->lp_slang;
11450 int fword_ends;
11451 int goodword_ends;
11452#ifdef DEBUG_TRIEWALK
11453 /* Stores the name of the change made at each level. */
11454 char_u changename[MAXWLEN][80];
11455#endif
11456 int breakcheckcount = 1000;
11457 int compound_ok;
11458
11459 /*
11460 * Go through the whole case-fold tree, try changes at each node.
11461 * "tword[]" contains the word collected from nodes in the tree.
11462 * "fword[]" the word we are trying to match with (initially the bad
11463 * word).
11464 */
11465 depth = 0;
11466 sp = &stack[0];
11467 vim_memset(sp, 0, sizeof(trystate_T));
11468 sp->ts_curi = 1;
11469
11470 if (soundfold)
11471 {
11472 /* Going through the soundfold tree. */
11473 byts = fbyts = slang->sl_sbyts;
11474 idxs = fidxs = slang->sl_sidxs;
11475 pbyts = NULL;
11476 pidxs = NULL;
11477 sp->ts_prefixdepth = PFD_NOPREFIX;
11478 sp->ts_state = STATE_START;
11479 }
11480 else
11481 {
Bram Moolenaarea424162005-06-16 21:51:00 +000011482 /*
Bram Moolenaar42eeac32005-06-29 22:40:58 +000011483 * When there are postponed prefixes we need to use these first. At
11484 * the end of the prefix we continue in the case-fold tree.
11485 */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011486 fbyts = slang->sl_fbyts;
11487 fidxs = slang->sl_fidxs;
11488 pbyts = slang->sl_pbyts;
11489 pidxs = slang->sl_pidxs;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000011490 if (pbyts != NULL)
11491 {
11492 byts = pbyts;
11493 idxs = pidxs;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011494 sp->ts_prefixdepth = PFD_PREFIXTREE;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000011495 sp->ts_state = STATE_NOPREFIX; /* try without prefix first */
11496 }
11497 else
11498 {
11499 byts = fbyts;
11500 idxs = fidxs;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011501 sp->ts_prefixdepth = PFD_NOPREFIX;
Bram Moolenaard12a1322005-08-21 22:08:24 +000011502 sp->ts_state = STATE_START;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000011503 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011504 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +000011505
Bram Moolenaar4770d092006-01-12 23:22:24 +000011506 /*
11507 * Loop to find all suggestions. At each round we either:
11508 * - For the current state try one operation, advance "ts_curi",
11509 * increase "depth".
11510 * - When a state is done go to the next, set "ts_state".
11511 * - When all states are tried decrease "depth".
11512 */
11513 while (depth >= 0 && !got_int)
11514 {
11515 sp = &stack[depth];
11516 switch (sp->ts_state)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011517 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011518 case STATE_START:
11519 case STATE_NOPREFIX:
11520 /*
11521 * Start of node: Deal with NUL bytes, which means
11522 * tword[] may end here.
11523 */
11524 arridx = sp->ts_arridx; /* current node in the tree */
11525 len = byts[arridx]; /* bytes in this node */
11526 arridx += sp->ts_curi; /* index of current byte */
11527
11528 if (sp->ts_prefixdepth == PFD_PREFIXTREE)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011529 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011530 /* Skip over the NUL bytes, we use them later. */
11531 for (n = 0; n < len && byts[arridx + n] == 0; ++n)
11532 ;
11533 sp->ts_curi += n;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011534
Bram Moolenaar4770d092006-01-12 23:22:24 +000011535 /* Always past NUL bytes now. */
11536 n = (int)sp->ts_state;
11537 sp->ts_state = STATE_ENDNUL;
11538 sp->ts_save_badflags = su->su_badflags;
11539
11540 /* At end of a prefix or at start of prefixtree: check for
11541 * following word. */
11542 if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX)
Bram Moolenaar42eeac32005-06-29 22:40:58 +000011543 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011544 /* Set su->su_badflags to the caps type at this position.
11545 * Use the caps type until here for the prefix itself. */
Bram Moolenaar53805d12005-08-01 07:08:33 +000011546#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000011547 if (has_mbyte)
11548 n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
11549 else
Bram Moolenaar53805d12005-08-01 07:08:33 +000011550#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000011551 n = sp->ts_fidx;
11552 flags = badword_captype(su->su_badptr, su->su_badptr + n);
11553 su->su_badflags = badword_captype(su->su_badptr + n,
Bram Moolenaar53805d12005-08-01 07:08:33 +000011554 su->su_badptr + su->su_badlen);
Bram Moolenaar4770d092006-01-12 23:22:24 +000011555#ifdef DEBUG_TRIEWALK
11556 sprintf(changename[depth], "prefix");
11557#endif
11558 go_deeper(stack, depth, 0);
11559 ++depth;
11560 sp = &stack[depth];
11561 sp->ts_prefixdepth = depth - 1;
11562 byts = fbyts;
11563 idxs = fidxs;
11564 sp->ts_arridx = 0;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000011565
Bram Moolenaar4770d092006-01-12 23:22:24 +000011566 /* Move the prefix to preword[] with the right case
11567 * and make find_keepcap_word() works. */
11568 tword[sp->ts_twordlen] = NUL;
11569 make_case_word(tword + sp->ts_splitoff,
11570 preword + sp->ts_prewordlen, flags);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000011571 sp->ts_prewordlen = (char_u)STRLEN(preword);
Bram Moolenaar4770d092006-01-12 23:22:24 +000011572 sp->ts_splitoff = sp->ts_twordlen;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000011573 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011574 break;
11575 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +000011576
Bram Moolenaar4770d092006-01-12 23:22:24 +000011577 if (sp->ts_curi > len || byts[arridx] != 0)
11578 {
11579 /* Past bytes in node and/or past NUL bytes. */
11580 sp->ts_state = STATE_ENDNUL;
11581 sp->ts_save_badflags = su->su_badflags;
11582 break;
11583 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011584
Bram Moolenaar4770d092006-01-12 23:22:24 +000011585 /*
11586 * End of word in tree.
11587 */
11588 ++sp->ts_curi; /* eat one NUL byte */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011589
Bram Moolenaar4770d092006-01-12 23:22:24 +000011590 flags = (int)idxs[arridx];
Bram Moolenaare1438bb2006-03-01 22:01:55 +000011591
11592 /* Skip words with the NOSUGGEST flag. */
11593 if (flags & WF_NOSUGGEST)
11594 break;
11595
Bram Moolenaar4770d092006-01-12 23:22:24 +000011596 fword_ends = (fword[sp->ts_fidx] == NUL
11597 || (soundfold
11598 ? vim_iswhite(fword[sp->ts_fidx])
11599 : !spell_iswordp(fword + sp->ts_fidx, curbuf)));
11600 tword[sp->ts_twordlen] = NUL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011601
Bram Moolenaar4770d092006-01-12 23:22:24 +000011602 if (sp->ts_prefixdepth <= PFD_NOTSPECIAL
Bram Moolenaard12a1322005-08-21 22:08:24 +000011603 && (sp->ts_flags & TSF_PREFIXOK) == 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +000011604 {
11605 /* There was a prefix before the word. Check that the prefix
11606 * can be used with this word. */
11607 /* Count the length of the NULs in the prefix. If there are
11608 * none this must be the first try without a prefix. */
11609 n = stack[sp->ts_prefixdepth].ts_arridx;
11610 len = pbyts[n++];
11611 for (c = 0; c < len && pbyts[n + c] == 0; ++c)
11612 ;
11613 if (c > 0)
Bram Moolenaar42eeac32005-06-29 22:40:58 +000011614 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011615 c = valid_word_prefix(c, n, flags,
Bram Moolenaar5195e452005-08-19 20:32:47 +000011616 tword + sp->ts_splitoff, slang, FALSE);
Bram Moolenaar4770d092006-01-12 23:22:24 +000011617 if (c == 0)
11618 break;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000011619
Bram Moolenaar4770d092006-01-12 23:22:24 +000011620 /* Use the WF_RARE flag for a rare prefix. */
11621 if (c & WF_RAREPFX)
11622 flags |= WF_RARE;
Bram Moolenaard12a1322005-08-21 22:08:24 +000011623
Bram Moolenaar4770d092006-01-12 23:22:24 +000011624 /* Tricky: when checking for both prefix and compounding
11625 * we run into the prefix flag first.
11626 * Remember that it's OK, so that we accept the prefix
11627 * when arriving at a compound flag. */
11628 sp->ts_flags |= TSF_PREFIXOK;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000011629 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011630 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +000011631
Bram Moolenaar4770d092006-01-12 23:22:24 +000011632 /* Check NEEDCOMPOUND: can't use word without compounding. Do try
11633 * appending another compound word below. */
11634 if (sp->ts_complen == sp->ts_compsplit && fword_ends
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000011635 && (flags & WF_NEEDCOMP))
Bram Moolenaar4770d092006-01-12 23:22:24 +000011636 goodword_ends = FALSE;
11637 else
11638 goodword_ends = TRUE;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000011639
Bram Moolenaar4770d092006-01-12 23:22:24 +000011640 p = NULL;
11641 compound_ok = TRUE;
11642 if (sp->ts_complen > sp->ts_compsplit)
11643 {
11644 if (slang->sl_nobreak)
Bram Moolenaard12a1322005-08-21 22:08:24 +000011645 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011646 /* There was a word before this word. When there was no
11647 * change in this word (it was correct) add the first word
11648 * as a suggestion. If this word was corrected too, we
11649 * need to check if a correct word follows. */
11650 if (sp->ts_fidx - sp->ts_splitfidx
Bram Moolenaar78622822005-08-23 21:00:13 +000011651 == sp->ts_twordlen - sp->ts_splitoff
Bram Moolenaar4770d092006-01-12 23:22:24 +000011652 && STRNCMP(fword + sp->ts_splitfidx,
11653 tword + sp->ts_splitoff,
Bram Moolenaar78622822005-08-23 21:00:13 +000011654 sp->ts_fidx - sp->ts_splitfidx) == 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +000011655 {
11656 preword[sp->ts_prewordlen] = NUL;
11657 newscore = score_wordcount_adj(slang, sp->ts_score,
11658 preword + sp->ts_prewordlen,
11659 sp->ts_prewordlen > 0);
11660 /* Add the suggestion if the score isn't too bad. */
11661 if (newscore <= su->su_maxscore)
Bram Moolenaar78622822005-08-23 21:00:13 +000011662 add_suggestion(su, &su->su_ga, preword,
Bram Moolenaar8b96d642005-09-05 22:05:30 +000011663 sp->ts_splitfidx - repextra,
Bram Moolenaar4770d092006-01-12 23:22:24 +000011664 newscore, 0, FALSE,
11665 lp->lp_sallang, FALSE);
11666 break;
Bram Moolenaar78622822005-08-23 21:00:13 +000011667 }
Bram Moolenaard12a1322005-08-21 22:08:24 +000011668 }
Bram Moolenaare52325c2005-08-22 22:54:29 +000011669 else
Bram Moolenaar0c405862005-06-22 22:26:26 +000011670 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011671 /* There was a compound word before this word. If this
11672 * word does not support compounding then give up
11673 * (splitting is tried for the word without compound
11674 * flag). */
11675 if (((unsigned)flags >> 24) == 0
11676 || sp->ts_twordlen - sp->ts_splitoff
11677 < slang->sl_compminlen)
11678 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +000011679#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000011680 /* For multi-byte chars check character length against
11681 * COMPOUNDMIN. */
11682 if (has_mbyte
11683 && slang->sl_compminlen > 0
11684 && mb_charlen(tword + sp->ts_splitoff)
11685 < slang->sl_compminlen)
11686 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +000011687#endif
Bram Moolenaare52325c2005-08-22 22:54:29 +000011688
Bram Moolenaar4770d092006-01-12 23:22:24 +000011689 compflags[sp->ts_complen] = ((unsigned)flags >> 24);
11690 compflags[sp->ts_complen + 1] = NUL;
11691 vim_strncpy(preword + sp->ts_prewordlen,
11692 tword + sp->ts_splitoff,
11693 sp->ts_twordlen - sp->ts_splitoff);
Bram Moolenaar9f94b052008-11-30 20:12:46 +000011694
11695 /* Verify CHECKCOMPOUNDPATTERN rules. */
11696 if (match_checkcompoundpattern(preword, sp->ts_prewordlen,
11697 &slang->sl_comppat))
Bram Moolenaar4770d092006-01-12 23:22:24 +000011698 compound_ok = FALSE;
11699
Bram Moolenaar9f94b052008-11-30 20:12:46 +000011700 if (compound_ok)
11701 {
11702 p = preword;
11703 while (*skiptowhite(p) != NUL)
11704 p = skipwhite(skiptowhite(p));
11705 if (fword_ends && !can_compound(slang, p,
11706 compflags + sp->ts_compsplit))
11707 /* Compound is not allowed. But it may still be
11708 * possible if we add another (short) word. */
11709 compound_ok = FALSE;
11710 }
11711
Bram Moolenaar4770d092006-01-12 23:22:24 +000011712 /* Get pointer to last char of previous word. */
11713 p = preword + sp->ts_prewordlen;
11714 mb_ptr_back(preword, p);
Bram Moolenaar0c405862005-06-22 22:26:26 +000011715 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011716 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011717
Bram Moolenaar4770d092006-01-12 23:22:24 +000011718 /*
11719 * Form the word with proper case in preword.
11720 * If there is a word from a previous split, append.
11721 * For the soundfold tree don't change the case, simply append.
11722 */
11723 if (soundfold)
11724 STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff);
11725 else if (flags & WF_KEEPCAP)
11726 /* Must find the word in the keep-case tree. */
11727 find_keepcap_word(slang, tword + sp->ts_splitoff,
11728 preword + sp->ts_prewordlen);
11729 else
11730 {
11731 /* Include badflags: If the badword is onecap or allcap
11732 * use that for the goodword too. But if the badword is
11733 * allcap and it's only one char long use onecap. */
11734 c = su->su_badflags;
11735 if ((c & WF_ALLCAP)
11736#ifdef FEAT_MBYTE
11737 && su->su_badlen == (*mb_ptr2len)(su->su_badptr)
11738#else
11739 && su->su_badlen == 1
11740#endif
11741 )
11742 c = WF_ONECAP;
11743 c |= flags;
11744
11745 /* When appending a compound word after a word character don't
11746 * use Onecap. */
11747 if (p != NULL && spell_iswordp_nmw(p))
11748 c &= ~WF_ONECAP;
11749 make_case_word(tword + sp->ts_splitoff,
11750 preword + sp->ts_prewordlen, c);
11751 }
11752
11753 if (!soundfold)
11754 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011755 /* Don't use a banned word. It may appear again as a good
11756 * word, thus remember it. */
11757 if (flags & WF_BANNED)
11758 {
Bram Moolenaar5195e452005-08-19 20:32:47 +000011759 add_banned(su, preword + sp->ts_prewordlen);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011760 break;
11761 }
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000011762 if ((sp->ts_complen == sp->ts_compsplit
Bram Moolenaar4770d092006-01-12 23:22:24 +000011763 && WAS_BANNED(su, preword + sp->ts_prewordlen))
11764 || WAS_BANNED(su, preword))
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000011765 {
11766 if (slang->sl_compprog == NULL)
11767 break;
11768 /* the word so far was banned but we may try compounding */
11769 goodword_ends = FALSE;
11770 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011771 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011772
Bram Moolenaar4770d092006-01-12 23:22:24 +000011773 newscore = 0;
11774 if (!soundfold) /* soundfold words don't have flags */
11775 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011776 if ((flags & WF_REGION)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000011777 && (((unsigned)flags >> 16) & lp->lp_region) == 0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011778 newscore += SCORE_REGION;
11779 if (flags & WF_RARE)
11780 newscore += SCORE_RARE;
11781
Bram Moolenaar0c405862005-06-22 22:26:26 +000011782 if (!spell_valid_case(su->su_badflags,
Bram Moolenaar5195e452005-08-19 20:32:47 +000011783 captype(preword + sp->ts_prewordlen, NULL)))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011784 newscore += SCORE_ICASE;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011785 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011786
Bram Moolenaar4770d092006-01-12 23:22:24 +000011787 /* TODO: how about splitting in the soundfold tree? */
11788 if (fword_ends
11789 && goodword_ends
11790 && sp->ts_fidx >= sp->ts_fidxtry
11791 && compound_ok)
11792 {
11793 /* The badword also ends: add suggestions. */
11794#ifdef DEBUG_TRIEWALK
11795 if (soundfold && STRCMP(preword, "smwrd") == 0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011796 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011797 int j;
11798
11799 /* print the stack of changes that brought us here */
11800 smsg("------ %s -------", fword);
11801 for (j = 0; j < depth; ++j)
11802 smsg("%s", changename[j]);
11803 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +000011804#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000011805 if (soundfold)
11806 {
11807 /* For soundfolded words we need to find the original
Bram Moolenaarf711faf2007-05-10 16:48:19 +000011808 * words, the edit distance and then add them. */
Bram Moolenaar4770d092006-01-12 23:22:24 +000011809 add_sound_suggest(su, preword, sp->ts_score, lp);
11810 }
11811 else
11812 {
11813 /* Give a penalty when changing non-word char to word
11814 * char, e.g., "thes," -> "these". */
11815 p = fword + sp->ts_fidx;
11816 mb_ptr_back(fword, p);
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011817 if (!spell_iswordp(p, curbuf))
Bram Moolenaarcf6bf392005-06-27 22:27:46 +000011818 {
11819 p = preword + STRLEN(preword);
Bram Moolenaar4770d092006-01-12 23:22:24 +000011820 mb_ptr_back(preword, p);
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011821 if (spell_iswordp(p, curbuf))
Bram Moolenaarcf6bf392005-06-27 22:27:46 +000011822 newscore += SCORE_NONWORD;
11823 }
11824
Bram Moolenaar4770d092006-01-12 23:22:24 +000011825 /* Give a bonus to words seen before. */
11826 score = score_wordcount_adj(slang,
11827 sp->ts_score + newscore,
11828 preword + sp->ts_prewordlen,
11829 sp->ts_prewordlen > 0);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000011830
Bram Moolenaar4770d092006-01-12 23:22:24 +000011831 /* Add the suggestion if the score isn't too bad. */
11832 if (score <= su->su_maxscore)
Bram Moolenaar2d3f4892006-01-20 23:02:51 +000011833 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011834 add_suggestion(su, &su->su_ga, preword,
11835 sp->ts_fidx - repextra,
11836 score, 0, FALSE, lp->lp_sallang, FALSE);
Bram Moolenaar2d3f4892006-01-20 23:02:51 +000011837
11838 if (su->su_badflags & WF_MIXCAP)
11839 {
11840 /* We really don't know if the word should be
11841 * upper or lower case, add both. */
11842 c = captype(preword, NULL);
11843 if (c == 0 || c == WF_ALLCAP)
11844 {
11845 make_case_word(tword + sp->ts_splitoff,
11846 preword + sp->ts_prewordlen,
11847 c == 0 ? WF_ALLCAP : 0);
11848
11849 add_suggestion(su, &su->su_ga, preword,
11850 sp->ts_fidx - repextra,
11851 score + SCORE_ICASE, 0, FALSE,
11852 lp->lp_sallang, FALSE);
11853 }
11854 }
11855 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011856 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011857 }
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000011858
Bram Moolenaar4770d092006-01-12 23:22:24 +000011859 /*
11860 * Try word split and/or compounding.
11861 */
11862 if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends)
Bram Moolenaarea424162005-06-16 21:51:00 +000011863#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000011864 /* Don't split halfway a character. */
11865 && (!has_mbyte || sp->ts_tcharlen == 0)
Bram Moolenaarea424162005-06-16 21:51:00 +000011866#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000011867 )
11868 {
11869 int try_compound;
11870 int try_split;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011871
Bram Moolenaar4770d092006-01-12 23:22:24 +000011872 /* If past the end of the bad word don't try a split.
11873 * Otherwise try changing the next word. E.g., find
11874 * suggestions for "the the" where the second "the" is
11875 * different. It's done like a split.
11876 * TODO: word split for soundfold words */
11877 try_split = (sp->ts_fidx - repextra < su->su_badlen)
11878 && !soundfold;
11879
11880 /* Get here in several situations:
11881 * 1. The word in the tree ends:
11882 * If the word allows compounding try that. Otherwise try
11883 * a split by inserting a space. For both check that a
11884 * valid words starts at fword[sp->ts_fidx].
11885 * For NOBREAK do like compounding to be able to check if
11886 * the next word is valid.
11887 * 2. The badword does end, but it was due to a change (e.g.,
11888 * a swap). No need to split, but do check that the
11889 * following word is valid.
11890 * 3. The badword and the word in the tree end. It may still
11891 * be possible to compound another (short) word.
11892 */
11893 try_compound = FALSE;
11894 if (!soundfold
11895 && slang->sl_compprog != NULL
11896 && ((unsigned)flags >> 24) != 0
11897 && sp->ts_twordlen - sp->ts_splitoff
11898 >= slang->sl_compminlen
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000011899#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000011900 && (!has_mbyte
11901 || slang->sl_compminlen == 0
11902 || mb_charlen(tword + sp->ts_splitoff)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000011903 >= slang->sl_compminlen)
11904#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000011905 && (slang->sl_compsylmax < MAXWLEN
11906 || sp->ts_complen + 1 - sp->ts_compsplit
11907 < slang->sl_compmax)
Bram Moolenaar9f94b052008-11-30 20:12:46 +000011908 && (can_be_compound(sp, slang,
11909 compflags, ((unsigned)flags >> 24))))
11910
Bram Moolenaar4770d092006-01-12 23:22:24 +000011911 {
11912 try_compound = TRUE;
11913 compflags[sp->ts_complen] = ((unsigned)flags >> 24);
11914 compflags[sp->ts_complen + 1] = NUL;
11915 }
Bram Moolenaard12a1322005-08-21 22:08:24 +000011916
Bram Moolenaar4770d092006-01-12 23:22:24 +000011917 /* For NOBREAK we never try splitting, it won't make any word
11918 * valid. */
11919 if (slang->sl_nobreak)
11920 try_compound = TRUE;
Bram Moolenaar78622822005-08-23 21:00:13 +000011921
Bram Moolenaar4770d092006-01-12 23:22:24 +000011922 /* If we could add a compound word, and it's also possible to
11923 * split at this point, do the split first and set
11924 * TSF_DIDSPLIT to avoid doing it again. */
11925 else if (!fword_ends
11926 && try_compound
11927 && (sp->ts_flags & TSF_DIDSPLIT) == 0)
11928 {
11929 try_compound = FALSE;
11930 sp->ts_flags |= TSF_DIDSPLIT;
11931 --sp->ts_curi; /* do the same NUL again */
11932 compflags[sp->ts_complen] = NUL;
11933 }
11934 else
11935 sp->ts_flags &= ~TSF_DIDSPLIT;
Bram Moolenaard12a1322005-08-21 22:08:24 +000011936
Bram Moolenaar4770d092006-01-12 23:22:24 +000011937 if (try_split || try_compound)
11938 {
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000011939 if (!try_compound && (!fword_ends || !goodword_ends))
Bram Moolenaard12a1322005-08-21 22:08:24 +000011940 {
11941 /* If we're going to split need to check that the
Bram Moolenaarda2303d2005-08-30 21:55:26 +000011942 * words so far are valid for compounding. If there
11943 * is only one word it must not have the NEEDCOMPOUND
11944 * flag. */
11945 if (sp->ts_complen == sp->ts_compsplit
11946 && (flags & WF_NEEDCOMP))
11947 break;
Bram Moolenaare52325c2005-08-22 22:54:29 +000011948 p = preword;
11949 while (*skiptowhite(p) != NUL)
11950 p = skipwhite(skiptowhite(p));
Bram Moolenaard12a1322005-08-21 22:08:24 +000011951 if (sp->ts_complen > sp->ts_compsplit
Bram Moolenaare52325c2005-08-22 22:54:29 +000011952 && !can_compound(slang, p,
Bram Moolenaard12a1322005-08-21 22:08:24 +000011953 compflags + sp->ts_compsplit))
11954 break;
Bram Moolenaare1438bb2006-03-01 22:01:55 +000011955
11956 if (slang->sl_nosplitsugs)
11957 newscore += SCORE_SPLIT_NO;
11958 else
11959 newscore += SCORE_SPLIT;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011960
11961 /* Give a bonus to words seen before. */
11962 newscore = score_wordcount_adj(slang, newscore,
11963 preword + sp->ts_prewordlen, TRUE);
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011964 }
11965
Bram Moolenaar4770d092006-01-12 23:22:24 +000011966 if (TRY_DEEPER(su, stack, depth, newscore))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011967 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011968 go_deeper(stack, depth, newscore);
11969#ifdef DEBUG_TRIEWALK
11970 if (!try_compound && !fword_ends)
11971 sprintf(changename[depth], "%.*s-%s: split",
11972 sp->ts_twordlen, tword, fword + sp->ts_fidx);
11973 else
11974 sprintf(changename[depth], "%.*s-%s: compound",
11975 sp->ts_twordlen, tword, fword + sp->ts_fidx);
11976#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011977 /* Save things to be restored at STATE_SPLITUNDO. */
Bram Moolenaar0c405862005-06-22 22:26:26 +000011978 sp->ts_save_badflags = su->su_badflags;
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011979 sp->ts_state = STATE_SPLITUNDO;
11980
11981 ++depth;
11982 sp = &stack[depth];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011983
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011984 /* Append a space to preword when splitting. */
11985 if (!try_compound && !fword_ends)
11986 STRCAT(preword, " ");
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000011987 sp->ts_prewordlen = (char_u)STRLEN(preword);
Bram Moolenaar5195e452005-08-19 20:32:47 +000011988 sp->ts_splitoff = sp->ts_twordlen;
Bram Moolenaar78622822005-08-23 21:00:13 +000011989 sp->ts_splitfidx = sp->ts_fidx;
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011990
11991 /* If the badword has a non-word character at this
11992 * position skip it. That means replacing the
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011993 * non-word character with a space. Always skip a
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000011994 * character when the word ends. But only when the
11995 * good word can end. */
Bram Moolenaar4770d092006-01-12 23:22:24 +000011996 if (((!try_compound && !spell_iswordp_nmw(fword
11997 + sp->ts_fidx))
11998 || fword_ends)
11999 && fword[sp->ts_fidx] != NUL
12000 && goodword_ends)
Bram Moolenaar9c96f592005-06-30 21:52:39 +000012001 {
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000012002 int l;
12003
Bram Moolenaar9c96f592005-06-30 21:52:39 +000012004#ifdef FEAT_MBYTE
12005 if (has_mbyte)
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000012006 l = MB_BYTE2LEN(fword[sp->ts_fidx]);
Bram Moolenaar9c96f592005-06-30 21:52:39 +000012007 else
12008#endif
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000012009 l = 1;
12010 if (fword_ends)
12011 {
12012 /* Copy the skipped character to preword. */
Bram Moolenaar5195e452005-08-19 20:32:47 +000012013 mch_memmove(preword + sp->ts_prewordlen,
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000012014 fword + sp->ts_fidx, l);
Bram Moolenaar5195e452005-08-19 20:32:47 +000012015 sp->ts_prewordlen += l;
12016 preword[sp->ts_prewordlen] = NUL;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000012017 }
12018 else
12019 sp->ts_score -= SCORE_SPLIT - SCORE_SUBST;
12020 sp->ts_fidx += l;
Bram Moolenaar9c96f592005-06-30 21:52:39 +000012021 }
Bram Moolenaar53805d12005-08-01 07:08:33 +000012022
Bram Moolenaard12a1322005-08-21 22:08:24 +000012023 /* When compounding include compound flag in
12024 * compflags[] (already set above). When splitting we
12025 * may start compounding over again. */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000012026 if (try_compound)
Bram Moolenaar5195e452005-08-19 20:32:47 +000012027 ++sp->ts_complen;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000012028 else
Bram Moolenaard12a1322005-08-21 22:08:24 +000012029 sp->ts_compsplit = sp->ts_complen;
12030 sp->ts_prefixdepth = PFD_NOPREFIX;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000012031
Bram Moolenaar53805d12005-08-01 07:08:33 +000012032 /* set su->su_badflags to the caps type at this
12033 * position */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012034#ifdef FEAT_MBYTE
12035 if (has_mbyte)
Bram Moolenaar53805d12005-08-01 07:08:33 +000012036 n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012037 else
12038#endif
Bram Moolenaar53805d12005-08-01 07:08:33 +000012039 n = sp->ts_fidx;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000012040 su->su_badflags = badword_captype(su->su_badptr + n,
Bram Moolenaar53805d12005-08-01 07:08:33 +000012041 su->su_badptr + su->su_badlen);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012042
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012043 /* Restart at top of the tree. */
Bram Moolenaar9c96f592005-06-30 21:52:39 +000012044 sp->ts_arridx = 0;
Bram Moolenaard12a1322005-08-21 22:08:24 +000012045
12046 /* If there are postponed prefixes, try these too. */
12047 if (pbyts != NULL)
12048 {
12049 byts = pbyts;
12050 idxs = pidxs;
12051 sp->ts_prefixdepth = PFD_PREFIXTREE;
12052 sp->ts_state = STATE_NOPREFIX;
12053 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012054 }
12055 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000012056 }
12057 break;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012058
Bram Moolenaar4770d092006-01-12 23:22:24 +000012059 case STATE_SPLITUNDO:
12060 /* Undo the changes done for word split or compound word. */
12061 su->su_badflags = sp->ts_save_badflags;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012062
Bram Moolenaar4770d092006-01-12 23:22:24 +000012063 /* Continue looking for NUL bytes. */
12064 sp->ts_state = STATE_START;
Bram Moolenaard12a1322005-08-21 22:08:24 +000012065
Bram Moolenaar4770d092006-01-12 23:22:24 +000012066 /* In case we went into the prefix tree. */
12067 byts = fbyts;
12068 idxs = fidxs;
12069 break;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012070
Bram Moolenaar4770d092006-01-12 23:22:24 +000012071 case STATE_ENDNUL:
12072 /* Past the NUL bytes in the node. */
12073 su->su_badflags = sp->ts_save_badflags;
12074 if (fword[sp->ts_fidx] == NUL
Bram Moolenaarda2303d2005-08-30 21:55:26 +000012075#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000012076 && sp->ts_tcharlen == 0
Bram Moolenaarda2303d2005-08-30 21:55:26 +000012077#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000012078 )
12079 {
12080 /* The badword ends, can't use STATE_PLAIN. */
12081 sp->ts_state = STATE_DEL;
12082 break;
12083 }
12084 sp->ts_state = STATE_PLAIN;
12085 /*FALLTHROUGH*/
12086
12087 case STATE_PLAIN:
12088 /*
12089 * Go over all possible bytes at this node, add each to tword[]
12090 * and use child node. "ts_curi" is the index.
12091 */
12092 arridx = sp->ts_arridx;
12093 if (sp->ts_curi > byts[arridx])
12094 {
12095 /* Done all bytes at this node, do next state. When still at
12096 * already changed bytes skip the other tricks. */
12097 if (sp->ts_fidx >= sp->ts_fidxtry)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012098 sp->ts_state = STATE_DEL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012099 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000012100 sp->ts_state = STATE_FINAL;
12101 }
12102 else
12103 {
12104 arridx += sp->ts_curi++;
12105 c = byts[arridx];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012106
Bram Moolenaar4770d092006-01-12 23:22:24 +000012107 /* Normal byte, go one level deeper. If it's not equal to the
12108 * byte in the bad word adjust the score. But don't even try
12109 * when the byte was already changed. And don't try when we
12110 * just deleted this byte, accepting it is always cheaper then
12111 * delete + substitute. */
12112 if (c == fword[sp->ts_fidx]
Bram Moolenaarea424162005-06-16 21:51:00 +000012113#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000012114 || (sp->ts_tcharlen > 0 && sp->ts_isdiff != DIFF_NONE)
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012115#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000012116 )
12117 newscore = 0;
12118 else
12119 newscore = SCORE_SUBST;
12120 if ((newscore == 0
12121 || (sp->ts_fidx >= sp->ts_fidxtry
12122 && ((sp->ts_flags & TSF_DIDDEL) == 0
12123 || c != fword[sp->ts_delidx])))
12124 && TRY_DEEPER(su, stack, depth, newscore))
12125 {
12126 go_deeper(stack, depth, newscore);
12127#ifdef DEBUG_TRIEWALK
12128 if (newscore > 0)
12129 sprintf(changename[depth], "%.*s-%s: subst %c to %c",
12130 sp->ts_twordlen, tword, fword + sp->ts_fidx,
12131 fword[sp->ts_fidx], c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012132 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000012133 sprintf(changename[depth], "%.*s-%s: accept %c",
12134 sp->ts_twordlen, tword, fword + sp->ts_fidx,
12135 fword[sp->ts_fidx]);
12136#endif
12137 ++depth;
12138 sp = &stack[depth];
12139 ++sp->ts_fidx;
12140 tword[sp->ts_twordlen++] = c;
12141 sp->ts_arridx = idxs[arridx];
Bram Moolenaarea424162005-06-16 21:51:00 +000012142#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000012143 if (newscore == SCORE_SUBST)
12144 sp->ts_isdiff = DIFF_YES;
12145 if (has_mbyte)
12146 {
12147 /* Multi-byte characters are a bit complicated to
12148 * handle: They differ when any of the bytes differ
12149 * and then their length may also differ. */
12150 if (sp->ts_tcharlen == 0)
Bram Moolenaarea424162005-06-16 21:51:00 +000012151 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012152 /* First byte. */
12153 sp->ts_tcharidx = 0;
12154 sp->ts_tcharlen = MB_BYTE2LEN(c);
12155 sp->ts_fcharstart = sp->ts_fidx - 1;
12156 sp->ts_isdiff = (newscore != 0)
Bram Moolenaarea424162005-06-16 21:51:00 +000012157 ? DIFF_YES : DIFF_NONE;
Bram Moolenaar4770d092006-01-12 23:22:24 +000012158 }
12159 else if (sp->ts_isdiff == DIFF_INSERT)
12160 /* When inserting trail bytes don't advance in the
12161 * bad word. */
12162 --sp->ts_fidx;
12163 if (++sp->ts_tcharidx == sp->ts_tcharlen)
12164 {
12165 /* Last byte of character. */
12166 if (sp->ts_isdiff == DIFF_YES)
Bram Moolenaarea424162005-06-16 21:51:00 +000012167 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012168 /* Correct ts_fidx for the byte length of the
12169 * character (we didn't check that before). */
12170 sp->ts_fidx = sp->ts_fcharstart
12171 + MB_BYTE2LEN(
Bram Moolenaarea424162005-06-16 21:51:00 +000012172 fword[sp->ts_fcharstart]);
12173
Bram Moolenaar4770d092006-01-12 23:22:24 +000012174 /* For changing a composing character adjust
12175 * the score from SCORE_SUBST to
12176 * SCORE_SUBCOMP. */
12177 if (enc_utf8
12178 && utf_iscomposing(
12179 mb_ptr2char(tword
12180 + sp->ts_twordlen
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +000012181 - sp->ts_tcharlen))
Bram Moolenaar4770d092006-01-12 23:22:24 +000012182 && utf_iscomposing(
12183 mb_ptr2char(fword
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +000012184 + sp->ts_fcharstart)))
Bram Moolenaar4770d092006-01-12 23:22:24 +000012185 sp->ts_score -=
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +000012186 SCORE_SUBST - SCORE_SUBCOMP;
12187
Bram Moolenaar4770d092006-01-12 23:22:24 +000012188 /* For a similar character adjust score from
12189 * SCORE_SUBST to SCORE_SIMILAR. */
12190 else if (!soundfold
12191 && slang->sl_has_map
12192 && similar_chars(slang,
12193 mb_ptr2char(tword
12194 + sp->ts_twordlen
Bram Moolenaarea424162005-06-16 21:51:00 +000012195 - sp->ts_tcharlen),
Bram Moolenaar4770d092006-01-12 23:22:24 +000012196 mb_ptr2char(fword
Bram Moolenaarea424162005-06-16 21:51:00 +000012197 + sp->ts_fcharstart)))
Bram Moolenaar4770d092006-01-12 23:22:24 +000012198 sp->ts_score -=
Bram Moolenaarea424162005-06-16 21:51:00 +000012199 SCORE_SUBST - SCORE_SIMILAR;
Bram Moolenaarea424162005-06-16 21:51:00 +000012200 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000012201 else if (sp->ts_isdiff == DIFF_INSERT
12202 && sp->ts_twordlen > sp->ts_tcharlen)
12203 {
12204 p = tword + sp->ts_twordlen - sp->ts_tcharlen;
12205 c = mb_ptr2char(p);
12206 if (enc_utf8 && utf_iscomposing(c))
12207 {
12208 /* Inserting a composing char doesn't
12209 * count that much. */
12210 sp->ts_score -= SCORE_INS - SCORE_INSCOMP;
12211 }
12212 else
12213 {
12214 /* If the previous character was the same,
12215 * thus doubling a character, give a bonus
12216 * to the score. Also for the soundfold
12217 * tree (might seem illogical but does
12218 * give better scores). */
12219 mb_ptr_back(tword, p);
12220 if (c == mb_ptr2char(p))
12221 sp->ts_score -= SCORE_INS
12222 - SCORE_INSDUP;
12223 }
12224 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012225
Bram Moolenaar4770d092006-01-12 23:22:24 +000012226 /* Starting a new char, reset the length. */
12227 sp->ts_tcharlen = 0;
12228 }
Bram Moolenaarea408852005-06-25 22:49:46 +000012229 }
Bram Moolenaarea424162005-06-16 21:51:00 +000012230 else
12231#endif
Bram Moolenaarea408852005-06-25 22:49:46 +000012232 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012233 /* If we found a similar char adjust the score.
12234 * We do this after calling go_deeper() because
12235 * it's slow. */
12236 if (newscore != 0
12237 && !soundfold
12238 && slang->sl_has_map
12239 && similar_chars(slang,
12240 c, fword[sp->ts_fidx - 1]))
12241 sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
Bram Moolenaarea408852005-06-25 22:49:46 +000012242 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012243 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000012244 }
12245 break;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012246
Bram Moolenaar4770d092006-01-12 23:22:24 +000012247 case STATE_DEL:
12248#ifdef FEAT_MBYTE
12249 /* When past the first byte of a multi-byte char don't try
12250 * delete/insert/swap a character. */
12251 if (has_mbyte && sp->ts_tcharlen > 0)
12252 {
12253 sp->ts_state = STATE_FINAL;
12254 break;
12255 }
12256#endif
12257 /*
12258 * Try skipping one character in the bad word (delete it).
12259 */
12260 sp->ts_state = STATE_INS_PREP;
12261 sp->ts_curi = 1;
12262 if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*')
12263 /* Deleting a vowel at the start of a word counts less, see
12264 * soundalike_score(). */
12265 newscore = 2 * SCORE_DEL / 3;
12266 else
12267 newscore = SCORE_DEL;
12268 if (fword[sp->ts_fidx] != NUL
12269 && TRY_DEEPER(su, stack, depth, newscore))
12270 {
12271 go_deeper(stack, depth, newscore);
12272#ifdef DEBUG_TRIEWALK
12273 sprintf(changename[depth], "%.*s-%s: delete %c",
12274 sp->ts_twordlen, tword, fword + sp->ts_fidx,
12275 fword[sp->ts_fidx]);
12276#endif
12277 ++depth;
12278
12279 /* Remember what character we deleted, so that we can avoid
12280 * inserting it again. */
12281 stack[depth].ts_flags |= TSF_DIDDEL;
12282 stack[depth].ts_delidx = sp->ts_fidx;
12283
12284 /* Advance over the character in fword[]. Give a bonus to the
12285 * score if the same character is following "nn" -> "n". It's
12286 * a bit illogical for soundfold tree but it does give better
12287 * results. */
12288#ifdef FEAT_MBYTE
12289 if (has_mbyte)
12290 {
12291 c = mb_ptr2char(fword + sp->ts_fidx);
12292 stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]);
12293 if (enc_utf8 && utf_iscomposing(c))
12294 stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP;
12295 else if (c == mb_ptr2char(fword + stack[depth].ts_fidx))
12296 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
12297 }
12298 else
12299#endif
12300 {
12301 ++stack[depth].ts_fidx;
12302 if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1])
12303 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
12304 }
12305 break;
12306 }
12307 /*FALLTHROUGH*/
12308
12309 case STATE_INS_PREP:
12310 if (sp->ts_flags & TSF_DIDDEL)
12311 {
12312 /* If we just deleted a byte then inserting won't make sense,
12313 * a substitute is always cheaper. */
12314 sp->ts_state = STATE_SWAP;
12315 break;
12316 }
12317
12318 /* skip over NUL bytes */
12319 n = sp->ts_arridx;
12320 for (;;)
12321 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012322 if (sp->ts_curi > byts[n])
12323 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012324 /* Only NUL bytes at this node, go to next state. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012325 sp->ts_state = STATE_SWAP;
Bram Moolenaar4770d092006-01-12 23:22:24 +000012326 break;
12327 }
12328 if (byts[n + sp->ts_curi] != NUL)
12329 {
12330 /* Found a byte to insert. */
12331 sp->ts_state = STATE_INS;
12332 break;
12333 }
12334 ++sp->ts_curi;
12335 }
12336 break;
12337
12338 /*FALLTHROUGH*/
12339
12340 case STATE_INS:
12341 /* Insert one byte. Repeat this for each possible byte at this
12342 * node. */
12343 n = sp->ts_arridx;
12344 if (sp->ts_curi > byts[n])
12345 {
12346 /* Done all bytes at this node, go to next state. */
12347 sp->ts_state = STATE_SWAP;
12348 break;
12349 }
12350
12351 /* Do one more byte at this node, but:
12352 * - Skip NUL bytes.
12353 * - Skip the byte if it's equal to the byte in the word,
12354 * accepting that byte is always better.
12355 */
12356 n += sp->ts_curi++;
12357 c = byts[n];
12358 if (soundfold && sp->ts_twordlen == 0 && c == '*')
12359 /* Inserting a vowel at the start of a word counts less,
12360 * see soundalike_score(). */
12361 newscore = 2 * SCORE_INS / 3;
12362 else
12363 newscore = SCORE_INS;
12364 if (c != fword[sp->ts_fidx]
12365 && TRY_DEEPER(su, stack, depth, newscore))
12366 {
12367 go_deeper(stack, depth, newscore);
12368#ifdef DEBUG_TRIEWALK
12369 sprintf(changename[depth], "%.*s-%s: insert %c",
12370 sp->ts_twordlen, tword, fword + sp->ts_fidx,
12371 c);
12372#endif
12373 ++depth;
12374 sp = &stack[depth];
12375 tword[sp->ts_twordlen++] = c;
12376 sp->ts_arridx = idxs[n];
12377#ifdef FEAT_MBYTE
12378 if (has_mbyte)
12379 {
12380 fl = MB_BYTE2LEN(c);
12381 if (fl > 1)
12382 {
12383 /* There are following bytes for the same character.
12384 * We must find all bytes before trying
12385 * delete/insert/swap/etc. */
12386 sp->ts_tcharlen = fl;
12387 sp->ts_tcharidx = 1;
12388 sp->ts_isdiff = DIFF_INSERT;
12389 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012390 }
12391 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000012392 fl = 1;
12393 if (fl == 1)
Bram Moolenaarea424162005-06-16 21:51:00 +000012394#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000012395 {
12396 /* If the previous character was the same, thus doubling a
12397 * character, give a bonus to the score. Also for
12398 * soundfold words (illogical but does give a better
12399 * score). */
12400 if (sp->ts_twordlen >= 2
Bram Moolenaarea408852005-06-25 22:49:46 +000012401 && tword[sp->ts_twordlen - 2] == c)
Bram Moolenaar4770d092006-01-12 23:22:24 +000012402 sp->ts_score -= SCORE_INS - SCORE_INSDUP;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012403 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000012404 }
12405 break;
12406
12407 case STATE_SWAP:
12408 /*
12409 * Swap two bytes in the bad word: "12" -> "21".
12410 * We change "fword" here, it's changed back afterwards at
12411 * STATE_UNSWAP.
12412 */
12413 p = fword + sp->ts_fidx;
12414 c = *p;
12415 if (c == NUL)
12416 {
12417 /* End of word, can't swap or replace. */
12418 sp->ts_state = STATE_FINAL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012419 break;
Bram Moolenaar4770d092006-01-12 23:22:24 +000012420 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012421
Bram Moolenaar4770d092006-01-12 23:22:24 +000012422 /* Don't swap if the first character is not a word character.
12423 * SWAP3 etc. also don't make sense then. */
12424 if (!soundfold && !spell_iswordp(p, curbuf))
12425 {
12426 sp->ts_state = STATE_REP_INI;
12427 break;
12428 }
Bram Moolenaarbb15b652005-10-03 21:52:09 +000012429
Bram Moolenaar4770d092006-01-12 23:22:24 +000012430#ifdef FEAT_MBYTE
12431 if (has_mbyte)
12432 {
12433 n = mb_cptr2len(p);
12434 c = mb_ptr2char(p);
Bram Moolenaar3dcfbf72007-08-05 16:33:12 +000012435 if (p[n] == NUL)
12436 c2 = NUL;
12437 else if (!soundfold && !spell_iswordp(p + n, curbuf))
Bram Moolenaar4770d092006-01-12 23:22:24 +000012438 c2 = c; /* don't swap non-word char */
12439 else
12440 c2 = mb_ptr2char(p + n);
12441 }
12442 else
12443#endif
12444 {
Bram Moolenaar3dcfbf72007-08-05 16:33:12 +000012445 if (p[1] == NUL)
12446 c2 = NUL;
12447 else if (!soundfold && !spell_iswordp(p + 1, curbuf))
Bram Moolenaar4770d092006-01-12 23:22:24 +000012448 c2 = c; /* don't swap non-word char */
12449 else
12450 c2 = p[1];
12451 }
Bram Moolenaarbb15b652005-10-03 21:52:09 +000012452
Bram Moolenaar3dcfbf72007-08-05 16:33:12 +000012453 /* When the second character is NUL we can't swap. */
12454 if (c2 == NUL)
12455 {
12456 sp->ts_state = STATE_REP_INI;
12457 break;
12458 }
12459
Bram Moolenaar4770d092006-01-12 23:22:24 +000012460 /* When characters are identical, swap won't do anything.
12461 * Also get here if the second char is not a word character. */
12462 if (c == c2)
12463 {
12464 sp->ts_state = STATE_SWAP3;
12465 break;
12466 }
12467 if (c2 != NUL && TRY_DEEPER(su, stack, depth, SCORE_SWAP))
12468 {
12469 go_deeper(stack, depth, SCORE_SWAP);
12470#ifdef DEBUG_TRIEWALK
12471 sprintf(changename[depth], "%.*s-%s: swap %c and %c",
12472 sp->ts_twordlen, tword, fword + sp->ts_fidx,
12473 c, c2);
12474#endif
12475 sp->ts_state = STATE_UNSWAP;
12476 ++depth;
Bram Moolenaarea424162005-06-16 21:51:00 +000012477#ifdef FEAT_MBYTE
12478 if (has_mbyte)
12479 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012480 fl = mb_char2len(c2);
12481 mch_memmove(p, p + n, fl);
12482 mb_char2bytes(c, p + fl);
12483 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
Bram Moolenaarea424162005-06-16 21:51:00 +000012484 }
12485 else
12486#endif
Bram Moolenaarbb15b652005-10-03 21:52:09 +000012487 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012488 p[0] = c2;
Bram Moolenaarea424162005-06-16 21:51:00 +000012489 p[1] = c;
Bram Moolenaar4770d092006-01-12 23:22:24 +000012490 stack[depth].ts_fidxtry = sp->ts_fidx + 2;
Bram Moolenaarea424162005-06-16 21:51:00 +000012491 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000012492 }
12493 else
12494 /* If this swap doesn't work then SWAP3 won't either. */
12495 sp->ts_state = STATE_REP_INI;
12496 break;
Bram Moolenaarea424162005-06-16 21:51:00 +000012497
Bram Moolenaar4770d092006-01-12 23:22:24 +000012498 case STATE_UNSWAP:
12499 /* Undo the STATE_SWAP swap: "21" -> "12". */
12500 p = fword + sp->ts_fidx;
12501#ifdef FEAT_MBYTE
12502 if (has_mbyte)
12503 {
12504 n = MB_BYTE2LEN(*p);
12505 c = mb_ptr2char(p + n);
12506 mch_memmove(p + MB_BYTE2LEN(p[n]), p, n);
12507 mb_char2bytes(c, p);
12508 }
12509 else
12510#endif
12511 {
12512 c = *p;
12513 *p = p[1];
12514 p[1] = c;
12515 }
12516 /*FALLTHROUGH*/
12517
12518 case STATE_SWAP3:
12519 /* Swap two bytes, skipping one: "123" -> "321". We change
12520 * "fword" here, it's changed back afterwards at STATE_UNSWAP3. */
12521 p = fword + sp->ts_fidx;
12522#ifdef FEAT_MBYTE
12523 if (has_mbyte)
12524 {
12525 n = mb_cptr2len(p);
12526 c = mb_ptr2char(p);
12527 fl = mb_cptr2len(p + n);
12528 c2 = mb_ptr2char(p + n);
12529 if (!soundfold && !spell_iswordp(p + n + fl, curbuf))
12530 c3 = c; /* don't swap non-word char */
12531 else
12532 c3 = mb_ptr2char(p + n + fl);
12533 }
12534 else
12535#endif
12536 {
12537 c = *p;
12538 c2 = p[1];
12539 if (!soundfold && !spell_iswordp(p + 2, curbuf))
12540 c3 = c; /* don't swap non-word char */
12541 else
12542 c3 = p[2];
12543 }
12544
12545 /* When characters are identical: "121" then SWAP3 result is
12546 * identical, ROT3L result is same as SWAP: "211", ROT3L result is
12547 * same as SWAP on next char: "112". Thus skip all swapping.
12548 * Also skip when c3 is NUL.
12549 * Also get here when the third character is not a word character.
12550 * Second character may any char: "a.b" -> "b.a" */
12551 if (c == c3 || c3 == NUL)
12552 {
12553 sp->ts_state = STATE_REP_INI;
12554 break;
12555 }
12556 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
12557 {
12558 go_deeper(stack, depth, SCORE_SWAP3);
12559#ifdef DEBUG_TRIEWALK
12560 sprintf(changename[depth], "%.*s-%s: swap3 %c and %c",
12561 sp->ts_twordlen, tword, fword + sp->ts_fidx,
12562 c, c3);
12563#endif
12564 sp->ts_state = STATE_UNSWAP3;
12565 ++depth;
12566#ifdef FEAT_MBYTE
12567 if (has_mbyte)
12568 {
12569 tl = mb_char2len(c3);
12570 mch_memmove(p, p + n + fl, tl);
12571 mb_char2bytes(c2, p + tl);
12572 mb_char2bytes(c, p + fl + tl);
12573 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl;
12574 }
12575 else
12576#endif
12577 {
12578 p[0] = p[2];
12579 p[2] = c;
12580 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
12581 }
12582 }
12583 else
12584 sp->ts_state = STATE_REP_INI;
12585 break;
12586
12587 case STATE_UNSWAP3:
12588 /* Undo STATE_SWAP3: "321" -> "123" */
12589 p = fword + sp->ts_fidx;
12590#ifdef FEAT_MBYTE
12591 if (has_mbyte)
12592 {
12593 n = MB_BYTE2LEN(*p);
12594 c2 = mb_ptr2char(p + n);
12595 fl = MB_BYTE2LEN(p[n]);
12596 c = mb_ptr2char(p + n + fl);
12597 tl = MB_BYTE2LEN(p[n + fl]);
12598 mch_memmove(p + fl + tl, p, n);
12599 mb_char2bytes(c, p);
12600 mb_char2bytes(c2, p + tl);
12601 p = p + tl;
12602 }
12603 else
12604#endif
12605 {
12606 c = *p;
12607 *p = p[2];
12608 p[2] = c;
12609 ++p;
12610 }
12611
12612 if (!soundfold && !spell_iswordp(p, curbuf))
12613 {
12614 /* Middle char is not a word char, skip the rotate. First and
12615 * third char were already checked at swap and swap3. */
12616 sp->ts_state = STATE_REP_INI;
12617 break;
12618 }
12619
12620 /* Rotate three characters left: "123" -> "231". We change
12621 * "fword" here, it's changed back afterwards at STATE_UNROT3L. */
12622 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
12623 {
12624 go_deeper(stack, depth, SCORE_SWAP3);
12625#ifdef DEBUG_TRIEWALK
12626 p = fword + sp->ts_fidx;
12627 sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c",
12628 sp->ts_twordlen, tword, fword + sp->ts_fidx,
12629 p[0], p[1], p[2]);
12630#endif
12631 sp->ts_state = STATE_UNROT3L;
12632 ++depth;
Bram Moolenaarea424162005-06-16 21:51:00 +000012633 p = fword + sp->ts_fidx;
12634#ifdef FEAT_MBYTE
12635 if (has_mbyte)
12636 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000012637 n = mb_cptr2len(p);
Bram Moolenaarea424162005-06-16 21:51:00 +000012638 c = mb_ptr2char(p);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000012639 fl = mb_cptr2len(p + n);
Bram Moolenaar4770d092006-01-12 23:22:24 +000012640 fl += mb_cptr2len(p + n + fl);
12641 mch_memmove(p, p + n, fl);
12642 mb_char2bytes(c, p + fl);
12643 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
Bram Moolenaarea424162005-06-16 21:51:00 +000012644 }
12645 else
12646#endif
12647 {
12648 c = *p;
12649 *p = p[1];
12650 p[1] = p[2];
12651 p[2] = c;
Bram Moolenaar4770d092006-01-12 23:22:24 +000012652 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
Bram Moolenaarea424162005-06-16 21:51:00 +000012653 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000012654 }
12655 else
12656 sp->ts_state = STATE_REP_INI;
12657 break;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012658
Bram Moolenaar4770d092006-01-12 23:22:24 +000012659 case STATE_UNROT3L:
12660 /* Undo ROT3L: "231" -> "123" */
12661 p = fword + sp->ts_fidx;
Bram Moolenaarea424162005-06-16 21:51:00 +000012662#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000012663 if (has_mbyte)
12664 {
12665 n = MB_BYTE2LEN(*p);
12666 n += MB_BYTE2LEN(p[n]);
12667 c = mb_ptr2char(p + n);
12668 tl = MB_BYTE2LEN(p[n]);
12669 mch_memmove(p + tl, p, n);
12670 mb_char2bytes(c, p);
12671 }
12672 else
Bram Moolenaarea424162005-06-16 21:51:00 +000012673#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000012674 {
12675 c = p[2];
12676 p[2] = p[1];
12677 p[1] = *p;
12678 *p = c;
12679 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012680
Bram Moolenaar4770d092006-01-12 23:22:24 +000012681 /* Rotate three bytes right: "123" -> "312". We change "fword"
12682 * here, it's changed back afterwards at STATE_UNROT3R. */
12683 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
12684 {
12685 go_deeper(stack, depth, SCORE_SWAP3);
12686#ifdef DEBUG_TRIEWALK
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012687 p = fword + sp->ts_fidx;
Bram Moolenaar4770d092006-01-12 23:22:24 +000012688 sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c",
12689 sp->ts_twordlen, tword, fword + sp->ts_fidx,
12690 p[0], p[1], p[2]);
12691#endif
12692 sp->ts_state = STATE_UNROT3R;
12693 ++depth;
12694 p = fword + sp->ts_fidx;
12695#ifdef FEAT_MBYTE
12696 if (has_mbyte)
Bram Moolenaar0c405862005-06-22 22:26:26 +000012697 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012698 n = mb_cptr2len(p);
12699 n += mb_cptr2len(p + n);
12700 c = mb_ptr2char(p + n);
12701 tl = mb_cptr2len(p + n);
12702 mch_memmove(p + tl, p, n);
12703 mb_char2bytes(c, p);
12704 stack[depth].ts_fidxtry = sp->ts_fidx + n + tl;
Bram Moolenaar0c405862005-06-22 22:26:26 +000012705 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000012706 else
12707#endif
12708 {
12709 c = p[2];
12710 p[2] = p[1];
12711 p[1] = *p;
12712 *p = c;
12713 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
12714 }
12715 }
12716 else
12717 sp->ts_state = STATE_REP_INI;
12718 break;
12719
12720 case STATE_UNROT3R:
12721 /* Undo ROT3R: "312" -> "123" */
12722 p = fword + sp->ts_fidx;
12723#ifdef FEAT_MBYTE
12724 if (has_mbyte)
12725 {
12726 c = mb_ptr2char(p);
12727 tl = MB_BYTE2LEN(*p);
12728 n = MB_BYTE2LEN(p[tl]);
12729 n += MB_BYTE2LEN(p[tl + n]);
12730 mch_memmove(p, p + tl, n);
12731 mb_char2bytes(c, p + n);
12732 }
12733 else
12734#endif
12735 {
12736 c = *p;
12737 *p = p[1];
12738 p[1] = p[2];
12739 p[2] = c;
12740 }
12741 /*FALLTHROUGH*/
12742
12743 case STATE_REP_INI:
12744 /* Check if matching with REP items from the .aff file would work.
12745 * Quickly skip if:
12746 * - there are no REP items and we are not in the soundfold trie
12747 * - the score is going to be too high anyway
12748 * - already applied a REP item or swapped here */
12749 if ((lp->lp_replang == NULL && !soundfold)
12750 || sp->ts_score + SCORE_REP >= su->su_maxscore
12751 || sp->ts_fidx < sp->ts_fidxtry)
12752 {
12753 sp->ts_state = STATE_FINAL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012754 break;
Bram Moolenaar4770d092006-01-12 23:22:24 +000012755 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012756
Bram Moolenaar4770d092006-01-12 23:22:24 +000012757 /* Use the first byte to quickly find the first entry that may
12758 * match. If the index is -1 there is none. */
12759 if (soundfold)
12760 sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]];
12761 else
12762 sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012763
Bram Moolenaar4770d092006-01-12 23:22:24 +000012764 if (sp->ts_curi < 0)
12765 {
12766 sp->ts_state = STATE_FINAL;
12767 break;
12768 }
12769
12770 sp->ts_state = STATE_REP;
12771 /*FALLTHROUGH*/
12772
12773 case STATE_REP:
12774 /* Try matching with REP items from the .aff file. For each match
12775 * replace the characters and check if the resulting word is
12776 * valid. */
12777 p = fword + sp->ts_fidx;
12778
12779 if (soundfold)
12780 gap = &slang->sl_repsal;
12781 else
12782 gap = &lp->lp_replang->sl_rep;
12783 while (sp->ts_curi < gap->ga_len)
12784 {
12785 ftp = (fromto_T *)gap->ga_data + sp->ts_curi++;
12786 if (*ftp->ft_from != *p)
Bram Moolenaar42eeac32005-06-29 22:40:58 +000012787 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012788 /* past possible matching entries */
12789 sp->ts_curi = gap->ga_len;
12790 break;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000012791 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000012792 if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0
12793 && TRY_DEEPER(su, stack, depth, SCORE_REP))
12794 {
12795 go_deeper(stack, depth, SCORE_REP);
12796#ifdef DEBUG_TRIEWALK
12797 sprintf(changename[depth], "%.*s-%s: replace %s with %s",
12798 sp->ts_twordlen, tword, fword + sp->ts_fidx,
12799 ftp->ft_from, ftp->ft_to);
12800#endif
12801 /* Need to undo this afterwards. */
12802 sp->ts_state = STATE_REP_UNDO;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000012803
Bram Moolenaar4770d092006-01-12 23:22:24 +000012804 /* Change the "from" to the "to" string. */
12805 ++depth;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000012806 fl = (int)STRLEN(ftp->ft_from);
12807 tl = (int)STRLEN(ftp->ft_to);
Bram Moolenaar4770d092006-01-12 23:22:24 +000012808 if (fl != tl)
12809 {
Bram Moolenaara7241f52008-06-24 20:39:31 +000012810 STRMOVE(p + tl, p + fl);
Bram Moolenaar4770d092006-01-12 23:22:24 +000012811 repextra += tl - fl;
12812 }
12813 mch_memmove(p, ftp->ft_to, tl);
12814 stack[depth].ts_fidxtry = sp->ts_fidx + tl;
12815#ifdef FEAT_MBYTE
12816 stack[depth].ts_tcharlen = 0;
12817#endif
12818 break;
12819 }
12820 }
12821
12822 if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP)
12823 /* No (more) matches. */
12824 sp->ts_state = STATE_FINAL;
12825
12826 break;
12827
12828 case STATE_REP_UNDO:
12829 /* Undo a REP replacement and continue with the next one. */
12830 if (soundfold)
12831 gap = &slang->sl_repsal;
12832 else
12833 gap = &lp->lp_replang->sl_rep;
12834 ftp = (fromto_T *)gap->ga_data + sp->ts_curi - 1;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000012835 fl = (int)STRLEN(ftp->ft_from);
12836 tl = (int)STRLEN(ftp->ft_to);
Bram Moolenaar4770d092006-01-12 23:22:24 +000012837 p = fword + sp->ts_fidx;
12838 if (fl != tl)
12839 {
Bram Moolenaara7241f52008-06-24 20:39:31 +000012840 STRMOVE(p + fl, p + tl);
Bram Moolenaar4770d092006-01-12 23:22:24 +000012841 repextra -= tl - fl;
12842 }
12843 mch_memmove(p, ftp->ft_from, fl);
12844 sp->ts_state = STATE_REP;
12845 break;
12846
12847 default:
12848 /* Did all possible states at this level, go up one level. */
12849 --depth;
12850
12851 if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE)
12852 {
12853 /* Continue in or go back to the prefix tree. */
12854 byts = pbyts;
12855 idxs = pidxs;
12856 }
12857
12858 /* Don't check for CTRL-C too often, it takes time. */
12859 if (--breakcheckcount == 0)
12860 {
12861 ui_breakcheck();
12862 breakcheckcount = 1000;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012863 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012864 }
12865 }
12866}
12867
Bram Moolenaar4770d092006-01-12 23:22:24 +000012868
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012869/*
Bram Moolenaar4770d092006-01-12 23:22:24 +000012870 * Go one level deeper in the tree.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012871 */
Bram Moolenaar4770d092006-01-12 23:22:24 +000012872 static void
12873go_deeper(stack, depth, score_add)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012874 trystate_T *stack;
12875 int depth;
12876 int score_add;
12877{
Bram Moolenaarea424162005-06-16 21:51:00 +000012878 stack[depth + 1] = stack[depth];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012879 stack[depth + 1].ts_state = STATE_START;
Bram Moolenaar4770d092006-01-12 23:22:24 +000012880 stack[depth + 1].ts_score = stack[depth].ts_score + score_add;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012881 stack[depth + 1].ts_curi = 1; /* start just after length byte */
Bram Moolenaard12a1322005-08-21 22:08:24 +000012882 stack[depth + 1].ts_flags = 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012883}
12884
Bram Moolenaar53805d12005-08-01 07:08:33 +000012885#ifdef FEAT_MBYTE
12886/*
12887 * Case-folding may change the number of bytes: Count nr of chars in
12888 * fword[flen] and return the byte length of that many chars in "word".
12889 */
12890 static int
12891nofold_len(fword, flen, word)
12892 char_u *fword;
12893 int flen;
12894 char_u *word;
12895{
12896 char_u *p;
12897 int i = 0;
12898
12899 for (p = fword; p < fword + flen; mb_ptr_adv(p))
12900 ++i;
12901 for (p = word; i > 0; mb_ptr_adv(p))
12902 --i;
12903 return (int)(p - word);
12904}
12905#endif
12906
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012907/*
12908 * "fword" is a good word with case folded. Find the matching keep-case
12909 * words and put it in "kword".
12910 * Theoretically there could be several keep-case words that result in the
12911 * same case-folded word, but we only find one...
12912 */
12913 static void
12914find_keepcap_word(slang, fword, kword)
12915 slang_T *slang;
12916 char_u *fword;
12917 char_u *kword;
12918{
12919 char_u uword[MAXWLEN]; /* "fword" in upper-case */
12920 int depth;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012921 idx_T tryidx;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012922
12923 /* The following arrays are used at each depth in the tree. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012924 idx_T arridx[MAXWLEN];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012925 int round[MAXWLEN];
12926 int fwordidx[MAXWLEN];
12927 int uwordidx[MAXWLEN];
12928 int kwordlen[MAXWLEN];
12929
12930 int flen, ulen;
12931 int l;
12932 int len;
12933 int c;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012934 idx_T lo, hi, m;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012935 char_u *p;
12936 char_u *byts = slang->sl_kbyts; /* array with bytes of the words */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012937 idx_T *idxs = slang->sl_kidxs; /* array with indexes */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012938
12939 if (byts == NULL)
12940 {
12941 /* array is empty: "cannot happen" */
12942 *kword = NUL;
12943 return;
12944 }
12945
12946 /* Make an all-cap version of "fword". */
12947 allcap_copy(fword, uword);
12948
12949 /*
12950 * Each character needs to be tried both case-folded and upper-case.
12951 * All this gets very complicated if we keep in mind that changing case
12952 * may change the byte length of a multi-byte character...
12953 */
12954 depth = 0;
12955 arridx[0] = 0;
12956 round[0] = 0;
12957 fwordidx[0] = 0;
12958 uwordidx[0] = 0;
12959 kwordlen[0] = 0;
12960 while (depth >= 0)
12961 {
12962 if (fword[fwordidx[depth]] == NUL)
12963 {
12964 /* We are at the end of "fword". If the tree allows a word to end
12965 * here we have found a match. */
12966 if (byts[arridx[depth] + 1] == 0)
12967 {
12968 kword[kwordlen[depth]] = NUL;
12969 return;
12970 }
12971
12972 /* kword is getting too long, continue one level up */
12973 --depth;
12974 }
12975 else if (++round[depth] > 2)
12976 {
12977 /* tried both fold-case and upper-case character, continue one
12978 * level up */
12979 --depth;
12980 }
12981 else
12982 {
12983 /*
12984 * round[depth] == 1: Try using the folded-case character.
12985 * round[depth] == 2: Try using the upper-case character.
12986 */
12987#ifdef FEAT_MBYTE
12988 if (has_mbyte)
12989 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000012990 flen = mb_cptr2len(fword + fwordidx[depth]);
12991 ulen = mb_cptr2len(uword + uwordidx[depth]);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012992 }
12993 else
12994#endif
12995 ulen = flen = 1;
12996 if (round[depth] == 1)
12997 {
12998 p = fword + fwordidx[depth];
12999 l = flen;
13000 }
13001 else
13002 {
13003 p = uword + uwordidx[depth];
13004 l = ulen;
13005 }
13006
13007 for (tryidx = arridx[depth]; l > 0; --l)
13008 {
13009 /* Perform a binary search in the list of accepted bytes. */
13010 len = byts[tryidx++];
13011 c = *p++;
13012 lo = tryidx;
13013 hi = tryidx + len - 1;
13014 while (lo < hi)
13015 {
13016 m = (lo + hi) / 2;
13017 if (byts[m] > c)
13018 hi = m - 1;
13019 else if (byts[m] < c)
13020 lo = m + 1;
13021 else
13022 {
13023 lo = hi = m;
13024 break;
13025 }
13026 }
13027
13028 /* Stop if there is no matching byte. */
13029 if (hi < lo || byts[lo] != c)
13030 break;
13031
13032 /* Continue at the child (if there is one). */
13033 tryidx = idxs[lo];
13034 }
13035
13036 if (l == 0)
13037 {
13038 /*
13039 * Found the matching char. Copy it to "kword" and go a
13040 * level deeper.
13041 */
13042 if (round[depth] == 1)
13043 {
13044 STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth],
13045 flen);
13046 kwordlen[depth + 1] = kwordlen[depth] + flen;
13047 }
13048 else
13049 {
13050 STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth],
13051 ulen);
13052 kwordlen[depth + 1] = kwordlen[depth] + ulen;
13053 }
13054 fwordidx[depth + 1] = fwordidx[depth] + flen;
13055 uwordidx[depth + 1] = uwordidx[depth] + ulen;
13056
13057 ++depth;
13058 arridx[depth] = tryidx;
13059 round[depth] = 0;
13060 }
13061 }
13062 }
13063
13064 /* Didn't find it: "cannot happen". */
13065 *kword = NUL;
13066}
13067
13068/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013069 * Compute the sound-a-like score for suggestions in su->su_ga and add them to
13070 * su->su_sga.
13071 */
13072 static void
13073score_comp_sal(su)
13074 suginfo_T *su;
13075{
13076 langp_T *lp;
13077 char_u badsound[MAXWLEN];
13078 int i;
13079 suggest_T *stp;
13080 suggest_T *sstp;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013081 int score;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000013082 int lpi;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013083
13084 if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL)
13085 return;
13086
13087 /* Use the sound-folding of the first language that supports it. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013088 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000013089 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013090 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013091 if (lp->lp_slang->sl_sal.ga_len > 0)
13092 {
13093 /* soundfold the bad word */
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013094 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013095
13096 for (i = 0; i < su->su_ga.ga_len; ++i)
13097 {
13098 stp = &SUG(su->su_ga, i);
13099
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000013100 /* Case-fold the suggested word, sound-fold it and compute the
13101 * sound-a-like score. */
13102 score = stp_sal_score(stp, su, lp->lp_slang, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013103 if (score < SCORE_MAXMAX)
13104 {
13105 /* Add the suggestion. */
13106 sstp = &SUG(su->su_sga, su->su_sga.ga_len);
13107 sstp->st_word = vim_strsave(stp->st_word);
13108 if (sstp->st_word != NULL)
13109 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000013110 sstp->st_wordlen = stp->st_wordlen;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013111 sstp->st_score = score;
13112 sstp->st_altscore = 0;
13113 sstp->st_orglen = stp->st_orglen;
13114 ++su->su_sga.ga_len;
13115 }
13116 }
13117 }
13118 break;
13119 }
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000013120 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013121}
13122
13123/*
13124 * Combine the list of suggestions in su->su_ga and su->su_sga.
13125 * They are intwined.
13126 */
13127 static void
13128score_combine(su)
13129 suginfo_T *su;
13130{
13131 int i;
13132 int j;
13133 garray_T ga;
13134 garray_T *gap;
13135 langp_T *lp;
13136 suggest_T *stp;
13137 char_u *p;
13138 char_u badsound[MAXWLEN];
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013139 int round;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000013140 int lpi;
Bram Moolenaar4770d092006-01-12 23:22:24 +000013141 slang_T *slang = NULL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013142
13143 /* Add the alternate score to su_ga. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013144 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013145 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013146 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013147 if (lp->lp_slang->sl_sal.ga_len > 0)
13148 {
13149 /* soundfold the bad word */
Bram Moolenaar4770d092006-01-12 23:22:24 +000013150 slang = lp->lp_slang;
13151 spell_soundfold(slang, su->su_fbadword, TRUE, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013152
13153 for (i = 0; i < su->su_ga.ga_len; ++i)
13154 {
13155 stp = &SUG(su->su_ga, i);
Bram Moolenaar4770d092006-01-12 23:22:24 +000013156 stp->st_altscore = stp_sal_score(stp, su, slang, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013157 if (stp->st_altscore == SCORE_MAXMAX)
13158 stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4;
13159 else
13160 stp->st_score = (stp->st_score * 3
13161 + stp->st_altscore) / 4;
13162 stp->st_salscore = FALSE;
13163 }
13164 break;
13165 }
13166 }
13167
Bram Moolenaarf193fff2006-04-27 00:02:13 +000013168 if (slang == NULL) /* Using "double" without sound folding. */
13169 {
13170 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore,
13171 su->su_maxcount);
Bram Moolenaar4770d092006-01-12 23:22:24 +000013172 return;
Bram Moolenaarf193fff2006-04-27 00:02:13 +000013173 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000013174
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013175 /* Add the alternate score to su_sga. */
13176 for (i = 0; i < su->su_sga.ga_len; ++i)
13177 {
13178 stp = &SUG(su->su_sga, i);
Bram Moolenaar4770d092006-01-12 23:22:24 +000013179 stp->st_altscore = spell_edit_score(slang,
13180 su->su_badword, stp->st_word);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013181 if (stp->st_score == SCORE_MAXMAX)
13182 stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8;
13183 else
13184 stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8;
13185 stp->st_salscore = TRUE;
13186 }
13187
Bram Moolenaar4770d092006-01-12 23:22:24 +000013188 /* Remove bad suggestions, sort the suggestions and truncate at "maxcount"
13189 * for both lists. */
13190 check_suggestions(su, &su->su_ga);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013191 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
Bram Moolenaar4770d092006-01-12 23:22:24 +000013192 check_suggestions(su, &su->su_sga);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013193 (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount);
13194
13195 ga_init2(&ga, (int)sizeof(suginfo_T), 1);
13196 if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL)
13197 return;
13198
13199 stp = &SUG(ga, 0);
13200 for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i)
13201 {
13202 /* round 1: get a suggestion from su_ga
13203 * round 2: get a suggestion from su_sga */
13204 for (round = 1; round <= 2; ++round)
13205 {
13206 gap = round == 1 ? &su->su_ga : &su->su_sga;
13207 if (i < gap->ga_len)
13208 {
13209 /* Don't add a word if it's already there. */
13210 p = SUG(*gap, i).st_word;
13211 for (j = 0; j < ga.ga_len; ++j)
13212 if (STRCMP(stp[j].st_word, p) == 0)
13213 break;
13214 if (j == ga.ga_len)
13215 stp[ga.ga_len++] = SUG(*gap, i);
13216 else
13217 vim_free(p);
13218 }
13219 }
13220 }
13221
13222 ga_clear(&su->su_ga);
13223 ga_clear(&su->su_sga);
13224
13225 /* Truncate the list to the number of suggestions that will be displayed. */
13226 if (ga.ga_len > su->su_maxcount)
13227 {
13228 for (i = su->su_maxcount; i < ga.ga_len; ++i)
13229 vim_free(stp[i].st_word);
13230 ga.ga_len = su->su_maxcount;
13231 }
13232
13233 su->su_ga = ga;
13234}
13235
13236/*
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000013237 * For the goodword in "stp" compute the soundalike score compared to the
13238 * badword.
13239 */
13240 static int
13241stp_sal_score(stp, su, slang, badsound)
13242 suggest_T *stp;
13243 suginfo_T *su;
13244 slang_T *slang;
13245 char_u *badsound; /* sound-folded badword */
13246{
13247 char_u *p;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013248 char_u *pbad;
13249 char_u *pgood;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000013250 char_u badsound2[MAXWLEN];
13251 char_u fword[MAXWLEN];
13252 char_u goodsound[MAXWLEN];
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013253 char_u goodword[MAXWLEN];
13254 int lendiff;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000013255
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013256 lendiff = (int)(su->su_badlen - stp->st_orglen);
13257 if (lendiff >= 0)
13258 pbad = badsound;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000013259 else
13260 {
13261 /* soundfold the bad word with more characters following */
13262 (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN);
13263
13264 /* When joining two words the sound often changes a lot. E.g., "t he"
13265 * sounds like "t h" while "the" sounds like "@". Avoid that by
13266 * removing the space. Don't do it when the good word also contains a
13267 * space. */
13268 if (vim_iswhite(su->su_badptr[su->su_badlen])
13269 && *skiptowhite(stp->st_word) == NUL)
13270 for (p = fword; *(p = skiptowhite(p)) != NUL; )
Bram Moolenaara7241f52008-06-24 20:39:31 +000013271 STRMOVE(p, p + 1);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000013272
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013273 spell_soundfold(slang, fword, TRUE, badsound2);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013274 pbad = badsound2;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000013275 }
13276
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013277 if (lendiff > 0)
13278 {
13279 /* Add part of the bad word to the good word, so that we soundfold
13280 * what replaces the bad word. */
13281 STRCPY(goodword, stp->st_word);
Bram Moolenaar4770d092006-01-12 23:22:24 +000013282 vim_strncpy(goodword + stp->st_wordlen,
13283 su->su_badptr + su->su_badlen - lendiff, lendiff);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013284 pgood = goodword;
13285 }
13286 else
13287 pgood = stp->st_word;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000013288
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013289 /* Sound-fold the word and compute the score for the difference. */
13290 spell_soundfold(slang, pgood, FALSE, goodsound);
13291
13292 return soundalike_score(goodsound, pbad);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000013293}
13294
Bram Moolenaar4770d092006-01-12 23:22:24 +000013295/* structure used to store soundfolded words that add_sound_suggest() has
13296 * handled already. */
13297typedef struct
13298{
13299 short sft_score; /* lowest score used */
13300 char_u sft_word[1]; /* soundfolded word, actually longer */
13301} sftword_T;
13302
13303static sftword_T dumsft;
13304#define HIKEY2SFT(p) ((sftword_T *)(p - (dumsft.sft_word - (char_u *)&dumsft)))
13305#define HI2SFT(hi) HIKEY2SFT((hi)->hi_key)
13306
13307/*
13308 * Prepare for calling suggest_try_soundalike().
13309 */
13310 static void
13311suggest_try_soundalike_prep()
13312{
13313 langp_T *lp;
13314 int lpi;
13315 slang_T *slang;
13316
13317 /* Do this for all languages that support sound folding and for which a
13318 * .sug file has been loaded. */
13319 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
13320 {
13321 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
13322 slang = lp->lp_slang;
13323 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
13324 /* prepare the hashtable used by add_sound_suggest() */
13325 hash_init(&slang->sl_sounddone);
13326 }
13327}
13328
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000013329/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013330 * Find suggestions by comparing the word in a sound-a-like form.
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013331 * Note: This doesn't support postponed prefixes.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013332 */
13333 static void
Bram Moolenaar0c405862005-06-22 22:26:26 +000013334suggest_try_soundalike(su)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013335 suginfo_T *su;
13336{
13337 char_u salword[MAXWLEN];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013338 langp_T *lp;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000013339 int lpi;
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013340 slang_T *slang;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013341
Bram Moolenaar4770d092006-01-12 23:22:24 +000013342 /* Do this for all languages that support sound folding and for which a
13343 * .sug file has been loaded. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013344 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013345 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013346 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
13347 slang = lp->lp_slang;
Bram Moolenaar4770d092006-01-12 23:22:24 +000013348 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013349 {
13350 /* soundfold the bad word */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013351 spell_soundfold(slang, su->su_fbadword, TRUE, salword);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013352
Bram Moolenaar4770d092006-01-12 23:22:24 +000013353 /* try all kinds of inserts/deletes/swaps/etc. */
13354 /* TODO: also soundfold the next words, so that we can try joining
13355 * and splitting */
13356 suggest_trie_walk(su, lp, salword, TRUE);
13357 }
13358 }
13359}
13360
13361/*
13362 * Finish up after calling suggest_try_soundalike().
13363 */
13364 static void
13365suggest_try_soundalike_finish()
13366{
13367 langp_T *lp;
13368 int lpi;
13369 slang_T *slang;
13370 int todo;
13371 hashitem_T *hi;
13372
13373 /* Do this for all languages that support sound folding and for which a
13374 * .sug file has been loaded. */
13375 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
13376 {
13377 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
13378 slang = lp->lp_slang;
13379 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
13380 {
13381 /* Free the info about handled words. */
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000013382 todo = (int)slang->sl_sounddone.ht_used;
Bram Moolenaar4770d092006-01-12 23:22:24 +000013383 for (hi = slang->sl_sounddone.ht_array; todo > 0; ++hi)
13384 if (!HASHITEM_EMPTY(hi))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013385 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000013386 vim_free(HI2SFT(hi));
13387 --todo;
13388 }
Bram Moolenaar6417da62007-03-08 13:49:53 +000013389
13390 /* Clear the hashtable, it may also be used by another region. */
Bram Moolenaar4770d092006-01-12 23:22:24 +000013391 hash_clear(&slang->sl_sounddone);
Bram Moolenaar6417da62007-03-08 13:49:53 +000013392 hash_init(&slang->sl_sounddone);
Bram Moolenaar4770d092006-01-12 23:22:24 +000013393 }
13394 }
13395}
13396
13397/*
13398 * A match with a soundfolded word is found. Add the good word(s) that
13399 * produce this soundfolded word.
13400 */
13401 static void
13402add_sound_suggest(su, goodword, score, lp)
13403 suginfo_T *su;
13404 char_u *goodword;
13405 int score; /* soundfold score */
13406 langp_T *lp;
13407{
13408 slang_T *slang = lp->lp_slang; /* language for sound folding */
13409 int sfwordnr;
13410 char_u *nrline;
13411 int orgnr;
13412 char_u theword[MAXWLEN];
13413 int i;
13414 int wlen;
13415 char_u *byts;
13416 idx_T *idxs;
13417 int n;
13418 int wordcount;
13419 int wc;
13420 int goodscore;
13421 hash_T hash;
13422 hashitem_T *hi;
13423 sftword_T *sft;
13424 int bc, gc;
13425 int limit;
13426
13427 /*
13428 * It's very well possible that the same soundfold word is found several
13429 * times with different scores. Since the following is quite slow only do
13430 * the words that have a better score than before. Use a hashtable to
13431 * remember the words that have been done.
13432 */
13433 hash = hash_hash(goodword);
13434 hi = hash_lookup(&slang->sl_sounddone, goodword, hash);
13435 if (HASHITEM_EMPTY(hi))
13436 {
Bram Moolenaarf193fff2006-04-27 00:02:13 +000013437 sft = (sftword_T *)alloc((unsigned)(sizeof(sftword_T)
13438 + STRLEN(goodword)));
Bram Moolenaar4770d092006-01-12 23:22:24 +000013439 if (sft != NULL)
13440 {
13441 sft->sft_score = score;
13442 STRCPY(sft->sft_word, goodword);
13443 hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash);
13444 }
13445 }
13446 else
13447 {
13448 sft = HI2SFT(hi);
13449 if (score >= sft->sft_score)
13450 return;
13451 sft->sft_score = score;
13452 }
13453
13454 /*
13455 * Find the word nr in the soundfold tree.
13456 */
13457 sfwordnr = soundfold_find(slang, goodword);
13458 if (sfwordnr < 0)
13459 {
13460 EMSG2(_(e_intern2), "add_sound_suggest()");
13461 return;
13462 }
13463
13464 /*
13465 * go over the list of good words that produce this soundfold word
13466 */
13467 nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)(sfwordnr + 1), FALSE);
13468 orgnr = 0;
13469 while (*nrline != NUL)
13470 {
13471 /* The wordnr was stored in a minimal nr of bytes as an offset to the
13472 * previous wordnr. */
13473 orgnr += bytes2offset(&nrline);
13474
13475 byts = slang->sl_fbyts;
13476 idxs = slang->sl_fidxs;
13477
13478 /* Lookup the word "orgnr" one of the two tries. */
13479 n = 0;
13480 wlen = 0;
13481 wordcount = 0;
13482 for (;;)
13483 {
13484 i = 1;
13485 if (wordcount == orgnr && byts[n + 1] == NUL)
13486 break; /* found end of word */
13487
13488 if (byts[n + 1] == NUL)
13489 ++wordcount;
13490
13491 /* skip over the NUL bytes */
13492 for ( ; byts[n + i] == NUL; ++i)
13493 if (i > byts[n]) /* safety check */
13494 {
13495 STRCPY(theword + wlen, "BAD");
13496 goto badword;
13497 }
13498
13499 /* One of the siblings must have the word. */
13500 for ( ; i < byts[n]; ++i)
13501 {
13502 wc = idxs[idxs[n + i]]; /* nr of words under this byte */
13503 if (wordcount + wc > orgnr)
13504 break;
13505 wordcount += wc;
13506 }
13507
13508 theword[wlen++] = byts[n + i];
13509 n = idxs[n + i];
13510 }
13511badword:
13512 theword[wlen] = NUL;
13513
13514 /* Go over the possible flags and regions. */
13515 for (; i <= byts[n] && byts[n + i] == NUL; ++i)
13516 {
13517 char_u cword[MAXWLEN];
13518 char_u *p;
13519 int flags = (int)idxs[n + i];
13520
Bram Moolenaare1438bb2006-03-01 22:01:55 +000013521 /* Skip words with the NOSUGGEST flag */
13522 if (flags & WF_NOSUGGEST)
13523 continue;
13524
Bram Moolenaar4770d092006-01-12 23:22:24 +000013525 if (flags & WF_KEEPCAP)
13526 {
13527 /* Must find the word in the keep-case tree. */
13528 find_keepcap_word(slang, theword, cword);
13529 p = cword;
13530 }
13531 else
13532 {
13533 flags |= su->su_badflags;
13534 if ((flags & WF_CAPMASK) != 0)
13535 {
13536 /* Need to fix case according to "flags". */
13537 make_case_word(theword, cword, flags);
13538 p = cword;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013539 }
13540 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000013541 p = theword;
13542 }
13543
13544 /* Add the suggestion. */
13545 if (sps_flags & SPS_DOUBLE)
13546 {
13547 /* Add the suggestion if the score isn't too bad. */
13548 if (score <= su->su_maxscore)
13549 add_suggestion(su, &su->su_sga, p, su->su_badlen,
13550 score, 0, FALSE, slang, FALSE);
13551 }
13552 else
13553 {
13554 /* Add a penalty for words in another region. */
13555 if ((flags & WF_REGION)
13556 && (((unsigned)flags >> 16) & lp->lp_region) == 0)
13557 goodscore = SCORE_REGION;
13558 else
13559 goodscore = 0;
13560
13561 /* Add a small penalty for changing the first letter from
13562 * lower to upper case. Helps for "tath" -> "Kath", which is
13563 * less common thatn "tath" -> "path". Don't do it when the
13564 * letter is the same, that has already been counted. */
13565 gc = PTR2CHAR(p);
13566 if (SPELL_ISUPPER(gc))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013567 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000013568 bc = PTR2CHAR(su->su_badword);
13569 if (!SPELL_ISUPPER(bc)
13570 && SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc))
13571 goodscore += SCORE_ICASE / 2;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013572 }
13573
Bram Moolenaar4770d092006-01-12 23:22:24 +000013574 /* Compute the score for the good word. This only does letter
13575 * insert/delete/swap/replace. REP items are not considered,
13576 * which may make the score a bit higher.
13577 * Use a limit for the score to make it work faster. Use
13578 * MAXSCORE(), because RESCORE() will change the score.
13579 * If the limit is very high then the iterative method is
13580 * inefficient, using an array is quicker. */
13581 limit = MAXSCORE(su->su_sfmaxscore - goodscore, score);
13582 if (limit > SCORE_LIMITMAX)
13583 goodscore += spell_edit_score(slang, su->su_badword, p);
13584 else
13585 goodscore += spell_edit_score_limit(slang, su->su_badword,
13586 p, limit);
13587
13588 /* When going over the limit don't bother to do the rest. */
13589 if (goodscore < SCORE_MAXMAX)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013590 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000013591 /* Give a bonus to words seen before. */
13592 goodscore = score_wordcount_adj(slang, goodscore, p, FALSE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013593
Bram Moolenaar4770d092006-01-12 23:22:24 +000013594 /* Add the suggestion if the score isn't too bad. */
13595 goodscore = RESCORE(goodscore, score);
13596 if (goodscore <= su->su_sfmaxscore)
13597 add_suggestion(su, &su->su_ga, p, su->su_badlen,
13598 goodscore, score, TRUE, slang, TRUE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013599 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013600 }
13601 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000013602 /* smsg("word %s (%d): %s (%d)", sftword, sftnr, theword, orgnr); */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013603 }
13604}
13605
13606/*
Bram Moolenaar4770d092006-01-12 23:22:24 +000013607 * Find word "word" in fold-case tree for "slang" and return the word number.
13608 */
13609 static int
13610soundfold_find(slang, word)
13611 slang_T *slang;
13612 char_u *word;
13613{
13614 idx_T arridx = 0;
13615 int len;
13616 int wlen = 0;
13617 int c;
13618 char_u *ptr = word;
13619 char_u *byts;
13620 idx_T *idxs;
13621 int wordnr = 0;
13622
13623 byts = slang->sl_sbyts;
13624 idxs = slang->sl_sidxs;
13625
13626 for (;;)
13627 {
13628 /* First byte is the number of possible bytes. */
13629 len = byts[arridx++];
13630
13631 /* If the first possible byte is a zero the word could end here.
13632 * If the word ends we found the word. If not skip the NUL bytes. */
13633 c = ptr[wlen];
13634 if (byts[arridx] == NUL)
13635 {
13636 if (c == NUL)
13637 break;
13638
13639 /* Skip over the zeros, there can be several. */
13640 while (len > 0 && byts[arridx] == NUL)
13641 {
13642 ++arridx;
13643 --len;
13644 }
13645 if (len == 0)
13646 return -1; /* no children, word should have ended here */
13647 ++wordnr;
13648 }
13649
13650 /* If the word ends we didn't find it. */
13651 if (c == NUL)
13652 return -1;
13653
13654 /* Perform a binary search in the list of accepted bytes. */
13655 if (c == TAB) /* <Tab> is handled like <Space> */
13656 c = ' ';
13657 while (byts[arridx] < c)
13658 {
13659 /* The word count is in the first idxs[] entry of the child. */
13660 wordnr += idxs[idxs[arridx]];
13661 ++arridx;
13662 if (--len == 0) /* end of the bytes, didn't find it */
13663 return -1;
13664 }
13665 if (byts[arridx] != c) /* didn't find the byte */
13666 return -1;
13667
13668 /* Continue at the child (if there is one). */
13669 arridx = idxs[arridx];
13670 ++wlen;
13671
13672 /* One space in the good word may stand for several spaces in the
13673 * checked word. */
13674 if (c == ' ')
13675 while (ptr[wlen] == ' ' || ptr[wlen] == TAB)
13676 ++wlen;
13677 }
13678
13679 return wordnr;
13680}
13681
13682/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013683 * Copy "fword" to "cword", fixing case according to "flags".
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013684 */
13685 static void
13686make_case_word(fword, cword, flags)
13687 char_u *fword;
13688 char_u *cword;
13689 int flags;
13690{
13691 if (flags & WF_ALLCAP)
13692 /* Make it all upper-case */
13693 allcap_copy(fword, cword);
13694 else if (flags & WF_ONECAP)
13695 /* Make the first letter upper-case */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013696 onecap_copy(fword, cword, TRUE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013697 else
13698 /* Use goodword as-is. */
13699 STRCPY(cword, fword);
13700}
13701
Bram Moolenaarea424162005-06-16 21:51:00 +000013702/*
13703 * Use map string "map" for languages "lp".
13704 */
13705 static void
13706set_map_str(lp, map)
13707 slang_T *lp;
13708 char_u *map;
13709{
13710 char_u *p;
13711 int headc = 0;
13712 int c;
13713 int i;
13714
13715 if (*map == NUL)
13716 {
13717 lp->sl_has_map = FALSE;
13718 return;
13719 }
13720 lp->sl_has_map = TRUE;
13721
Bram Moolenaar4770d092006-01-12 23:22:24 +000013722 /* Init the array and hash tables empty. */
Bram Moolenaarea424162005-06-16 21:51:00 +000013723 for (i = 0; i < 256; ++i)
13724 lp->sl_map_array[i] = 0;
13725#ifdef FEAT_MBYTE
13726 hash_init(&lp->sl_map_hash);
13727#endif
13728
13729 /*
13730 * The similar characters are stored separated with slashes:
13731 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and
13732 * before the same slash. For characters above 255 sl_map_hash is used.
13733 */
13734 for (p = map; *p != NUL; )
13735 {
13736#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000013737 c = mb_cptr2char_adv(&p);
Bram Moolenaarea424162005-06-16 21:51:00 +000013738#else
13739 c = *p++;
13740#endif
13741 if (c == '/')
13742 headc = 0;
13743 else
13744 {
13745 if (headc == 0)
13746 headc = c;
13747
13748#ifdef FEAT_MBYTE
13749 /* Characters above 255 don't fit in sl_map_array[], put them in
13750 * the hash table. Each entry is the char, a NUL the headchar and
13751 * a NUL. */
13752 if (c >= 256)
13753 {
13754 int cl = mb_char2len(c);
13755 int headcl = mb_char2len(headc);
13756 char_u *b;
13757 hash_T hash;
13758 hashitem_T *hi;
13759
13760 b = alloc((unsigned)(cl + headcl + 2));
13761 if (b == NULL)
13762 return;
13763 mb_char2bytes(c, b);
13764 b[cl] = NUL;
13765 mb_char2bytes(headc, b + cl + 1);
13766 b[cl + 1 + headcl] = NUL;
13767 hash = hash_hash(b);
13768 hi = hash_lookup(&lp->sl_map_hash, b, hash);
13769 if (HASHITEM_EMPTY(hi))
13770 hash_add_item(&lp->sl_map_hash, hi, b, hash);
13771 else
13772 {
13773 /* This should have been checked when generating the .spl
13774 * file. */
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000013775 EMSG(_("E783: duplicate char in MAP entry"));
Bram Moolenaarea424162005-06-16 21:51:00 +000013776 vim_free(b);
13777 }
13778 }
13779 else
13780#endif
13781 lp->sl_map_array[c] = headc;
13782 }
13783 }
13784}
13785
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013786/*
13787 * Return TRUE if "c1" and "c2" are similar characters according to the MAP
13788 * lines in the .aff file.
13789 */
13790 static int
13791similar_chars(slang, c1, c2)
13792 slang_T *slang;
13793 int c1;
13794 int c2;
13795{
Bram Moolenaarea424162005-06-16 21:51:00 +000013796 int m1, m2;
13797#ifdef FEAT_MBYTE
13798 char_u buf[MB_MAXBYTES];
13799 hashitem_T *hi;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013800
Bram Moolenaarea424162005-06-16 21:51:00 +000013801 if (c1 >= 256)
13802 {
13803 buf[mb_char2bytes(c1, buf)] = 0;
13804 hi = hash_find(&slang->sl_map_hash, buf);
13805 if (HASHITEM_EMPTY(hi))
13806 m1 = 0;
13807 else
13808 m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
13809 }
13810 else
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013811#endif
Bram Moolenaarea424162005-06-16 21:51:00 +000013812 m1 = slang->sl_map_array[c1];
13813 if (m1 == 0)
13814 return FALSE;
13815
13816
13817#ifdef FEAT_MBYTE
13818 if (c2 >= 256)
13819 {
13820 buf[mb_char2bytes(c2, buf)] = 0;
13821 hi = hash_find(&slang->sl_map_hash, buf);
13822 if (HASHITEM_EMPTY(hi))
13823 m2 = 0;
13824 else
13825 m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
13826 }
13827 else
13828#endif
13829 m2 = slang->sl_map_array[c2];
13830
13831 return m1 == m2;
13832}
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013833
13834/*
13835 * Add a suggestion to the list of suggestions.
Bram Moolenaar4770d092006-01-12 23:22:24 +000013836 * For a suggestion that is already in the list the lowest score is remembered.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013837 */
13838 static void
Bram Moolenaar4770d092006-01-12 23:22:24 +000013839add_suggestion(su, gap, goodword, badlenarg, score, altscore, had_bonus,
13840 slang, maxsf)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013841 suginfo_T *su;
Bram Moolenaar4770d092006-01-12 23:22:24 +000013842 garray_T *gap; /* either su_ga or su_sga */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013843 char_u *goodword;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013844 int badlenarg; /* len of bad word replaced with "goodword" */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013845 int score;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000013846 int altscore;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013847 int had_bonus; /* value for st_had_bonus */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013848 slang_T *slang; /* language for sound folding */
Bram Moolenaar4770d092006-01-12 23:22:24 +000013849 int maxsf; /* su_maxscore applies to soundfold score,
13850 su_sfmaxscore to the total score. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013851{
Bram Moolenaar4770d092006-01-12 23:22:24 +000013852 int goodlen; /* len of goodword changed */
13853 int badlen; /* len of bad word changed */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013854 suggest_T *stp;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013855 suggest_T new_sug;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013856 int i;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013857 char_u *pgood, *pbad;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013858
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013859 /* Minimize "badlen" for consistency. Avoids that changing "the the" to
13860 * "thee the" is added next to changing the first "the" the "thee". */
13861 pgood = goodword + STRLEN(goodword);
Bram Moolenaar4770d092006-01-12 23:22:24 +000013862 pbad = su->su_badptr + badlenarg;
13863 for (;;)
Bram Moolenaar0c405862005-06-22 22:26:26 +000013864 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000013865 goodlen = (int)(pgood - goodword);
13866 badlen = (int)(pbad - su->su_badptr);
Bram Moolenaar4770d092006-01-12 23:22:24 +000013867 if (goodlen <= 0 || badlen <= 0)
13868 break;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013869 mb_ptr_back(goodword, pgood);
13870 mb_ptr_back(su->su_badptr, pbad);
13871#ifdef FEAT_MBYTE
13872 if (has_mbyte)
Bram Moolenaar0c405862005-06-22 22:26:26 +000013873 {
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013874 if (mb_ptr2char(pgood) != mb_ptr2char(pbad))
13875 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +000013876 }
13877 else
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013878#endif
13879 if (*pgood != *pbad)
13880 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +000013881 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000013882
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013883 if (badlen == 0 && goodlen == 0)
13884 /* goodword doesn't change anything; may happen for "the the" changing
13885 * the first "the" to itself. */
13886 return;
Bram Moolenaar0c405862005-06-22 22:26:26 +000013887
Bram Moolenaar89d40322006-08-29 15:30:07 +000013888 if (gap->ga_len == 0)
13889 i = -1;
13890 else
13891 {
13892 /* Check if the word is already there. Also check the length that is
13893 * being replaced "thes," -> "these" is a different suggestion from
13894 * "thes" -> "these". */
13895 stp = &SUG(*gap, 0);
13896 for (i = gap->ga_len; --i >= 0; ++stp)
13897 if (stp->st_wordlen == goodlen
13898 && stp->st_orglen == badlen
13899 && STRNCMP(stp->st_word, goodword, goodlen) == 0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013900 {
Bram Moolenaar89d40322006-08-29 15:30:07 +000013901 /*
13902 * Found it. Remember the word with the lowest score.
13903 */
13904 if (stp->st_slang == NULL)
13905 stp->st_slang = slang;
13906
13907 new_sug.st_score = score;
13908 new_sug.st_altscore = altscore;
13909 new_sug.st_had_bonus = had_bonus;
13910
13911 if (stp->st_had_bonus != had_bonus)
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013912 {
Bram Moolenaar89d40322006-08-29 15:30:07 +000013913 /* Only one of the two had the soundalike score computed.
13914 * Need to do that for the other one now, otherwise the
13915 * scores can't be compared. This happens because
13916 * suggest_try_change() doesn't compute the soundalike
13917 * word to keep it fast, while some special methods set
13918 * the soundalike score to zero. */
13919 if (had_bonus)
13920 rescore_one(su, stp);
13921 else
13922 {
13923 new_sug.st_word = stp->st_word;
13924 new_sug.st_wordlen = stp->st_wordlen;
13925 new_sug.st_slang = stp->st_slang;
13926 new_sug.st_orglen = badlen;
13927 rescore_one(su, &new_sug);
13928 }
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013929 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013930
Bram Moolenaar89d40322006-08-29 15:30:07 +000013931 if (stp->st_score > new_sug.st_score)
13932 {
13933 stp->st_score = new_sug.st_score;
13934 stp->st_altscore = new_sug.st_altscore;
13935 stp->st_had_bonus = new_sug.st_had_bonus;
13936 }
13937 break;
Bram Moolenaar4770d092006-01-12 23:22:24 +000013938 }
Bram Moolenaar89d40322006-08-29 15:30:07 +000013939 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013940
Bram Moolenaar4770d092006-01-12 23:22:24 +000013941 if (i < 0 && ga_grow(gap, 1) == OK)
13942 {
13943 /* Add a suggestion. */
13944 stp = &SUG(*gap, gap->ga_len);
13945 stp->st_word = vim_strnsave(goodword, goodlen);
13946 if (stp->st_word != NULL)
13947 {
13948 stp->st_wordlen = goodlen;
13949 stp->st_score = score;
13950 stp->st_altscore = altscore;
13951 stp->st_had_bonus = had_bonus;
13952 stp->st_orglen = badlen;
13953 stp->st_slang = slang;
13954 ++gap->ga_len;
13955
13956 /* If we have too many suggestions now, sort the list and keep
13957 * the best suggestions. */
13958 if (gap->ga_len > SUG_MAX_COUNT(su))
13959 {
13960 if (maxsf)
13961 su->su_sfmaxscore = cleanup_suggestions(gap,
13962 su->su_sfmaxscore, SUG_CLEAN_COUNT(su));
13963 else
13964 {
13965 i = su->su_maxscore;
13966 su->su_maxscore = cleanup_suggestions(gap,
13967 su->su_maxscore, SUG_CLEAN_COUNT(su));
13968 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013969 }
13970 }
13971 }
13972}
13973
13974/*
Bram Moolenaar4770d092006-01-12 23:22:24 +000013975 * Suggestions may in fact be flagged as errors. Esp. for banned words and
13976 * for split words, such as "the the". Remove these from the list here.
13977 */
13978 static void
13979check_suggestions(su, gap)
13980 suginfo_T *su;
13981 garray_T *gap; /* either su_ga or su_sga */
13982{
13983 suggest_T *stp;
13984 int i;
13985 char_u longword[MAXWLEN + 1];
13986 int len;
13987 hlf_T attr;
13988
13989 stp = &SUG(*gap, 0);
13990 for (i = gap->ga_len - 1; i >= 0; --i)
13991 {
13992 /* Need to append what follows to check for "the the". */
13993 STRCPY(longword, stp[i].st_word);
13994 len = stp[i].st_wordlen;
13995 vim_strncpy(longword + len, su->su_badptr + stp[i].st_orglen,
13996 MAXWLEN - len);
13997 attr = HLF_COUNT;
13998 (void)spell_check(curwin, longword, &attr, NULL, FALSE);
13999 if (attr != HLF_COUNT)
14000 {
14001 /* Remove this entry. */
14002 vim_free(stp[i].st_word);
14003 --gap->ga_len;
14004 if (i < gap->ga_len)
14005 mch_memmove(stp + i, stp + i + 1,
14006 sizeof(suggest_T) * (gap->ga_len - i));
14007 }
14008 }
14009}
14010
14011
14012/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014013 * Add a word to be banned.
14014 */
14015 static void
14016add_banned(su, word)
14017 suginfo_T *su;
14018 char_u *word;
14019{
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000014020 char_u *s;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014021 hash_T hash;
14022 hashitem_T *hi;
14023
Bram Moolenaar4770d092006-01-12 23:22:24 +000014024 hash = hash_hash(word);
14025 hi = hash_lookup(&su->su_banned, word, hash);
14026 if (HASHITEM_EMPTY(hi))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014027 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000014028 s = vim_strsave(word);
14029 if (s != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014030 hash_add_item(&su->su_banned, hi, s, hash);
14031 }
14032}
14033
14034/*
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000014035 * Recompute the score for all suggestions if sound-folding is possible. This
14036 * is slow, thus only done for the final results.
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014037 */
14038 static void
14039rescore_suggestions(su)
14040 suginfo_T *su;
14041{
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014042 int i;
14043
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000014044 if (su->su_sallang != NULL)
Bram Moolenaar8b96d642005-09-05 22:05:30 +000014045 for (i = 0; i < su->su_ga.ga_len; ++i)
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000014046 rescore_one(su, &SUG(su->su_ga, i));
14047}
14048
14049/*
14050 * Recompute the score for one suggestion if sound-folding is possible.
14051 */
14052 static void
14053rescore_one(su, stp)
Bram Moolenaar4effc802005-09-30 21:12:02 +000014054 suginfo_T *su;
14055 suggest_T *stp;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000014056{
14057 slang_T *slang = stp->st_slang;
14058 char_u sal_badword[MAXWLEN];
Bram Moolenaar4effc802005-09-30 21:12:02 +000014059 char_u *p;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000014060
14061 /* Only rescore suggestions that have no sal score yet and do have a
14062 * language. */
14063 if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus)
14064 {
14065 if (slang == su->su_sallang)
Bram Moolenaar4effc802005-09-30 21:12:02 +000014066 p = su->su_sal_badword;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000014067 else
Bram Moolenaar8b96d642005-09-05 22:05:30 +000014068 {
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000014069 spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword);
Bram Moolenaar4effc802005-09-30 21:12:02 +000014070 p = sal_badword;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014071 }
Bram Moolenaar4effc802005-09-30 21:12:02 +000014072
14073 stp->st_altscore = stp_sal_score(stp, su, slang, p);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000014074 if (stp->st_altscore == SCORE_MAXMAX)
14075 stp->st_altscore = SCORE_BIG;
14076 stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
14077 stp->st_had_bonus = TRUE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014078 }
14079}
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014080
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014081static int
14082#ifdef __BORLANDC__
14083_RTLENTRYF
14084#endif
14085sug_compare __ARGS((const void *s1, const void *s2));
14086
14087/*
14088 * Function given to qsort() to sort the suggestions on st_score.
Bram Moolenaar6b730e12005-09-16 21:47:57 +000014089 * First on "st_score", then "st_altscore" then alphabetically.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014090 */
14091 static int
14092#ifdef __BORLANDC__
14093_RTLENTRYF
14094#endif
14095sug_compare(s1, s2)
14096 const void *s1;
14097 const void *s2;
14098{
14099 suggest_T *p1 = (suggest_T *)s1;
14100 suggest_T *p2 = (suggest_T *)s2;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014101 int n = p1->st_score - p2->st_score;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014102
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014103 if (n == 0)
Bram Moolenaar6b730e12005-09-16 21:47:57 +000014104 {
14105 n = p1->st_altscore - p2->st_altscore;
14106 if (n == 0)
14107 n = STRICMP(p1->st_word, p2->st_word);
14108 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014109 return n;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014110}
14111
14112/*
14113 * Cleanup the suggestions:
14114 * - Sort on score.
14115 * - Remove words that won't be displayed.
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014116 * Returns the maximum score in the list or "maxscore" unmodified.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014117 */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014118 static int
14119cleanup_suggestions(gap, maxscore, keep)
14120 garray_T *gap;
14121 int maxscore;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014122 int keep; /* nr of suggestions to keep */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014123{
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014124 suggest_T *stp = &SUG(*gap, 0);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014125 int i;
14126
14127 /* Sort the list. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014128 qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014129
14130 /* Truncate the list to the number of suggestions that will be displayed. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014131 if (gap->ga_len > keep)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014132 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014133 for (i = keep; i < gap->ga_len; ++i)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014134 vim_free(stp[i].st_word);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014135 gap->ga_len = keep;
14136 return stp[keep - 1].st_score;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014137 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014138 return maxscore;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014139}
14140
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014141#if defined(FEAT_EVAL) || defined(PROTO)
14142/*
14143 * Soundfold a string, for soundfold().
14144 * Result is in allocated memory, NULL for an error.
14145 */
14146 char_u *
14147eval_soundfold(word)
14148 char_u *word;
14149{
14150 langp_T *lp;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014151 char_u sound[MAXWLEN];
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000014152 int lpi;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014153
14154 if (curwin->w_p_spell && *curbuf->b_p_spl != NUL)
14155 /* Use the sound-folding of the first language that supports it. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000014156 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000014157 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +000014158 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014159 if (lp->lp_slang->sl_sal.ga_len > 0)
14160 {
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014161 /* soundfold the word */
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014162 spell_soundfold(lp->lp_slang, word, FALSE, sound);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014163 return vim_strsave(sound);
14164 }
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000014165 }
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014166
14167 /* No language with sound folding, return word as-is. */
14168 return vim_strsave(word);
14169}
14170#endif
14171
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014172/*
14173 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
Bram Moolenaard12a1322005-08-21 22:08:24 +000014174 *
14175 * There are many ways to turn a word into a sound-a-like representation. The
14176 * oldest is Soundex (1918!). A nice overview can be found in "Approximate
14177 * swedish name matching - survey and test of different algorithms" by Klas
14178 * Erikson.
14179 *
14180 * We support two methods:
14181 * 1. SOFOFROM/SOFOTO do a simple character mapping.
14182 * 2. SAL items define a more advanced sound-folding (and much slower).
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014183 */
14184 static void
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014185spell_soundfold(slang, inword, folded, res)
14186 slang_T *slang;
14187 char_u *inword;
14188 int folded; /* "inword" is already case-folded */
14189 char_u *res;
14190{
14191 char_u fword[MAXWLEN];
14192 char_u *word;
14193
14194 if (slang->sl_sofo)
14195 /* SOFOFROM and SOFOTO used */
14196 spell_soundfold_sofo(slang, inword, res);
14197 else
14198 {
14199 /* SAL items used. Requires the word to be case-folded. */
14200 if (folded)
14201 word = inword;
14202 else
14203 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000014204 (void)spell_casefold(inword, (int)STRLEN(inword), fword, MAXWLEN);
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014205 word = fword;
14206 }
14207
14208#ifdef FEAT_MBYTE
14209 if (has_mbyte)
14210 spell_soundfold_wsal(slang, word, res);
14211 else
14212#endif
14213 spell_soundfold_sal(slang, word, res);
14214 }
14215}
14216
14217/*
14218 * Perform sound folding of "inword" into "res" according to SOFOFROM and
14219 * SOFOTO lines.
14220 */
14221 static void
14222spell_soundfold_sofo(slang, inword, res)
14223 slang_T *slang;
14224 char_u *inword;
14225 char_u *res;
14226{
14227 char_u *s;
14228 int ri = 0;
14229 int c;
14230
14231#ifdef FEAT_MBYTE
14232 if (has_mbyte)
14233 {
14234 int prevc = 0;
14235 int *ip;
14236
14237 /* The sl_sal_first[] table contains the translation for chars up to
14238 * 255, sl_sal the rest. */
14239 for (s = inword; *s != NUL; )
14240 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000014241 c = mb_cptr2char_adv(&s);
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014242 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c))
14243 c = ' ';
14244 else if (c < 256)
14245 c = slang->sl_sal_first[c];
14246 else
14247 {
14248 ip = ((int **)slang->sl_sal.ga_data)[c & 0xff];
14249 if (ip == NULL) /* empty list, can't match */
14250 c = NUL;
14251 else
14252 for (;;) /* find "c" in the list */
14253 {
14254 if (*ip == 0) /* not found */
14255 {
14256 c = NUL;
14257 break;
14258 }
14259 if (*ip == c) /* match! */
14260 {
14261 c = ip[1];
14262 break;
14263 }
14264 ip += 2;
14265 }
14266 }
14267
14268 if (c != NUL && c != prevc)
14269 {
14270 ri += mb_char2bytes(c, res + ri);
14271 if (ri + MB_MAXBYTES > MAXWLEN)
14272 break;
14273 prevc = c;
14274 }
14275 }
14276 }
14277 else
14278#endif
14279 {
14280 /* The sl_sal_first[] table contains the translation. */
14281 for (s = inword; (c = *s) != NUL; ++s)
14282 {
14283 if (vim_iswhite(c))
14284 c = ' ';
14285 else
14286 c = slang->sl_sal_first[c];
14287 if (c != NUL && (ri == 0 || res[ri - 1] != c))
14288 res[ri++] = c;
14289 }
14290 }
14291
14292 res[ri] = NUL;
14293}
14294
14295 static void
14296spell_soundfold_sal(slang, inword, res)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014297 slang_T *slang;
14298 char_u *inword;
14299 char_u *res;
14300{
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014301 salitem_T *smp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014302 char_u word[MAXWLEN];
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014303 char_u *s = inword;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014304 char_u *t;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014305 char_u *pf;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014306 int i, j, z;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014307 int reslen;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014308 int n, k = 0;
14309 int z0;
14310 int k0;
14311 int n0;
14312 int c;
14313 int pri;
14314 int p0 = -333;
14315 int c0;
14316
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014317 /* Remove accents, if wanted. We actually remove all non-word characters.
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014318 * But keep white space. We need a copy, the word may be changed here. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014319 if (slang->sl_rem_accents)
14320 {
14321 t = word;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014322 while (*s != NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014323 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014324 if (vim_iswhite(*s))
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014325 {
14326 *t++ = ' ';
14327 s = skipwhite(s);
14328 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014329 else
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014330 {
Bram Moolenaar9c96f592005-06-30 21:52:39 +000014331 if (spell_iswordp_nmw(s))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014332 *t++ = *s;
14333 ++s;
14334 }
14335 }
14336 *t = NUL;
14337 }
14338 else
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014339 STRCPY(word, s);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014340
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014341 smp = (salitem_T *)slang->sl_sal.ga_data;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014342
14343 /*
14344 * This comes from Aspell phonet.cpp. Converted from C++ to C.
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014345 * Changed to keep spaces.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014346 */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014347 i = reslen = z = 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014348 while ((c = word[i]) != NUL)
14349 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014350 /* Start with the first rule that has the character in the word. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014351 n = slang->sl_sal_first[c];
14352 z0 = 0;
14353
14354 if (n >= 0)
14355 {
14356 /* check all rules for the same letter */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014357 for (; (s = smp[n].sm_lead)[0] == c; ++n)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014358 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014359 /* Quickly skip entries that don't match the word. Most
14360 * entries are less then three chars, optimize for that. */
14361 k = smp[n].sm_leadlen;
14362 if (k > 1)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014363 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014364 if (word[i + 1] != s[1])
14365 continue;
14366 if (k > 2)
14367 {
14368 for (j = 2; j < k; ++j)
14369 if (word[i + j] != s[j])
14370 break;
14371 if (j < k)
14372 continue;
14373 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014374 }
14375
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014376 if ((pf = smp[n].sm_oneof) != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014377 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014378 /* Check for match with one of the chars in "sm_oneof". */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014379 while (*pf != NUL && *pf != word[i + k])
14380 ++pf;
14381 if (*pf == NUL)
14382 continue;
14383 ++k;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014384 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014385 s = smp[n].sm_rules;
14386 pri = 5; /* default priority */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014387
14388 p0 = *s;
14389 k0 = k;
14390 while (*s == '-' && k > 1)
14391 {
14392 k--;
14393 s++;
14394 }
14395 if (*s == '<')
14396 s++;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014397 if (VIM_ISDIGIT(*s))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014398 {
14399 /* determine priority */
14400 pri = *s - '0';
14401 s++;
14402 }
14403 if (*s == '^' && *(s + 1) == '^')
14404 s++;
14405
14406 if (*s == NUL
14407 || (*s == '^'
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014408 && (i == 0 || !(word[i - 1] == ' '
Bram Moolenaar9c96f592005-06-30 21:52:39 +000014409 || spell_iswordp(word + i - 1, curbuf)))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014410 && (*(s + 1) != '$'
Bram Moolenaar9c96f592005-06-30 21:52:39 +000014411 || (!spell_iswordp(word + i + k0, curbuf))))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014412 || (*s == '$' && i > 0
Bram Moolenaar9c96f592005-06-30 21:52:39 +000014413 && spell_iswordp(word + i - 1, curbuf)
14414 && (!spell_iswordp(word + i + k0, curbuf))))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014415 {
14416 /* search for followup rules, if: */
14417 /* followup and k > 1 and NO '-' in searchstring */
14418 c0 = word[i + k - 1];
14419 n0 = slang->sl_sal_first[c0];
14420
14421 if (slang->sl_followup && k > 1 && n0 >= 0
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014422 && p0 != '-' && word[i + k] != NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014423 {
14424 /* test follow-up rule for "word[i + k]" */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014425 for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014426 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014427 /* Quickly skip entries that don't match the word.
14428 * */
14429 k0 = smp[n0].sm_leadlen;
14430 if (k0 > 1)
14431 {
14432 if (word[i + k] != s[1])
14433 continue;
14434 if (k0 > 2)
14435 {
14436 pf = word + i + k + 1;
14437 for (j = 2; j < k0; ++j)
14438 if (*pf++ != s[j])
14439 break;
14440 if (j < k0)
14441 continue;
14442 }
14443 }
14444 k0 += k - 1;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014445
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014446 if ((pf = smp[n0].sm_oneof) != NULL)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014447 {
14448 /* Check for match with one of the chars in
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014449 * "sm_oneof". */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014450 while (*pf != NUL && *pf != word[i + k0])
14451 ++pf;
14452 if (*pf == NUL)
14453 continue;
14454 ++k0;
14455 }
14456
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014457 p0 = 5;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014458 s = smp[n0].sm_rules;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014459 while (*s == '-')
14460 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014461 /* "k0" gets NOT reduced because
14462 * "if (k0 == k)" */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014463 s++;
14464 }
14465 if (*s == '<')
14466 s++;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014467 if (VIM_ISDIGIT(*s))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014468 {
14469 p0 = *s - '0';
14470 s++;
14471 }
14472
14473 if (*s == NUL
14474 /* *s == '^' cuts */
14475 || (*s == '$'
Bram Moolenaar9c96f592005-06-30 21:52:39 +000014476 && !spell_iswordp(word + i + k0,
14477 curbuf)))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014478 {
14479 if (k0 == k)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014480 /* this is just a piece of the string */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014481 continue;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014482
14483 if (p0 < pri)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014484 /* priority too low */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014485 continue;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014486 /* rule fits; stop search */
14487 break;
14488 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014489 }
14490
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014491 if (p0 >= pri && smp[n0].sm_lead[0] == c0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014492 continue;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014493 }
14494
14495 /* replace string */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014496 s = smp[n].sm_to;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000014497 if (s == NULL)
14498 s = (char_u *)"";
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014499 pf = smp[n].sm_rules;
14500 p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014501 if (p0 == 1 && z == 0)
14502 {
14503 /* rule with '<' is used */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014504 if (reslen > 0 && *s != NUL && (res[reslen - 1] == c
14505 || res[reslen - 1] == *s))
14506 reslen--;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014507 z0 = 1;
14508 z = 1;
14509 k0 = 0;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014510 while (*s != NUL && word[i + k0] != NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014511 {
14512 word[i + k0] = *s;
14513 k0++;
14514 s++;
14515 }
14516 if (k > k0)
Bram Moolenaara7241f52008-06-24 20:39:31 +000014517 STRMOVE(word + i + k0, word + i + k);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014518
14519 /* new "actual letter" */
14520 c = word[i];
14521 }
14522 else
14523 {
14524 /* no '<' rule used */
14525 i += k - 1;
14526 z = 0;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014527 while (*s != NUL && s[1] != NUL && reslen < MAXWLEN)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014528 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014529 if (reslen == 0 || res[reslen - 1] != *s)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014530 res[reslen++] = *s;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014531 s++;
14532 }
14533 /* new "actual letter" */
14534 c = *s;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014535 if (strstr((char *)pf, "^^") != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014536 {
14537 if (c != NUL)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014538 res[reslen++] = c;
Bram Moolenaara7241f52008-06-24 20:39:31 +000014539 STRMOVE(word, word + i + 1);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014540 i = 0;
14541 z0 = 1;
14542 }
14543 }
14544 break;
14545 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014546 }
14547 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014548 else if (vim_iswhite(c))
14549 {
14550 c = ' ';
14551 k = 1;
14552 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014553
14554 if (z0 == 0)
14555 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014556 if (k && !p0 && reslen < MAXWLEN && c != NUL
14557 && (!slang->sl_collapse || reslen == 0
14558 || res[reslen - 1] != c))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014559 /* condense only double letters */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014560 res[reslen++] = c;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014561
14562 i++;
14563 z = 0;
14564 k = 0;
14565 }
14566 }
14567
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014568 res[reslen] = NUL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014569}
14570
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014571#ifdef FEAT_MBYTE
14572/*
14573 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
14574 * Multi-byte version of spell_soundfold().
14575 */
14576 static void
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014577spell_soundfold_wsal(slang, inword, res)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014578 slang_T *slang;
14579 char_u *inword;
14580 char_u *res;
14581{
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014582 salitem_T *smp = (salitem_T *)slang->sl_sal.ga_data;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014583 int word[MAXWLEN];
14584 int wres[MAXWLEN];
14585 int l;
14586 char_u *s;
14587 int *ws;
14588 char_u *t;
14589 int *pf;
14590 int i, j, z;
14591 int reslen;
14592 int n, k = 0;
14593 int z0;
14594 int k0;
14595 int n0;
14596 int c;
14597 int pri;
14598 int p0 = -333;
14599 int c0;
14600 int did_white = FALSE;
14601
14602 /*
14603 * Convert the multi-byte string to a wide-character string.
14604 * Remove accents, if wanted. We actually remove all non-word characters.
14605 * But keep white space.
14606 */
14607 n = 0;
14608 for (s = inword; *s != NUL; )
14609 {
14610 t = s;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000014611 c = mb_cptr2char_adv(&s);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014612 if (slang->sl_rem_accents)
14613 {
14614 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c))
14615 {
14616 if (did_white)
14617 continue;
14618 c = ' ';
14619 did_white = TRUE;
14620 }
14621 else
14622 {
14623 did_white = FALSE;
Bram Moolenaar9c96f592005-06-30 21:52:39 +000014624 if (!spell_iswordp_nmw(t))
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014625 continue;
14626 }
14627 }
14628 word[n++] = c;
14629 }
14630 word[n] = NUL;
14631
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014632 /*
14633 * This comes from Aspell phonet.cpp.
14634 * Converted from C++ to C. Added support for multi-byte chars.
14635 * Changed to keep spaces.
14636 */
14637 i = reslen = z = 0;
14638 while ((c = word[i]) != NUL)
14639 {
14640 /* Start with the first rule that has the character in the word. */
14641 n = slang->sl_sal_first[c & 0xff];
14642 z0 = 0;
14643
14644 if (n >= 0)
14645 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014646 /* check all rules for the same index byte */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014647 for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff); ++n)
14648 {
14649 /* Quickly skip entries that don't match the word. Most
14650 * entries are less then three chars, optimize for that. */
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014651 if (c != ws[0])
14652 continue;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014653 k = smp[n].sm_leadlen;
14654 if (k > 1)
14655 {
14656 if (word[i + 1] != ws[1])
14657 continue;
14658 if (k > 2)
14659 {
14660 for (j = 2; j < k; ++j)
14661 if (word[i + j] != ws[j])
14662 break;
14663 if (j < k)
14664 continue;
14665 }
14666 }
14667
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014668 if ((pf = smp[n].sm_oneof_w) != NULL)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014669 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014670 /* Check for match with one of the chars in "sm_oneof". */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014671 while (*pf != NUL && *pf != word[i + k])
14672 ++pf;
14673 if (*pf == NUL)
14674 continue;
14675 ++k;
14676 }
14677 s = smp[n].sm_rules;
14678 pri = 5; /* default priority */
14679
14680 p0 = *s;
14681 k0 = k;
14682 while (*s == '-' && k > 1)
14683 {
14684 k--;
14685 s++;
14686 }
14687 if (*s == '<')
14688 s++;
14689 if (VIM_ISDIGIT(*s))
14690 {
14691 /* determine priority */
14692 pri = *s - '0';
14693 s++;
14694 }
14695 if (*s == '^' && *(s + 1) == '^')
14696 s++;
14697
14698 if (*s == NUL
14699 || (*s == '^'
14700 && (i == 0 || !(word[i - 1] == ' '
Bram Moolenaar9c96f592005-06-30 21:52:39 +000014701 || spell_iswordp_w(word + i - 1, curbuf)))
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014702 && (*(s + 1) != '$'
Bram Moolenaar9c96f592005-06-30 21:52:39 +000014703 || (!spell_iswordp_w(word + i + k0, curbuf))))
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014704 || (*s == '$' && i > 0
Bram Moolenaar9c96f592005-06-30 21:52:39 +000014705 && spell_iswordp_w(word + i - 1, curbuf)
14706 && (!spell_iswordp_w(word + i + k0, curbuf))))
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014707 {
14708 /* search for followup rules, if: */
14709 /* followup and k > 1 and NO '-' in searchstring */
14710 c0 = word[i + k - 1];
14711 n0 = slang->sl_sal_first[c0 & 0xff];
14712
14713 if (slang->sl_followup && k > 1 && n0 >= 0
14714 && p0 != '-' && word[i + k] != NUL)
14715 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014716 /* Test follow-up rule for "word[i + k]"; loop over
14717 * all entries with the same index byte. */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014718 for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff)
14719 == (c0 & 0xff); ++n0)
14720 {
14721 /* Quickly skip entries that don't match the word.
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014722 */
14723 if (c0 != ws[0])
14724 continue;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014725 k0 = smp[n0].sm_leadlen;
14726 if (k0 > 1)
14727 {
14728 if (word[i + k] != ws[1])
14729 continue;
14730 if (k0 > 2)
14731 {
14732 pf = word + i + k + 1;
14733 for (j = 2; j < k0; ++j)
14734 if (*pf++ != ws[j])
14735 break;
14736 if (j < k0)
14737 continue;
14738 }
14739 }
14740 k0 += k - 1;
14741
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014742 if ((pf = smp[n0].sm_oneof_w) != NULL)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014743 {
14744 /* Check for match with one of the chars in
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014745 * "sm_oneof". */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014746 while (*pf != NUL && *pf != word[i + k0])
14747 ++pf;
14748 if (*pf == NUL)
14749 continue;
14750 ++k0;
14751 }
14752
14753 p0 = 5;
14754 s = smp[n0].sm_rules;
14755 while (*s == '-')
14756 {
14757 /* "k0" gets NOT reduced because
14758 * "if (k0 == k)" */
14759 s++;
14760 }
14761 if (*s == '<')
14762 s++;
14763 if (VIM_ISDIGIT(*s))
14764 {
14765 p0 = *s - '0';
14766 s++;
14767 }
14768
14769 if (*s == NUL
14770 /* *s == '^' cuts */
14771 || (*s == '$'
Bram Moolenaar9c96f592005-06-30 21:52:39 +000014772 && !spell_iswordp_w(word + i + k0,
14773 curbuf)))
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014774 {
14775 if (k0 == k)
14776 /* this is just a piece of the string */
14777 continue;
14778
14779 if (p0 < pri)
14780 /* priority too low */
14781 continue;
14782 /* rule fits; stop search */
14783 break;
14784 }
14785 }
14786
14787 if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff)
14788 == (c0 & 0xff))
14789 continue;
14790 }
14791
14792 /* replace string */
14793 ws = smp[n].sm_to_w;
14794 s = smp[n].sm_rules;
14795 p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0;
14796 if (p0 == 1 && z == 0)
14797 {
14798 /* rule with '<' is used */
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000014799 if (reslen > 0 && ws != NULL && *ws != NUL
14800 && (wres[reslen - 1] == c
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014801 || wres[reslen - 1] == *ws))
14802 reslen--;
14803 z0 = 1;
14804 z = 1;
14805 k0 = 0;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000014806 if (ws != NULL)
14807 while (*ws != NUL && word[i + k0] != NUL)
14808 {
14809 word[i + k0] = *ws;
14810 k0++;
14811 ws++;
14812 }
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014813 if (k > k0)
14814 mch_memmove(word + i + k0, word + i + k,
14815 sizeof(int) * (STRLEN(word + i + k) + 1));
14816
14817 /* new "actual letter" */
14818 c = word[i];
14819 }
14820 else
14821 {
14822 /* no '<' rule used */
14823 i += k - 1;
14824 z = 0;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000014825 if (ws != NULL)
14826 while (*ws != NUL && ws[1] != NUL
14827 && reslen < MAXWLEN)
14828 {
14829 if (reslen == 0 || wres[reslen - 1] != *ws)
14830 wres[reslen++] = *ws;
14831 ws++;
14832 }
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014833 /* new "actual letter" */
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000014834 if (ws == NULL)
14835 c = NUL;
14836 else
14837 c = *ws;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014838 if (strstr((char *)s, "^^") != NULL)
14839 {
14840 if (c != NUL)
14841 wres[reslen++] = c;
14842 mch_memmove(word, word + i + 1,
14843 sizeof(int) * (STRLEN(word + i + 1) + 1));
14844 i = 0;
14845 z0 = 1;
14846 }
14847 }
14848 break;
14849 }
14850 }
14851 }
14852 else if (vim_iswhite(c))
14853 {
14854 c = ' ';
14855 k = 1;
14856 }
14857
14858 if (z0 == 0)
14859 {
14860 if (k && !p0 && reslen < MAXWLEN && c != NUL
14861 && (!slang->sl_collapse || reslen == 0
14862 || wres[reslen - 1] != c))
14863 /* condense only double letters */
14864 wres[reslen++] = c;
14865
14866 i++;
14867 z = 0;
14868 k = 0;
14869 }
14870 }
14871
14872 /* Convert wide characters in "wres" to a multi-byte string in "res". */
14873 l = 0;
14874 for (n = 0; n < reslen; ++n)
14875 {
14876 l += mb_char2bytes(wres[n], res + l);
14877 if (l + MB_MAXBYTES > MAXWLEN)
14878 break;
14879 }
14880 res[l] = NUL;
14881}
14882#endif
14883
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014884/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014885 * Compute a score for two sound-a-like words.
14886 * This permits up to two inserts/deletes/swaps/etc. to keep things fast.
14887 * Instead of a generic loop we write out the code. That keeps it fast by
14888 * avoiding checks that will not be possible.
14889 */
14890 static int
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014891soundalike_score(goodstart, badstart)
14892 char_u *goodstart; /* sound-folded good word */
14893 char_u *badstart; /* sound-folded bad word */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014894{
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014895 char_u *goodsound = goodstart;
14896 char_u *badsound = badstart;
14897 int goodlen;
14898 int badlen;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014899 int n;
14900 char_u *pl, *ps;
14901 char_u *pl2, *ps2;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014902 int score = 0;
14903
14904 /* adding/inserting "*" at the start (word starts with vowel) shouldn't be
14905 * counted so much, vowels halfway the word aren't counted at all. */
14906 if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound)
14907 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000014908 if (badsound[1] == goodsound[1]
14909 || (badsound[1] != NUL
14910 && goodsound[1] != NUL
14911 && badsound[2] == goodsound[2]))
14912 {
14913 /* handle like a substitute */
14914 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014915 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000014916 {
14917 score = 2 * SCORE_DEL / 3;
14918 if (*badsound == '*')
14919 ++badsound;
14920 else
14921 ++goodsound;
14922 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014923 }
14924
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000014925 goodlen = (int)STRLEN(goodsound);
14926 badlen = (int)STRLEN(badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014927
Bram Moolenaarf711faf2007-05-10 16:48:19 +000014928 /* Return quickly if the lengths are too different to be fixed by two
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014929 * changes. */
14930 n = goodlen - badlen;
14931 if (n < -2 || n > 2)
14932 return SCORE_MAXMAX;
14933
14934 if (n > 0)
14935 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014936 pl = goodsound; /* goodsound is longest */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014937 ps = badsound;
14938 }
14939 else
14940 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014941 pl = badsound; /* badsound is longest */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014942 ps = goodsound;
14943 }
14944
14945 /* Skip over the identical part. */
14946 while (*pl == *ps && *pl != NUL)
14947 {
14948 ++pl;
14949 ++ps;
14950 }
14951
14952 switch (n)
14953 {
14954 case -2:
14955 case 2:
14956 /*
14957 * Must delete two characters from "pl".
14958 */
14959 ++pl; /* first delete */
14960 while (*pl == *ps)
14961 {
14962 ++pl;
14963 ++ps;
14964 }
14965 /* strings must be equal after second delete */
14966 if (STRCMP(pl + 1, ps) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014967 return score + SCORE_DEL * 2;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014968
14969 /* Failed to compare. */
14970 break;
14971
14972 case -1:
14973 case 1:
14974 /*
14975 * Minimal one delete from "pl" required.
14976 */
14977
14978 /* 1: delete */
14979 pl2 = pl + 1;
14980 ps2 = ps;
14981 while (*pl2 == *ps2)
14982 {
14983 if (*pl2 == NUL) /* reached the end */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014984 return score + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014985 ++pl2;
14986 ++ps2;
14987 }
14988
14989 /* 2: delete then swap, then rest must be equal */
14990 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
14991 && STRCMP(pl2 + 2, ps2 + 2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014992 return score + SCORE_DEL + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014993
14994 /* 3: delete then substitute, then the rest must be equal */
14995 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014996 return score + SCORE_DEL + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014997
14998 /* 4: first swap then delete */
14999 if (pl[0] == ps[1] && pl[1] == ps[0])
15000 {
15001 pl2 = pl + 2; /* swap, skip two chars */
15002 ps2 = ps + 2;
15003 while (*pl2 == *ps2)
15004 {
15005 ++pl2;
15006 ++ps2;
15007 }
15008 /* delete a char and then strings must be equal */
15009 if (STRCMP(pl2 + 1, ps2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015010 return score + SCORE_SWAP + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000015011 }
15012
15013 /* 5: first substitute then delete */
15014 pl2 = pl + 1; /* substitute, skip one char */
15015 ps2 = ps + 1;
15016 while (*pl2 == *ps2)
15017 {
15018 ++pl2;
15019 ++ps2;
15020 }
15021 /* delete a char and then strings must be equal */
15022 if (STRCMP(pl2 + 1, ps2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015023 return score + SCORE_SUBST + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000015024
15025 /* Failed to compare. */
15026 break;
15027
15028 case 0:
15029 /*
Bram Moolenaar6ae167a2009-02-11 16:58:49 +000015030 * Lengths are equal, thus changes must result in same length: An
Bram Moolenaard857f0e2005-06-21 22:37:39 +000015031 * insert is only possible in combination with a delete.
15032 * 1: check if for identical strings
15033 */
15034 if (*pl == NUL)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015035 return score;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000015036
15037 /* 2: swap */
15038 if (pl[0] == ps[1] && pl[1] == ps[0])
15039 {
15040 pl2 = pl + 2; /* swap, skip two chars */
15041 ps2 = ps + 2;
15042 while (*pl2 == *ps2)
15043 {
15044 if (*pl2 == NUL) /* reached the end */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015045 return score + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000015046 ++pl2;
15047 ++ps2;
15048 }
15049 /* 3: swap and swap again */
15050 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
15051 && STRCMP(pl2 + 2, ps2 + 2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015052 return score + SCORE_SWAP + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000015053
15054 /* 4: swap and substitute */
15055 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015056 return score + SCORE_SWAP + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000015057 }
15058
15059 /* 5: substitute */
15060 pl2 = pl + 1;
15061 ps2 = ps + 1;
15062 while (*pl2 == *ps2)
15063 {
15064 if (*pl2 == NUL) /* reached the end */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015065 return score + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000015066 ++pl2;
15067 ++ps2;
15068 }
15069
15070 /* 6: substitute and swap */
15071 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
15072 && STRCMP(pl2 + 2, ps2 + 2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015073 return score + SCORE_SUBST + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000015074
15075 /* 7: substitute and substitute */
15076 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015077 return score + SCORE_SUBST + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000015078
15079 /* 8: insert then delete */
15080 pl2 = pl;
15081 ps2 = ps + 1;
15082 while (*pl2 == *ps2)
15083 {
15084 ++pl2;
15085 ++ps2;
15086 }
15087 if (STRCMP(pl2 + 1, ps2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015088 return score + SCORE_INS + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000015089
15090 /* 9: delete then insert */
15091 pl2 = pl + 1;
15092 ps2 = ps;
15093 while (*pl2 == *ps2)
15094 {
15095 ++pl2;
15096 ++ps2;
15097 }
15098 if (STRCMP(pl2, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015099 return score + SCORE_INS + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000015100
15101 /* Failed to compare. */
15102 break;
15103 }
15104
15105 return SCORE_MAXMAX;
15106}
Bram Moolenaar9f30f502005-06-14 22:01:04 +000015107
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000015108/*
15109 * Compute the "edit distance" to turn "badword" into "goodword". The less
Bram Moolenaard857f0e2005-06-21 22:37:39 +000015110 * deletes/inserts/substitutes/swaps are required the lower the score.
Bram Moolenaar9f30f502005-06-14 22:01:04 +000015111 *
Bram Moolenaard12a1322005-08-21 22:08:24 +000015112 * The algorithm is described by Du and Chang, 1992.
15113 * The implementation of the algorithm comes from Aspell editdist.cpp,
15114 * edit_distance(). It has been converted from C++ to C and modified to
15115 * support multi-byte characters.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000015116 */
15117 static int
Bram Moolenaar4770d092006-01-12 23:22:24 +000015118spell_edit_score(slang, badword, goodword)
15119 slang_T *slang;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000015120 char_u *badword;
15121 char_u *goodword;
15122{
15123 int *cnt;
Bram Moolenaarf711faf2007-05-10 16:48:19 +000015124 int badlen, goodlen; /* lengths including NUL */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000015125 int j, i;
15126 int t;
15127 int bc, gc;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000015128 int pbc, pgc;
15129#ifdef FEAT_MBYTE
15130 char_u *p;
15131 int wbadword[MAXWLEN];
15132 int wgoodword[MAXWLEN];
15133
15134 if (has_mbyte)
15135 {
15136 /* Get the characters from the multi-byte strings and put them in an
15137 * int array for easy access. */
15138 for (p = badword, badlen = 0; *p != NUL; )
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000015139 wbadword[badlen++] = mb_cptr2char_adv(&p);
Bram Moolenaar97409f12005-07-08 22:17:29 +000015140 wbadword[badlen++] = 0;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000015141 for (p = goodword, goodlen = 0; *p != NUL; )
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000015142 wgoodword[goodlen++] = mb_cptr2char_adv(&p);
Bram Moolenaar97409f12005-07-08 22:17:29 +000015143 wgoodword[goodlen++] = 0;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000015144 }
15145 else
15146#endif
15147 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000015148 badlen = (int)STRLEN(badword) + 1;
15149 goodlen = (int)STRLEN(goodword) + 1;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000015150 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000015151
15152 /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */
15153#define CNT(a, b) cnt[(a) + (b) * (badlen + 1)]
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000015154 cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)),
15155 TRUE);
Bram Moolenaar9f30f502005-06-14 22:01:04 +000015156 if (cnt == NULL)
15157 return 0; /* out of memory */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000015158
15159 CNT(0, 0) = 0;
15160 for (j = 1; j <= goodlen; ++j)
Bram Moolenaar4770d092006-01-12 23:22:24 +000015161 CNT(0, j) = CNT(0, j - 1) + SCORE_INS;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000015162
15163 for (i = 1; i <= badlen; ++i)
15164 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000015165 CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000015166 for (j = 1; j <= goodlen; ++j)
15167 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +000015168#ifdef FEAT_MBYTE
15169 if (has_mbyte)
15170 {
15171 bc = wbadword[i - 1];
15172 gc = wgoodword[j - 1];
15173 }
15174 else
15175#endif
15176 {
15177 bc = badword[i - 1];
15178 gc = goodword[j - 1];
15179 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000015180 if (bc == gc)
15181 CNT(i, j) = CNT(i - 1, j - 1);
15182 else
15183 {
15184 /* Use a better score when there is only a case difference. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000015185 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000015186 CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1);
15187 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000015188 {
15189 /* For a similar character use SCORE_SIMILAR. */
15190 if (slang != NULL
15191 && slang->sl_has_map
15192 && similar_chars(slang, gc, bc))
15193 CNT(i, j) = SCORE_SIMILAR + CNT(i - 1, j - 1);
15194 else
15195 CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1);
15196 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000015197
Bram Moolenaar9f30f502005-06-14 22:01:04 +000015198 if (i > 1 && j > 1)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000015199 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +000015200#ifdef FEAT_MBYTE
15201 if (has_mbyte)
15202 {
15203 pbc = wbadword[i - 2];
15204 pgc = wgoodword[j - 2];
15205 }
15206 else
15207#endif
15208 {
15209 pbc = badword[i - 2];
15210 pgc = goodword[j - 2];
15211 }
15212 if (bc == pgc && pbc == gc)
15213 {
15214 t = SCORE_SWAP + CNT(i - 2, j - 2);
15215 if (t < CNT(i, j))
15216 CNT(i, j) = t;
15217 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000015218 }
15219 t = SCORE_DEL + CNT(i - 1, j);
15220 if (t < CNT(i, j))
15221 CNT(i, j) = t;
15222 t = SCORE_INS + CNT(i, j - 1);
15223 if (t < CNT(i, j))
15224 CNT(i, j) = t;
15225 }
15226 }
15227 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +000015228
15229 i = CNT(badlen - 1, goodlen - 1);
15230 vim_free(cnt);
15231 return i;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000015232}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000015233
Bram Moolenaar4770d092006-01-12 23:22:24 +000015234typedef struct
15235{
15236 int badi;
15237 int goodi;
15238 int score;
15239} limitscore_T;
15240
15241/*
15242 * Like spell_edit_score(), but with a limit on the score to make it faster.
15243 * May return SCORE_MAXMAX when the score is higher than "limit".
15244 *
15245 * This uses a stack for the edits still to be tried.
15246 * The idea comes from Aspell leditdist.cpp. Rewritten in C and added support
15247 * for multi-byte characters.
15248 */
15249 static int
15250spell_edit_score_limit(slang, badword, goodword, limit)
15251 slang_T *slang;
15252 char_u *badword;
15253 char_u *goodword;
15254 int limit;
15255{
15256 limitscore_T stack[10]; /* allow for over 3 * 2 edits */
15257 int stackidx;
15258 int bi, gi;
15259 int bi2, gi2;
15260 int bc, gc;
15261 int score;
15262 int score_off;
15263 int minscore;
15264 int round;
15265
15266#ifdef FEAT_MBYTE
15267 /* Multi-byte characters require a bit more work, use a different function
15268 * to avoid testing "has_mbyte" quite often. */
15269 if (has_mbyte)
15270 return spell_edit_score_limit_w(slang, badword, goodword, limit);
15271#endif
15272
15273 /*
15274 * The idea is to go from start to end over the words. So long as
15275 * characters are equal just continue, this always gives the lowest score.
15276 * When there is a difference try several alternatives. Each alternative
15277 * increases "score" for the edit distance. Some of the alternatives are
15278 * pushed unto a stack and tried later, some are tried right away. At the
15279 * end of the word the score for one alternative is known. The lowest
15280 * possible score is stored in "minscore".
15281 */
15282 stackidx = 0;
15283 bi = 0;
15284 gi = 0;
15285 score = 0;
15286 minscore = limit + 1;
15287
15288 for (;;)
15289 {
15290 /* Skip over an equal part, score remains the same. */
15291 for (;;)
15292 {
15293 bc = badword[bi];
15294 gc = goodword[gi];
15295 if (bc != gc) /* stop at a char that's different */
15296 break;
15297 if (bc == NUL) /* both words end */
15298 {
15299 if (score < minscore)
15300 minscore = score;
15301 goto pop; /* do next alternative */
15302 }
15303 ++bi;
15304 ++gi;
15305 }
15306
15307 if (gc == NUL) /* goodword ends, delete badword chars */
15308 {
15309 do
15310 {
15311 if ((score += SCORE_DEL) >= minscore)
15312 goto pop; /* do next alternative */
15313 } while (badword[++bi] != NUL);
15314 minscore = score;
15315 }
15316 else if (bc == NUL) /* badword ends, insert badword chars */
15317 {
15318 do
15319 {
15320 if ((score += SCORE_INS) >= minscore)
15321 goto pop; /* do next alternative */
15322 } while (goodword[++gi] != NUL);
15323 minscore = score;
15324 }
15325 else /* both words continue */
15326 {
15327 /* If not close to the limit, perform a change. Only try changes
15328 * that may lead to a lower score than "minscore".
15329 * round 0: try deleting a char from badword
15330 * round 1: try inserting a char in badword */
15331 for (round = 0; round <= 1; ++round)
15332 {
15333 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
15334 if (score_off < minscore)
15335 {
15336 if (score_off + SCORE_EDIT_MIN >= minscore)
15337 {
15338 /* Near the limit, rest of the words must match. We
15339 * can check that right now, no need to push an item
15340 * onto the stack. */
15341 bi2 = bi + 1 - round;
15342 gi2 = gi + round;
15343 while (goodword[gi2] == badword[bi2])
15344 {
15345 if (goodword[gi2] == NUL)
15346 {
15347 minscore = score_off;
15348 break;
15349 }
15350 ++bi2;
15351 ++gi2;
15352 }
15353 }
15354 else
15355 {
15356 /* try deleting/inserting a character later */
15357 stack[stackidx].badi = bi + 1 - round;
15358 stack[stackidx].goodi = gi + round;
15359 stack[stackidx].score = score_off;
15360 ++stackidx;
15361 }
15362 }
15363 }
15364
15365 if (score + SCORE_SWAP < minscore)
15366 {
15367 /* If swapping two characters makes a match then the
15368 * substitution is more expensive, thus there is no need to
15369 * try both. */
15370 if (gc == badword[bi + 1] && bc == goodword[gi + 1])
15371 {
15372 /* Swap two characters, that is: skip them. */
15373 gi += 2;
15374 bi += 2;
15375 score += SCORE_SWAP;
15376 continue;
15377 }
15378 }
15379
15380 /* Substitute one character for another which is the same
15381 * thing as deleting a character from both goodword and badword.
15382 * Use a better score when there is only a case difference. */
15383 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
15384 score += SCORE_ICASE;
15385 else
15386 {
15387 /* For a similar character use SCORE_SIMILAR. */
15388 if (slang != NULL
15389 && slang->sl_has_map
15390 && similar_chars(slang, gc, bc))
15391 score += SCORE_SIMILAR;
15392 else
15393 score += SCORE_SUBST;
15394 }
15395
15396 if (score < minscore)
15397 {
15398 /* Do the substitution. */
15399 ++gi;
15400 ++bi;
15401 continue;
15402 }
15403 }
15404pop:
15405 /*
15406 * Get here to try the next alternative, pop it from the stack.
15407 */
15408 if (stackidx == 0) /* stack is empty, finished */
15409 break;
15410
15411 /* pop an item from the stack */
15412 --stackidx;
15413 gi = stack[stackidx].goodi;
15414 bi = stack[stackidx].badi;
15415 score = stack[stackidx].score;
15416 }
15417
15418 /* When the score goes over "limit" it may actually be much higher.
15419 * Return a very large number to avoid going below the limit when giving a
15420 * bonus. */
15421 if (minscore > limit)
15422 return SCORE_MAXMAX;
15423 return minscore;
15424}
15425
15426#ifdef FEAT_MBYTE
15427/*
15428 * Multi-byte version of spell_edit_score_limit().
15429 * Keep it in sync with the above!
15430 */
15431 static int
15432spell_edit_score_limit_w(slang, badword, goodword, limit)
15433 slang_T *slang;
15434 char_u *badword;
15435 char_u *goodword;
15436 int limit;
15437{
15438 limitscore_T stack[10]; /* allow for over 3 * 2 edits */
15439 int stackidx;
15440 int bi, gi;
15441 int bi2, gi2;
15442 int bc, gc;
15443 int score;
15444 int score_off;
15445 int minscore;
15446 int round;
15447 char_u *p;
15448 int wbadword[MAXWLEN];
15449 int wgoodword[MAXWLEN];
15450
15451 /* Get the characters from the multi-byte strings and put them in an
15452 * int array for easy access. */
15453 bi = 0;
15454 for (p = badword; *p != NUL; )
15455 wbadword[bi++] = mb_cptr2char_adv(&p);
15456 wbadword[bi++] = 0;
15457 gi = 0;
15458 for (p = goodword; *p != NUL; )
15459 wgoodword[gi++] = mb_cptr2char_adv(&p);
15460 wgoodword[gi++] = 0;
15461
15462 /*
15463 * The idea is to go from start to end over the words. So long as
15464 * characters are equal just continue, this always gives the lowest score.
15465 * When there is a difference try several alternatives. Each alternative
15466 * increases "score" for the edit distance. Some of the alternatives are
15467 * pushed unto a stack and tried later, some are tried right away. At the
15468 * end of the word the score for one alternative is known. The lowest
15469 * possible score is stored in "minscore".
15470 */
15471 stackidx = 0;
15472 bi = 0;
15473 gi = 0;
15474 score = 0;
15475 minscore = limit + 1;
15476
15477 for (;;)
15478 {
15479 /* Skip over an equal part, score remains the same. */
15480 for (;;)
15481 {
15482 bc = wbadword[bi];
15483 gc = wgoodword[gi];
15484
15485 if (bc != gc) /* stop at a char that's different */
15486 break;
15487 if (bc == NUL) /* both words end */
15488 {
15489 if (score < minscore)
15490 minscore = score;
15491 goto pop; /* do next alternative */
15492 }
15493 ++bi;
15494 ++gi;
15495 }
15496
15497 if (gc == NUL) /* goodword ends, delete badword chars */
15498 {
15499 do
15500 {
15501 if ((score += SCORE_DEL) >= minscore)
15502 goto pop; /* do next alternative */
15503 } while (wbadword[++bi] != NUL);
15504 minscore = score;
15505 }
15506 else if (bc == NUL) /* badword ends, insert badword chars */
15507 {
15508 do
15509 {
15510 if ((score += SCORE_INS) >= minscore)
15511 goto pop; /* do next alternative */
15512 } while (wgoodword[++gi] != NUL);
15513 minscore = score;
15514 }
15515 else /* both words continue */
15516 {
15517 /* If not close to the limit, perform a change. Only try changes
15518 * that may lead to a lower score than "minscore".
15519 * round 0: try deleting a char from badword
15520 * round 1: try inserting a char in badword */
15521 for (round = 0; round <= 1; ++round)
15522 {
15523 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
15524 if (score_off < minscore)
15525 {
15526 if (score_off + SCORE_EDIT_MIN >= minscore)
15527 {
15528 /* Near the limit, rest of the words must match. We
15529 * can check that right now, no need to push an item
15530 * onto the stack. */
15531 bi2 = bi + 1 - round;
15532 gi2 = gi + round;
15533 while (wgoodword[gi2] == wbadword[bi2])
15534 {
15535 if (wgoodword[gi2] == NUL)
15536 {
15537 minscore = score_off;
15538 break;
15539 }
15540 ++bi2;
15541 ++gi2;
15542 }
15543 }
15544 else
15545 {
15546 /* try deleting a character from badword later */
15547 stack[stackidx].badi = bi + 1 - round;
15548 stack[stackidx].goodi = gi + round;
15549 stack[stackidx].score = score_off;
15550 ++stackidx;
15551 }
15552 }
15553 }
15554
15555 if (score + SCORE_SWAP < minscore)
15556 {
15557 /* If swapping two characters makes a match then the
15558 * substitution is more expensive, thus there is no need to
15559 * try both. */
15560 if (gc == wbadword[bi + 1] && bc == wgoodword[gi + 1])
15561 {
15562 /* Swap two characters, that is: skip them. */
15563 gi += 2;
15564 bi += 2;
15565 score += SCORE_SWAP;
15566 continue;
15567 }
15568 }
15569
15570 /* Substitute one character for another which is the same
15571 * thing as deleting a character from both goodword and badword.
15572 * Use a better score when there is only a case difference. */
15573 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
15574 score += SCORE_ICASE;
15575 else
15576 {
15577 /* For a similar character use SCORE_SIMILAR. */
15578 if (slang != NULL
15579 && slang->sl_has_map
15580 && similar_chars(slang, gc, bc))
15581 score += SCORE_SIMILAR;
15582 else
15583 score += SCORE_SUBST;
15584 }
15585
15586 if (score < minscore)
15587 {
15588 /* Do the substitution. */
15589 ++gi;
15590 ++bi;
15591 continue;
15592 }
15593 }
15594pop:
15595 /*
15596 * Get here to try the next alternative, pop it from the stack.
15597 */
15598 if (stackidx == 0) /* stack is empty, finished */
15599 break;
15600
15601 /* pop an item from the stack */
15602 --stackidx;
15603 gi = stack[stackidx].goodi;
15604 bi = stack[stackidx].badi;
15605 score = stack[stackidx].score;
15606 }
15607
15608 /* When the score goes over "limit" it may actually be much higher.
15609 * Return a very large number to avoid going below the limit when giving a
15610 * bonus. */
15611 if (minscore > limit)
15612 return SCORE_MAXMAX;
15613 return minscore;
15614}
15615#endif
15616
Bram Moolenaar362e1a32006-03-06 23:29:24 +000015617/*
15618 * ":spellinfo"
15619 */
Bram Moolenaar362e1a32006-03-06 23:29:24 +000015620 void
15621ex_spellinfo(eap)
Bram Moolenaar2c4278f2009-05-17 11:33:22 +000015622 exarg_T *eap UNUSED;
Bram Moolenaar362e1a32006-03-06 23:29:24 +000015623{
15624 int lpi;
15625 langp_T *lp;
15626 char_u *p;
15627
15628 if (no_spell_checking(curwin))
15629 return;
15630
15631 msg_start();
15632 for (lpi = 0; lpi < curbuf->b_langp.ga_len && !got_int; ++lpi)
15633 {
15634 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
15635 msg_puts((char_u *)"file: ");
15636 msg_puts(lp->lp_slang->sl_fname);
15637 msg_putchar('\n');
15638 p = lp->lp_slang->sl_info;
15639 if (p != NULL)
15640 {
15641 msg_puts(p);
15642 msg_putchar('\n');
15643 }
15644 }
15645 msg_end();
15646}
15647
Bram Moolenaar4770d092006-01-12 23:22:24 +000015648#define DUMPFLAG_KEEPCASE 1 /* round 2: keep-case tree */
15649#define DUMPFLAG_COUNT 2 /* include word count */
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015650#define DUMPFLAG_ICASE 4 /* ignore case when finding matches */
Bram Moolenaard0131a82006-03-04 21:46:13 +000015651#define DUMPFLAG_ONECAP 8 /* pattern starts with capital */
15652#define DUMPFLAG_ALLCAP 16 /* pattern is all capitals */
Bram Moolenaar4770d092006-01-12 23:22:24 +000015653
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015654/*
15655 * ":spelldump"
15656 */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015657 void
15658ex_spelldump(eap)
15659 exarg_T *eap;
15660{
15661 buf_T *buf = curbuf;
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015662
15663 if (no_spell_checking(curwin))
15664 return;
15665
15666 /* Create a new empty buffer by splitting the window. */
15667 do_cmdline_cmd((char_u *)"new");
15668 if (!bufempty() || !buf_valid(buf))
15669 return;
15670
15671 spell_dump_compl(buf, NULL, 0, NULL, eap->forceit ? DUMPFLAG_COUNT : 0);
15672
15673 /* Delete the empty line that we started with. */
15674 if (curbuf->b_ml.ml_line_count > 1)
15675 ml_delete(curbuf->b_ml.ml_line_count, FALSE);
15676
15677 redraw_later(NOT_VALID);
15678}
15679
15680/*
15681 * Go through all possible words and:
15682 * 1. When "pat" is NULL: dump a list of all words in the current buffer.
15683 * "ic" and "dir" are not used.
15684 * 2. When "pat" is not NULL: add matching words to insert mode completion.
15685 */
15686 void
15687spell_dump_compl(buf, pat, ic, dir, dumpflags_arg)
15688 buf_T *buf; /* buffer with spell checking */
15689 char_u *pat; /* leading part of the word */
15690 int ic; /* ignore case */
15691 int *dir; /* direction for adding matches */
15692 int dumpflags_arg; /* DUMPFLAG_* */
15693{
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015694 langp_T *lp;
15695 slang_T *slang;
15696 idx_T arridx[MAXWLEN];
15697 int curi[MAXWLEN];
15698 char_u word[MAXWLEN];
15699 int c;
15700 char_u *byts;
15701 idx_T *idxs;
15702 linenr_T lnum = 0;
15703 int round;
15704 int depth;
15705 int n;
15706 int flags;
Bram Moolenaar7887d882005-07-01 22:33:52 +000015707 char_u *region_names = NULL; /* region names being used */
15708 int do_region = TRUE; /* dump region names and numbers */
15709 char_u *p;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000015710 int lpi;
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015711 int dumpflags = dumpflags_arg;
15712 int patlen;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015713
Bram Moolenaard0131a82006-03-04 21:46:13 +000015714 /* When ignoring case or when the pattern starts with capital pass this on
15715 * to dump_word(). */
15716 if (pat != NULL)
15717 {
15718 if (ic)
15719 dumpflags |= DUMPFLAG_ICASE;
15720 else
15721 {
15722 n = captype(pat, NULL);
15723 if (n == WF_ONECAP)
15724 dumpflags |= DUMPFLAG_ONECAP;
15725 else if (n == WF_ALLCAP
15726#ifdef FEAT_MBYTE
Bram Moolenaar362e1a32006-03-06 23:29:24 +000015727 && (int)STRLEN(pat) > mb_ptr2len(pat)
Bram Moolenaard0131a82006-03-04 21:46:13 +000015728#else
Bram Moolenaar362e1a32006-03-06 23:29:24 +000015729 && (int)STRLEN(pat) > 1
Bram Moolenaard0131a82006-03-04 21:46:13 +000015730#endif
15731 )
15732 dumpflags |= DUMPFLAG_ALLCAP;
15733 }
15734 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015735
Bram Moolenaar7887d882005-07-01 22:33:52 +000015736 /* Find out if we can support regions: All languages must support the same
15737 * regions or none at all. */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000015738 for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi)
Bram Moolenaar7887d882005-07-01 22:33:52 +000015739 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000015740 lp = LANGP_ENTRY(buf->b_langp, lpi);
Bram Moolenaar7887d882005-07-01 22:33:52 +000015741 p = lp->lp_slang->sl_regions;
15742 if (p[0] != 0)
15743 {
15744 if (region_names == NULL) /* first language with regions */
15745 region_names = p;
15746 else if (STRCMP(region_names, p) != 0)
15747 {
15748 do_region = FALSE; /* region names are different */
15749 break;
15750 }
15751 }
15752 }
15753
15754 if (do_region && region_names != NULL)
15755 {
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015756 if (pat == NULL)
15757 {
15758 vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names);
15759 ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
15760 }
Bram Moolenaar7887d882005-07-01 22:33:52 +000015761 }
15762 else
15763 do_region = FALSE;
15764
15765 /*
15766 * Loop over all files loaded for the entries in 'spelllang'.
15767 */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000015768 for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015769 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000015770 lp = LANGP_ENTRY(buf->b_langp, lpi);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015771 slang = lp->lp_slang;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000015772 if (slang->sl_fbyts == NULL) /* reloading failed */
15773 continue;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015774
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015775 if (pat == NULL)
15776 {
15777 vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
15778 ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
15779 }
15780
15781 /* When matching with a pattern and there are no prefixes only use
15782 * parts of the tree that match "pat". */
15783 if (pat != NULL && slang->sl_pbyts == NULL)
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000015784 patlen = (int)STRLEN(pat);
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015785 else
Bram Moolenaareb3593b2006-04-22 22:33:57 +000015786 patlen = -1;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015787
15788 /* round 1: case-folded tree
15789 * round 2: keep-case tree */
15790 for (round = 1; round <= 2; ++round)
15791 {
15792 if (round == 1)
15793 {
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015794 dumpflags &= ~DUMPFLAG_KEEPCASE;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015795 byts = slang->sl_fbyts;
15796 idxs = slang->sl_fidxs;
15797 }
15798 else
15799 {
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015800 dumpflags |= DUMPFLAG_KEEPCASE;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015801 byts = slang->sl_kbyts;
15802 idxs = slang->sl_kidxs;
15803 }
15804 if (byts == NULL)
15805 continue; /* array is empty */
15806
15807 depth = 0;
15808 arridx[0] = 0;
15809 curi[0] = 1;
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015810 while (depth >= 0 && !got_int
15811 && (pat == NULL || !compl_interrupted))
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015812 {
15813 if (curi[depth] > byts[arridx[depth]])
15814 {
15815 /* Done all bytes at this node, go up one level. */
15816 --depth;
15817 line_breakcheck();
Bram Moolenaara2031822006-03-07 22:29:51 +000015818 ins_compl_check_keys(50);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015819 }
15820 else
15821 {
15822 /* Do one more byte at this node. */
15823 n = arridx[depth] + curi[depth];
15824 ++curi[depth];
15825 c = byts[n];
15826 if (c == 0)
15827 {
15828 /* End of word, deal with the word.
15829 * Don't use keep-case words in the fold-case tree,
15830 * they will appear in the keep-case tree.
15831 * Only use the word when the region matches. */
15832 flags = (int)idxs[n];
15833 if ((round == 2 || (flags & WF_KEEPCAP) == 0)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000015834 && (flags & WF_NEEDCOMP) == 0
Bram Moolenaar7887d882005-07-01 22:33:52 +000015835 && (do_region
15836 || (flags & WF_REGION) == 0
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000015837 || (((unsigned)flags >> 16)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015838 & lp->lp_region) != 0))
15839 {
15840 word[depth] = NUL;
Bram Moolenaar7887d882005-07-01 22:33:52 +000015841 if (!do_region)
15842 flags &= ~WF_REGION;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +000015843
15844 /* Dump the basic word if there is no prefix or
15845 * when it's the first one. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000015846 c = (unsigned)flags >> 24;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +000015847 if (c == 0 || curi[depth] == 2)
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015848 {
15849 dump_word(slang, word, pat, dir,
15850 dumpflags, flags, lnum);
15851 if (pat == NULL)
15852 ++lnum;
15853 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015854
15855 /* Apply the prefix, if there is one. */
Bram Moolenaar0a5fe212005-06-24 23:01:23 +000015856 if (c != 0)
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015857 lnum = dump_prefixes(slang, word, pat, dir,
15858 dumpflags, flags, lnum);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015859 }
15860 }
15861 else
15862 {
15863 /* Normal char, go one level deeper. */
15864 word[depth++] = c;
15865 arridx[depth] = idxs[n];
15866 curi[depth] = 1;
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015867
15868 /* Check if this characters matches with the pattern.
15869 * If not skip the whole tree below it.
Bram Moolenaard0131a82006-03-04 21:46:13 +000015870 * Always ignore case here, dump_word() will check
15871 * proper case later. This isn't exactly right when
15872 * length changes for multi-byte characters with
15873 * ignore case... */
15874 if (depth <= patlen
15875 && MB_STRNICMP(word, pat, depth) != 0)
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015876 --depth;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015877 }
15878 }
15879 }
15880 }
15881 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015882}
15883
15884/*
15885 * Dump one word: apply case modifications and append a line to the buffer.
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015886 * When "lnum" is zero add insert mode completion.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015887 */
15888 static void
Bram Moolenaard0131a82006-03-04 21:46:13 +000015889dump_word(slang, word, pat, dir, dumpflags, wordflags, lnum)
Bram Moolenaar4770d092006-01-12 23:22:24 +000015890 slang_T *slang;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015891 char_u *word;
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015892 char_u *pat;
15893 int *dir;
Bram Moolenaar4770d092006-01-12 23:22:24 +000015894 int dumpflags;
Bram Moolenaard0131a82006-03-04 21:46:13 +000015895 int wordflags;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015896 linenr_T lnum;
15897{
15898 int keepcap = FALSE;
15899 char_u *p;
Bram Moolenaar4770d092006-01-12 23:22:24 +000015900 char_u *tw;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015901 char_u cword[MAXWLEN];
Bram Moolenaar7887d882005-07-01 22:33:52 +000015902 char_u badword[MAXWLEN + 10];
15903 int i;
Bram Moolenaard0131a82006-03-04 21:46:13 +000015904 int flags = wordflags;
15905
15906 if (dumpflags & DUMPFLAG_ONECAP)
15907 flags |= WF_ONECAP;
15908 if (dumpflags & DUMPFLAG_ALLCAP)
15909 flags |= WF_ALLCAP;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015910
Bram Moolenaar4770d092006-01-12 23:22:24 +000015911 if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015912 {
15913 /* Need to fix case according to "flags". */
15914 make_case_word(word, cword, flags);
15915 p = cword;
15916 }
15917 else
15918 {
15919 p = word;
Bram Moolenaar4770d092006-01-12 23:22:24 +000015920 if ((dumpflags & DUMPFLAG_KEEPCASE)
15921 && ((captype(word, NULL) & WF_KEEPCAP) == 0
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000015922 || (flags & WF_FIXCAP) != 0))
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015923 keepcap = TRUE;
15924 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000015925 tw = p;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015926
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015927 if (pat == NULL)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015928 {
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015929 /* Add flags and regions after a slash. */
15930 if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap)
Bram Moolenaar4770d092006-01-12 23:22:24 +000015931 {
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015932 STRCPY(badword, p);
15933 STRCAT(badword, "/");
15934 if (keepcap)
15935 STRCAT(badword, "=");
15936 if (flags & WF_BANNED)
15937 STRCAT(badword, "!");
15938 else if (flags & WF_RARE)
15939 STRCAT(badword, "?");
15940 if (flags & WF_REGION)
15941 for (i = 0; i < 7; ++i)
15942 if (flags & (0x10000 << i))
15943 sprintf((char *)badword + STRLEN(badword), "%d", i + 1);
15944 p = badword;
Bram Moolenaar4770d092006-01-12 23:22:24 +000015945 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000015946
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015947 if (dumpflags & DUMPFLAG_COUNT)
15948 {
15949 hashitem_T *hi;
15950
15951 /* Include the word count for ":spelldump!". */
15952 hi = hash_find(&slang->sl_wordcount, tw);
15953 if (!HASHITEM_EMPTY(hi))
15954 {
15955 vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d",
15956 tw, HI2WC(hi)->wc_count);
15957 p = IObuff;
15958 }
15959 }
15960
15961 ml_append(lnum, p, (colnr_T)0, FALSE);
15962 }
Bram Moolenaard0131a82006-03-04 21:46:13 +000015963 else if (((dumpflags & DUMPFLAG_ICASE)
15964 ? MB_STRNICMP(p, pat, STRLEN(pat)) == 0
15965 : STRNCMP(p, pat, STRLEN(pat)) == 0)
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015966 && ins_compl_add_infercase(p, (int)STRLEN(p),
Bram Moolenaare8c3a142006-08-29 14:30:35 +000015967 p_ic, NULL, *dir, 0) == OK)
Bram Moolenaard0131a82006-03-04 21:46:13 +000015968 /* if dir was BACKWARD then honor it just once */
15969 *dir = FORWARD;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015970}
15971
15972/*
Bram Moolenaara1ba8112005-06-28 23:23:32 +000015973 * For ":spelldump": Find matching prefixes for "word". Prepend each to
15974 * "word" and append a line to the buffer.
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015975 * When "lnum" is zero add insert mode completion.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015976 * Return the updated line number.
15977 */
15978 static linenr_T
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015979dump_prefixes(slang, word, pat, dir, dumpflags, flags, startlnum)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015980 slang_T *slang;
15981 char_u *word; /* case-folded word */
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015982 char_u *pat;
15983 int *dir;
Bram Moolenaar4770d092006-01-12 23:22:24 +000015984 int dumpflags;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015985 int flags; /* flags with prefix ID */
15986 linenr_T startlnum;
15987{
15988 idx_T arridx[MAXWLEN];
15989 int curi[MAXWLEN];
15990 char_u prefix[MAXWLEN];
Bram Moolenaar53805d12005-08-01 07:08:33 +000015991 char_u word_up[MAXWLEN];
15992 int has_word_up = FALSE;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015993 int c;
15994 char_u *byts;
15995 idx_T *idxs;
15996 linenr_T lnum = startlnum;
15997 int depth;
15998 int n;
15999 int len;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000016000 int i;
16001
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000016002 /* If the word starts with a lower-case letter make the word with an
Bram Moolenaar53805d12005-08-01 07:08:33 +000016003 * upper-case letter in word_up[]. */
16004 c = PTR2CHAR(word);
16005 if (SPELL_TOUPPER(c) != c)
16006 {
16007 onecap_copy(word, word_up, TRUE);
16008 has_word_up = TRUE;
16009 }
16010
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000016011 byts = slang->sl_pbyts;
16012 idxs = slang->sl_pidxs;
16013 if (byts != NULL) /* array not is empty */
16014 {
16015 /*
16016 * Loop over all prefixes, building them byte-by-byte in prefix[].
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000016017 * When at the end of a prefix check that it supports "flags".
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000016018 */
16019 depth = 0;
16020 arridx[0] = 0;
16021 curi[0] = 1;
16022 while (depth >= 0 && !got_int)
16023 {
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000016024 n = arridx[depth];
16025 len = byts[n];
16026 if (curi[depth] > len)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000016027 {
16028 /* Done all bytes at this node, go up one level. */
16029 --depth;
16030 line_breakcheck();
16031 }
16032 else
16033 {
16034 /* Do one more byte at this node. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000016035 n += curi[depth];
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000016036 ++curi[depth];
16037 c = byts[n];
16038 if (c == 0)
16039 {
16040 /* End of prefix, find out how many IDs there are. */
16041 for (i = 1; i < len; ++i)
16042 if (byts[n + i] != 0)
16043 break;
16044 curi[depth] += i - 1;
16045
Bram Moolenaar53805d12005-08-01 07:08:33 +000016046 c = valid_word_prefix(i, n, flags, word, slang, FALSE);
16047 if (c != 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000016048 {
Bram Moolenaar9c96f592005-06-30 21:52:39 +000016049 vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1);
Bram Moolenaarb475fb92006-03-02 22:40:52 +000016050 dump_word(slang, prefix, pat, dir, dumpflags,
Bram Moolenaar53805d12005-08-01 07:08:33 +000016051 (c & WF_RAREPFX) ? (flags | WF_RARE)
Bram Moolenaarb475fb92006-03-02 22:40:52 +000016052 : flags, lnum);
16053 if (lnum != 0)
16054 ++lnum;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000016055 }
Bram Moolenaar53805d12005-08-01 07:08:33 +000016056
16057 /* Check for prefix that matches the word when the
16058 * first letter is upper-case, but only if the prefix has
16059 * a condition. */
16060 if (has_word_up)
16061 {
16062 c = valid_word_prefix(i, n, flags, word_up, slang,
16063 TRUE);
16064 if (c != 0)
16065 {
16066 vim_strncpy(prefix + depth, word_up,
16067 MAXWLEN - depth - 1);
Bram Moolenaarb475fb92006-03-02 22:40:52 +000016068 dump_word(slang, prefix, pat, dir, dumpflags,
Bram Moolenaar53805d12005-08-01 07:08:33 +000016069 (c & WF_RAREPFX) ? (flags | WF_RARE)
Bram Moolenaarb475fb92006-03-02 22:40:52 +000016070 : flags, lnum);
16071 if (lnum != 0)
16072 ++lnum;
Bram Moolenaar53805d12005-08-01 07:08:33 +000016073 }
16074 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000016075 }
16076 else
16077 {
16078 /* Normal char, go one level deeper. */
16079 prefix[depth++] = c;
16080 arridx[depth] = idxs[n];
16081 curi[depth] = 1;
16082 }
16083 }
16084 }
16085 }
16086
16087 return lnum;
16088}
16089
Bram Moolenaar95529562005-08-25 21:21:38 +000016090/*
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000016091 * Move "p" to the end of word "start".
16092 * Uses the spell-checking word characters.
Bram Moolenaar95529562005-08-25 21:21:38 +000016093 */
16094 char_u *
16095spell_to_word_end(start, buf)
16096 char_u *start;
16097 buf_T *buf;
16098{
16099 char_u *p = start;
16100
16101 while (*p != NUL && spell_iswordp(p, buf))
16102 mb_ptr_adv(p);
16103 return p;
16104}
16105
Bram Moolenaar8b59de92005-08-11 19:59:29 +000016106#if defined(FEAT_INS_EXPAND) || defined(PROTO)
Bram Moolenaar8b59de92005-08-11 19:59:29 +000016107/*
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000016108 * For Insert mode completion CTRL-X s:
16109 * Find start of the word in front of column "startcol".
16110 * We don't check if it is badly spelled, with completion we can only change
16111 * the word in front of the cursor.
Bram Moolenaar8b59de92005-08-11 19:59:29 +000016112 * Returns the column number of the word.
16113 */
16114 int
16115spell_word_start(startcol)
16116 int startcol;
16117{
16118 char_u *line;
16119 char_u *p;
16120 int col = 0;
16121
Bram Moolenaar95529562005-08-25 21:21:38 +000016122 if (no_spell_checking(curwin))
Bram Moolenaar8b59de92005-08-11 19:59:29 +000016123 return startcol;
16124
16125 /* Find a word character before "startcol". */
16126 line = ml_get_curline();
16127 for (p = line + startcol; p > line; )
16128 {
16129 mb_ptr_back(line, p);
16130 if (spell_iswordp_nmw(p))
16131 break;
16132 }
16133
16134 /* Go back to start of the word. */
16135 while (p > line)
16136 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000016137 col = (int)(p - line);
Bram Moolenaar8b59de92005-08-11 19:59:29 +000016138 mb_ptr_back(line, p);
16139 if (!spell_iswordp(p, curbuf))
16140 break;
16141 col = 0;
16142 }
16143
Bram Moolenaar8b59de92005-08-11 19:59:29 +000016144 return col;
16145}
16146
16147/*
Bram Moolenaar4effc802005-09-30 21:12:02 +000016148 * Need to check for 'spellcapcheck' now, the word is removed before
16149 * expand_spelling() is called. Therefore the ugly global variable.
16150 */
16151static int spell_expand_need_cap;
16152
16153 void
16154spell_expand_check_cap(col)
16155 colnr_T col;
16156{
16157 spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col);
16158}
16159
16160/*
Bram Moolenaar8b59de92005-08-11 19:59:29 +000016161 * Get list of spelling suggestions.
16162 * Used for Insert mode completion CTRL-X ?.
16163 * Returns the number of matches. The matches are in "matchp[]", array of
16164 * allocated strings.
16165 */
Bram Moolenaar8b59de92005-08-11 19:59:29 +000016166 int
Bram Moolenaar5fd0ca72009-05-13 16:56:33 +000016167expand_spelling(lnum, pat, matchp)
Bram Moolenaar2c4278f2009-05-17 11:33:22 +000016168 linenr_T lnum UNUSED;
Bram Moolenaar8b59de92005-08-11 19:59:29 +000016169 char_u *pat;
16170 char_u ***matchp;
16171{
16172 garray_T ga;
16173
Bram Moolenaar4770d092006-01-12 23:22:24 +000016174 spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE);
Bram Moolenaar8b59de92005-08-11 19:59:29 +000016175 *matchp = ga.ga_data;
16176 return ga.ga_len;
16177}
16178#endif
16179
Bram Moolenaarf71a3db2006-03-12 21:50:18 +000016180#endif /* FEAT_SPELL */