blob: 8c82a7ea95133379a7fd67089bf31d032d0b8381 [file] [log] [blame]
Bram Moolenaare19defe2005-03-21 08:23:33 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * spell.c: code for spell checking
Bram Moolenaarfc735152005-03-22 22:54:12 +000012 *
Bram Moolenaar51485f02005-06-04 21:55:20 +000013 * The spell checking mechanism uses a tree (aka trie). Each node in the tree
14 * has a list of bytes that can appear (siblings). For each byte there is a
15 * pointer to the node with the byte that follows in the word (child).
Bram Moolenaar9f30f502005-06-14 22:01:04 +000016 *
17 * A NUL byte is used where the word may end. The bytes are sorted, so that
18 * binary searching can be used and the NUL bytes are at the start. The
19 * number of possible bytes is stored before the list of bytes.
20 *
21 * The tree uses two arrays: "byts" stores the characters, "idxs" stores
22 * either the next index or flags. The tree starts at index 0. For example,
23 * to lookup "vi" this sequence is followed:
24 * i = 0
25 * len = byts[i]
26 * n = where "v" appears in byts[i + 1] to byts[i + len]
27 * i = idxs[n]
28 * len = byts[i]
29 * n = where "i" appears in byts[i + 1] to byts[i + len]
30 * i = idxs[n]
31 * len = byts[i]
32 * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
Bram Moolenaar51485f02005-06-04 21:55:20 +000033 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +000034 * There are two word trees: one with case-folded words and one with words in
Bram Moolenaar51485f02005-06-04 21:55:20 +000035 * original case. The second one is only used for keep-case words and is
36 * usually small.
37 *
Bram Moolenaarae5bce12005-08-15 21:41:48 +000038 * There is one additional tree for when not all prefixes are applied when
Bram Moolenaar1d73c882005-06-19 22:48:47 +000039 * generating the .spl file. This tree stores all the possible prefixes, as
40 * if they were words. At each word (prefix) end the prefix nr is stored, the
41 * following word must support this prefix nr. And the condition nr is
42 * stored, used to lookup the condition that the word must match with.
43 *
Bram Moolenaar51485f02005-06-04 21:55:20 +000044 * Thanks to Olaf Seibert for providing an example implementation of this tree
45 * and the compression mechanism.
Bram Moolenaar4770d092006-01-12 23:22:24 +000046 * LZ trie ideas:
47 * http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf
48 * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +000049 *
50 * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +000051 *
Bram Moolenaar402d2fe2005-04-15 21:00:38 +000052 * Why doesn't Vim use aspell/ispell/myspell/etc.?
53 * See ":help develop-spell".
54 */
55
Bram Moolenaar329cc7e2005-08-10 07:51:35 +000056/* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word.
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000057 * Only use it for small word lists! */
Bram Moolenaar329cc7e2005-08-10 07:51:35 +000058#if 0
59# define SPELL_PRINTTREE
Bram Moolenaar329cc7e2005-08-10 07:51:35 +000060#endif
61
Bram Moolenaar2d3f4892006-01-20 23:02:51 +000062/* Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk() for a
63 * specific word. */
Bram Moolenaar4770d092006-01-12 23:22:24 +000064#if 0
65# define DEBUG_TRIEWALK
66#endif
67
Bram Moolenaar51485f02005-06-04 21:55:20 +000068/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +000069 * Use this to adjust the score after finding suggestions, based on the
70 * suggested word sounding like the bad word. This is much faster than doing
71 * it for every possible suggestion.
Bram Moolenaar4770d092006-01-12 23:22:24 +000072 * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@"
73 * vs "ht") and goes down in the list.
Bram Moolenaard857f0e2005-06-21 22:37:39 +000074 * Used when 'spellsuggest' is set to "best".
75 */
76#define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4)
77
78/*
Bram Moolenaar4770d092006-01-12 23:22:24 +000079 * Do the opposite: based on a maximum end score and a known sound score,
80 * compute the the maximum word score that can be used.
81 */
82#define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3)
83
84/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +000085 * Vim spell file format: <HEADER>
Bram Moolenaar5195e452005-08-19 20:32:47 +000086 * <SECTIONS>
Bram Moolenaar1d73c882005-06-19 22:48:47 +000087 * <LWORDTREE>
88 * <KWORDTREE>
89 * <PREFIXTREE>
Bram Moolenaar51485f02005-06-04 21:55:20 +000090 *
Bram Moolenaar5195e452005-08-19 20:32:47 +000091 * <HEADER>: <fileID> <versionnr>
Bram Moolenaar51485f02005-06-04 21:55:20 +000092 *
Bram Moolenaar5195e452005-08-19 20:32:47 +000093 * <fileID> 8 bytes "VIMspell"
94 * <versionnr> 1 byte VIMSPELLVERSION
95 *
96 *
97 * Sections make it possible to add information to the .spl file without
98 * making it incompatible with previous versions. There are two kinds of
99 * sections:
100 * 1. Not essential for correct spell checking. E.g. for making suggestions.
101 * These are skipped when not supported.
102 * 2. Optional information, but essential for spell checking when present.
103 * E.g. conditions for affixes. When this section is present but not
104 * supported an error message is given.
105 *
106 * <SECTIONS>: <section> ... <sectionend>
107 *
108 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
109 *
110 * <sectionID> 1 byte number from 0 to 254 identifying the section
111 *
112 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct
113 * spell checking
114 *
115 * <sectionlen> 4 bytes length of section contents, MSB first
116 *
117 * <sectionend> 1 byte SN_END
118 *
119 *
120 * sectionID == SN_REGION: <regionname> ...
121 * <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000122 * First <regionname> is region 1.
123 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000124 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
125 * <folcharslen> <folchars>
Bram Moolenaar51485f02005-06-04 21:55:20 +0000126 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
127 * <charflags> N bytes List of flags (first one is for character 128):
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000128 * 0x01 word character CF_WORD
129 * 0x02 upper-case character CF_UPPER
Bram Moolenaar5195e452005-08-19 20:32:47 +0000130 * <folcharslen> 2 bytes Number of bytes in <folchars>.
131 * <folchars> N bytes Folded characters, first one is for character 128.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000132 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000133 * sectionID == SN_MIDWORD: <midword>
134 * <midword> N bytes Characters that are word characters only when used
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000135 * in the middle of a word.
136 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000137 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000138 * <prefcondcnt> 2 bytes Number of <prefcond> items following.
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000139 * <prefcond> : <condlen> <condstr>
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000140 * <condlen> 1 byte Length of <condstr>.
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000141 * <condstr> N bytes Condition for the prefix.
142 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000143 * sectionID == SN_REP: <repcount> <rep> ...
144 * <repcount> 2 bytes number of <rep> items, MSB first.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000145 * <rep> : <repfromlen> <repfrom> <reptolen> <repto>
Bram Moolenaar5195e452005-08-19 20:32:47 +0000146 * <repfromlen> 1 byte length of <repfrom>
147 * <repfrom> N bytes "from" part of replacement
148 * <reptolen> 1 byte length of <repto>
149 * <repto> N bytes "to" part of replacement
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000150 *
Bram Moolenaar4770d092006-01-12 23:22:24 +0000151 * sectionID == SN_REPSAL: <repcount> <rep> ...
152 * just like SN_REP but for soundfolded words
153 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000154 * sectionID == SN_SAL: <salflags> <salcount> <sal> ...
155 * <salflags> 1 byte flags for soundsalike conversion:
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000156 * SAL_F0LLOWUP
157 * SAL_COLLAPSE
158 * SAL_REM_ACCENTS
Bram Moolenaar5195e452005-08-19 20:32:47 +0000159 * <salcount> 2 bytes number of <sal> items following
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000160 * <sal> : <salfromlen> <salfrom> <saltolen> <salto>
Bram Moolenaar5195e452005-08-19 20:32:47 +0000161 * <salfromlen> 1 byte length of <salfrom>
162 * <salfrom> N bytes "from" part of soundsalike
163 * <saltolen> 1 byte length of <salto>
164 * <salto> N bytes "to" part of soundsalike
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000165 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000166 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
167 * <sofofromlen> 2 bytes length of <sofofrom>
168 * <sofofrom> N bytes "from" part of soundfold
169 * <sofotolen> 2 bytes length of <sofoto>
170 * <sofoto> N bytes "to" part of soundfold
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000171 *
Bram Moolenaar4770d092006-01-12 23:22:24 +0000172 * sectionID == SN_SUGFILE: <timestamp>
173 * <timestamp> 8 bytes time in seconds that must match with .sug file
174 *
175 * sectionID == SN_WORDS: <word> ...
176 * <word> N bytes NUL terminated common word
177 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000178 * sectionID == SN_MAP: <mapstr>
179 * <mapstr> N bytes String with sequences of similar characters,
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000180 * separated by slashes.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000181 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000182 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compflags>
183 * <compmax> 1 byte Maximum nr of words in compound word.
184 * <compminlen> 1 byte Minimal word length for compounding.
185 * <compsylmax> 1 byte Maximum nr of syllables in compound word.
186 * <compflags> N bytes Flags from COMPOUNDFLAGS items, separated by
187 * slashes.
188 *
Bram Moolenaar78622822005-08-23 21:00:13 +0000189 * sectionID == SN_NOBREAK: (empty, its presence is enough)
190 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000191 * sectionID == SN_SYLLABLE: <syllable>
192 * <syllable> N bytes String from SYLLABLE item.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000193 *
194 * <LWORDTREE>: <wordtree>
195 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000196 * <KWORDTREE>: <wordtree>
197 *
198 * <PREFIXTREE>: <wordtree>
199 *
200 *
Bram Moolenaar51485f02005-06-04 21:55:20 +0000201 * <wordtree>: <nodecount> <nodedata> ...
202 *
203 * <nodecount> 4 bytes Number of nodes following. MSB first.
204 *
205 * <nodedata>: <siblingcount> <sibling> ...
206 *
207 * <siblingcount> 1 byte Number of siblings in this node. The siblings
208 * follow in sorted order.
209 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000210 * <sibling>: <byte> [ <nodeidx> <xbyte>
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000211 * | <flags> [<flags2>] [<region>] [<affixID>]
212 * | [<pflags>] <affixID> <prefcondnr> ]
Bram Moolenaar51485f02005-06-04 21:55:20 +0000213 *
214 * <byte> 1 byte Byte value of the sibling. Special cases:
215 * BY_NOFLAGS: End of word without flags and for all
216 * regions.
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000217 * For PREFIXTREE <affixID> and
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000218 * <prefcondnr> follow.
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000219 * BY_FLAGS: End of word, <flags> follow.
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000220 * For PREFIXTREE <pflags>, <affixID>
Bram Moolenaar53805d12005-08-01 07:08:33 +0000221 * and <prefcondnr> follow.
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000222 * BY_FLAGS2: End of word, <flags> and <flags2>
223 * follow. Not used in PREFIXTREE.
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000224 * BY_INDEX: Child of sibling is shared, <nodeidx>
Bram Moolenaar51485f02005-06-04 21:55:20 +0000225 * and <xbyte> follow.
226 *
227 * <nodeidx> 3 bytes Index of child for this sibling, MSB first.
228 *
229 * <xbyte> 1 byte byte value of the sibling.
230 *
231 * <flags> 1 byte bitmask of:
232 * WF_ALLCAP word must have only capitals
233 * WF_ONECAP first char of word must be capital
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000234 * WF_KEEPCAP keep-case word
235 * WF_FIXCAP keep-case word, all caps not allowed
Bram Moolenaar51485f02005-06-04 21:55:20 +0000236 * WF_RARE rare word
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000237 * WF_BANNED bad word
Bram Moolenaar51485f02005-06-04 21:55:20 +0000238 * WF_REGION <region> follows
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000239 * WF_AFX <affixID> follows
Bram Moolenaar51485f02005-06-04 21:55:20 +0000240 *
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000241 * <flags2> 1 byte Bitmask of:
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000242 * WF_HAS_AFF >> 8 word includes affix
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000243 * WF_NEEDCOMP >> 8 word only valid in compound
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000244 *
Bram Moolenaar53805d12005-08-01 07:08:33 +0000245 * <pflags> 1 byte bitmask of:
246 * WFP_RARE rare prefix
247 * WFP_NC non-combining prefix
248 * WFP_UP letter after prefix made upper case
249 *
Bram Moolenaar51485f02005-06-04 21:55:20 +0000250 * <region> 1 byte Bitmask for regions in which word is valid. When
251 * omitted it's valid in all regions.
252 * Lowest bit is for region 1.
253 *
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000254 * <affixID> 1 byte ID of affix that can be used with this word. In
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000255 * PREFIXTREE used for the required prefix ID.
256 *
257 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list
258 * from HEADER.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000259 *
Bram Moolenaar51485f02005-06-04 21:55:20 +0000260 * All text characters are in 'encoding', but stored as single bytes.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000261 */
262
Bram Moolenaar4770d092006-01-12 23:22:24 +0000263/*
264 * Vim .sug file format: <SUGHEADER>
265 * <SUGWORDTREE>
266 * <SUGTABLE>
267 *
268 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
269 *
270 * <fileID> 6 bytes "VIMsug"
271 * <versionnr> 1 byte VIMSUGVERSION
272 * <timestamp> 8 bytes timestamp that must match with .spl file
273 *
274 *
275 * <SUGWORDTREE>: <wordtree> (see above, no flags or region used)
276 *
277 *
278 * <SUGTABLE>: <sugwcount> <sugline> ...
279 *
280 * <sugwcount> 4 bytes number of <sugline> following
281 *
282 * <sugline>: <sugnr> ... NUL
283 *
284 * <sugnr>: X bytes word number that results in this soundfolded word,
285 * stored as an offset to the previous number in as
286 * few bytes as possible, see offset2bytes())
287 */
288
Bram Moolenaare19defe2005-03-21 08:23:33 +0000289#if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
290# include <io.h> /* for lseek(), must be before vim.h */
291#endif
292
293#include "vim.h"
294
295#if defined(FEAT_SYN_HL) || defined(PROTO)
296
297#ifdef HAVE_FCNTL_H
298# include <fcntl.h>
299#endif
300
Bram Moolenaar4770d092006-01-12 23:22:24 +0000301#ifndef UNIX /* it's in os_unix.h for Unix */
302# include <time.h> /* for time_t */
303#endif
304
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000305#define MAXWLEN 250 /* Assume max. word len is this many bytes.
306 Some places assume a word length fits in a
307 byte, thus it can't be above 255. */
Bram Moolenaarfc735152005-03-22 22:54:12 +0000308
Bram Moolenaare52325c2005-08-22 22:54:29 +0000309/* Type used for indexes in the word tree need to be at least 4 bytes. If int
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000310 * is 8 bytes we could use something smaller, but what? */
Bram Moolenaare52325c2005-08-22 22:54:29 +0000311#if SIZEOF_INT > 3
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000312typedef int idx_T;
313#else
314typedef long idx_T;
315#endif
316
317/* Flags used for a word. Only the lowest byte can be used, the region byte
318 * comes above it. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000319#define WF_REGION 0x01 /* region byte follows */
320#define WF_ONECAP 0x02 /* word with one capital (or all capitals) */
321#define WF_ALLCAP 0x04 /* word must be all capitals */
322#define WF_RARE 0x08 /* rare word */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000323#define WF_BANNED 0x10 /* bad word */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000324#define WF_AFX 0x20 /* affix ID follows */
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000325#define WF_FIXCAP 0x40 /* keep-case word, allcap not allowed */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000326#define WF_KEEPCAP 0x80 /* keep-case word */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000327
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000328/* for <flags2>, shifted up one byte to be used in wn_flags */
329#define WF_HAS_AFF 0x0100 /* word includes affix */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000330#define WF_NEEDCOMP 0x0200 /* word only valid in compound */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000331
Bram Moolenaar2d3f4892006-01-20 23:02:51 +0000332/* only used for su_badflags */
333#define WF_MIXCAP 0x20 /* mix of upper and lower case: macaRONI */
334
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000335#define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000336
Bram Moolenaar53805d12005-08-01 07:08:33 +0000337/* flags for <pflags> */
338#define WFP_RARE 0x01 /* rare prefix */
339#define WFP_NC 0x02 /* prefix is not combining */
340#define WFP_UP 0x04 /* to-upper prefix */
341
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000342/* Flags for postponed prefixes. Must be above affixID (one byte)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000343 * and prefcondnr (two bytes). */
Bram Moolenaar53805d12005-08-01 07:08:33 +0000344#define WF_RAREPFX (WFP_RARE << 24) /* in sl_pidxs: flag for rare
345 * postponed prefix */
346#define WF_PFX_NC (WFP_NC << 24) /* in sl_pidxs: flag for non-combining
347 * postponed prefix */
348#define WF_PFX_UP (WFP_UP << 24) /* in sl_pidxs: flag for to-upper
349 * postponed prefix */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000350
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000351/* Special byte values for <byte>. Some are only used in the tree for
352 * postponed prefixes, some only in the other trees. This is a bit messy... */
353#define BY_NOFLAGS 0 /* end of word without flags or region; for
Bram Moolenaar53805d12005-08-01 07:08:33 +0000354 * postponed prefix: no <pflags> */
355#define BY_INDEX 1 /* child is shared, index follows */
356#define BY_FLAGS 2 /* end of word, <flags> byte follows; for
357 * postponed prefix: <pflags> follows */
358#define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes
359 * follow; never used in prefix tree */
360#define BY_SPECIAL BY_FLAGS2 /* highest special byte value */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000361
Bram Moolenaar4770d092006-01-12 23:22:24 +0000362/* Info from "REP", "REPSAL" and "SAL" entries in ".aff" file used in si_rep,
363 * si_repsal, sl_rep, and si_sal. Not for sl_sal!
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000364 * One replacement: from "ft_from" to "ft_to". */
365typedef struct fromto_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000366{
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000367 char_u *ft_from;
368 char_u *ft_to;
369} fromto_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000370
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000371/* Info from "SAL" entries in ".aff" file used in sl_sal.
372 * The info is split for quick processing by spell_soundfold().
373 * Note that "sm_oneof" and "sm_rules" point into sm_lead. */
374typedef struct salitem_S
375{
376 char_u *sm_lead; /* leading letters */
377 int sm_leadlen; /* length of "sm_lead" */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000378 char_u *sm_oneof; /* letters from () or NULL */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000379 char_u *sm_rules; /* rules like ^, $, priority */
380 char_u *sm_to; /* replacement. */
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000381#ifdef FEAT_MBYTE
382 int *sm_lead_w; /* wide character copy of "sm_lead" */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000383 int *sm_oneof_w; /* wide character copy of "sm_oneof" */
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000384 int *sm_to_w; /* wide character copy of "sm_to" */
385#endif
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000386} salitem_T;
387
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000388#ifdef FEAT_MBYTE
389typedef int salfirst_T;
390#else
391typedef short salfirst_T;
392#endif
393
Bram Moolenaar5195e452005-08-19 20:32:47 +0000394/* Values for SP_*ERROR are negative, positive values are used by
395 * read_cnt_string(). */
396#define SP_TRUNCERROR -1 /* spell file truncated error */
397#define SP_FORMERROR -2 /* format error in spell file */
Bram Moolenaar6de68532005-08-24 22:08:48 +0000398#define SP_OTHERERROR -3 /* other error while reading spell file */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000399
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000400/*
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000401 * Structure used to store words and other info for one language, loaded from
402 * a .spl file.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000403 * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the
404 * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words.
405 *
406 * The "byts" array stores the possible bytes in each tree node, preceded by
407 * the number of possible bytes, sorted on byte value:
408 * <len> <byte1> <byte2> ...
409 * The "idxs" array stores the index of the child node corresponding to the
410 * byte in "byts".
411 * Exception: when the byte is zero, the word may end here and "idxs" holds
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000412 * the flags, region mask and affixID for the word. There may be several
413 * zeros in sequence for alternative flag/region/affixID combinations.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000414 */
415typedef struct slang_S slang_T;
416struct slang_S
417{
418 slang_T *sl_next; /* next language */
419 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */
Bram Moolenaarb765d632005-06-07 21:00:02 +0000420 char_u *sl_fname; /* name of .spl file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000421 int sl_add; /* TRUE if it's a .add file. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000422
Bram Moolenaar51485f02005-06-04 21:55:20 +0000423 char_u *sl_fbyts; /* case-folded word bytes */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000424 idx_T *sl_fidxs; /* case-folded word indexes */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000425 char_u *sl_kbyts; /* keep-case word bytes */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000426 idx_T *sl_kidxs; /* keep-case word indexes */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000427 char_u *sl_pbyts; /* prefix tree word bytes */
428 idx_T *sl_pidxs; /* prefix tree word indexes */
429
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000430 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000431
Bram Moolenaar9c96f592005-06-30 21:52:39 +0000432 char_u *sl_midword; /* MIDWORD string or NULL */
433
Bram Moolenaar4770d092006-01-12 23:22:24 +0000434 hashtab_T sl_wordcount; /* hashtable with word count, wordcount_T */
435
Bram Moolenaar5195e452005-08-19 20:32:47 +0000436 int sl_compmax; /* COMPOUNDMAX (default: MAXWLEN) */
Bram Moolenaarda2303d2005-08-30 21:55:26 +0000437 int sl_compminlen; /* COMPOUNDMIN (default: 0) */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000438 int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */
439 regprog_T *sl_compprog; /* COMPOUNDFLAGS turned into a regexp progrm
440 * (NULL when no compounding) */
441 char_u *sl_compstartflags; /* flags for first compound word */
Bram Moolenaard12a1322005-08-21 22:08:24 +0000442 char_u *sl_compallflags; /* all flags for compound words */
Bram Moolenaar78622822005-08-23 21:00:13 +0000443 char_u sl_nobreak; /* When TRUE: no spaces between words */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000444 char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */
445 garray_T sl_syl_items; /* syllable items */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000446
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000447 int sl_prefixcnt; /* number of items in "sl_prefprog" */
448 regprog_T **sl_prefprog; /* table with regprogs for prefixes */
449
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000450 garray_T sl_rep; /* list of fromto_T entries from REP lines */
451 short sl_rep_first[256]; /* indexes where byte first appears, -1 if
452 there is none */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000453 garray_T sl_sal; /* list of salitem_T entries from SAL lines */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000454 salfirst_T sl_sal_first[256]; /* indexes where byte first appears, -1 if
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000455 there is none */
456 int sl_followup; /* SAL followup */
457 int sl_collapse; /* SAL collapse_result */
458 int sl_rem_accents; /* SAL remove_accents */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000459 int sl_sofo; /* SOFOFROM and SOFOTO instead of SAL items:
460 * "sl_sal_first" maps chars, when has_mbyte
461 * "sl_sal" is a list of wide char lists. */
462 garray_T sl_repsal; /* list of fromto_T entries from REPSAL lines */
463 short sl_repsal_first[256]; /* sl_rep_first for REPSAL lines */
464
465 /* Info from the .sug file. Loaded on demand. */
466 time_t sl_sugtime; /* timestamp for .sug file */
467 char_u *sl_sbyts; /* soundfolded word bytes */
468 idx_T *sl_sidxs; /* soundfolded word indexes */
469 buf_T *sl_sugbuf; /* buffer with word number table */
470 int sl_sugloaded; /* TRUE when .sug file was loaded or failed to
471 load */
472
Bram Moolenaarea424162005-06-16 21:51:00 +0000473 int sl_has_map; /* TRUE if there is a MAP line */
474#ifdef FEAT_MBYTE
475 hashtab_T sl_map_hash; /* MAP for multi-byte chars */
476 int sl_map_array[256]; /* MAP for first 256 chars */
477#else
478 char_u sl_map_array[256]; /* MAP for first 256 chars */
479#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +0000480 hashtab_T sl_sounddone; /* table with soundfolded words that have
481 handled, see add_sound_suggest() */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000482};
483
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000484/* First language that is loaded, start of the linked list of loaded
485 * languages. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000486static slang_T *first_lang = NULL;
487
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000488/* Flags used in .spl file for soundsalike flags. */
489#define SAL_F0LLOWUP 1
490#define SAL_COLLAPSE 2
491#define SAL_REM_ACCENTS 4
492
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000493/*
494 * Structure used in "b_langp", filled from 'spelllang'.
495 */
496typedef struct langp_S
497{
Bram Moolenaar8b96d642005-09-05 22:05:30 +0000498 slang_T *lp_slang; /* info for this language */
499 slang_T *lp_sallang; /* language used for sound folding or NULL */
500 slang_T *lp_replang; /* language used for REP items or NULL */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000501 int lp_region; /* bitmask for region or REGION_ALL */
502} langp_T;
503
504#define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
505
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000506#define REGION_ALL 0xff /* word valid in all regions */
507
Bram Moolenaar5195e452005-08-19 20:32:47 +0000508#define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */
509#define VIMSPELLMAGICL 8
510#define VIMSPELLVERSION 50
511
Bram Moolenaar4770d092006-01-12 23:22:24 +0000512#define VIMSUGMAGIC "VIMsug" /* string at start of Vim .sug file */
513#define VIMSUGMAGICL 6
514#define VIMSUGVERSION 1
515
Bram Moolenaar5195e452005-08-19 20:32:47 +0000516/* Section IDs. Only renumber them when VIMSPELLVERSION changes! */
517#define SN_REGION 0 /* <regionname> section */
518#define SN_CHARFLAGS 1 /* charflags section */
519#define SN_MIDWORD 2 /* <midword> section */
520#define SN_PREFCOND 3 /* <prefcond> section */
521#define SN_REP 4 /* REP items section */
522#define SN_SAL 5 /* SAL items section */
523#define SN_SOFO 6 /* soundfolding section */
524#define SN_MAP 7 /* MAP items section */
525#define SN_COMPOUND 8 /* compound words section */
526#define SN_SYLLABLE 9 /* syllable section */
Bram Moolenaar78622822005-08-23 21:00:13 +0000527#define SN_NOBREAK 10 /* NOBREAK section */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000528#define SN_SUGFILE 11 /* timestamp for .sug file */
529#define SN_REPSAL 12 /* REPSAL items section */
530#define SN_WORDS 13 /* common words */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000531#define SN_END 255 /* end of sections */
532
533#define SNF_REQUIRED 1 /* <sectionflags>: required section */
534
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000535/* Result values. Lower number is accepted over higher one. */
536#define SP_BANNED -1
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000537#define SP_OK 0
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000538#define SP_RARE 1
539#define SP_LOCAL 2
540#define SP_BAD 3
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000541
Bram Moolenaar7887d882005-07-01 22:33:52 +0000542/* file used for "zG" and "zW" */
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000543static char_u *int_wordlist = NULL;
Bram Moolenaar7887d882005-07-01 22:33:52 +0000544
Bram Moolenaar4770d092006-01-12 23:22:24 +0000545typedef struct wordcount_S
546{
547 short_u wc_count; /* nr of times word was seen */
548 char_u wc_word[1]; /* word, actually longer */
549} wordcount_T;
550
551static wordcount_T dumwc;
552#define WC_KEY_OFF (dumwc.wc_word - (char_u *)&dumwc)
553#define HI2WC(hi) ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF))
554#define MAXWORDCOUNT 0xffff
555
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000556/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000557 * Information used when looking for suggestions.
558 */
559typedef struct suginfo_S
560{
561 garray_T su_ga; /* suggestions, contains "suggest_T" */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000562 int su_maxcount; /* max. number of suggestions displayed */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000563 int su_maxscore; /* maximum score for adding to su_ga */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000564 int su_sfmaxscore; /* idem, for when doing soundfold words */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000565 garray_T su_sga; /* like su_ga, sound-folded scoring */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000566 char_u *su_badptr; /* start of bad word in line */
567 int su_badlen; /* length of detected bad word in line */
Bram Moolenaar0c405862005-06-22 22:26:26 +0000568 int su_badflags; /* caps flags for bad word */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000569 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */
570 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +0000571 char_u su_sal_badword[MAXWLEN]; /* su_badword soundfolded */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000572 hashtab_T su_banned; /* table with banned words */
Bram Moolenaar8b96d642005-09-05 22:05:30 +0000573 slang_T *su_sallang; /* default language for sound folding */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000574} suginfo_T;
575
576/* One word suggestion. Used in "si_ga". */
577typedef struct suggest_S
578{
579 char_u *st_word; /* suggested word, allocated string */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000580 int st_wordlen; /* STRLEN(st_word) */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000581 int st_orglen; /* length of replaced text */
582 int st_score; /* lower is better */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000583 int st_altscore; /* used when st_score compares equal */
584 int st_salscore; /* st_score is for soundalike */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000585 int st_had_bonus; /* bonus already included in score */
Bram Moolenaar8b96d642005-09-05 22:05:30 +0000586 slang_T *st_slang; /* language used for sound folding */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000587} suggest_T;
588
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000589#define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i])
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000590
Bram Moolenaar4770d092006-01-12 23:22:24 +0000591/* TRUE if a word appears in the list of banned words. */
592#define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word)))
593
594/* Number of suggestions kept when cleaning up. we need to keep more than
595 * what is displayed, because when rescore_suggestions() is called the score
596 * may change and wrong suggestions may be removed later. */
597#define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 130 ? 150 : (su)->su_maxcount + 20)
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000598
599/* Threshold for sorting and cleaning up suggestions. Don't want to keep lots
600 * of suggestions that are not going to be displayed. */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000601#define SUG_MAX_COUNT(su) (SUG_CLEAN_COUNT(su) + 50)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000602
603/* score for various changes */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000604#define SCORE_SPLIT 149 /* split bad word */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000605#define SCORE_ICASE 52 /* slightly different case */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000606#define SCORE_REGION 200 /* word is for different region */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000607#define SCORE_RARE 180 /* rare word */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000608#define SCORE_SWAP 75 /* swap two characters */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000609#define SCORE_SWAP3 110 /* swap two characters in three */
Bram Moolenaar1e015462005-09-25 22:16:38 +0000610#define SCORE_REP 65 /* REP replacement */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000611#define SCORE_SUBST 93 /* substitute a character */
612#define SCORE_SIMILAR 33 /* substitute a similar character */
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +0000613#define SCORE_SUBCOMP 33 /* substitute a composing character */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000614#define SCORE_DEL 94 /* delete a character */
Bram Moolenaar1e015462005-09-25 22:16:38 +0000615#define SCORE_DELDUP 66 /* delete a duplicated character */
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +0000616#define SCORE_DELCOMP 28 /* delete a composing character */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000617#define SCORE_INS 96 /* insert a character */
Bram Moolenaar1e015462005-09-25 22:16:38 +0000618#define SCORE_INSDUP 67 /* insert a duplicate character */
Bram Moolenaar8b59de92005-08-11 19:59:29 +0000619#define SCORE_INSCOMP 30 /* insert a composing character */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000620#define SCORE_NONWORD 103 /* change non-word to word char */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000621
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000622#define SCORE_FILE 30 /* suggestion from a file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000623#define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower.
624 * 350 allows for about three changes. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000625
Bram Moolenaar4770d092006-01-12 23:22:24 +0000626#define SCORE_COMMON1 30 /* subtracted for words seen before */
627#define SCORE_COMMON2 40 /* subtracted for words often seen */
628#define SCORE_COMMON3 50 /* subtracted for words very often seen */
629#define SCORE_THRES2 10 /* word count threshold for COMMON2 */
630#define SCORE_THRES3 100 /* word count threshold for COMMON3 */
631
632/* When trying changed soundfold words it becomes slow when trying more than
633 * two changes. With less then two changes it's slightly faster but we miss a
634 * few good suggestions. In rare cases we need to try three of four changes.
635 */
636#define SCORE_SFMAX1 200 /* maximum score for first try */
637#define SCORE_SFMAX2 300 /* maximum score for second try */
638#define SCORE_SFMAX3 400 /* maximum score for third try */
639
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000640#define SCORE_BIG SCORE_INS * 3 /* big difference */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000641#define SCORE_MAXMAX 999999 /* accept any score */
642#define SCORE_LIMITMAX 350 /* for spell_edit_score_limit() */
643
644/* for spell_edit_score_limit() we need to know the minimum value of
645 * SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS */
646#define SCORE_EDIT_MIN SCORE_SIMILAR
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000647
648/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000649 * Structure to store info for word matching.
650 */
651typedef struct matchinf_S
652{
653 langp_T *mi_lp; /* info for language and region */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000654
655 /* pointers to original text to be checked */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000656 char_u *mi_word; /* start of word being checked */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000657 char_u *mi_end; /* end of matching word so far */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000658 char_u *mi_fend; /* next char to be added to mi_fword */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000659 char_u *mi_cend; /* char after what was used for
660 mi_capflags */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000661
662 /* case-folded text */
663 char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000664 int mi_fwordlen; /* nr of valid bytes in mi_fword */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000665
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000666 /* for when checking word after a prefix */
667 int mi_prefarridx; /* index in sl_pidxs with list of
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000668 affixID/condition */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000669 int mi_prefcnt; /* number of entries at mi_prefarridx */
670 int mi_prefixlen; /* byte length of prefix */
Bram Moolenaar53805d12005-08-01 07:08:33 +0000671#ifdef FEAT_MBYTE
672 int mi_cprefixlen; /* byte length of prefix in original
673 case */
674#else
675# define mi_cprefixlen mi_prefixlen /* it's the same value */
676#endif
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000677
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000678 /* for when checking a compound word */
679 int mi_compoff; /* start of following word offset */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000680 char_u mi_compflags[MAXWLEN]; /* flags for compound words used */
681 int mi_complen; /* nr of compound words used */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000682
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000683 /* others */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000684 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000685 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
Bram Moolenaar9c96f592005-06-30 21:52:39 +0000686 buf_T *mi_buf; /* buffer being checked */
Bram Moolenaar78622822005-08-23 21:00:13 +0000687
688 /* for NOBREAK */
689 int mi_result2; /* "mi_resul" without following word */
690 char_u *mi_end2; /* "mi_end" without following word */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000691} matchinf_T;
692
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000693/*
694 * The tables used for recognizing word characters according to spelling.
695 * These are only used for the first 256 characters of 'encoding'.
696 */
697typedef struct spelltab_S
698{
699 char_u st_isw[256]; /* flags: is word char */
700 char_u st_isu[256]; /* flags: is uppercase char */
701 char_u st_fold[256]; /* chars: folded case */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000702 char_u st_upper[256]; /* chars: upper case */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000703} spelltab_T;
704
705static spelltab_T spelltab;
706static int did_set_spelltab;
707
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000708#define CF_WORD 0x01
709#define CF_UPPER 0x02
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000710
711static void clear_spell_chartab __ARGS((spelltab_T *sp));
712static int set_spell_finish __ARGS((spelltab_T *new_st));
Bram Moolenaar9c96f592005-06-30 21:52:39 +0000713static int spell_iswordp __ARGS((char_u *p, buf_T *buf));
714static int spell_iswordp_nmw __ARGS((char_u *p));
715#ifdef FEAT_MBYTE
716static int spell_iswordp_w __ARGS((int *p, buf_T *buf));
717#endif
Bram Moolenaar5195e452005-08-19 20:32:47 +0000718static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000719
720/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000721 * For finding suggestions: At each node in the tree these states are tried:
Bram Moolenaarea424162005-06-16 21:51:00 +0000722 */
723typedef enum
724{
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000725 STATE_START = 0, /* At start of node check for NUL bytes (goodword
726 * ends); if badword ends there is a match, otherwise
727 * try splitting word. */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000728 STATE_NOPREFIX, /* try without prefix */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000729 STATE_SPLITUNDO, /* Undo splitting. */
Bram Moolenaarea424162005-06-16 21:51:00 +0000730 STATE_ENDNUL, /* Past NUL bytes at start of the node. */
731 STATE_PLAIN, /* Use each byte of the node. */
732 STATE_DEL, /* Delete a byte from the bad word. */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000733 STATE_INS_PREP, /* Prepare for inserting bytes. */
Bram Moolenaarea424162005-06-16 21:51:00 +0000734 STATE_INS, /* Insert a byte in the bad word. */
735 STATE_SWAP, /* Swap two bytes. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000736 STATE_UNSWAP, /* Undo swap two characters. */
737 STATE_SWAP3, /* Swap two characters over three. */
738 STATE_UNSWAP3, /* Undo Swap two characters over three. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000739 STATE_UNROT3L, /* Undo rotate three characters left */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000740 STATE_UNROT3R, /* Undo rotate three characters right */
Bram Moolenaarea424162005-06-16 21:51:00 +0000741 STATE_REP_INI, /* Prepare for using REP items. */
742 STATE_REP, /* Use matching REP items from the .aff file. */
743 STATE_REP_UNDO, /* Undo a REP item replacement. */
744 STATE_FINAL /* End of this node. */
745} state_T;
746
747/*
Bram Moolenaar0c405862005-06-22 22:26:26 +0000748 * Struct to keep the state at each level in suggest_try_change().
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000749 */
750typedef struct trystate_S
751{
Bram Moolenaarea424162005-06-16 21:51:00 +0000752 state_T ts_state; /* state at this level, STATE_ */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000753 int ts_score; /* score */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000754 idx_T ts_arridx; /* index in tree array, start of node */
Bram Moolenaarea424162005-06-16 21:51:00 +0000755 short ts_curi; /* index in list of child nodes */
756 char_u ts_fidx; /* index in fword[], case-folded bad word */
757 char_u ts_fidxtry; /* ts_fidx at which bytes may be changed */
758 char_u ts_twordlen; /* valid length of tword[] */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +0000759 char_u ts_prefixdepth; /* stack depth for end of prefix or
Bram Moolenaard12a1322005-08-21 22:08:24 +0000760 * PFD_PREFIXTREE or PFD_NOPREFIX */
761 char_u ts_flags; /* TSF_ flags */
Bram Moolenaarea424162005-06-16 21:51:00 +0000762#ifdef FEAT_MBYTE
763 char_u ts_tcharlen; /* number of bytes in tword character */
764 char_u ts_tcharidx; /* current byte index in tword character */
765 char_u ts_isdiff; /* DIFF_ values */
766 char_u ts_fcharstart; /* index in fword where badword char started */
767#endif
Bram Moolenaar5195e452005-08-19 20:32:47 +0000768 char_u ts_prewordlen; /* length of word in "preword[]" */
769 char_u ts_splitoff; /* index in "tword" after last split */
Bram Moolenaar78622822005-08-23 21:00:13 +0000770 char_u ts_splitfidx; /* "ts_fidx" at word split */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000771 char_u ts_complen; /* nr of compound words used */
Bram Moolenaard12a1322005-08-21 22:08:24 +0000772 char_u ts_compsplit; /* index for "compflags" where word was spit */
Bram Moolenaar0c405862005-06-22 22:26:26 +0000773 char_u ts_save_badflags; /* su_badflags saved here */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000774 char_u ts_delidx; /* index in fword for char that was deleted,
775 valid when "ts_flags" has TSF_DIDDEL */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000776} trystate_T;
777
Bram Moolenaarea424162005-06-16 21:51:00 +0000778/* values for ts_isdiff */
779#define DIFF_NONE 0 /* no different byte (yet) */
780#define DIFF_YES 1 /* different byte found */
781#define DIFF_INSERT 2 /* inserting character */
782
Bram Moolenaard12a1322005-08-21 22:08:24 +0000783/* values for ts_flags */
784#define TSF_PREFIXOK 1 /* already checked that prefix is OK */
785#define TSF_DIDSPLIT 2 /* tried split at this point */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000786#define TSF_DIDDEL 4 /* did a delete, "ts_delidx" has index */
Bram Moolenaard12a1322005-08-21 22:08:24 +0000787
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000788/* special values ts_prefixdepth */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +0000789#define PFD_NOPREFIX 0xff /* not using prefixes */
Bram Moolenaard12a1322005-08-21 22:08:24 +0000790#define PFD_PREFIXTREE 0xfe /* walking through the prefix tree */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000791#define PFD_NOTSPECIAL 0xfd /* highest value that's not special */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000792
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000793/* mode values for find_word */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000794#define FIND_FOLDWORD 0 /* find word case-folded */
795#define FIND_KEEPWORD 1 /* find keep-case word */
796#define FIND_PREFIX 2 /* find word after prefix */
797#define FIND_COMPOUND 3 /* find case-folded compound word */
798#define FIND_KEEPCOMPOUND 4 /* find keep-case compound word */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000799
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000800static slang_T *slang_alloc __ARGS((char_u *lang));
801static void slang_free __ARGS((slang_T *lp));
Bram Moolenaarb765d632005-06-07 21:00:02 +0000802static void slang_clear __ARGS((slang_T *lp));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000803static void slang_clear_sug __ARGS((slang_T *lp));
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000804static void find_word __ARGS((matchinf_T *mip, int mode));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000805static int can_compound __ARGS((slang_T *slang, char_u *word, char_u *flags));
Bram Moolenaar53805d12005-08-01 07:08:33 +0000806static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req));
Bram Moolenaard12a1322005-08-21 22:08:24 +0000807static void find_prefix __ARGS((matchinf_T *mip, int mode));
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000808static int fold_more __ARGS((matchinf_T *mip));
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000809static int spell_valid_case __ARGS((int wordflags, int treeflags));
Bram Moolenaar95529562005-08-25 21:21:38 +0000810static int no_spell_checking __ARGS((win_T *wp));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000811static void spell_load_lang __ARGS((char_u *lang));
Bram Moolenaarb765d632005-06-07 21:00:02 +0000812static char_u *spell_enc __ARGS((void));
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000813static void int_wordlist_spl __ARGS((char_u *fname));
Bram Moolenaarb765d632005-06-07 21:00:02 +0000814static void spell_load_cb __ARGS((char_u *fname, void *cookie));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000815static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent));
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000816static char_u *read_cnt_string __ARGS((FILE *fd, int cnt_bytes, int *lenp));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000817static char_u *read_string __ARGS((FILE *fd, int cnt));
818static int read_region_section __ARGS((FILE *fd, slang_T *slang, int len));
819static int read_charflags_section __ARGS((FILE *fd));
820static int read_prefcond_section __ARGS((FILE *fd, slang_T *lp));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000821static int read_rep_section __ARGS((FILE *fd, garray_T *gap, short *first));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000822static int read_sal_section __ARGS((FILE *fd, slang_T *slang));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000823static int read_words_section __ARGS((FILE *fd, slang_T *lp, int len));
824static void count_common_word __ARGS((slang_T *lp, char_u *word, int len, int count));
825static int score_wordcount_adj __ARGS((slang_T *slang, int score, char_u *word, int split));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000826static int read_sofo_section __ARGS((FILE *fd, slang_T *slang));
827static int read_compound __ARGS((FILE *fd, slang_T *slang, int len));
Bram Moolenaar6de68532005-08-24 22:08:48 +0000828static int byte_in_str __ARGS((char_u *str, int byte));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000829static int init_syl_tab __ARGS((slang_T *slang));
830static int count_syllables __ARGS((slang_T *slang, char_u *word));
Bram Moolenaar7887d882005-07-01 22:33:52 +0000831static int set_sofo __ARGS((slang_T *lp, char_u *from, char_u *to));
832static void set_sal_first __ARGS((slang_T *lp));
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000833#ifdef FEAT_MBYTE
834static int *mb_str2wide __ARGS((char_u *s));
835#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +0000836static int spell_read_tree __ARGS((FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt));
837static idx_T read_tree_node __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr));
Bram Moolenaar9c96f592005-06-30 21:52:39 +0000838static void clear_midword __ARGS((buf_T *buf));
839static void use_midword __ARGS((slang_T *lp, buf_T *buf));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000840static int find_region __ARGS((char_u *rp, char_u *region));
841static int captype __ARGS((char_u *word, char_u *end));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000842static int badword_captype __ARGS((char_u *word, char_u *end));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000843static void spell_reload_one __ARGS((char_u *fname, int added_word));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000844static void set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000845static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000846static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen));
Bram Moolenaar8b59de92005-08-11 19:59:29 +0000847static int check_need_cap __ARGS((linenr_T lnum, colnr_T col));
Bram Moolenaar66fa2712006-01-22 23:22:22 +0000848static void spell_find_suggest __ARGS((char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive));
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000849#ifdef FEAT_EVAL
850static void spell_suggest_expr __ARGS((suginfo_T *su, char_u *expr));
851#endif
852static void spell_suggest_file __ARGS((suginfo_T *su, char_u *fname));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000853static void spell_suggest_intern __ARGS((suginfo_T *su, int interactive));
854static void suggest_load_files __ARGS((void));
855static void tree_count_words __ARGS((char_u *byts, idx_T *idxs));
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000856static void spell_find_cleanup __ARGS((suginfo_T *su));
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000857static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper));
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000858static void allcap_copy __ARGS((char_u *word, char_u *wcopy));
Bram Moolenaar0c405862005-06-22 22:26:26 +0000859static void suggest_try_special __ARGS((suginfo_T *su));
860static void suggest_try_change __ARGS((suginfo_T *su));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000861static void suggest_trie_walk __ARGS((suginfo_T *su, langp_T *lp, char_u *fword, int soundfold));
862static void go_deeper __ARGS((trystate_T *stack, int depth, int score_add));
Bram Moolenaar53805d12005-08-01 07:08:33 +0000863#ifdef FEAT_MBYTE
864static int nofold_len __ARGS((char_u *fword, int flen, char_u *word));
865#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000866static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword));
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000867static void score_comp_sal __ARGS((suginfo_T *su));
868static void score_combine __ARGS((suginfo_T *su));
Bram Moolenaarf417f2b2005-06-23 22:29:21 +0000869static int stp_sal_score __ARGS((suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000870static void suggest_try_soundalike_prep __ARGS((void));
Bram Moolenaar0c405862005-06-22 22:26:26 +0000871static void suggest_try_soundalike __ARGS((suginfo_T *su));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000872static void suggest_try_soundalike_finish __ARGS((void));
873static void add_sound_suggest __ARGS((suginfo_T *su, char_u *goodword, int score, langp_T *lp));
874static int soundfold_find __ARGS((slang_T *slang, char_u *word));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000875static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags));
Bram Moolenaarea424162005-06-16 21:51:00 +0000876static void set_map_str __ARGS((slang_T *lp, char_u *map));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000877static int similar_chars __ARGS((slang_T *slang, int c1, int c2));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000878static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf));
879static void check_suggestions __ARGS((suginfo_T *su, garray_T *gap));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000880static void add_banned __ARGS((suginfo_T *su, char_u *word));
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000881static void rescore_suggestions __ARGS((suginfo_T *su));
Bram Moolenaar482aaeb2005-09-29 18:26:07 +0000882static void rescore_one __ARGS((suginfo_T *su, suggest_T *stp));
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000883static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep));
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000884static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, int folded, char_u *res));
885static void spell_soundfold_sofo __ARGS((slang_T *slang, char_u *inword, char_u *res));
886static void spell_soundfold_sal __ARGS((slang_T *slang, char_u *inword, char_u *res));
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000887#ifdef FEAT_MBYTE
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000888static void spell_soundfold_wsal __ARGS((slang_T *slang, char_u *inword, char_u *res));
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000889#endif
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000890static int soundalike_score __ARGS((char_u *goodsound, char_u *badsound));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000891static int spell_edit_score __ARGS((slang_T *slang, char_u *badword, char_u *goodword));
892static int spell_edit_score_limit __ARGS((slang_T *slang, char_u *badword, char_u *goodword, int limit));
893#ifdef FEAT_MBYTE
894static int spell_edit_score_limit_w __ARGS((slang_T *slang, char_u *badword, char_u *goodword, int limit));
895#endif
896static void dump_word __ARGS((slang_T *slang, char_u *word, int round, int flags, linenr_T lnum));
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000897static linenr_T dump_prefixes __ARGS((slang_T *slang, char_u *word, int round, int flags, linenr_T startlnum));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000898static buf_T *open_spellbuf __ARGS((void));
899static void close_spellbuf __ARGS((buf_T *buf));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000900
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000901/*
902 * Use our own character-case definitions, because the current locale may
903 * differ from what the .spl file uses.
904 * These must not be called with negative number!
905 */
906#ifndef FEAT_MBYTE
907/* Non-multi-byte implementation. */
908# define SPELL_TOFOLD(c) ((c) < 256 ? spelltab.st_fold[c] : (c))
909# define SPELL_TOUPPER(c) ((c) < 256 ? spelltab.st_upper[c] : (c))
910# define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE)
911#else
Bram Moolenaarcfc7d632005-07-28 22:28:16 +0000912# if defined(HAVE_WCHAR_H)
913# include <wchar.h> /* for towupper() and towlower() */
914# endif
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000915/* Multi-byte implementation. For Unicode we can call utf_*(), but don't do
916 * that for ASCII, because we don't want to use 'casemap' here. Otherwise use
917 * the "w" library function for characters above 255 if available. */
918# ifdef HAVE_TOWLOWER
919# define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
920 : (c) < 256 ? spelltab.st_fold[c] : towlower(c))
921# else
922# define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
923 : (c) < 256 ? spelltab.st_fold[c] : (c))
924# endif
925
926# ifdef HAVE_TOWUPPER
927# define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
928 : (c) < 256 ? spelltab.st_upper[c] : towupper(c))
929# else
930# define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
931 : (c) < 256 ? spelltab.st_upper[c] : (c))
932# endif
933
934# ifdef HAVE_ISWUPPER
935# define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
936 : (c) < 256 ? spelltab.st_isu[c] : iswupper(c))
937# else
938# define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000939 : (c) < 256 ? spelltab.st_isu[c] : (FALSE))
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000940# endif
941#endif
942
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000943
944static char *e_format = N_("E759: Format error in spell file");
Bram Moolenaar7887d882005-07-01 22:33:52 +0000945static char *e_spell_trunc = N_("E758: Truncated spell file");
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +0000946static char *e_afftrailing = N_("Trailing text in %s line %d: %s");
Bram Moolenaar6de68532005-08-24 22:08:48 +0000947static char *e_affname = N_("Affix name too long in %s line %d: %s");
948static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
949static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
Bram Moolenaar329cc7e2005-08-10 07:51:35 +0000950static char *msg_compressing = N_("Compressing word tree...");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000951
Bram Moolenaara40ceaf2006-01-13 22:35:40 +0000952/* Remember what "z?" replaced. */
953static char_u *repl_from = NULL;
954static char_u *repl_to = NULL;
955
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000956/*
957 * Main spell-checking function.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000958 * "ptr" points to a character that could be the start of a word.
Bram Moolenaar482aaeb2005-09-29 18:26:07 +0000959 * "*attrp" is set to the highlight index for a badly spelled word. For a
960 * non-word or when it's OK it remains unchanged.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000961 * This must only be called when 'spelllang' is not empty.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000962 *
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000963 * "capcol" is used to check for a Capitalised word after the end of a
964 * sentence. If it's zero then perform the check. Return the column where to
965 * check next, or -1 when no sentence end was found. If it's NULL then don't
966 * worry.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000967 *
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000968 * Returns the length of the word in bytes, also when it's OK, so that the
969 * caller can skip over the word.
970 */
971 int
Bram Moolenaar4770d092006-01-12 23:22:24 +0000972spell_check(wp, ptr, attrp, capcol, docount)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000973 win_T *wp; /* current window */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000974 char_u *ptr;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +0000975 hlf_T *attrp;
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000976 int *capcol; /* column to check for Capital */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000977 int docount; /* count good words */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000978{
979 matchinf_T mi; /* Most things are put in "mi" so that it can
980 be passed to functions quickly. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000981 int nrlen = 0; /* found a number first */
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000982 int c;
Bram Moolenaar5195e452005-08-19 20:32:47 +0000983 int wrongcaplen = 0;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000984 int lpi;
Bram Moolenaar4770d092006-01-12 23:22:24 +0000985 int count_word = docount;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000986
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000987 /* A word never starts at a space or a control character. Return quickly
988 * then, skipping over the character. */
989 if (*ptr <= ' ')
990 return 1;
Bram Moolenaar5195e452005-08-19 20:32:47 +0000991 vim_memset(&mi, 0, sizeof(matchinf_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000992
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000993 /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and
Bram Moolenaar43abc522005-12-10 20:15:02 +0000994 * 0X99FF. But always do check spelling to find "3GPP" and "11
995 * julifeest". */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000996 if (*ptr >= '0' && *ptr <= '9')
Bram Moolenaar51485f02005-06-04 21:55:20 +0000997 {
Bram Moolenaar3982c542005-06-08 21:56:31 +0000998 if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
999 mi.mi_end = skiphex(ptr + 2);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001000 else
1001 mi.mi_end = skipdigits(ptr);
Bram Moolenaar43abc522005-12-10 20:15:02 +00001002 nrlen = mi.mi_end - ptr;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001003 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001004
Bram Moolenaar0c405862005-06-22 22:26:26 +00001005 /* Find the normal end of the word (until the next non-word character). */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001006 mi.mi_word = ptr;
Bram Moolenaar43abc522005-12-10 20:15:02 +00001007 mi.mi_fend = ptr;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00001008 if (spell_iswordp(mi.mi_fend, wp->w_buffer))
Bram Moolenaar51485f02005-06-04 21:55:20 +00001009 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001010 do
Bram Moolenaar51485f02005-06-04 21:55:20 +00001011 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001012 mb_ptr_adv(mi.mi_fend);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00001013 } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp->w_buffer));
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001014
1015 if (capcol != NULL && *capcol == 0 && wp->w_buffer->b_cap_prog != NULL)
1016 {
1017 /* Check word starting with capital letter. */
Bram Moolenaar53805d12005-08-01 07:08:33 +00001018 c = PTR2CHAR(ptr);
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001019 if (!SPELL_ISUPPER(c))
Bram Moolenaar5195e452005-08-19 20:32:47 +00001020 wrongcaplen = (int)(mi.mi_fend - ptr);
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001021 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001022 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001023 if (capcol != NULL)
1024 *capcol = -1;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001025
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001026 /* We always use the characters up to the next non-word character,
1027 * also for bad words. */
1028 mi.mi_end = mi.mi_fend;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001029
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001030 /* Check caps type later. */
Bram Moolenaar9c96f592005-06-30 21:52:39 +00001031 mi.mi_buf = wp->w_buffer;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001032
Bram Moolenaar5195e452005-08-19 20:32:47 +00001033 /* case-fold the word with one non-word character, so that we can check
1034 * for the word end. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001035 if (*mi.mi_fend != NUL)
1036 mb_ptr_adv(mi.mi_fend);
1037
1038 (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
1039 MAXWLEN + 1);
1040 mi.mi_fwordlen = STRLEN(mi.mi_fword);
1041
1042 /* The word is bad unless we recognize it. */
1043 mi.mi_result = SP_BAD;
Bram Moolenaar78622822005-08-23 21:00:13 +00001044 mi.mi_result2 = SP_BAD;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001045
1046 /*
1047 * Loop over the languages specified in 'spelllang'.
Bram Moolenaar4770d092006-01-12 23:22:24 +00001048 * We check them all, because a word may be matched longer in another
1049 * language.
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001050 */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001051 for (lpi = 0; lpi < wp->w_buffer->b_langp.ga_len; ++lpi)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001052 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001053 mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, lpi);
1054
1055 /* If reloading fails the language is still in the list but everything
1056 * has been cleared. */
1057 if (mi.mi_lp->lp_slang->sl_fidxs == NULL)
1058 continue;
1059
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001060 /* Check for a matching word in case-folded words. */
1061 find_word(&mi, FIND_FOLDWORD);
1062
1063 /* Check for a matching word in keep-case words. */
1064 find_word(&mi, FIND_KEEPWORD);
1065
1066 /* Check for matching prefixes. */
Bram Moolenaard12a1322005-08-21 22:08:24 +00001067 find_prefix(&mi, FIND_FOLDWORD);
Bram Moolenaar78622822005-08-23 21:00:13 +00001068
1069 /* For a NOBREAK language, may want to use a word without a following
1070 * word as a backup. */
1071 if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
1072 && mi.mi_result2 != SP_BAD)
1073 {
1074 mi.mi_result = mi.mi_result2;
1075 mi.mi_end = mi.mi_end2;
1076 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00001077
1078 /* Count the word in the first language where it's found to be OK. */
1079 if (count_word && mi.mi_result == SP_OK)
1080 {
1081 count_common_word(mi.mi_lp->lp_slang, ptr,
1082 (int)(mi.mi_end - ptr), 1);
1083 count_word = FALSE;
1084 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001085 }
1086
1087 if (mi.mi_result != SP_OK)
1088 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00001089 /* If we found a number skip over it. Allows for "42nd". Do flag
1090 * rare and local words, e.g., "3GPP". */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001091 if (nrlen > 0)
Bram Moolenaar0c405862005-06-22 22:26:26 +00001092 {
1093 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
1094 return nrlen;
1095 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001096
1097 /* When we are at a non-word character there is no error, just
1098 * skip over the character (try looking for a word after it). */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001099 else if (!spell_iswordp_nmw(ptr))
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +00001100 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001101 if (capcol != NULL && wp->w_buffer->b_cap_prog != NULL)
1102 {
1103 regmatch_T regmatch;
1104
1105 /* Check for end of sentence. */
1106 regmatch.regprog = wp->w_buffer->b_cap_prog;
1107 regmatch.rm_ic = FALSE;
1108 if (vim_regexec(&regmatch, ptr, 0))
1109 *capcol = (int)(regmatch.endp[0] - ptr);
1110 }
1111
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001112#ifdef FEAT_MBYTE
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001113 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001114 return (*mb_ptr2len)(ptr);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001115#endif
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001116 return 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001117 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00001118 else if (mi.mi_end == ptr)
1119 /* Always include at least one character. Required for when there
1120 * is a mixup in "midword". */
1121 mb_ptr_adv(mi.mi_end);
Bram Moolenaar78622822005-08-23 21:00:13 +00001122 else if (mi.mi_result == SP_BAD
1123 && LANGP_ENTRY(wp->w_buffer->b_langp, 0)->lp_slang->sl_nobreak)
1124 {
1125 char_u *p, *fp;
1126 int save_result = mi.mi_result;
1127
1128 /* First language in 'spelllang' is NOBREAK. Find first position
1129 * at which any word would be valid. */
1130 mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001131 if (mi.mi_lp->lp_slang->sl_fidxs != NULL)
Bram Moolenaar78622822005-08-23 21:00:13 +00001132 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001133 p = mi.mi_word;
1134 fp = mi.mi_fword;
1135 for (;;)
Bram Moolenaar78622822005-08-23 21:00:13 +00001136 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001137 mb_ptr_adv(p);
1138 mb_ptr_adv(fp);
1139 if (p >= mi.mi_end)
1140 break;
1141 mi.mi_compoff = fp - mi.mi_fword;
1142 find_word(&mi, FIND_COMPOUND);
1143 if (mi.mi_result != SP_BAD)
1144 {
1145 mi.mi_end = p;
1146 break;
1147 }
Bram Moolenaar78622822005-08-23 21:00:13 +00001148 }
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001149 mi.mi_result = save_result;
Bram Moolenaar78622822005-08-23 21:00:13 +00001150 }
Bram Moolenaar78622822005-08-23 21:00:13 +00001151 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001152
1153 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001154 *attrp = HLF_SPB;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001155 else if (mi.mi_result == SP_RARE)
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001156 *attrp = HLF_SPR;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001157 else
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001158 *attrp = HLF_SPL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001159 }
1160
Bram Moolenaar5195e452005-08-19 20:32:47 +00001161 if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE))
1162 {
1163 /* Report SpellCap only when the word isn't badly spelled. */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001164 *attrp = HLF_SPC;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001165 return wrongcaplen;
1166 }
1167
Bram Moolenaar51485f02005-06-04 21:55:20 +00001168 return (int)(mi.mi_end - ptr);
1169}
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001170
Bram Moolenaar51485f02005-06-04 21:55:20 +00001171/*
1172 * Check if the word at "mip->mi_word" is in the tree.
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001173 * When "mode" is FIND_FOLDWORD check in fold-case word tree.
1174 * When "mode" is FIND_KEEPWORD check in keep-case word tree.
1175 * When "mode" is FIND_PREFIX check for word after prefix in fold-case word
1176 * tree.
Bram Moolenaar51485f02005-06-04 21:55:20 +00001177 *
1178 * For a match mip->mi_result is updated.
1179 */
1180 static void
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001181find_word(mip, mode)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001182 matchinf_T *mip;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001183 int mode;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001184{
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001185 idx_T arridx = 0;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001186 int endlen[MAXWLEN]; /* length at possible word endings */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001187 idx_T endidx[MAXWLEN]; /* possible word endings */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001188 int endidxcnt = 0;
1189 int len;
1190 int wlen = 0;
1191 int flen;
1192 int c;
1193 char_u *ptr;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001194 idx_T lo, hi, m;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001195#ifdef FEAT_MBYTE
1196 char_u *s;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001197#endif
Bram Moolenaare52325c2005-08-22 22:54:29 +00001198 char_u *p;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001199 int res = SP_BAD;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001200 slang_T *slang = mip->mi_lp->lp_slang;
1201 unsigned flags;
1202 char_u *byts;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001203 idx_T *idxs;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001204 int word_ends;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001205 int prefix_found;
Bram Moolenaar78622822005-08-23 21:00:13 +00001206 int nobreak_result;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001207
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001208 if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001209 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001210 /* Check for word with matching case in keep-case tree. */
1211 ptr = mip->mi_word;
1212 flen = 9999; /* no case folding, always enough bytes */
1213 byts = slang->sl_kbyts;
1214 idxs = slang->sl_kidxs;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001215
1216 if (mode == FIND_KEEPCOMPOUND)
1217 /* Skip over the previously found word(s). */
1218 wlen += mip->mi_compoff;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001219 }
1220 else
1221 {
1222 /* Check for case-folded in case-folded tree. */
1223 ptr = mip->mi_fword;
1224 flen = mip->mi_fwordlen; /* available case-folded bytes */
1225 byts = slang->sl_fbyts;
1226 idxs = slang->sl_fidxs;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001227
1228 if (mode == FIND_PREFIX)
1229 {
1230 /* Skip over the prefix. */
1231 wlen = mip->mi_prefixlen;
1232 flen -= mip->mi_prefixlen;
1233 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001234 else if (mode == FIND_COMPOUND)
1235 {
1236 /* Skip over the previously found word(s). */
1237 wlen = mip->mi_compoff;
1238 flen -= mip->mi_compoff;
1239 }
1240
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001241 }
1242
Bram Moolenaar51485f02005-06-04 21:55:20 +00001243 if (byts == NULL)
1244 return; /* array is empty */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001245
Bram Moolenaar51485f02005-06-04 21:55:20 +00001246 /*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001247 * Repeat advancing in the tree until:
1248 * - there is a byte that doesn't match,
1249 * - we reach the end of the tree,
1250 * - or we reach the end of the line.
Bram Moolenaar51485f02005-06-04 21:55:20 +00001251 */
1252 for (;;)
1253 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00001254 if (flen <= 0 && *mip->mi_fend != NUL)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001255 flen = fold_more(mip);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001256
1257 len = byts[arridx++];
1258
1259 /* If the first possible byte is a zero the word could end here.
1260 * Remember this index, we first check for the longest word. */
1261 if (byts[arridx] == 0)
1262 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001263 if (endidxcnt == MAXWLEN)
1264 {
1265 /* Must be a corrupted spell file. */
1266 EMSG(_(e_format));
1267 return;
1268 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00001269 endlen[endidxcnt] = wlen;
1270 endidx[endidxcnt++] = arridx++;
1271 --len;
1272
1273 /* Skip over the zeros, there can be several flag/region
1274 * combinations. */
1275 while (len > 0 && byts[arridx] == 0)
1276 {
1277 ++arridx;
1278 --len;
1279 }
1280 if (len == 0)
1281 break; /* no children, word must end here */
1282 }
1283
1284 /* Stop looking at end of the line. */
1285 if (ptr[wlen] == NUL)
1286 break;
1287
1288 /* Perform a binary search in the list of accepted bytes. */
1289 c = ptr[wlen];
Bram Moolenaar0c405862005-06-22 22:26:26 +00001290 if (c == TAB) /* <Tab> is handled like <Space> */
1291 c = ' ';
Bram Moolenaar51485f02005-06-04 21:55:20 +00001292 lo = arridx;
1293 hi = arridx + len - 1;
1294 while (lo < hi)
1295 {
1296 m = (lo + hi) / 2;
1297 if (byts[m] > c)
1298 hi = m - 1;
1299 else if (byts[m] < c)
1300 lo = m + 1;
1301 else
1302 {
1303 lo = hi = m;
1304 break;
1305 }
1306 }
1307
1308 /* Stop if there is no matching byte. */
1309 if (hi < lo || byts[lo] != c)
1310 break;
1311
1312 /* Continue at the child (if there is one). */
1313 arridx = idxs[lo];
1314 ++wlen;
1315 --flen;
Bram Moolenaar0c405862005-06-22 22:26:26 +00001316
1317 /* One space in the good word may stand for several spaces in the
1318 * checked word. */
1319 if (c == ' ')
1320 {
1321 for (;;)
1322 {
1323 if (flen <= 0 && *mip->mi_fend != NUL)
1324 flen = fold_more(mip);
1325 if (ptr[wlen] != ' ' && ptr[wlen] != TAB)
1326 break;
1327 ++wlen;
1328 --flen;
1329 }
1330 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00001331 }
1332
1333 /*
1334 * Verify that one of the possible endings is valid. Try the longest
1335 * first.
1336 */
1337 while (endidxcnt > 0)
1338 {
1339 --endidxcnt;
1340 arridx = endidx[endidxcnt];
1341 wlen = endlen[endidxcnt];
1342
1343#ifdef FEAT_MBYTE
1344 if ((*mb_head_off)(ptr, ptr + wlen) > 0)
1345 continue; /* not at first byte of character */
1346#endif
Bram Moolenaar9c96f592005-06-30 21:52:39 +00001347 if (spell_iswordp(ptr + wlen, mip->mi_buf))
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001348 {
Bram Moolenaar78622822005-08-23 21:00:13 +00001349 if (slang->sl_compprog == NULL && !slang->sl_nobreak)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001350 continue; /* next char is a word character */
1351 word_ends = FALSE;
1352 }
1353 else
1354 word_ends = TRUE;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001355 /* The prefix flag is before compound flags. Once a valid prefix flag
1356 * has been found we try compound flags. */
1357 prefix_found = FALSE;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001358
1359#ifdef FEAT_MBYTE
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001360 if (mode != FIND_KEEPWORD && has_mbyte)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001361 {
1362 /* Compute byte length in original word, length may change
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001363 * when folding case. This can be slow, take a shortcut when the
1364 * case-folded word is equal to the keep-case word. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001365 p = mip->mi_word;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001366 if (STRNCMP(ptr, p, wlen) != 0)
1367 {
1368 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
1369 mb_ptr_adv(p);
1370 wlen = p - mip->mi_word;
1371 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00001372 }
1373#endif
1374
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001375 /* Check flags and region. For FIND_PREFIX check the condition and
1376 * prefix ID.
1377 * Repeat this if there are more flags/region alternatives until there
1378 * is a match. */
1379 res = SP_BAD;
1380 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0;
1381 --len, ++arridx)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001382 {
1383 flags = idxs[arridx];
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001384
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001385 /* For the fold-case tree check that the case of the checked word
1386 * matches with what the word in the tree requires.
1387 * For keep-case tree the case is always right. For prefixes we
1388 * don't bother to check. */
1389 if (mode == FIND_FOLDWORD)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001390 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001391 if (mip->mi_cend != mip->mi_word + wlen)
1392 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001393 /* mi_capflags was set for a different word length, need
1394 * to do it again. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001395 mip->mi_cend = mip->mi_word + wlen;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001396 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001397 }
1398
Bram Moolenaar0c405862005-06-22 22:26:26 +00001399 if (mip->mi_capflags == WF_KEEPCAP
1400 || !spell_valid_case(mip->mi_capflags, flags))
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001401 continue;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001402 }
1403
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001404 /* When mode is FIND_PREFIX the word must support the prefix:
1405 * check the prefix ID and the condition. Do that for the list at
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001406 * mip->mi_prefarridx that find_prefix() filled. */
Bram Moolenaard12a1322005-08-21 22:08:24 +00001407 else if (mode == FIND_PREFIX && !prefix_found)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001408 {
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001409 c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001410 flags,
Bram Moolenaar53805d12005-08-01 07:08:33 +00001411 mip->mi_word + mip->mi_cprefixlen, slang,
1412 FALSE);
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001413 if (c == 0)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001414 continue;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001415
1416 /* Use the WF_RARE flag for a rare prefix. */
1417 if (c & WF_RAREPFX)
1418 flags |= WF_RARE;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001419 prefix_found = TRUE;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001420 }
1421
Bram Moolenaar78622822005-08-23 21:00:13 +00001422 if (slang->sl_nobreak)
1423 {
1424 if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND)
1425 && (flags & WF_BANNED) == 0)
1426 {
1427 /* NOBREAK: found a valid following word. That's all we
1428 * need to know, so return. */
1429 mip->mi_result = SP_OK;
1430 break;
1431 }
1432 }
1433
1434 else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
1435 || !word_ends))
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001436 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00001437 /* If there is no flag or the word is shorter than
1438 * COMPOUNDMIN reject it quickly.
1439 * Makes you wonder why someone puts a compound flag on a word
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001440 * that's too short... Myspell compatibility requires this
1441 * anyway. */
Bram Moolenaare52325c2005-08-22 22:54:29 +00001442 if (((unsigned)flags >> 24) == 0
1443 || wlen - mip->mi_compoff < slang->sl_compminlen)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001444 continue;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001445#ifdef FEAT_MBYTE
1446 /* For multi-byte chars check character length against
1447 * COMPOUNDMIN. */
1448 if (has_mbyte
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001449 && slang->sl_compminlen > 0
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001450 && mb_charlen_len(mip->mi_word + mip->mi_compoff,
1451 wlen - mip->mi_compoff) < slang->sl_compminlen)
1452 continue;
1453#endif
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001454
Bram Moolenaare52325c2005-08-22 22:54:29 +00001455 /* Limit the number of compound words to COMPOUNDMAX if no
1456 * maximum for syllables is specified. */
1457 if (!word_ends && mip->mi_complen + 2 > slang->sl_compmax
1458 && slang->sl_compsylmax == MAXWLEN)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001459 continue;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001460
Bram Moolenaard12a1322005-08-21 22:08:24 +00001461 /* Quickly check if compounding is possible with this flag. */
Bram Moolenaar6de68532005-08-24 22:08:48 +00001462 if (!byte_in_str(mip->mi_complen == 0
Bram Moolenaard12a1322005-08-21 22:08:24 +00001463 ? slang->sl_compstartflags
1464 : slang->sl_compallflags,
Bram Moolenaar6de68532005-08-24 22:08:48 +00001465 ((unsigned)flags >> 24)))
Bram Moolenaar5195e452005-08-19 20:32:47 +00001466 continue;
1467
Bram Moolenaare52325c2005-08-22 22:54:29 +00001468 if (mode == FIND_COMPOUND)
1469 {
1470 int capflags;
1471
1472 /* Need to check the caps type of the appended compound
1473 * word. */
1474#ifdef FEAT_MBYTE
1475 if (has_mbyte && STRNCMP(ptr, mip->mi_word,
1476 mip->mi_compoff) != 0)
1477 {
1478 /* case folding may have changed the length */
1479 p = mip->mi_word;
1480 for (s = ptr; s < ptr + mip->mi_compoff; mb_ptr_adv(s))
1481 mb_ptr_adv(p);
1482 }
1483 else
1484#endif
1485 p = mip->mi_word + mip->mi_compoff;
1486 capflags = captype(p, mip->mi_word + wlen);
1487 if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP
1488 && (flags & WF_FIXCAP) != 0))
1489 continue;
1490
1491 if (capflags != WF_ALLCAP)
1492 {
1493 /* When the character before the word is a word
1494 * character we do not accept a Onecap word. We do
1495 * accept a no-caps word, even when the dictionary
1496 * word specifies ONECAP. */
1497 mb_ptr_back(mip->mi_word, p);
1498 if (spell_iswordp_nmw(p)
1499 ? capflags == WF_ONECAP
1500 : (flags & WF_ONECAP) != 0
1501 && capflags != WF_ONECAP)
1502 continue;
1503 }
1504 }
1505
Bram Moolenaar5195e452005-08-19 20:32:47 +00001506 /* If the word ends the sequence of compound flags of the
1507 * words must match with one of the COMPOUNDFLAGS items and
1508 * the number of syllables must not be too large. */
1509 mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24);
1510 mip->mi_compflags[mip->mi_complen + 1] = NUL;
1511 if (word_ends)
1512 {
1513 char_u fword[MAXWLEN];
1514
1515 if (slang->sl_compsylmax < MAXWLEN)
1516 {
1517 /* "fword" is only needed for checking syllables. */
1518 if (ptr == mip->mi_word)
1519 (void)spell_casefold(ptr, wlen, fword, MAXWLEN);
1520 else
1521 vim_strncpy(fword, ptr, endlen[endidxcnt]);
1522 }
1523 if (!can_compound(slang, fword, mip->mi_compflags))
1524 continue;
1525 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001526 }
1527
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001528 /* Check NEEDCOMPOUND: can't use word without compounding. */
1529 else if (flags & WF_NEEDCOMP)
1530 continue;
1531
Bram Moolenaar78622822005-08-23 21:00:13 +00001532 nobreak_result = SP_OK;
1533
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001534 if (!word_ends)
1535 {
Bram Moolenaar78622822005-08-23 21:00:13 +00001536 int save_result = mip->mi_result;
1537 char_u *save_end = mip->mi_end;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001538 langp_T *save_lp = mip->mi_lp;
1539 int lpi;
Bram Moolenaar78622822005-08-23 21:00:13 +00001540
1541 /* Check that a valid word follows. If there is one and we
1542 * are compounding, it will set "mi_result", thus we are
1543 * always finished here. For NOBREAK we only check that a
1544 * valid word follows.
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001545 * Recursive! */
Bram Moolenaar78622822005-08-23 21:00:13 +00001546 if (slang->sl_nobreak)
1547 mip->mi_result = SP_BAD;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001548
1549 /* Find following word in case-folded tree. */
1550 mip->mi_compoff = endlen[endidxcnt];
1551#ifdef FEAT_MBYTE
1552 if (has_mbyte && mode == FIND_KEEPWORD)
1553 {
1554 /* Compute byte length in case-folded word from "wlen":
1555 * byte length in keep-case word. Length may change when
1556 * folding case. This can be slow, take a shortcut when
1557 * the case-folded word is equal to the keep-case word. */
1558 p = mip->mi_fword;
1559 if (STRNCMP(ptr, p, wlen) != 0)
1560 {
1561 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
1562 mb_ptr_adv(p);
1563 mip->mi_compoff = p - mip->mi_fword;
1564 }
1565 }
1566#endif
Bram Moolenaard12a1322005-08-21 22:08:24 +00001567 c = mip->mi_compoff;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001568 ++mip->mi_complen;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001569
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001570 /* For NOBREAK we need to try all NOBREAK languages, at least
1571 * to find the ".add" file(s). */
1572 for (lpi = 0; lpi < mip->mi_buf->b_langp.ga_len; ++lpi)
Bram Moolenaar78622822005-08-23 21:00:13 +00001573 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001574 if (slang->sl_nobreak)
1575 {
1576 mip->mi_lp = LANGP_ENTRY(mip->mi_buf->b_langp, lpi);
1577 if (mip->mi_lp->lp_slang->sl_fidxs == NULL
1578 || !mip->mi_lp->lp_slang->sl_nobreak)
1579 continue;
1580 }
Bram Moolenaard12a1322005-08-21 22:08:24 +00001581
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001582 find_word(mip, FIND_COMPOUND);
1583
1584 /* When NOBREAK any word that matches is OK. Otherwise we
1585 * need to find the longest match, thus try with keep-case
1586 * and prefix too. */
Bram Moolenaar78622822005-08-23 21:00:13 +00001587 if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1588 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001589 /* Find following word in keep-case tree. */
1590 mip->mi_compoff = wlen;
1591 find_word(mip, FIND_KEEPCOMPOUND);
1592
1593 if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1594 {
1595 /* Check for following word with prefix. */
1596 mip->mi_compoff = c;
1597 find_prefix(mip, FIND_COMPOUND);
1598 }
Bram Moolenaar78622822005-08-23 21:00:13 +00001599 }
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001600
1601 if (!slang->sl_nobreak)
1602 break;
Bram Moolenaar78622822005-08-23 21:00:13 +00001603 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00001604 --mip->mi_complen;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001605 mip->mi_lp = save_lp;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001606
Bram Moolenaar78622822005-08-23 21:00:13 +00001607 if (slang->sl_nobreak)
1608 {
1609 nobreak_result = mip->mi_result;
1610 mip->mi_result = save_result;
1611 mip->mi_end = save_end;
1612 }
1613 else
1614 {
1615 if (mip->mi_result == SP_OK)
1616 break;
1617 continue;
1618 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001619 }
1620
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001621 if (flags & WF_BANNED)
1622 res = SP_BANNED;
1623 else if (flags & WF_REGION)
1624 {
1625 /* Check region. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001626 if ((mip->mi_lp->lp_region & (flags >> 16)) != 0)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001627 res = SP_OK;
1628 else
1629 res = SP_LOCAL;
1630 }
1631 else if (flags & WF_RARE)
1632 res = SP_RARE;
1633 else
1634 res = SP_OK;
1635
Bram Moolenaar78622822005-08-23 21:00:13 +00001636 /* Always use the longest match and the best result. For NOBREAK
1637 * we separately keep the longest match without a following good
1638 * word as a fall-back. */
1639 if (nobreak_result == SP_BAD)
1640 {
1641 if (mip->mi_result2 > res)
1642 {
1643 mip->mi_result2 = res;
1644 mip->mi_end2 = mip->mi_word + wlen;
1645 }
1646 else if (mip->mi_result2 == res
1647 && mip->mi_end2 < mip->mi_word + wlen)
1648 mip->mi_end2 = mip->mi_word + wlen;
1649 }
1650 else if (mip->mi_result > res)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001651 {
1652 mip->mi_result = res;
1653 mip->mi_end = mip->mi_word + wlen;
1654 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001655 else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001656 mip->mi_end = mip->mi_word + wlen;
1657
Bram Moolenaar78622822005-08-23 21:00:13 +00001658 if (mip->mi_result == SP_OK)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001659 break;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001660 }
1661
Bram Moolenaar78622822005-08-23 21:00:13 +00001662 if (mip->mi_result == SP_OK)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001663 break;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001664 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001665}
1666
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001667/*
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00001668 * Return TRUE if "flags" is a valid sequence of compound flags and "word"
1669 * does not have too many syllables.
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00001670 */
1671 static int
Bram Moolenaar5195e452005-08-19 20:32:47 +00001672can_compound(slang, word, flags)
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00001673 slang_T *slang;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001674 char_u *word;
1675 char_u *flags;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00001676{
Bram Moolenaar5195e452005-08-19 20:32:47 +00001677 regmatch_T regmatch;
Bram Moolenaar6de68532005-08-24 22:08:48 +00001678#ifdef FEAT_MBYTE
1679 char_u uflags[MAXWLEN * 2];
1680 int i;
1681#endif
1682 char_u *p;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001683
1684 if (slang->sl_compprog == NULL)
1685 return FALSE;
Bram Moolenaar6de68532005-08-24 22:08:48 +00001686#ifdef FEAT_MBYTE
1687 if (enc_utf8)
1688 {
1689 /* Need to convert the single byte flags to utf8 characters. */
1690 p = uflags;
1691 for (i = 0; flags[i] != NUL; ++i)
1692 p += mb_char2bytes(flags[i], p);
1693 *p = NUL;
1694 p = uflags;
1695 }
1696 else
1697#endif
1698 p = flags;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001699 regmatch.regprog = slang->sl_compprog;
1700 regmatch.rm_ic = FALSE;
Bram Moolenaar6de68532005-08-24 22:08:48 +00001701 if (!vim_regexec(&regmatch, p, 0))
Bram Moolenaar5195e452005-08-19 20:32:47 +00001702 return FALSE;
1703
Bram Moolenaare52325c2005-08-22 22:54:29 +00001704 /* Count the number of syllables. This may be slow, do it last. If there
1705 * are too many syllables AND the number of compound words is above
1706 * COMPOUNDMAX then compounding is not allowed. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00001707 if (slang->sl_compsylmax < MAXWLEN
1708 && count_syllables(slang, word) > slang->sl_compsylmax)
Bram Moolenaar6de68532005-08-24 22:08:48 +00001709 return (int)STRLEN(flags) < slang->sl_compmax;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001710 return TRUE;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00001711}
1712
1713/*
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001714 * Return non-zero if the prefix indicated by "arridx" matches with the prefix
1715 * ID in "flags" for the word "word".
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001716 * The WF_RAREPFX flag is included in the return value for a rare prefix.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001717 */
1718 static int
Bram Moolenaar53805d12005-08-01 07:08:33 +00001719valid_word_prefix(totprefcnt, arridx, flags, word, slang, cond_req)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001720 int totprefcnt; /* nr of prefix IDs */
1721 int arridx; /* idx in sl_pidxs[] */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001722 int flags;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001723 char_u *word;
1724 slang_T *slang;
Bram Moolenaar53805d12005-08-01 07:08:33 +00001725 int cond_req; /* only use prefixes with a condition */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001726{
1727 int prefcnt;
1728 int pidx;
1729 regprog_T *rp;
1730 regmatch_T regmatch;
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001731 int prefid;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001732
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001733 prefid = (unsigned)flags >> 24;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001734 for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt)
1735 {
1736 pidx = slang->sl_pidxs[arridx + prefcnt];
1737
1738 /* Check the prefix ID. */
1739 if (prefid != (pidx & 0xff))
1740 continue;
1741
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001742 /* Check if the prefix doesn't combine and the word already has a
1743 * suffix. */
1744 if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC))
1745 continue;
1746
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001747 /* Check the condition, if there is one. The condition index is
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001748 * stored in the two bytes above the prefix ID byte. */
1749 rp = slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff];
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001750 if (rp != NULL)
1751 {
1752 regmatch.regprog = rp;
1753 regmatch.rm_ic = FALSE;
1754 if (!vim_regexec(&regmatch, word, 0))
1755 continue;
1756 }
Bram Moolenaar53805d12005-08-01 07:08:33 +00001757 else if (cond_req)
1758 continue;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001759
Bram Moolenaar53805d12005-08-01 07:08:33 +00001760 /* It's a match! Return the WF_ flags. */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001761 return pidx;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001762 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001763 return 0;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001764}
1765
1766/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001767 * Check if the word at "mip->mi_word" has a matching prefix.
1768 * If it does, then check the following word.
1769 *
Bram Moolenaard12a1322005-08-21 22:08:24 +00001770 * If "mode" is "FIND_COMPOUND" then do the same after another word, find a
1771 * prefix in a compound word.
1772 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001773 * For a match mip->mi_result is updated.
1774 */
1775 static void
Bram Moolenaard12a1322005-08-21 22:08:24 +00001776find_prefix(mip, mode)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001777 matchinf_T *mip;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001778 int mode;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001779{
1780 idx_T arridx = 0;
1781 int len;
1782 int wlen = 0;
1783 int flen;
1784 int c;
1785 char_u *ptr;
1786 idx_T lo, hi, m;
1787 slang_T *slang = mip->mi_lp->lp_slang;
1788 char_u *byts;
1789 idx_T *idxs;
1790
Bram Moolenaar42eeac32005-06-29 22:40:58 +00001791 byts = slang->sl_pbyts;
1792 if (byts == NULL)
1793 return; /* array is empty */
1794
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001795 /* We use the case-folded word here, since prefixes are always
1796 * case-folded. */
1797 ptr = mip->mi_fword;
1798 flen = mip->mi_fwordlen; /* available case-folded bytes */
Bram Moolenaard12a1322005-08-21 22:08:24 +00001799 if (mode == FIND_COMPOUND)
1800 {
1801 /* Skip over the previously found word(s). */
1802 ptr += mip->mi_compoff;
1803 flen -= mip->mi_compoff;
1804 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001805 idxs = slang->sl_pidxs;
1806
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001807 /*
1808 * Repeat advancing in the tree until:
1809 * - there is a byte that doesn't match,
1810 * - we reach the end of the tree,
1811 * - or we reach the end of the line.
1812 */
1813 for (;;)
1814 {
1815 if (flen == 0 && *mip->mi_fend != NUL)
1816 flen = fold_more(mip);
1817
1818 len = byts[arridx++];
1819
1820 /* If the first possible byte is a zero the prefix could end here.
1821 * Check if the following word matches and supports the prefix. */
1822 if (byts[arridx] == 0)
1823 {
1824 /* There can be several prefixes with different conditions. We
1825 * try them all, since we don't know which one will give the
1826 * longest match. The word is the same each time, pass the list
1827 * of possible prefixes to find_word(). */
1828 mip->mi_prefarridx = arridx;
1829 mip->mi_prefcnt = len;
1830 while (len > 0 && byts[arridx] == 0)
1831 {
1832 ++arridx;
1833 --len;
1834 }
1835 mip->mi_prefcnt -= len;
1836
1837 /* Find the word that comes after the prefix. */
1838 mip->mi_prefixlen = wlen;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001839 if (mode == FIND_COMPOUND)
1840 /* Skip over the previously found word(s). */
1841 mip->mi_prefixlen += mip->mi_compoff;
1842
Bram Moolenaar53805d12005-08-01 07:08:33 +00001843#ifdef FEAT_MBYTE
1844 if (has_mbyte)
1845 {
1846 /* Case-folded length may differ from original length. */
Bram Moolenaard12a1322005-08-21 22:08:24 +00001847 mip->mi_cprefixlen = nofold_len(mip->mi_fword,
1848 mip->mi_prefixlen, mip->mi_word);
Bram Moolenaar53805d12005-08-01 07:08:33 +00001849 }
1850 else
Bram Moolenaard12a1322005-08-21 22:08:24 +00001851 mip->mi_cprefixlen = mip->mi_prefixlen;
Bram Moolenaar53805d12005-08-01 07:08:33 +00001852#endif
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001853 find_word(mip, FIND_PREFIX);
1854
1855
1856 if (len == 0)
1857 break; /* no children, word must end here */
1858 }
1859
1860 /* Stop looking at end of the line. */
1861 if (ptr[wlen] == NUL)
1862 break;
1863
1864 /* Perform a binary search in the list of accepted bytes. */
1865 c = ptr[wlen];
1866 lo = arridx;
1867 hi = arridx + len - 1;
1868 while (lo < hi)
1869 {
1870 m = (lo + hi) / 2;
1871 if (byts[m] > c)
1872 hi = m - 1;
1873 else if (byts[m] < c)
1874 lo = m + 1;
1875 else
1876 {
1877 lo = hi = m;
1878 break;
1879 }
1880 }
1881
1882 /* Stop if there is no matching byte. */
1883 if (hi < lo || byts[lo] != c)
1884 break;
1885
1886 /* Continue at the child (if there is one). */
1887 arridx = idxs[lo];
1888 ++wlen;
1889 --flen;
1890 }
1891}
1892
1893/*
1894 * Need to fold at least one more character. Do until next non-word character
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00001895 * for efficiency. Include the non-word character too.
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001896 * Return the length of the folded chars in bytes.
1897 */
1898 static int
1899fold_more(mip)
1900 matchinf_T *mip;
1901{
1902 int flen;
1903 char_u *p;
1904
1905 p = mip->mi_fend;
1906 do
1907 {
1908 mb_ptr_adv(mip->mi_fend);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00001909 } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_buf));
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001910
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00001911 /* Include the non-word character so that we can check for the word end. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001912 if (*mip->mi_fend != NUL)
1913 mb_ptr_adv(mip->mi_fend);
1914
1915 (void)spell_casefold(p, (int)(mip->mi_fend - p),
1916 mip->mi_fword + mip->mi_fwordlen,
1917 MAXWLEN - mip->mi_fwordlen);
1918 flen = STRLEN(mip->mi_fword + mip->mi_fwordlen);
1919 mip->mi_fwordlen += flen;
1920 return flen;
1921}
1922
1923/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001924 * Check case flags for a word. Return TRUE if the word has the requested
1925 * case.
1926 */
1927 static int
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00001928spell_valid_case(wordflags, treeflags)
1929 int wordflags; /* flags for the checked word. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001930 int treeflags; /* flags for the word in the spell tree */
1931{
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00001932 return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001933 || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001934 && ((treeflags & WF_ONECAP) == 0
1935 || (wordflags & WF_ONECAP) != 0)));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001936}
1937
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001938/*
1939 * Return TRUE if spell checking is not enabled.
1940 */
1941 static int
Bram Moolenaar95529562005-08-25 21:21:38 +00001942no_spell_checking(wp)
1943 win_T *wp;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001944{
Bram Moolenaar95529562005-08-25 21:21:38 +00001945 if (!wp->w_p_spell || *wp->w_buffer->b_p_spl == NUL)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001946 {
1947 EMSG(_("E756: Spell checking is not enabled"));
1948 return TRUE;
1949 }
1950 return FALSE;
1951}
Bram Moolenaar51485f02005-06-04 21:55:20 +00001952
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001953/*
1954 * Move to next spell error.
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001955 * "curline" is FALSE for "[s", "]s", "[S" and "]S".
1956 * "curline" is TRUE to find word under/after cursor in the same line.
Bram Moolenaar5195e452005-08-19 20:32:47 +00001957 * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move
1958 * to after badly spelled word before the cursor.
Bram Moolenaar6de68532005-08-24 22:08:48 +00001959 * Return 0 if not found, length of the badly spelled word otherwise.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001960 */
1961 int
Bram Moolenaar95529562005-08-25 21:21:38 +00001962spell_move_to(wp, dir, allwords, curline, attrp)
1963 win_T *wp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001964 int dir; /* FORWARD or BACKWARD */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001965 int allwords; /* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001966 int curline;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001967 hlf_T *attrp; /* return: attributes of bad word or NULL
1968 (only when "dir" is FORWARD) */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001969{
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001970 linenr_T lnum;
1971 pos_T found_pos;
Bram Moolenaar6de68532005-08-24 22:08:48 +00001972 int found_len = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001973 char_u *line;
1974 char_u *p;
Bram Moolenaar0c405862005-06-22 22:26:26 +00001975 char_u *endp;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001976 hlf_T attr;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001977 int len;
Bram Moolenaar95529562005-08-25 21:21:38 +00001978 int has_syntax = syntax_present(wp->w_buffer);
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001979 int col;
1980 int can_spell;
Bram Moolenaar0c405862005-06-22 22:26:26 +00001981 char_u *buf = NULL;
1982 int buflen = 0;
1983 int skip = 0;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001984 int capcol = -1;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001985 int found_one = FALSE;
1986 int wrapped = FALSE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001987
Bram Moolenaar95529562005-08-25 21:21:38 +00001988 if (no_spell_checking(wp))
Bram Moolenaar6de68532005-08-24 22:08:48 +00001989 return 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001990
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001991 /*
1992 * Start looking for bad word at the start of the line, because we can't
Bram Moolenaar0c405862005-06-22 22:26:26 +00001993 * start halfway a word, we don't know where the it starts or ends.
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001994 *
1995 * When searching backwards, we continue in the line to find the last
1996 * bad word (in the cursor line: before the cursor).
Bram Moolenaar0c405862005-06-22 22:26:26 +00001997 *
1998 * We concatenate the start of the next line, so that wrapped words work
1999 * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards
2000 * though...
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002001 */
Bram Moolenaar95529562005-08-25 21:21:38 +00002002 lnum = wp->w_cursor.lnum;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002003 found_pos.lnum = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002004
2005 while (!got_int)
2006 {
Bram Moolenaar95529562005-08-25 21:21:38 +00002007 line = ml_get_buf(wp->w_buffer, lnum, FALSE);
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002008
Bram Moolenaar0c405862005-06-22 22:26:26 +00002009 len = STRLEN(line);
2010 if (buflen < len + MAXWLEN + 2)
2011 {
2012 vim_free(buf);
2013 buflen = len + MAXWLEN + 2;
2014 buf = alloc(buflen);
2015 if (buf == NULL)
2016 break;
2017 }
2018
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002019 /* In first line check first word for Capital. */
2020 if (lnum == 1)
2021 capcol = 0;
2022
2023 /* For checking first word with a capital skip white space. */
2024 if (capcol == 0)
2025 capcol = skipwhite(line) - line;
2026
Bram Moolenaar0c405862005-06-22 22:26:26 +00002027 /* Copy the line into "buf" and append the start of the next line if
2028 * possible. */
2029 STRCPY(buf, line);
Bram Moolenaar95529562005-08-25 21:21:38 +00002030 if (lnum < wp->w_buffer->b_ml.ml_line_count)
Bram Moolenaar0c405862005-06-22 22:26:26 +00002031 spell_cat_line(buf + STRLEN(buf), ml_get(lnum + 1), MAXWLEN);
2032
2033 p = buf + skip;
2034 endp = buf + len;
2035 while (p < endp)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002036 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002037 /* When searching backward don't search after the cursor. Unless
2038 * we wrapped around the end of the buffer. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002039 if (dir == BACKWARD
Bram Moolenaar95529562005-08-25 21:21:38 +00002040 && lnum == wp->w_cursor.lnum
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002041 && !wrapped
Bram Moolenaar95529562005-08-25 21:21:38 +00002042 && (colnr_T)(p - buf) >= wp->w_cursor.col)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002043 break;
2044
2045 /* start of word */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00002046 attr = HLF_COUNT;
Bram Moolenaar4770d092006-01-12 23:22:24 +00002047 len = spell_check(wp, p, &attr, &capcol, FALSE);
Bram Moolenaar51485f02005-06-04 21:55:20 +00002048
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00002049 if (attr != HLF_COUNT)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002050 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002051 /* We found a bad word. Check the attribute. */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00002052 if (allwords || attr == HLF_SPB)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002053 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002054 found_one = TRUE;
2055
Bram Moolenaar51485f02005-06-04 21:55:20 +00002056 /* When searching forward only accept a bad word after
2057 * the cursor. */
2058 if (dir == BACKWARD
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002059 || lnum != wp->w_cursor.lnum
Bram Moolenaar95529562005-08-25 21:21:38 +00002060 || (lnum == wp->w_cursor.lnum
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002061 && (wrapped
2062 || (colnr_T)(curline ? p - buf + len
Bram Moolenaar0c405862005-06-22 22:26:26 +00002063 : p - buf)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002064 > wp->w_cursor.col)))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002065 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002066 if (has_syntax)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002067 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00002068 col = p - buf;
Bram Moolenaar95529562005-08-25 21:21:38 +00002069 (void)syn_get_id(wp, lnum, (colnr_T)col,
Bram Moolenaar51485f02005-06-04 21:55:20 +00002070 FALSE, &can_spell);
Bram Moolenaar51485f02005-06-04 21:55:20 +00002071 }
2072 else
2073 can_spell = TRUE;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002074
Bram Moolenaar51485f02005-06-04 21:55:20 +00002075 if (can_spell)
2076 {
2077 found_pos.lnum = lnum;
Bram Moolenaar0c405862005-06-22 22:26:26 +00002078 found_pos.col = p - buf;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002079#ifdef FEAT_VIRTUALEDIT
Bram Moolenaar51485f02005-06-04 21:55:20 +00002080 found_pos.coladd = 0;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002081#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00002082 if (dir == FORWARD)
2083 {
2084 /* No need to search further. */
Bram Moolenaar95529562005-08-25 21:21:38 +00002085 wp->w_cursor = found_pos;
Bram Moolenaar0c405862005-06-22 22:26:26 +00002086 vim_free(buf);
Bram Moolenaar95529562005-08-25 21:21:38 +00002087 if (attrp != NULL)
2088 *attrp = attr;
Bram Moolenaar6de68532005-08-24 22:08:48 +00002089 return len;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002090 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00002091 else if (curline)
2092 /* Insert mode completion: put cursor after
2093 * the bad word. */
2094 found_pos.col += len;
Bram Moolenaar6de68532005-08-24 22:08:48 +00002095 found_len = len;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002096 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002097 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002098 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002099 }
2100
Bram Moolenaar51485f02005-06-04 21:55:20 +00002101 /* advance to character after the word */
2102 p += len;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002103 capcol -= len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002104 }
2105
Bram Moolenaar5195e452005-08-19 20:32:47 +00002106 if (dir == BACKWARD && found_pos.lnum != 0)
2107 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002108 /* Use the last match in the line (before the cursor). */
Bram Moolenaar95529562005-08-25 21:21:38 +00002109 wp->w_cursor = found_pos;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002110 vim_free(buf);
Bram Moolenaar6de68532005-08-24 22:08:48 +00002111 return found_len;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002112 }
2113
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002114 if (curline)
Bram Moolenaar0c405862005-06-22 22:26:26 +00002115 break; /* only check cursor line */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002116
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002117 /* Advance to next line. */
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002118 if (dir == BACKWARD)
2119 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002120 /* If we are back at the starting line and searched it again there
2121 * is no match, give up. */
2122 if (lnum == wp->w_cursor.lnum && wrapped)
Bram Moolenaar0c405862005-06-22 22:26:26 +00002123 break;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002124
2125 if (lnum > 1)
2126 --lnum;
2127 else if (!p_ws)
2128 break; /* at first line and 'nowrapscan' */
2129 else
2130 {
2131 /* Wrap around to the end of the buffer. May search the
2132 * starting line again and accept the last match. */
2133 lnum = wp->w_buffer->b_ml.ml_line_count;
2134 wrapped = TRUE;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002135 if (!shortmess(SHM_SEARCH))
2136 give_warning((char_u *)_(top_bot_msg), TRUE);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002137 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002138 capcol = -1;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002139 }
2140 else
2141 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002142 if (lnum < wp->w_buffer->b_ml.ml_line_count)
2143 ++lnum;
2144 else if (!p_ws)
2145 break; /* at first line and 'nowrapscan' */
2146 else
2147 {
2148 /* Wrap around to the start of the buffer. May search the
2149 * starting line again and accept the first match. */
2150 lnum = 1;
2151 wrapped = TRUE;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002152 if (!shortmess(SHM_SEARCH))
2153 give_warning((char_u *)_(bot_top_msg), TRUE);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002154 }
2155
2156 /* If we are back at the starting line and there is no match then
2157 * give up. */
2158 if (lnum == wp->w_cursor.lnum && !found_one)
Bram Moolenaar0c405862005-06-22 22:26:26 +00002159 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +00002160
2161 /* Skip the characters at the start of the next line that were
2162 * included in a match crossing line boundaries. */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00002163 if (attr == HLF_COUNT)
Bram Moolenaar0c405862005-06-22 22:26:26 +00002164 skip = p - endp;
2165 else
2166 skip = 0;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002167
2168 /* Capscol skips over the inserted space. */
2169 --capcol;
2170
2171 /* But after empty line check first word in next line */
2172 if (*skipwhite(line) == NUL)
2173 capcol = 0;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002174 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002175
2176 line_breakcheck();
2177 }
2178
Bram Moolenaar0c405862005-06-22 22:26:26 +00002179 vim_free(buf);
Bram Moolenaar6de68532005-08-24 22:08:48 +00002180 return 0;
Bram Moolenaar0c405862005-06-22 22:26:26 +00002181}
2182
2183/*
2184 * For spell checking: concatenate the start of the following line "line" into
2185 * "buf", blanking-out special characters. Copy less then "maxlen" bytes.
2186 */
2187 void
2188spell_cat_line(buf, line, maxlen)
2189 char_u *buf;
2190 char_u *line;
2191 int maxlen;
2192{
2193 char_u *p;
2194 int n;
2195
2196 p = skipwhite(line);
2197 while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL)
2198 p = skipwhite(p + 1);
2199
2200 if (*p != NUL)
2201 {
2202 *buf = ' ';
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002203 vim_strncpy(buf + 1, line, maxlen - 2);
Bram Moolenaar0c405862005-06-22 22:26:26 +00002204 n = p - line;
2205 if (n >= maxlen)
2206 n = maxlen - 1;
2207 vim_memset(buf + 1, ' ', n);
2208 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002209}
2210
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00002211/*
2212 * Structure used for the cookie argument of do_in_runtimepath().
2213 */
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002214typedef struct spelload_S
2215{
2216 char_u sl_lang[MAXWLEN + 1]; /* language name */
2217 slang_T *sl_slang; /* resulting slang_T struct */
2218 int sl_nobreak; /* NOBREAK language found */
2219} spelload_T;
2220
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002221/*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002222 * Load word list(s) for "lang" from Vim spell file(s).
Bram Moolenaarb765d632005-06-07 21:00:02 +00002223 * "lang" must be the language without the region: e.g., "en".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002224 */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002225 static void
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002226spell_load_lang(lang)
2227 char_u *lang;
2228{
Bram Moolenaarb765d632005-06-07 21:00:02 +00002229 char_u fname_enc[85];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002230 int r;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002231 spelload_T sl;
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002232#ifdef FEAT_AUTOCMD
2233 int round;
2234#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002235
Bram Moolenaarb765d632005-06-07 21:00:02 +00002236 /* Copy the language name to pass it to spell_load_cb() as a cookie.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002237 * It's truncated when an error is detected. */
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002238 STRCPY(sl.sl_lang, lang);
2239 sl.sl_slang = NULL;
2240 sl.sl_nobreak = FALSE;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002241
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002242#ifdef FEAT_AUTOCMD
2243 /* We may retry when no spell file is found for the language, an
2244 * autocommand may load it then. */
2245 for (round = 1; round <= 2; ++round)
2246#endif
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002247 {
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002248 /*
2249 * Find the first spell file for "lang" in 'runtimepath' and load it.
2250 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002251 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002252 "spell/%s.%s.spl", lang, spell_enc());
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002253 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl);
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002254
2255 if (r == FAIL && *sl.sl_lang != NUL)
2256 {
2257 /* Try loading the ASCII version. */
2258 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
2259 "spell/%s.ascii.spl", lang);
2260 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl);
2261
2262#ifdef FEAT_AUTOCMD
2263 if (r == FAIL && *sl.sl_lang != NUL && round == 1
2264 && apply_autocmds(EVENT_SPELLFILEMISSING, lang,
2265 curbuf->b_fname, FALSE, curbuf))
2266 continue;
2267 break;
2268#endif
2269 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002270 }
2271
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002272 if (r == FAIL)
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002273 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002274 smsg((char_u *)_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""),
2275 lang, spell_enc(), lang);
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002276 }
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002277 else if (sl.sl_slang != NULL)
Bram Moolenaarb765d632005-06-07 21:00:02 +00002278 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00002279 /* At least one file was loaded, now load ALL the additions. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002280 STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl");
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002281 do_in_runtimepath(fname_enc, TRUE, spell_load_cb, &sl);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002282 }
2283}
2284
2285/*
2286 * Return the encoding used for spell checking: Use 'encoding', except that we
2287 * use "latin1" for "latin9". And limit to 60 characters (just in case).
2288 */
2289 static char_u *
2290spell_enc()
2291{
2292
2293#ifdef FEAT_MBYTE
2294 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
2295 return p_enc;
2296#endif
2297 return (char_u *)"latin1";
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002298}
2299
2300/*
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002301 * Get the name of the .spl file for the internal wordlist into
2302 * "fname[MAXPATHL]".
2303 */
2304 static void
2305int_wordlist_spl(fname)
2306 char_u *fname;
2307{
2308 vim_snprintf((char *)fname, MAXPATHL, "%s.%s.spl",
2309 int_wordlist, spell_enc());
2310}
2311
2312/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00002313 * Allocate a new slang_T for language "lang". "lang" can be NULL.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002314 * Caller must fill "sl_next".
2315 */
2316 static slang_T *
2317slang_alloc(lang)
2318 char_u *lang;
2319{
2320 slang_T *lp;
2321
Bram Moolenaar51485f02005-06-04 21:55:20 +00002322 lp = (slang_T *)alloc_clear(sizeof(slang_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002323 if (lp != NULL)
2324 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00002325 if (lang != NULL)
2326 lp->sl_name = vim_strsave(lang);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002327 ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
Bram Moolenaar4770d092006-01-12 23:22:24 +00002328 ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10);
Bram Moolenaar5195e452005-08-19 20:32:47 +00002329 lp->sl_compmax = MAXWLEN;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002330 lp->sl_compsylmax = MAXWLEN;
Bram Moolenaar4770d092006-01-12 23:22:24 +00002331 hash_init(&lp->sl_wordcount);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002332 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00002333
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002334 return lp;
2335}
2336
2337/*
2338 * Free the contents of an slang_T and the structure itself.
2339 */
2340 static void
2341slang_free(lp)
2342 slang_T *lp;
2343{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002344 vim_free(lp->sl_name);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002345 vim_free(lp->sl_fname);
2346 slang_clear(lp);
2347 vim_free(lp);
2348}
2349
2350/*
2351 * Clear an slang_T so that the file can be reloaded.
2352 */
2353 static void
2354slang_clear(lp)
2355 slang_T *lp;
2356{
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002357 garray_T *gap;
2358 fromto_T *ftp;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002359 salitem_T *smp;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002360 int i;
Bram Moolenaar4770d092006-01-12 23:22:24 +00002361 int round;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002362
Bram Moolenaar51485f02005-06-04 21:55:20 +00002363 vim_free(lp->sl_fbyts);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002364 lp->sl_fbyts = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002365 vim_free(lp->sl_kbyts);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002366 lp->sl_kbyts = NULL;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002367 vim_free(lp->sl_pbyts);
2368 lp->sl_pbyts = NULL;
2369
Bram Moolenaar51485f02005-06-04 21:55:20 +00002370 vim_free(lp->sl_fidxs);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002371 lp->sl_fidxs = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002372 vim_free(lp->sl_kidxs);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002373 lp->sl_kidxs = NULL;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002374 vim_free(lp->sl_pidxs);
2375 lp->sl_pidxs = NULL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002376
Bram Moolenaar4770d092006-01-12 23:22:24 +00002377 for (round = 1; round <= 2; ++round)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002378 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00002379 gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal;
2380 while (gap->ga_len > 0)
2381 {
2382 ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len];
2383 vim_free(ftp->ft_from);
2384 vim_free(ftp->ft_to);
2385 }
2386 ga_clear(gap);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002387 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002388
2389 gap = &lp->sl_sal;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002390 if (lp->sl_sofo)
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002391 {
2392 /* "ga_len" is set to 1 without adding an item for latin1 */
2393 if (gap->ga_data != NULL)
2394 /* SOFOFROM and SOFOTO items: free lists of wide characters. */
2395 for (i = 0; i < gap->ga_len; ++i)
2396 vim_free(((int **)gap->ga_data)[i]);
2397 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002398 else
2399 /* SAL items: free salitem_T items */
2400 while (gap->ga_len > 0)
2401 {
2402 smp = &((salitem_T *)gap->ga_data)[--gap->ga_len];
2403 vim_free(smp->sm_lead);
2404 /* Don't free sm_oneof and sm_rules, they point into sm_lead. */
2405 vim_free(smp->sm_to);
2406#ifdef FEAT_MBYTE
2407 vim_free(smp->sm_lead_w);
2408 vim_free(smp->sm_oneof_w);
2409 vim_free(smp->sm_to_w);
2410#endif
2411 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002412 ga_clear(gap);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002413
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002414 for (i = 0; i < lp->sl_prefixcnt; ++i)
2415 vim_free(lp->sl_prefprog[i]);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002416 lp->sl_prefixcnt = 0;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002417 vim_free(lp->sl_prefprog);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002418 lp->sl_prefprog = NULL;
2419
2420 vim_free(lp->sl_midword);
2421 lp->sl_midword = NULL;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002422
Bram Moolenaar5195e452005-08-19 20:32:47 +00002423 vim_free(lp->sl_compprog);
2424 vim_free(lp->sl_compstartflags);
Bram Moolenaard12a1322005-08-21 22:08:24 +00002425 vim_free(lp->sl_compallflags);
Bram Moolenaar5195e452005-08-19 20:32:47 +00002426 lp->sl_compprog = NULL;
2427 lp->sl_compstartflags = NULL;
Bram Moolenaard12a1322005-08-21 22:08:24 +00002428 lp->sl_compallflags = NULL;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002429
2430 vim_free(lp->sl_syllable);
2431 lp->sl_syllable = NULL;
2432 ga_clear(&lp->sl_syl_items);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002433
Bram Moolenaar4770d092006-01-12 23:22:24 +00002434 hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF);
2435 hash_init(&lp->sl_wordcount);
Bram Moolenaarea424162005-06-16 21:51:00 +00002436
Bram Moolenaar4770d092006-01-12 23:22:24 +00002437#ifdef FEAT_MBYTE
2438 hash_clear_all(&lp->sl_map_hash, 0);
Bram Moolenaarea424162005-06-16 21:51:00 +00002439#endif
Bram Moolenaar5195e452005-08-19 20:32:47 +00002440
Bram Moolenaar4770d092006-01-12 23:22:24 +00002441 /* Clear info from .sug file. */
2442 slang_clear_sug(lp);
2443
Bram Moolenaar5195e452005-08-19 20:32:47 +00002444 lp->sl_compmax = MAXWLEN;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002445 lp->sl_compminlen = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002446 lp->sl_compsylmax = MAXWLEN;
2447 lp->sl_regions[0] = NUL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002448}
2449
2450/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00002451 * Clear the info from the .sug file in "lp".
2452 */
2453 static void
2454slang_clear_sug(lp)
2455 slang_T *lp;
2456{
2457 vim_free(lp->sl_sbyts);
2458 lp->sl_sbyts = NULL;
2459 vim_free(lp->sl_sidxs);
2460 lp->sl_sidxs = NULL;
2461 close_spellbuf(lp->sl_sugbuf);
2462 lp->sl_sugbuf = NULL;
2463 lp->sl_sugloaded = FALSE;
2464 lp->sl_sugtime = 0;
2465}
2466
2467/*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002468 * Load one spell file and store the info into a slang_T.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002469 * Invoked through do_in_runtimepath().
2470 */
2471 static void
Bram Moolenaarb765d632005-06-07 21:00:02 +00002472spell_load_cb(fname, cookie)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002473 char_u *fname;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002474 void *cookie;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002475{
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002476 spelload_T *slp = (spelload_T *)cookie;
2477 slang_T *slang;
2478
2479 slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE);
2480 if (slang != NULL)
2481 {
2482 /* When a previously loaded file has NOBREAK also use it for the
2483 * ".add" files. */
2484 if (slp->sl_nobreak && slang->sl_add)
2485 slang->sl_nobreak = TRUE;
2486 else if (slang->sl_nobreak)
2487 slp->sl_nobreak = TRUE;
2488
2489 slp->sl_slang = slang;
2490 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00002491}
2492
2493/*
2494 * Load one spell file and store the info into a slang_T.
2495 *
Bram Moolenaar4770d092006-01-12 23:22:24 +00002496 * This is invoked in three ways:
Bram Moolenaarb765d632005-06-07 21:00:02 +00002497 * - From spell_load_cb() to load a spell file for the first time. "lang" is
2498 * the language name, "old_lp" is NULL. Will allocate an slang_T.
2499 * - To reload a spell file that was changed. "lang" is NULL and "old_lp"
2500 * points to the existing slang_T.
Bram Moolenaar4770d092006-01-12 23:22:24 +00002501 * - Just after writing a .spl file; it's read back to produce the .sug file.
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00002502 * "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T.
2503 *
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002504 * Returns the slang_T the spell file was loaded into. NULL for error.
Bram Moolenaarb765d632005-06-07 21:00:02 +00002505 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002506 static slang_T *
2507spell_load_file(fname, lang, old_lp, silent)
Bram Moolenaarb765d632005-06-07 21:00:02 +00002508 char_u *fname;
2509 char_u *lang;
2510 slang_T *old_lp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002511 int silent; /* no error if file doesn't exist */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002512{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002513 FILE *fd;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002514 char_u buf[VIMSPELLMAGICL];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002515 char_u *p;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002516 int i;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002517 int n;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002518 int len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002519 char_u *save_sourcing_name = sourcing_name;
2520 linenr_T save_sourcing_lnum = sourcing_lnum;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002521 slang_T *lp = NULL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002522 int c = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002523 int res;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002524
Bram Moolenaarb765d632005-06-07 21:00:02 +00002525 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002526 if (fd == NULL)
2527 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002528 if (!silent)
2529 EMSG2(_(e_notopen), fname);
2530 else if (p_verbose > 2)
2531 {
2532 verbose_enter();
2533 smsg((char_u *)e_notopen, fname);
2534 verbose_leave();
2535 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002536 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002537 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00002538 if (p_verbose > 2)
2539 {
2540 verbose_enter();
2541 smsg((char_u *)_("Reading spell file \"%s\""), fname);
2542 verbose_leave();
2543 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002544
Bram Moolenaarb765d632005-06-07 21:00:02 +00002545 if (old_lp == NULL)
2546 {
2547 lp = slang_alloc(lang);
2548 if (lp == NULL)
2549 goto endFAIL;
2550
2551 /* Remember the file name, used to reload the file when it's updated. */
2552 lp->sl_fname = vim_strsave(fname);
2553 if (lp->sl_fname == NULL)
2554 goto endFAIL;
2555
2556 /* Check for .add.spl. */
2557 lp->sl_add = strstr((char *)gettail(fname), ".add.") != NULL;
2558 }
2559 else
2560 lp = old_lp;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002561
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002562 /* Set sourcing_name, so that error messages mention the file name. */
2563 sourcing_name = fname;
2564 sourcing_lnum = 0;
2565
Bram Moolenaar4770d092006-01-12 23:22:24 +00002566 /*
2567 * <HEADER>: <fileID>
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002568 */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002569 for (i = 0; i < VIMSPELLMAGICL; ++i)
2570 buf[i] = getc(fd); /* <fileID> */
2571 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
2572 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002573 EMSG(_("E757: This does not look like a spell file"));
2574 goto endFAIL;
2575 }
2576 c = getc(fd); /* <versionnr> */
2577 if (c < VIMSPELLVERSION)
2578 {
2579 EMSG(_("E771: Old spell file, needs to be updated"));
2580 goto endFAIL;
2581 }
2582 else if (c > VIMSPELLVERSION)
2583 {
2584 EMSG(_("E772: Spell file is for newer version of Vim"));
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002585 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002586 }
2587
Bram Moolenaar5195e452005-08-19 20:32:47 +00002588
2589 /*
2590 * <SECTIONS>: <section> ... <sectionend>
2591 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
2592 */
2593 for (;;)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002594 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002595 n = getc(fd); /* <sectionID> or <sectionend> */
2596 if (n == SN_END)
2597 break;
2598 c = getc(fd); /* <sectionflags> */
2599 len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
2600 /* <sectionlen> */
2601 if (len < 0)
2602 goto truncerr;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002603
Bram Moolenaar5195e452005-08-19 20:32:47 +00002604 res = 0;
2605 switch (n)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002606 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002607 case SN_REGION:
2608 res = read_region_section(fd, lp, len);
2609 break;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002610
Bram Moolenaar5195e452005-08-19 20:32:47 +00002611 case SN_CHARFLAGS:
2612 res = read_charflags_section(fd);
2613 break;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002614
Bram Moolenaar5195e452005-08-19 20:32:47 +00002615 case SN_MIDWORD:
2616 lp->sl_midword = read_string(fd, len); /* <midword> */
2617 if (lp->sl_midword == NULL)
2618 goto endFAIL;
2619 break;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002620
Bram Moolenaar5195e452005-08-19 20:32:47 +00002621 case SN_PREFCOND:
2622 res = read_prefcond_section(fd, lp);
2623 break;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002624
Bram Moolenaar5195e452005-08-19 20:32:47 +00002625 case SN_REP:
Bram Moolenaar4770d092006-01-12 23:22:24 +00002626 res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first);
2627 break;
2628
2629 case SN_REPSAL:
2630 res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first);
Bram Moolenaar5195e452005-08-19 20:32:47 +00002631 break;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002632
Bram Moolenaar5195e452005-08-19 20:32:47 +00002633 case SN_SAL:
2634 res = read_sal_section(fd, lp);
2635 break;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002636
Bram Moolenaar5195e452005-08-19 20:32:47 +00002637 case SN_SOFO:
2638 res = read_sofo_section(fd, lp);
2639 break;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002640
Bram Moolenaar5195e452005-08-19 20:32:47 +00002641 case SN_MAP:
2642 p = read_string(fd, len); /* <mapstr> */
2643 if (p == NULL)
2644 goto endFAIL;
2645 set_map_str(lp, p);
2646 vim_free(p);
2647 break;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002648
Bram Moolenaar4770d092006-01-12 23:22:24 +00002649 case SN_WORDS:
2650 res = read_words_section(fd, lp, len);
2651 break;
2652
2653 case SN_SUGFILE:
2654 for (i = 7; i >= 0; --i) /* <timestamp> */
2655 lp->sl_sugtime += getc(fd) << (i * 8);
2656 break;
2657
Bram Moolenaar5195e452005-08-19 20:32:47 +00002658 case SN_COMPOUND:
2659 res = read_compound(fd, lp, len);
2660 break;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002661
Bram Moolenaar78622822005-08-23 21:00:13 +00002662 case SN_NOBREAK:
2663 lp->sl_nobreak = TRUE;
2664 break;
2665
Bram Moolenaar5195e452005-08-19 20:32:47 +00002666 case SN_SYLLABLE:
2667 lp->sl_syllable = read_string(fd, len); /* <syllable> */
2668 if (lp->sl_syllable == NULL)
2669 goto endFAIL;
2670 if (init_syl_tab(lp) == FAIL)
2671 goto endFAIL;
2672 break;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002673
Bram Moolenaar5195e452005-08-19 20:32:47 +00002674 default:
2675 /* Unsupported section. When it's required give an error
2676 * message. When it's not required skip the contents. */
2677 if (c & SNF_REQUIRED)
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002678 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002679 EMSG(_("E770: Unsupported section in spell file"));
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002680 goto endFAIL;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00002681 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00002682 while (--len >= 0)
2683 if (getc(fd) < 0)
2684 goto truncerr;
2685 break;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00002686 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00002687someerror:
Bram Moolenaar5195e452005-08-19 20:32:47 +00002688 if (res == SP_FORMERROR)
2689 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002690 EMSG(_(e_format));
2691 goto endFAIL;
2692 }
2693 if (res == SP_TRUNCERROR)
2694 {
2695truncerr:
2696 EMSG(_(e_spell_trunc));
2697 goto endFAIL;
2698 }
Bram Moolenaar6de68532005-08-24 22:08:48 +00002699 if (res == SP_OTHERERROR)
Bram Moolenaar5195e452005-08-19 20:32:47 +00002700 goto endFAIL;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002701 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002702
Bram Moolenaar4770d092006-01-12 23:22:24 +00002703 /* <LWORDTREE> */
2704 res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0);
2705 if (res != 0)
2706 goto someerror;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002707
Bram Moolenaar4770d092006-01-12 23:22:24 +00002708 /* <KWORDTREE> */
2709 res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0);
2710 if (res != 0)
2711 goto someerror;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002712
Bram Moolenaar4770d092006-01-12 23:22:24 +00002713 /* <PREFIXTREE> */
2714 res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE,
2715 lp->sl_prefixcnt);
2716 if (res != 0)
2717 goto someerror;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002718
Bram Moolenaarb765d632005-06-07 21:00:02 +00002719 /* For a new file link it in the list of spell files. */
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00002720 if (old_lp == NULL && lang != NULL)
Bram Moolenaarb765d632005-06-07 21:00:02 +00002721 {
2722 lp->sl_next = first_lang;
2723 first_lang = lp;
2724 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002725
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002726 goto endOK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002727
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002728endFAIL:
Bram Moolenaarb765d632005-06-07 21:00:02 +00002729 if (lang != NULL)
2730 /* truncating the name signals the error to spell_load_lang() */
2731 *lang = NUL;
2732 if (lp != NULL && old_lp == NULL)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002733 slang_free(lp);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002734 lp = NULL;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002735
2736endOK:
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002737 if (fd != NULL)
2738 fclose(fd);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002739 sourcing_name = save_sourcing_name;
2740 sourcing_lnum = save_sourcing_lnum;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002741
2742 return lp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002743}
2744
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002745/*
2746 * Read a length field from "fd" in "cnt_bytes" bytes.
Bram Moolenaar7887d882005-07-01 22:33:52 +00002747 * Allocate memory, read the string into it and add a NUL at the end.
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002748 * Returns NULL when the count is zero.
Bram Moolenaar5195e452005-08-19 20:32:47 +00002749 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result
2750 * otherwise.
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002751 */
2752 static char_u *
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002753read_cnt_string(fd, cnt_bytes, cntp)
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002754 FILE *fd;
2755 int cnt_bytes;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002756 int *cntp;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002757{
2758 int cnt = 0;
2759 int i;
2760 char_u *str;
2761
2762 /* read the length bytes, MSB first */
2763 for (i = 0; i < cnt_bytes; ++i)
2764 cnt = (cnt << 8) + getc(fd);
2765 if (cnt < 0)
2766 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002767 *cntp = SP_TRUNCERROR;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002768 return NULL;
2769 }
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002770 *cntp = cnt;
2771 if (cnt == 0)
2772 return NULL; /* nothing to read, return NULL */
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002773
Bram Moolenaar5195e452005-08-19 20:32:47 +00002774 str = read_string(fd, cnt);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002775 if (str == NULL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00002776 *cntp = SP_OTHERERROR;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002777 return str;
2778}
2779
Bram Moolenaar7887d882005-07-01 22:33:52 +00002780/*
Bram Moolenaar5195e452005-08-19 20:32:47 +00002781 * Read a string of length "cnt" from "fd" into allocated memory.
2782 * Returns NULL when out of memory.
2783 */
2784 static char_u *
2785read_string(fd, cnt)
2786 FILE *fd;
2787 int cnt;
2788{
2789 char_u *str;
2790 int i;
2791
2792 /* allocate memory */
2793 str = alloc((unsigned)cnt + 1);
2794 if (str != NULL)
2795 {
2796 /* Read the string. Doesn't check for truncated file. */
2797 for (i = 0; i < cnt; ++i)
2798 str[i] = getc(fd);
2799 str[i] = NUL;
2800 }
2801 return str;
2802}
2803
2804/*
2805 * Read SN_REGION: <regionname> ...
2806 * Return SP_*ERROR flags.
2807 */
2808 static int
2809read_region_section(fd, lp, len)
2810 FILE *fd;
2811 slang_T *lp;
2812 int len;
2813{
2814 int i;
2815
2816 if (len > 16)
2817 return SP_FORMERROR;
2818 for (i = 0; i < len; ++i)
2819 lp->sl_regions[i] = getc(fd); /* <regionname> */
2820 lp->sl_regions[len] = NUL;
2821 return 0;
2822}
2823
2824/*
2825 * Read SN_CHARFLAGS section: <charflagslen> <charflags>
2826 * <folcharslen> <folchars>
2827 * Return SP_*ERROR flags.
2828 */
2829 static int
2830read_charflags_section(fd)
2831 FILE *fd;
2832{
2833 char_u *flags;
2834 char_u *fol;
2835 int flagslen, follen;
2836
2837 /* <charflagslen> <charflags> */
2838 flags = read_cnt_string(fd, 1, &flagslen);
2839 if (flagslen < 0)
2840 return flagslen;
2841
2842 /* <folcharslen> <folchars> */
2843 fol = read_cnt_string(fd, 2, &follen);
2844 if (follen < 0)
2845 {
2846 vim_free(flags);
2847 return follen;
2848 }
2849
2850 /* Set the word-char flags and fill SPELL_ISUPPER() table. */
2851 if (flags != NULL && fol != NULL)
2852 set_spell_charflags(flags, flagslen, fol);
2853
2854 vim_free(flags);
2855 vim_free(fol);
2856
2857 /* When <charflagslen> is zero then <fcharlen> must also be zero. */
2858 if ((flags == NULL) != (fol == NULL))
2859 return SP_FORMERROR;
2860 return 0;
2861}
2862
2863/*
2864 * Read SN_PREFCOND section.
2865 * Return SP_*ERROR flags.
2866 */
2867 static int
2868read_prefcond_section(fd, lp)
2869 FILE *fd;
2870 slang_T *lp;
2871{
2872 int cnt;
2873 int i;
2874 int n;
2875 char_u *p;
2876 char_u buf[MAXWLEN + 1];
2877
2878 /* <prefcondcnt> <prefcond> ... */
2879 cnt = (getc(fd) << 8) + getc(fd); /* <prefcondcnt> */
2880 if (cnt <= 0)
2881 return SP_FORMERROR;
2882
2883 lp->sl_prefprog = (regprog_T **)alloc_clear(
2884 (unsigned)sizeof(regprog_T *) * cnt);
2885 if (lp->sl_prefprog == NULL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00002886 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002887 lp->sl_prefixcnt = cnt;
2888
2889 for (i = 0; i < cnt; ++i)
2890 {
2891 /* <prefcond> : <condlen> <condstr> */
2892 n = getc(fd); /* <condlen> */
2893 if (n < 0 || n >= MAXWLEN)
2894 return SP_FORMERROR;
2895
2896 /* When <condlen> is zero we have an empty condition. Otherwise
2897 * compile the regexp program used to check for the condition. */
2898 if (n > 0)
2899 {
2900 buf[0] = '^'; /* always match at one position only */
2901 p = buf + 1;
2902 while (n-- > 0)
2903 *p++ = getc(fd); /* <condstr> */
2904 *p = NUL;
2905 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
2906 }
2907 }
2908 return 0;
2909}
2910
2911/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00002912 * Read REP or REPSAL items section from "fd": <repcount> <rep> ...
Bram Moolenaar5195e452005-08-19 20:32:47 +00002913 * Return SP_*ERROR flags.
2914 */
2915 static int
Bram Moolenaar4770d092006-01-12 23:22:24 +00002916read_rep_section(fd, gap, first)
Bram Moolenaar5195e452005-08-19 20:32:47 +00002917 FILE *fd;
Bram Moolenaar4770d092006-01-12 23:22:24 +00002918 garray_T *gap;
2919 short *first;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002920{
2921 int cnt;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002922 fromto_T *ftp;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002923 int i;
2924
2925 cnt = (getc(fd) << 8) + getc(fd); /* <repcount> */
2926 if (cnt < 0)
2927 return SP_TRUNCERROR;
2928
Bram Moolenaar5195e452005-08-19 20:32:47 +00002929 if (ga_grow(gap, cnt) == FAIL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00002930 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002931
2932 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
2933 for (; gap->ga_len < cnt; ++gap->ga_len)
2934 {
2935 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
2936 ftp->ft_from = read_cnt_string(fd, 1, &i);
2937 if (i < 0)
2938 return i;
2939 if (i == 0)
2940 return SP_FORMERROR;
2941 ftp->ft_to = read_cnt_string(fd, 1, &i);
2942 if (i <= 0)
2943 {
2944 vim_free(ftp->ft_from);
2945 if (i < 0)
2946 return i;
2947 return SP_FORMERROR;
2948 }
2949 }
2950
2951 /* Fill the first-index table. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00002952 for (i = 0; i < 256; ++i)
2953 first[i] = -1;
2954 for (i = 0; i < gap->ga_len; ++i)
2955 {
2956 ftp = &((fromto_T *)gap->ga_data)[i];
2957 if (first[*ftp->ft_from] == -1)
2958 first[*ftp->ft_from] = i;
2959 }
2960 return 0;
2961}
2962
2963/*
2964 * Read SN_SAL section: <salflags> <salcount> <sal> ...
2965 * Return SP_*ERROR flags.
2966 */
2967 static int
2968read_sal_section(fd, slang)
2969 FILE *fd;
2970 slang_T *slang;
2971{
2972 int i;
2973 int cnt;
2974 garray_T *gap;
2975 salitem_T *smp;
2976 int ccnt;
2977 char_u *p;
Bram Moolenaard12a1322005-08-21 22:08:24 +00002978 int c = NUL;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002979
2980 slang->sl_sofo = FALSE;
2981
2982 i = getc(fd); /* <salflags> */
2983 if (i & SAL_F0LLOWUP)
2984 slang->sl_followup = TRUE;
2985 if (i & SAL_COLLAPSE)
2986 slang->sl_collapse = TRUE;
2987 if (i & SAL_REM_ACCENTS)
2988 slang->sl_rem_accents = TRUE;
2989
2990 cnt = (getc(fd) << 8) + getc(fd); /* <salcount> */
2991 if (cnt < 0)
2992 return SP_TRUNCERROR;
2993
2994 gap = &slang->sl_sal;
2995 ga_init2(gap, sizeof(salitem_T), 10);
Bram Moolenaard5cdbeb2005-10-10 20:59:28 +00002996 if (ga_grow(gap, cnt + 1) == FAIL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00002997 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002998
2999 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
3000 for (; gap->ga_len < cnt; ++gap->ga_len)
3001 {
3002 smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
3003 ccnt = getc(fd); /* <salfromlen> */
3004 if (ccnt < 0)
3005 return SP_TRUNCERROR;
3006 if ((p = alloc(ccnt + 2)) == NULL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003007 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003008 smp->sm_lead = p;
3009
3010 /* Read up to the first special char into sm_lead. */
3011 for (i = 0; i < ccnt; ++i)
3012 {
3013 c = getc(fd); /* <salfrom> */
3014 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
3015 break;
3016 *p++ = c;
3017 }
3018 smp->sm_leadlen = p - smp->sm_lead;
3019 *p++ = NUL;
3020
3021 /* Put (abc) chars in sm_oneof, if any. */
3022 if (c == '(')
3023 {
3024 smp->sm_oneof = p;
3025 for (++i; i < ccnt; ++i)
3026 {
3027 c = getc(fd); /* <salfrom> */
3028 if (c == ')')
3029 break;
3030 *p++ = c;
3031 }
3032 *p++ = NUL;
3033 if (++i < ccnt)
3034 c = getc(fd);
3035 }
3036 else
3037 smp->sm_oneof = NULL;
3038
3039 /* Any following chars go in sm_rules. */
3040 smp->sm_rules = p;
3041 if (i < ccnt)
3042 /* store the char we got while checking for end of sm_lead */
3043 *p++ = c;
3044 for (++i; i < ccnt; ++i)
3045 *p++ = getc(fd); /* <salfrom> */
3046 *p++ = NUL;
3047
3048 /* <saltolen> <salto> */
3049 smp->sm_to = read_cnt_string(fd, 1, &ccnt);
3050 if (ccnt < 0)
3051 {
3052 vim_free(smp->sm_lead);
3053 return ccnt;
3054 }
3055
3056#ifdef FEAT_MBYTE
3057 if (has_mbyte)
3058 {
3059 /* convert the multi-byte strings to wide char strings */
3060 smp->sm_lead_w = mb_str2wide(smp->sm_lead);
3061 smp->sm_leadlen = mb_charlen(smp->sm_lead);
3062 if (smp->sm_oneof == NULL)
3063 smp->sm_oneof_w = NULL;
3064 else
3065 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
3066 if (smp->sm_to == NULL)
3067 smp->sm_to_w = NULL;
3068 else
3069 smp->sm_to_w = mb_str2wide(smp->sm_to);
3070 if (smp->sm_lead_w == NULL
3071 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL)
3072 || (smp->sm_to_w == NULL && smp->sm_to != NULL))
3073 {
3074 vim_free(smp->sm_lead);
3075 vim_free(smp->sm_to);
3076 vim_free(smp->sm_lead_w);
3077 vim_free(smp->sm_oneof_w);
3078 vim_free(smp->sm_to_w);
Bram Moolenaar6de68532005-08-24 22:08:48 +00003079 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003080 }
3081 }
3082#endif
3083 }
3084
Bram Moolenaard5cdbeb2005-10-10 20:59:28 +00003085 if (gap->ga_len > 0)
3086 {
3087 /* Add one extra entry to mark the end with an empty sm_lead. Avoids
3088 * that we need to check the index every time. */
3089 smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
3090 if ((p = alloc(1)) == NULL)
3091 return SP_OTHERERROR;
3092 p[0] = NUL;
3093 smp->sm_lead = p;
3094 smp->sm_leadlen = 0;
3095 smp->sm_oneof = NULL;
3096 smp->sm_rules = p;
3097 smp->sm_to = NULL;
3098#ifdef FEAT_MBYTE
3099 if (has_mbyte)
3100 {
3101 smp->sm_lead_w = mb_str2wide(smp->sm_lead);
3102 smp->sm_leadlen = 0;
3103 smp->sm_oneof_w = NULL;
3104 smp->sm_to_w = NULL;
3105 }
3106#endif
3107 ++gap->ga_len;
3108 }
3109
Bram Moolenaar5195e452005-08-19 20:32:47 +00003110 /* Fill the first-index table. */
3111 set_sal_first(slang);
3112
3113 return 0;
3114}
3115
3116/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00003117 * Read SN_WORDS: <word> ...
3118 * Return SP_*ERROR flags.
3119 */
3120 static int
3121read_words_section(fd, lp, len)
3122 FILE *fd;
3123 slang_T *lp;
3124 int len;
3125{
3126 int done = 0;
3127 int i;
3128 char_u word[MAXWLEN];
3129
3130 while (done < len)
3131 {
3132 /* Read one word at a time. */
3133 for (i = 0; ; ++i)
3134 {
3135 word[i] = getc(fd);
3136 if (word[i] == NUL)
3137 break;
3138 if (i == MAXWLEN - 1)
3139 return SP_FORMERROR;
3140 }
3141
3142 /* Init the count to 10. */
3143 count_common_word(lp, word, -1, 10);
3144 done += i + 1;
3145 }
3146 return 0;
3147}
3148
3149/*
3150 * Add a word to the hashtable of common words.
3151 * If it's already there then the counter is increased.
3152 */
3153 static void
3154count_common_word(lp, word, len, count)
3155 slang_T *lp;
3156 char_u *word;
3157 int len; /* word length, -1 for upto NUL */
3158 int count; /* 1 to count once, 10 to init */
3159{
3160 hash_T hash;
3161 hashitem_T *hi;
3162 wordcount_T *wc;
3163 char_u buf[MAXWLEN];
3164 char_u *p;
3165
3166 if (len == -1)
3167 p = word;
3168 else
3169 {
3170 vim_strncpy(buf, word, len);
3171 p = buf;
3172 }
3173
3174 hash = hash_hash(p);
3175 hi = hash_lookup(&lp->sl_wordcount, p, hash);
3176 if (HASHITEM_EMPTY(hi))
3177 {
3178 wc = (wordcount_T *)alloc(sizeof(wordcount_T) + STRLEN(p));
3179 if (wc == NULL)
3180 return;
3181 STRCPY(wc->wc_word, p);
3182 wc->wc_count = count;
3183 hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash);
3184 }
3185 else
3186 {
3187 wc = HI2WC(hi);
3188 if ((wc->wc_count += count) < (unsigned)count) /* check for overflow */
3189 wc->wc_count = MAXWORDCOUNT;
3190 }
3191}
3192
3193/*
3194 * Adjust the score of common words.
3195 */
3196 static int
3197score_wordcount_adj(slang, score, word, split)
3198 slang_T *slang;
3199 int score;
3200 char_u *word;
3201 int split; /* word was split, less bonus */
3202{
3203 hashitem_T *hi;
3204 wordcount_T *wc;
3205 int bonus;
3206 int newscore;
3207
3208 hi = hash_find(&slang->sl_wordcount, word);
3209 if (!HASHITEM_EMPTY(hi))
3210 {
3211 wc = HI2WC(hi);
3212 if (wc->wc_count < SCORE_THRES2)
3213 bonus = SCORE_COMMON1;
3214 else if (wc->wc_count < SCORE_THRES3)
3215 bonus = SCORE_COMMON2;
3216 else
3217 bonus = SCORE_COMMON3;
3218 if (split)
3219 newscore = score - bonus / 2;
3220 else
3221 newscore = score - bonus;
3222 if (newscore < 0)
3223 return 0;
3224 return newscore;
3225 }
3226 return score;
3227}
3228
3229/*
Bram Moolenaar5195e452005-08-19 20:32:47 +00003230 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
3231 * Return SP_*ERROR flags.
3232 */
3233 static int
3234read_sofo_section(fd, slang)
3235 FILE *fd;
3236 slang_T *slang;
3237{
3238 int cnt;
3239 char_u *from, *to;
3240 int res;
3241
3242 slang->sl_sofo = TRUE;
3243
3244 /* <sofofromlen> <sofofrom> */
3245 from = read_cnt_string(fd, 2, &cnt);
3246 if (cnt < 0)
3247 return cnt;
3248
3249 /* <sofotolen> <sofoto> */
3250 to = read_cnt_string(fd, 2, &cnt);
3251 if (cnt < 0)
3252 {
3253 vim_free(from);
3254 return cnt;
3255 }
3256
3257 /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */
3258 if (from != NULL && to != NULL)
3259 res = set_sofo(slang, from, to);
3260 else if (from != NULL || to != NULL)
3261 res = SP_FORMERROR; /* only one of two strings is an error */
3262 else
3263 res = 0;
3264
3265 vim_free(from);
3266 vim_free(to);
3267 return res;
3268}
3269
3270/*
3271 * Read the compound section from the .spl file:
3272 * <compmax> <compminlen> <compsylmax> <compflags>
3273 * Returns SP_*ERROR flags.
3274 */
3275 static int
3276read_compound(fd, slang, len)
3277 FILE *fd;
3278 slang_T *slang;
3279 int len;
3280{
3281 int todo = len;
3282 int c;
3283 int atstart;
3284 char_u *pat;
3285 char_u *pp;
3286 char_u *cp;
Bram Moolenaard12a1322005-08-21 22:08:24 +00003287 char_u *ap;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003288
3289 if (todo < 2)
3290 return SP_FORMERROR; /* need at least two bytes */
3291
3292 --todo;
3293 c = getc(fd); /* <compmax> */
3294 if (c < 2)
3295 c = MAXWLEN;
3296 slang->sl_compmax = c;
3297
3298 --todo;
3299 c = getc(fd); /* <compminlen> */
3300 if (c < 1)
Bram Moolenaarda2303d2005-08-30 21:55:26 +00003301 c = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003302 slang->sl_compminlen = c;
3303
3304 --todo;
3305 c = getc(fd); /* <compsylmax> */
3306 if (c < 1)
3307 c = MAXWLEN;
3308 slang->sl_compsylmax = c;
3309
3310 /* Turn the COMPOUNDFLAGS items into a regexp pattern:
3311 * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$".
Bram Moolenaar6de68532005-08-24 22:08:48 +00003312 * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes.
3313 * Conversion to utf-8 may double the size. */
3314 c = todo * 2 + 7;
3315#ifdef FEAT_MBYTE
3316 if (enc_utf8)
3317 c += todo * 2;
3318#endif
3319 pat = alloc((unsigned)c);
Bram Moolenaar5195e452005-08-19 20:32:47 +00003320 if (pat == NULL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003321 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003322
Bram Moolenaard12a1322005-08-21 22:08:24 +00003323 /* We also need a list of all flags that can appear at the start and one
3324 * for all flags. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00003325 cp = alloc(todo + 1);
3326 if (cp == NULL)
3327 {
3328 vim_free(pat);
Bram Moolenaar6de68532005-08-24 22:08:48 +00003329 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003330 }
3331 slang->sl_compstartflags = cp;
3332 *cp = NUL;
3333
Bram Moolenaard12a1322005-08-21 22:08:24 +00003334 ap = alloc(todo + 1);
3335 if (ap == NULL)
3336 {
3337 vim_free(pat);
Bram Moolenaar6de68532005-08-24 22:08:48 +00003338 return SP_OTHERERROR;
Bram Moolenaard12a1322005-08-21 22:08:24 +00003339 }
3340 slang->sl_compallflags = ap;
3341 *ap = NUL;
3342
Bram Moolenaar5195e452005-08-19 20:32:47 +00003343 pp = pat;
3344 *pp++ = '^';
3345 *pp++ = '\\';
3346 *pp++ = '(';
3347
3348 atstart = 1;
3349 while (todo-- > 0)
3350 {
3351 c = getc(fd); /* <compflags> */
Bram Moolenaard12a1322005-08-21 22:08:24 +00003352
3353 /* Add all flags to "sl_compallflags". */
3354 if (vim_strchr((char_u *)"+*[]/", c) == NULL
Bram Moolenaar6de68532005-08-24 22:08:48 +00003355 && !byte_in_str(slang->sl_compallflags, c))
Bram Moolenaard12a1322005-08-21 22:08:24 +00003356 {
3357 *ap++ = c;
3358 *ap = NUL;
3359 }
3360
Bram Moolenaar5195e452005-08-19 20:32:47 +00003361 if (atstart != 0)
3362 {
3363 /* At start of item: copy flags to "sl_compstartflags". For a
3364 * [abc] item set "atstart" to 2 and copy up to the ']'. */
3365 if (c == '[')
3366 atstart = 2;
3367 else if (c == ']')
3368 atstart = 0;
3369 else
3370 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00003371 if (!byte_in_str(slang->sl_compstartflags, c))
Bram Moolenaar5195e452005-08-19 20:32:47 +00003372 {
3373 *cp++ = c;
3374 *cp = NUL;
3375 }
3376 if (atstart == 1)
3377 atstart = 0;
3378 }
3379 }
3380 if (c == '/') /* slash separates two items */
3381 {
3382 *pp++ = '\\';
3383 *pp++ = '|';
3384 atstart = 1;
3385 }
3386 else /* normal char, "[abc]" and '*' are copied as-is */
3387 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00003388 if (c == '+' || c == '~')
Bram Moolenaar5195e452005-08-19 20:32:47 +00003389 *pp++ = '\\'; /* "a+" becomes "a\+" */
Bram Moolenaar6de68532005-08-24 22:08:48 +00003390#ifdef FEAT_MBYTE
3391 if (enc_utf8)
3392 pp += mb_char2bytes(c, pp);
3393 else
3394#endif
3395 *pp++ = c;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003396 }
3397 }
3398
3399 *pp++ = '\\';
3400 *pp++ = ')';
3401 *pp++ = '$';
3402 *pp = NUL;
3403
3404 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT);
3405 vim_free(pat);
3406 if (slang->sl_compprog == NULL)
3407 return SP_FORMERROR;
3408
3409 return 0;
3410}
3411
Bram Moolenaar6de68532005-08-24 22:08:48 +00003412/*
Bram Moolenaar95529562005-08-25 21:21:38 +00003413 * Return TRUE if byte "n" appears in "str".
Bram Moolenaar6de68532005-08-24 22:08:48 +00003414 * Like strchr() but independent of locale.
3415 */
3416 static int
Bram Moolenaar95529562005-08-25 21:21:38 +00003417byte_in_str(str, n)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003418 char_u *str;
Bram Moolenaar95529562005-08-25 21:21:38 +00003419 int n;
Bram Moolenaar6de68532005-08-24 22:08:48 +00003420{
3421 char_u *p;
3422
3423 for (p = str; *p != NUL; ++p)
Bram Moolenaar95529562005-08-25 21:21:38 +00003424 if (*p == n)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003425 return TRUE;
3426 return FALSE;
3427}
3428
Bram Moolenaar5195e452005-08-19 20:32:47 +00003429#define SY_MAXLEN 30
3430typedef struct syl_item_S
3431{
3432 char_u sy_chars[SY_MAXLEN]; /* the sequence of chars */
3433 int sy_len;
3434} syl_item_T;
3435
3436/*
3437 * Truncate "slang->sl_syllable" at the first slash and put the following items
3438 * in "slang->sl_syl_items".
3439 */
3440 static int
3441init_syl_tab(slang)
3442 slang_T *slang;
3443{
3444 char_u *p;
3445 char_u *s;
3446 int l;
3447 syl_item_T *syl;
3448
3449 ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4);
3450 p = vim_strchr(slang->sl_syllable, '/');
3451 while (p != NULL)
3452 {
3453 *p++ = NUL;
Bram Moolenaar6de68532005-08-24 22:08:48 +00003454 if (*p == NUL) /* trailing slash */
Bram Moolenaar5195e452005-08-19 20:32:47 +00003455 break;
3456 s = p;
3457 p = vim_strchr(p, '/');
3458 if (p == NULL)
3459 l = STRLEN(s);
3460 else
3461 l = p - s;
3462 if (l >= SY_MAXLEN)
3463 return SP_FORMERROR;
3464 if (ga_grow(&slang->sl_syl_items, 1) == FAIL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003465 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003466 syl = ((syl_item_T *)slang->sl_syl_items.ga_data)
3467 + slang->sl_syl_items.ga_len++;
3468 vim_strncpy(syl->sy_chars, s, l);
3469 syl->sy_len = l;
3470 }
3471 return OK;
3472}
3473
3474/*
3475 * Count the number of syllables in "word".
3476 * When "word" contains spaces the syllables after the last space are counted.
3477 * Returns zero if syllables are not defines.
3478 */
3479 static int
3480count_syllables(slang, word)
3481 slang_T *slang;
3482 char_u *word;
3483{
3484 int cnt = 0;
3485 int skip = FALSE;
3486 char_u *p;
3487 int len;
3488 int i;
3489 syl_item_T *syl;
3490 int c;
3491
3492 if (slang->sl_syllable == NULL)
3493 return 0;
3494
3495 for (p = word; *p != NUL; p += len)
3496 {
3497 /* When running into a space reset counter. */
3498 if (*p == ' ')
3499 {
3500 len = 1;
3501 cnt = 0;
3502 continue;
3503 }
3504
3505 /* Find longest match of syllable items. */
3506 len = 0;
3507 for (i = 0; i < slang->sl_syl_items.ga_len; ++i)
3508 {
3509 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i;
3510 if (syl->sy_len > len
3511 && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0)
3512 len = syl->sy_len;
3513 }
3514 if (len != 0) /* found a match, count syllable */
3515 {
3516 ++cnt;
3517 skip = FALSE;
3518 }
3519 else
3520 {
3521 /* No recognized syllable item, at least a syllable char then? */
3522#ifdef FEAT_MBYTE
3523 c = mb_ptr2char(p);
3524 len = (*mb_ptr2len)(p);
3525#else
3526 c = *p;
3527 len = 1;
3528#endif
3529 if (vim_strchr(slang->sl_syllable, c) == NULL)
3530 skip = FALSE; /* No, search for next syllable */
3531 else if (!skip)
3532 {
3533 ++cnt; /* Yes, count it */
3534 skip = TRUE; /* don't count following syllable chars */
3535 }
3536 }
3537 }
3538 return cnt;
3539}
3540
3541/*
Bram Moolenaar7887d882005-07-01 22:33:52 +00003542 * Set the SOFOFROM and SOFOTO items in language "lp".
Bram Moolenaar5195e452005-08-19 20:32:47 +00003543 * Returns SP_*ERROR flags when there is something wrong.
Bram Moolenaar7887d882005-07-01 22:33:52 +00003544 */
3545 static int
3546set_sofo(lp, from, to)
3547 slang_T *lp;
3548 char_u *from;
3549 char_u *to;
3550{
3551 int i;
3552
3553#ifdef FEAT_MBYTE
3554 garray_T *gap;
3555 char_u *s;
3556 char_u *p;
3557 int c;
3558 int *inp;
3559
3560 if (has_mbyte)
3561 {
3562 /* Use "sl_sal" as an array with 256 pointers to a list of wide
3563 * characters. The index is the low byte of the character.
3564 * The list contains from-to pairs with a terminating NUL.
3565 * sl_sal_first[] is used for latin1 "from" characters. */
3566 gap = &lp->sl_sal;
3567 ga_init2(gap, sizeof(int *), 1);
3568 if (ga_grow(gap, 256) == FAIL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003569 return SP_OTHERERROR;
Bram Moolenaar7887d882005-07-01 22:33:52 +00003570 vim_memset(gap->ga_data, 0, sizeof(int *) * 256);
3571 gap->ga_len = 256;
3572
3573 /* First count the number of items for each list. Temporarily use
3574 * sl_sal_first[] for this. */
3575 for (p = from, s = to; *p != NUL && *s != NUL; )
3576 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003577 c = mb_cptr2char_adv(&p);
3578 mb_cptr_adv(s);
Bram Moolenaar7887d882005-07-01 22:33:52 +00003579 if (c >= 256)
3580 ++lp->sl_sal_first[c & 0xff];
3581 }
3582 if (*p != NUL || *s != NUL) /* lengths differ */
Bram Moolenaar5195e452005-08-19 20:32:47 +00003583 return SP_FORMERROR;
Bram Moolenaar7887d882005-07-01 22:33:52 +00003584
3585 /* Allocate the lists. */
3586 for (i = 0; i < 256; ++i)
3587 if (lp->sl_sal_first[i] > 0)
3588 {
3589 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
3590 if (p == NULL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003591 return SP_OTHERERROR;
Bram Moolenaar7887d882005-07-01 22:33:52 +00003592 ((int **)gap->ga_data)[i] = (int *)p;
3593 *(int *)p = 0;
3594 }
3595
3596 /* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
3597 * list. */
3598 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
3599 for (p = from, s = to; *p != NUL && *s != NUL; )
3600 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003601 c = mb_cptr2char_adv(&p);
3602 i = mb_cptr2char_adv(&s);
Bram Moolenaar7887d882005-07-01 22:33:52 +00003603 if (c >= 256)
3604 {
3605 /* Append the from-to chars at the end of the list with
3606 * the low byte. */
3607 inp = ((int **)gap->ga_data)[c & 0xff];
3608 while (*inp != 0)
3609 ++inp;
3610 *inp++ = c; /* from char */
3611 *inp++ = i; /* to char */
3612 *inp++ = NUL; /* NUL at the end */
3613 }
3614 else
3615 /* mapping byte to char is done in sl_sal_first[] */
3616 lp->sl_sal_first[c] = i;
3617 }
3618 }
3619 else
3620#endif
3621 {
3622 /* mapping bytes to bytes is done in sl_sal_first[] */
3623 if (STRLEN(from) != STRLEN(to))
Bram Moolenaar5195e452005-08-19 20:32:47 +00003624 return SP_FORMERROR;
Bram Moolenaar7887d882005-07-01 22:33:52 +00003625
3626 for (i = 0; to[i] != NUL; ++i)
3627 lp->sl_sal_first[from[i]] = to[i];
3628 lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */
3629 }
3630
Bram Moolenaar5195e452005-08-19 20:32:47 +00003631 return 0;
Bram Moolenaar7887d882005-07-01 22:33:52 +00003632}
3633
3634/*
3635 * Fill the first-index table for "lp".
3636 */
3637 static void
3638set_sal_first(lp)
3639 slang_T *lp;
3640{
3641 salfirst_T *sfirst;
3642 int i;
3643 salitem_T *smp;
3644 int c;
3645 garray_T *gap = &lp->sl_sal;
3646
3647 sfirst = lp->sl_sal_first;
3648 for (i = 0; i < 256; ++i)
3649 sfirst[i] = -1;
3650 smp = (salitem_T *)gap->ga_data;
3651 for (i = 0; i < gap->ga_len; ++i)
3652 {
3653#ifdef FEAT_MBYTE
3654 if (has_mbyte)
3655 /* Use the lowest byte of the first character. For latin1 it's
3656 * the character, for other encodings it should differ for most
3657 * characters. */
3658 c = *smp[i].sm_lead_w & 0xff;
3659 else
3660#endif
3661 c = *smp[i].sm_lead;
3662 if (sfirst[c] == -1)
3663 {
3664 sfirst[c] = i;
3665#ifdef FEAT_MBYTE
3666 if (has_mbyte)
3667 {
3668 int n;
3669
3670 /* Make sure all entries with this byte are following each
3671 * other. Move the ones that are in the wrong position. Do
3672 * keep the same ordering! */
3673 while (i + 1 < gap->ga_len
3674 && (*smp[i + 1].sm_lead_w & 0xff) == c)
3675 /* Skip over entry with same index byte. */
3676 ++i;
3677
3678 for (n = 1; i + n < gap->ga_len; ++n)
3679 if ((*smp[i + n].sm_lead_w & 0xff) == c)
3680 {
3681 salitem_T tsal;
3682
3683 /* Move entry with same index byte after the entries
3684 * we already found. */
3685 ++i;
3686 --n;
3687 tsal = smp[i + n];
3688 mch_memmove(smp + i + 1, smp + i,
3689 sizeof(salitem_T) * n);
3690 smp[i] = tsal;
3691 }
3692 }
3693#endif
3694 }
3695 }
3696}
Bram Moolenaar9c96f592005-06-30 21:52:39 +00003697
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003698#ifdef FEAT_MBYTE
3699/*
3700 * Turn a multi-byte string into a wide character string.
3701 * Return it in allocated memory (NULL for out-of-memory)
3702 */
3703 static int *
3704mb_str2wide(s)
3705 char_u *s;
3706{
3707 int *res;
3708 char_u *p;
3709 int i = 0;
3710
3711 res = (int *)alloc(sizeof(int) * (mb_charlen(s) + 1));
3712 if (res != NULL)
3713 {
3714 for (p = s; *p != NUL; )
3715 res[i++] = mb_ptr2char_adv(&p);
3716 res[i] = NUL;
3717 }
3718 return res;
3719}
3720#endif
3721
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003722/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00003723 * Read a tree from the .spl or .sug file.
3724 * Allocates the memory and stores pointers in "bytsp" and "idxsp".
3725 * This is skipped when the tree has zero length.
3726 * Returns zero when OK, SP_ value for an error.
3727 */
3728 static int
3729spell_read_tree(fd, bytsp, idxsp, prefixtree, prefixcnt)
3730 FILE *fd;
3731 char_u **bytsp;
3732 idx_T **idxsp;
3733 int prefixtree; /* TRUE for the prefix tree */
3734 int prefixcnt; /* when "prefixtree" is TRUE: prefix count */
3735{
3736 int len;
3737 int idx;
3738 char_u *bp;
3739 idx_T *ip;
3740
3741 /* The tree size was computed when writing the file, so that we can
3742 * allocate it as one long block. <nodecount> */
3743 len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
3744 if (len < 0)
3745 return SP_TRUNCERROR;
3746 if (len > 0)
3747 {
3748 /* Allocate the byte array. */
3749 bp = lalloc((long_u)len, TRUE);
3750 if (bp == NULL)
3751 return SP_OTHERERROR;
3752 *bytsp = bp;
3753
3754 /* Allocate the index array. */
3755 ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE);
3756 if (ip == NULL)
3757 return SP_OTHERERROR;
3758 *idxsp = ip;
3759
3760 /* Recursively read the tree and store it in the array. */
3761 idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt);
3762 if (idx < 0)
3763 return idx;
3764 }
3765 return 0;
3766}
3767
3768/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00003769 * Read one row of siblings from the spell file and store it in the byte array
3770 * "byts" and index array "idxs". Recursively read the children.
3771 *
Bram Moolenaar4770d092006-01-12 23:22:24 +00003772 * NOTE: The code here must match put_node()!
Bram Moolenaar51485f02005-06-04 21:55:20 +00003773 *
Bram Moolenaar4770d092006-01-12 23:22:24 +00003774 * Returns the index (>= 0) following the siblings.
3775 * Returns SP_TRUNCERROR if the file is shorter than expected.
3776 * Returns SP_FORMERROR if there is a format error.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003777 */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003778 static idx_T
Bram Moolenaar4770d092006-01-12 23:22:24 +00003779read_tree_node(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003780 FILE *fd;
3781 char_u *byts;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003782 idx_T *idxs;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003783 int maxidx; /* size of arrays */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003784 idx_T startidx; /* current index in "byts" and "idxs" */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003785 int prefixtree; /* TRUE for reading PREFIXTREE */
3786 int maxprefcondnr; /* maximum for <prefcondnr> */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003787{
Bram Moolenaar51485f02005-06-04 21:55:20 +00003788 int len;
3789 int i;
3790 int n;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003791 idx_T idx = startidx;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003792 int c;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003793 int c2;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003794#define SHARED_MASK 0x8000000
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003795
Bram Moolenaar51485f02005-06-04 21:55:20 +00003796 len = getc(fd); /* <siblingcount> */
3797 if (len <= 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +00003798 return SP_TRUNCERROR;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003799
3800 if (startidx + len >= maxidx)
Bram Moolenaar4770d092006-01-12 23:22:24 +00003801 return SP_FORMERROR;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003802 byts[idx++] = len;
3803
3804 /* Read the byte values, flag/region bytes and shared indexes. */
3805 for (i = 1; i <= len; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003806 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003807 c = getc(fd); /* <byte> */
3808 if (c < 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +00003809 return SP_TRUNCERROR;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003810 if (c <= BY_SPECIAL)
3811 {
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003812 if (c == BY_NOFLAGS && !prefixtree)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003813 {
3814 /* No flags, all regions. */
3815 idxs[idx] = 0;
3816 c = 0;
3817 }
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00003818 else if (c != BY_INDEX)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003819 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003820 if (prefixtree)
3821 {
Bram Moolenaar53805d12005-08-01 07:08:33 +00003822 /* Read the optional pflags byte, the prefix ID and the
3823 * condition nr. In idxs[] store the prefix ID in the low
3824 * byte, the condition index shifted up 8 bits, the flags
3825 * shifted up 24 bits. */
3826 if (c == BY_FLAGS)
3827 c = getc(fd) << 24; /* <pflags> */
3828 else
3829 c = 0;
3830
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003831 c |= getc(fd); /* <affixID> */
Bram Moolenaar53805d12005-08-01 07:08:33 +00003832
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003833 n = (getc(fd) << 8) + getc(fd); /* <prefcondnr> */
3834 if (n >= maxprefcondnr)
Bram Moolenaar4770d092006-01-12 23:22:24 +00003835 return SP_FORMERROR;
Bram Moolenaar53805d12005-08-01 07:08:33 +00003836 c |= (n << 8);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003837 }
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00003838 else /* c must be BY_FLAGS or BY_FLAGS2 */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003839 {
3840 /* Read flags and optional region and prefix ID. In
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00003841 * idxs[] the flags go in the low two bytes, region above
3842 * that and prefix ID above the region. */
3843 c2 = c;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003844 c = getc(fd); /* <flags> */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00003845 if (c2 == BY_FLAGS2)
3846 c = (getc(fd) << 8) + c; /* <flags2> */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003847 if (c & WF_REGION)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00003848 c = (getc(fd) << 16) + c; /* <region> */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003849 if (c & WF_AFX)
3850 c = (getc(fd) << 24) + c; /* <affixID> */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003851 }
3852
Bram Moolenaar51485f02005-06-04 21:55:20 +00003853 idxs[idx] = c;
3854 c = 0;
3855 }
3856 else /* c == BY_INDEX */
3857 {
3858 /* <nodeidx> */
3859 n = (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
3860 if (n < 0 || n >= maxidx)
Bram Moolenaar4770d092006-01-12 23:22:24 +00003861 return SP_FORMERROR;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003862 idxs[idx] = n + SHARED_MASK;
3863 c = getc(fd); /* <xbyte> */
3864 }
3865 }
3866 byts[idx++] = c;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003867 }
3868
Bram Moolenaar51485f02005-06-04 21:55:20 +00003869 /* Recursively read the children for non-shared siblings.
3870 * Skip the end-of-word ones (zero byte value) and the shared ones (and
3871 * remove SHARED_MASK) */
3872 for (i = 1; i <= len; ++i)
3873 if (byts[startidx + i] != 0)
3874 {
3875 if (idxs[startidx + i] & SHARED_MASK)
3876 idxs[startidx + i] &= ~SHARED_MASK;
3877 else
3878 {
3879 idxs[startidx + i] = idx;
Bram Moolenaar4770d092006-01-12 23:22:24 +00003880 idx = read_tree_node(fd, byts, idxs, maxidx, idx,
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003881 prefixtree, maxprefcondnr);
Bram Moolenaar51485f02005-06-04 21:55:20 +00003882 if (idx < 0)
3883 break;
3884 }
3885 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003886
Bram Moolenaar51485f02005-06-04 21:55:20 +00003887 return idx;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003888}
3889
3890/*
3891 * Parse 'spelllang' and set buf->b_langp accordingly.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00003892 * Returns NULL if it's OK, an error message otherwise.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003893 */
3894 char_u *
3895did_set_spelllang(buf)
3896 buf_T *buf;
3897{
3898 garray_T ga;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00003899 char_u *splp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003900 char_u *region;
Bram Moolenaarb6356332005-07-18 21:40:44 +00003901 char_u region_cp[3];
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00003902 int filename;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003903 int region_mask;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00003904 slang_T *slang;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003905 int c;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00003906 char_u lang[MAXWLEN + 1];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003907 char_u spf_name[MAXPATHL];
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00003908 int len;
3909 char_u *p;
Bram Moolenaar7887d882005-07-01 22:33:52 +00003910 int round;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00003911 char_u *spf;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00003912 char_u *use_region = NULL;
3913 int dont_use_region = FALSE;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00003914 int nobreak = FALSE;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00003915 int i, j;
3916 langp_T *lp, *lp2;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003917
3918 ga_init2(&ga, sizeof(langp_T), 2);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00003919 clear_midword(buf);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003920
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00003921 /* loop over comma separated language names. */
3922 for (splp = buf->b_p_spl; *splp != NUL; )
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003923 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00003924 /* Get one language name. */
3925 copy_option_part(&splp, lang, MAXWLEN, ",");
3926
Bram Moolenaar5482f332005-04-17 20:18:43 +00003927 region = NULL;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00003928 len = STRLEN(lang);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003929
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00003930 /* If the name ends in ".spl" use it as the name of the spell file.
3931 * If there is a region name let "region" point to it and remove it
3932 * from the name. */
3933 if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0)
3934 {
3935 filename = TRUE;
3936
Bram Moolenaarb6356332005-07-18 21:40:44 +00003937 /* Locate a region and remove it from the file name. */
3938 p = vim_strchr(gettail(lang), '_');
3939 if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2])
3940 && !ASCII_ISALPHA(p[3]))
3941 {
3942 vim_strncpy(region_cp, p + 1, 2);
3943 mch_memmove(p, p + 3, len - (p - lang) - 2);
3944 len -= 3;
3945 region = region_cp;
3946 }
3947 else
3948 dont_use_region = TRUE;
3949
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00003950 /* Check if we loaded this language before. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00003951 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
3952 if (fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME)
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00003953 break;
3954 }
3955 else
3956 {
3957 filename = FALSE;
3958 if (len > 3 && lang[len - 3] == '_')
3959 {
3960 region = lang + len - 2;
3961 len -= 3;
3962 lang[len] = NUL;
3963 }
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00003964 else
3965 dont_use_region = TRUE;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00003966
3967 /* Check if we loaded this language before. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00003968 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
3969 if (STRICMP(lang, slang->sl_name) == 0)
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00003970 break;
3971 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003972
Bram Moolenaarb6356332005-07-18 21:40:44 +00003973 if (region != NULL)
3974 {
3975 /* If the region differs from what was used before then don't
3976 * use it for 'spellfile'. */
3977 if (use_region != NULL && STRCMP(region, use_region) != 0)
3978 dont_use_region = TRUE;
3979 use_region = region;
3980 }
3981
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00003982 /* If not found try loading the language now. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00003983 if (slang == NULL)
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00003984 {
3985 if (filename)
3986 (void)spell_load_file(lang, lang, NULL, FALSE);
3987 else
3988 spell_load_lang(lang);
3989 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003990
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003991 /*
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00003992 * Loop over the languages, there can be several files for "lang".
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003993 */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00003994 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
3995 if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME
3996 : STRICMP(lang, slang->sl_name) == 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003997 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00003998 region_mask = REGION_ALL;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00003999 if (!filename && region != NULL)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004000 {
4001 /* find region in sl_regions */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004002 c = find_region(slang->sl_regions, region);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004003 if (c == REGION_ALL)
4004 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004005 if (slang->sl_add)
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004006 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004007 if (*slang->sl_regions != NUL)
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004008 /* This addition file is for other regions. */
4009 region_mask = 0;
4010 }
4011 else
4012 /* This is probably an error. Give a warning and
4013 * accept the words anyway. */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004014 smsg((char_u *)
4015 _("Warning: region %s not supported"),
4016 region);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004017 }
4018 else
4019 region_mask = 1 << c;
4020 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004021
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004022 if (region_mask != 0)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004023 {
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004024 if (ga_grow(&ga, 1) == FAIL)
4025 {
4026 ga_clear(&ga);
4027 return e_outofmem;
4028 }
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004029 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004030 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
4031 ++ga.ga_len;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004032 use_midword(slang, buf);
4033 if (slang->sl_nobreak)
Bram Moolenaarda2303d2005-08-30 21:55:26 +00004034 nobreak = TRUE;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004035 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004036 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004037 }
4038
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004039 /* round 0: load int_wordlist, if possible.
4040 * round 1: load first name in 'spellfile'.
4041 * round 2: load second name in 'spellfile.
4042 * etc. */
4043 spf = curbuf->b_p_spf;
4044 for (round = 0; round == 0 || *spf != NUL; ++round)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004045 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004046 if (round == 0)
Bram Moolenaar7887d882005-07-01 22:33:52 +00004047 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004048 /* Internal wordlist, if there is one. */
4049 if (int_wordlist == NULL)
Bram Moolenaar7887d882005-07-01 22:33:52 +00004050 continue;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004051 int_wordlist_spl(spf_name);
Bram Moolenaar7887d882005-07-01 22:33:52 +00004052 }
4053 else
4054 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004055 /* One entry in 'spellfile'. */
4056 copy_option_part(&spf, spf_name, MAXPATHL - 5, ",");
4057 STRCAT(spf_name, ".spl");
4058
4059 /* If it was already found above then skip it. */
4060 for (c = 0; c < ga.ga_len; ++c)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004061 {
4062 p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname;
4063 if (p != NULL && fullpathcmp(spf_name, p, FALSE) == FPC_SAME)
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004064 break;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004065 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004066 if (c < ga.ga_len)
Bram Moolenaar7887d882005-07-01 22:33:52 +00004067 continue;
Bram Moolenaar7887d882005-07-01 22:33:52 +00004068 }
4069
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004070 /* Check if it was loaded already. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004071 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4072 if (fullpathcmp(spf_name, slang->sl_fname, FALSE) == FPC_SAME)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004073 break;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004074 if (slang == NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004075 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004076 /* Not loaded, try loading it now. The language name includes the
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004077 * region name, the region is ignored otherwise. for int_wordlist
4078 * use an arbitrary name. */
4079 if (round == 0)
4080 STRCPY(lang, "internal wordlist");
4081 else
Bram Moolenaar7887d882005-07-01 22:33:52 +00004082 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004083 vim_strncpy(lang, gettail(spf_name), MAXWLEN);
Bram Moolenaar7887d882005-07-01 22:33:52 +00004084 p = vim_strchr(lang, '.');
4085 if (p != NULL)
4086 *p = NUL; /* truncate at ".encoding.add" */
4087 }
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004088 slang = spell_load_file(spf_name, lang, NULL, TRUE);
Bram Moolenaarda2303d2005-08-30 21:55:26 +00004089
4090 /* If one of the languages has NOBREAK we assume the addition
4091 * files also have this. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004092 if (slang != NULL && nobreak)
4093 slang->sl_nobreak = TRUE;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004094 }
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004095 if (slang != NULL && ga_grow(&ga, 1) == OK)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004096 {
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004097 region_mask = REGION_ALL;
4098 if (use_region != NULL && !dont_use_region)
4099 {
4100 /* find region in sl_regions */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004101 c = find_region(slang->sl_regions, use_region);
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004102 if (c != REGION_ALL)
4103 region_mask = 1 << c;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004104 else if (*slang->sl_regions != NUL)
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004105 /* This spell file is for other regions. */
4106 region_mask = 0;
4107 }
4108
4109 if (region_mask != 0)
4110 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004111 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
4112 LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL;
4113 LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004114 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
4115 ++ga.ga_len;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004116 use_midword(slang, buf);
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004117 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004118 }
4119 }
4120
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004121 /* Everything is fine, store the new b_langp value. */
4122 ga_clear(&buf->b_langp);
4123 buf->b_langp = ga;
4124
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004125 /* For each language figure out what language to use for sound folding and
4126 * REP items. If the language doesn't support it itself use another one
4127 * with the same name. E.g. for "en-math" use "en". */
4128 for (i = 0; i < ga.ga_len; ++i)
4129 {
4130 lp = LANGP_ENTRY(ga, i);
4131
4132 /* sound folding */
4133 if (lp->lp_slang->sl_sal.ga_len > 0)
4134 /* language does sound folding itself */
4135 lp->lp_sallang = lp->lp_slang;
4136 else
4137 /* find first similar language that does sound folding */
4138 for (j = 0; j < ga.ga_len; ++j)
4139 {
4140 lp2 = LANGP_ENTRY(ga, j);
4141 if (lp2->lp_slang->sl_sal.ga_len > 0
4142 && STRNCMP(lp->lp_slang->sl_name,
4143 lp2->lp_slang->sl_name, 2) == 0)
4144 {
4145 lp->lp_sallang = lp2->lp_slang;
4146 break;
4147 }
4148 }
4149
4150 /* REP items */
4151 if (lp->lp_slang->sl_rep.ga_len > 0)
4152 /* language has REP items itself */
4153 lp->lp_replang = lp->lp_slang;
4154 else
Bram Moolenaar4770d092006-01-12 23:22:24 +00004155 /* find first similar language that has REP items */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004156 for (j = 0; j < ga.ga_len; ++j)
4157 {
4158 lp2 = LANGP_ENTRY(ga, j);
4159 if (lp2->lp_slang->sl_rep.ga_len > 0
4160 && STRNCMP(lp->lp_slang->sl_name,
4161 lp2->lp_slang->sl_name, 2) == 0)
4162 {
4163 lp->lp_replang = lp2->lp_slang;
4164 break;
4165 }
4166 }
4167 }
4168
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004169 return NULL;
4170}
4171
4172/*
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004173 * Clear the midword characters for buffer "buf".
4174 */
4175 static void
4176clear_midword(buf)
4177 buf_T *buf;
4178{
4179 vim_memset(buf->b_spell_ismw, 0, 256);
4180#ifdef FEAT_MBYTE
4181 vim_free(buf->b_spell_ismw_mb);
4182 buf->b_spell_ismw_mb = NULL;
4183#endif
4184}
4185
4186/*
4187 * Use the "sl_midword" field of language "lp" for buffer "buf".
4188 * They add up to any currently used midword characters.
4189 */
4190 static void
4191use_midword(lp, buf)
4192 slang_T *lp;
4193 buf_T *buf;
4194{
4195 char_u *p;
4196
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004197 if (lp->sl_midword == NULL) /* there aren't any */
4198 return;
4199
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004200 for (p = lp->sl_midword; *p != NUL; )
4201#ifdef FEAT_MBYTE
4202 if (has_mbyte)
4203 {
4204 int c, l, n;
4205 char_u *bp;
4206
4207 c = mb_ptr2char(p);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004208 l = (*mb_ptr2len)(p);
4209 if (c < 256 && l <= 2)
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004210 buf->b_spell_ismw[c] = TRUE;
4211 else if (buf->b_spell_ismw_mb == NULL)
4212 /* First multi-byte char in "b_spell_ismw_mb". */
4213 buf->b_spell_ismw_mb = vim_strnsave(p, l);
4214 else
4215 {
4216 /* Append multi-byte chars to "b_spell_ismw_mb". */
4217 n = STRLEN(buf->b_spell_ismw_mb);
4218 bp = vim_strnsave(buf->b_spell_ismw_mb, n + l);
4219 if (bp != NULL)
4220 {
4221 vim_free(buf->b_spell_ismw_mb);
4222 buf->b_spell_ismw_mb = bp;
4223 vim_strncpy(bp + n, p, l);
4224 }
4225 }
4226 p += l;
4227 }
4228 else
4229#endif
4230 buf->b_spell_ismw[*p++] = TRUE;
4231}
4232
4233/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004234 * Find the region "region[2]" in "rp" (points to "sl_regions").
4235 * Each region is simply stored as the two characters of it's name.
Bram Moolenaar7887d882005-07-01 22:33:52 +00004236 * Returns the index if found (first is 0), REGION_ALL if not found.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004237 */
4238 static int
4239find_region(rp, region)
4240 char_u *rp;
4241 char_u *region;
4242{
4243 int i;
4244
4245 for (i = 0; ; i += 2)
4246 {
4247 if (rp[i] == NUL)
4248 return REGION_ALL;
4249 if (rp[i] == region[0] && rp[i + 1] == region[1])
4250 break;
4251 }
4252 return i / 2;
4253}
4254
4255/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004256 * Return case type of word:
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004257 * w word 0
Bram Moolenaar51485f02005-06-04 21:55:20 +00004258 * Word WF_ONECAP
4259 * W WORD WF_ALLCAP
4260 * WoRd wOrd WF_KEEPCAP
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004261 */
4262 static int
4263captype(word, end)
4264 char_u *word;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004265 char_u *end; /* When NULL use up to NUL byte. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004266{
4267 char_u *p;
4268 int c;
4269 int firstcap;
4270 int allcap;
4271 int past_second = FALSE; /* past second word char */
4272
4273 /* find first letter */
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004274 for (p = word; !spell_iswordp_nmw(p); mb_ptr_adv(p))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004275 if (end == NULL ? *p == NUL : p >= end)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004276 return 0; /* only non-word characters, illegal word */
4277#ifdef FEAT_MBYTE
Bram Moolenaarb765d632005-06-07 21:00:02 +00004278 if (has_mbyte)
4279 c = mb_ptr2char_adv(&p);
4280 else
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004281#endif
Bram Moolenaarb765d632005-06-07 21:00:02 +00004282 c = *p++;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004283 firstcap = allcap = SPELL_ISUPPER(c);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004284
4285 /*
4286 * Need to check all letters to find a word with mixed upper/lower.
4287 * But a word with an upper char only at start is a ONECAP.
4288 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004289 for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p))
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004290 if (spell_iswordp_nmw(p))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004291 {
Bram Moolenaar53805d12005-08-01 07:08:33 +00004292 c = PTR2CHAR(p);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004293 if (!SPELL_ISUPPER(c))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004294 {
4295 /* UUl -> KEEPCAP */
4296 if (past_second && allcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004297 return WF_KEEPCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004298 allcap = FALSE;
4299 }
4300 else if (!allcap)
4301 /* UlU -> KEEPCAP */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004302 return WF_KEEPCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004303 past_second = TRUE;
4304 }
4305
4306 if (allcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004307 return WF_ALLCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004308 if (firstcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004309 return WF_ONECAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004310 return 0;
4311}
4312
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004313/*
4314 * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a
4315 * capital. So that make_case_word() can turn WOrd into Word.
4316 * Add ALLCAP for "WOrD".
4317 */
4318 static int
4319badword_captype(word, end)
4320 char_u *word;
4321 char_u *end;
4322{
4323 int flags = captype(word, end);
Bram Moolenaar8b59de92005-08-11 19:59:29 +00004324 int c;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004325 int l, u;
4326 int first;
4327 char_u *p;
4328
4329 if (flags & WF_KEEPCAP)
4330 {
4331 /* Count the number of UPPER and lower case letters. */
4332 l = u = 0;
4333 first = FALSE;
4334 for (p = word; p < end; mb_ptr_adv(p))
4335 {
Bram Moolenaar8b59de92005-08-11 19:59:29 +00004336 c = PTR2CHAR(p);
4337 if (SPELL_ISUPPER(c))
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004338 {
4339 ++u;
4340 if (p == word)
4341 first = TRUE;
4342 }
4343 else
4344 ++l;
4345 }
4346
4347 /* If there are more UPPER than lower case letters suggest an
4348 * ALLCAP word. Otherwise, if the first letter is UPPER then
4349 * suggest ONECAP. Exception: "ALl" most likely should be "All",
4350 * require three upper case letters. */
4351 if (u > l && u > 2)
4352 flags |= WF_ALLCAP;
4353 else if (first)
4354 flags |= WF_ONECAP;
Bram Moolenaar2d3f4892006-01-20 23:02:51 +00004355
4356 if (u >= 2 && l >= 2) /* maCARONI maCAroni */
4357 flags |= WF_MIXCAP;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004358 }
4359 return flags;
4360}
4361
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004362# if defined(FEAT_MBYTE) || defined(EXITFREE) || defined(PROTO)
4363/*
4364 * Free all languages.
4365 */
4366 void
4367spell_free_all()
4368{
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004369 slang_T *slang;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004370 buf_T *buf;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004371 char_u fname[MAXPATHL];
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004372
4373 /* Go through all buffers and handle 'spelllang'. */
4374 for (buf = firstbuf; buf != NULL; buf = buf->b_next)
4375 ga_clear(&buf->b_langp);
4376
4377 while (first_lang != NULL)
4378 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004379 slang = first_lang;
4380 first_lang = slang->sl_next;
4381 slang_free(slang);
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004382 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004383
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004384 if (int_wordlist != NULL)
Bram Moolenaar7887d882005-07-01 22:33:52 +00004385 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004386 /* Delete the internal wordlist and its .spl file */
4387 mch_remove(int_wordlist);
4388 int_wordlist_spl(fname);
4389 mch_remove(fname);
4390 vim_free(int_wordlist);
4391 int_wordlist = NULL;
Bram Moolenaar7887d882005-07-01 22:33:52 +00004392 }
4393
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004394 init_spell_chartab();
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00004395
4396 vim_free(repl_to);
4397 repl_to = NULL;
4398 vim_free(repl_from);
4399 repl_from = NULL;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004400}
4401# endif
4402
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004403# if defined(FEAT_MBYTE) || defined(PROTO)
4404/*
4405 * Clear all spelling tables and reload them.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004406 * Used after 'encoding' is set and when ":mkspell" was used.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004407 */
4408 void
4409spell_reload()
4410{
4411 buf_T *buf;
Bram Moolenaar3982c542005-06-08 21:56:31 +00004412 win_T *wp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004413
Bram Moolenaarea408852005-06-25 22:49:46 +00004414 /* Initialize the table for spell_iswordp(). */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004415 init_spell_chartab();
4416
4417 /* Unload all allocated memory. */
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004418 spell_free_all();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004419
4420 /* Go through all buffers and handle 'spelllang'. */
4421 for (buf = firstbuf; buf != NULL; buf = buf->b_next)
4422 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00004423 /* Only load the wordlists when 'spelllang' is set and there is a
4424 * window for this buffer in which 'spell' is set. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004425 if (*buf->b_p_spl != NUL)
Bram Moolenaar3982c542005-06-08 21:56:31 +00004426 {
4427 FOR_ALL_WINDOWS(wp)
4428 if (wp->w_buffer == buf && wp->w_p_spell)
4429 {
4430 (void)did_set_spelllang(buf);
4431# ifdef FEAT_WINDOWS
4432 break;
4433# endif
4434 }
4435 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004436 }
4437}
4438# endif
4439
Bram Moolenaarb765d632005-06-07 21:00:02 +00004440/*
4441 * Reload the spell file "fname" if it's loaded.
4442 */
4443 static void
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004444spell_reload_one(fname, added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00004445 char_u *fname;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004446 int added_word; /* invoked through "zg" */
Bram Moolenaarb765d632005-06-07 21:00:02 +00004447{
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004448 slang_T *slang;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004449 int didit = FALSE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004450
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004451 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004452 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004453 if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME)
Bram Moolenaarb765d632005-06-07 21:00:02 +00004454 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004455 slang_clear(slang);
4456 if (spell_load_file(fname, NULL, slang, FALSE) == NULL)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004457 /* reloading failed, clear the language */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004458 slang_clear(slang);
Bram Moolenaarb765d632005-06-07 21:00:02 +00004459 redraw_all_later(NOT_VALID);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004460 didit = TRUE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00004461 }
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004462 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004463
4464 /* When "zg" was used and the file wasn't loaded yet, should redo
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00004465 * 'spelllang' to load it now. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004466 if (added_word && !didit)
4467 did_set_spelllang(curbuf);
Bram Moolenaarb765d632005-06-07 21:00:02 +00004468}
4469
4470
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004471/*
4472 * Functions for ":mkspell".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004473 */
4474
Bram Moolenaar51485f02005-06-04 21:55:20 +00004475#define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004476 and .dic file. */
4477/*
4478 * Main structure to store the contents of a ".aff" file.
4479 */
4480typedef struct afffile_S
4481{
4482 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */
Bram Moolenaar95529562005-08-25 21:21:38 +00004483 int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */
Bram Moolenaar371baa92005-12-29 22:43:53 +00004484 unsigned af_rare; /* RARE ID for rare word */
4485 unsigned af_keepcase; /* KEEPCASE ID for keep-case word */
Bram Moolenaar6de68532005-08-24 22:08:48 +00004486 unsigned af_bad; /* BAD ID for banned word */
4487 unsigned af_needaffix; /* NEEDAFFIX ID */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004488 unsigned af_needcomp; /* NEEDCOMPOUND ID */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004489 int af_pfxpostpone; /* postpone prefixes without chop string */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004490 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */
4491 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */
Bram Moolenaar6de68532005-08-24 22:08:48 +00004492 hashtab_T af_comp; /* hashtable for compound flags, compitem_T */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004493} afffile_T;
4494
Bram Moolenaar6de68532005-08-24 22:08:48 +00004495#define AFT_CHAR 0 /* flags are one character */
Bram Moolenaar95529562005-08-25 21:21:38 +00004496#define AFT_LONG 1 /* flags are two characters */
4497#define AFT_CAPLONG 2 /* flags are one or two characters */
4498#define AFT_NUM 3 /* flags are numbers, comma separated */
Bram Moolenaar6de68532005-08-24 22:08:48 +00004499
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004500typedef struct affentry_S affentry_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004501/* Affix entry from ".aff" file. Used for prefixes and suffixes. */
4502struct affentry_S
4503{
4504 affentry_T *ae_next; /* next affix with same name/number */
4505 char_u *ae_chop; /* text to chop off basic word (can be NULL) */
4506 char_u *ae_add; /* text to add to basic word (can be NULL) */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004507 char_u *ae_cond; /* condition (NULL for ".") */
4508 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */
Bram Moolenaar5195e452005-08-19 20:32:47 +00004509 char_u ae_rare; /* rare affix */
4510 char_u ae_nocomp; /* word with affix not compoundable */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004511};
4512
Bram Moolenaar6de68532005-08-24 22:08:48 +00004513#ifdef FEAT_MBYTE
4514# define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */
4515#else
Bram Moolenaar95529562005-08-25 21:21:38 +00004516# define AH_KEY_LEN 7 /* 6 digits + NUL */
Bram Moolenaar6de68532005-08-24 22:08:48 +00004517#endif
Bram Moolenaar53805d12005-08-01 07:08:33 +00004518
Bram Moolenaar51485f02005-06-04 21:55:20 +00004519/* Affix header from ".aff" file. Used for af_pref and af_suff. */
4520typedef struct affheader_S
4521{
Bram Moolenaar6de68532005-08-24 22:08:48 +00004522 char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */
4523 unsigned ah_flag; /* affix name as number, uses "af_flagtype" */
4524 int ah_newID; /* prefix ID after renumbering; 0 if not used */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004525 int ah_combine; /* suffix may combine with prefix */
Bram Moolenaar95529562005-08-25 21:21:38 +00004526 int ah_follows; /* another affix block should be following */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004527 affentry_T *ah_first; /* first affix entry */
4528} affheader_T;
4529
4530#define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
4531
Bram Moolenaar6de68532005-08-24 22:08:48 +00004532/* Flag used in compound items. */
4533typedef struct compitem_S
4534{
4535 char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */
4536 unsigned ci_flag; /* affix name as number, uses "af_flagtype" */
4537 int ci_newID; /* affix ID after renumbering. */
4538} compitem_T;
4539
4540#define HI2CI(hi) ((compitem_T *)(hi)->hi_key)
4541
Bram Moolenaar51485f02005-06-04 21:55:20 +00004542/*
4543 * Structure that is used to store the items in the word tree. This avoids
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004544 * the need to keep track of each allocated thing, everything is freed all at
4545 * once after ":mkspell" is done.
Bram Moolenaar51485f02005-06-04 21:55:20 +00004546 */
4547#define SBLOCKSIZE 16000 /* size of sb_data */
4548typedef struct sblock_S sblock_T;
4549struct sblock_S
4550{
4551 sblock_T *sb_next; /* next block in list */
4552 int sb_used; /* nr of bytes already in use */
4553 char_u sb_data[1]; /* data, actually longer */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004554};
4555
4556/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00004557 * A node in the tree.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004558 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004559typedef struct wordnode_S wordnode_T;
4560struct wordnode_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004561{
Bram Moolenaar0c405862005-06-22 22:26:26 +00004562 union /* shared to save space */
4563 {
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00004564 char_u hashkey[6]; /* the hash key, only used while compressing */
Bram Moolenaar0c405862005-06-22 22:26:26 +00004565 int index; /* index in written nodes (valid after first
4566 round) */
4567 } wn_u1;
4568 union /* shared to save space */
4569 {
4570 wordnode_T *next; /* next node with same hash key */
4571 wordnode_T *wnode; /* parent node that will write this node */
4572 } wn_u2;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004573 wordnode_T *wn_child; /* child (next byte in word) */
4574 wordnode_T *wn_sibling; /* next sibling (alternate byte in word,
4575 always sorted) */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004576 int wn_refs; /* Nr. of references to this node. Only
4577 relevant for first node in a list of
4578 siblings, in following siblings it is
4579 always one. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004580 char_u wn_byte; /* Byte for this node. NUL for word end */
Bram Moolenaar4770d092006-01-12 23:22:24 +00004581
4582 /* Info for when "wn_byte" is NUL.
4583 * In PREFIXTREE "wn_region" is used for the prefcondnr.
4584 * In the soundfolded word tree "wn_flags" has the MSW of the wordnr and
4585 * "wn_region" the LSW of the wordnr. */
4586 char_u wn_affixID; /* supported/required prefix ID or 0 */
4587 short_u wn_flags; /* WF_ flags */
4588 short wn_region; /* region mask */
4589
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00004590#ifdef SPELL_PRINTTREE
4591 int wn_nr; /* sequence nr for printing */
4592#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004593};
4594
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00004595#define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */
4596
Bram Moolenaar51485f02005-06-04 21:55:20 +00004597#define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004598
Bram Moolenaar51485f02005-06-04 21:55:20 +00004599/*
4600 * Info used while reading the spell files.
4601 */
4602typedef struct spellinfo_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004603{
Bram Moolenaar51485f02005-06-04 21:55:20 +00004604 wordnode_T *si_foldroot; /* tree with case-folded words */
Bram Moolenaar8db73182005-06-17 21:51:16 +00004605 long si_foldwcount; /* nr of words in si_foldroot */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004606
Bram Moolenaar51485f02005-06-04 21:55:20 +00004607 wordnode_T *si_keeproot; /* tree with keep-case words */
Bram Moolenaar8db73182005-06-17 21:51:16 +00004608 long si_keepwcount; /* nr of words in si_keeproot */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004609
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004610 wordnode_T *si_prefroot; /* tree with postponed prefixes */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004611
Bram Moolenaar4770d092006-01-12 23:22:24 +00004612 long si_sugtree; /* creating the soundfolding trie */
4613
Bram Moolenaar51485f02005-06-04 21:55:20 +00004614 sblock_T *si_blocks; /* memory blocks used */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004615 long si_blocks_cnt; /* memory blocks allocated */
4616 long si_compress_cnt; /* words to add before lowering
4617 compression limit */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004618 wordnode_T *si_first_free; /* List of nodes that have been freed during
4619 compression, linked by "wn_child" field. */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004620 long si_free_count; /* number of nodes in si_first_free */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004621#ifdef SPELL_PRINTTREE
4622 int si_wordnode_nr; /* sequence nr for nodes */
4623#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +00004624 buf_T *si_spellbuf; /* buffer used to store soundfold word table */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004625
Bram Moolenaar51485f02005-06-04 21:55:20 +00004626 int si_ascii; /* handling only ASCII words */
Bram Moolenaarb765d632005-06-07 21:00:02 +00004627 int si_add; /* addition file */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004628 int si_clear_chartab; /* when TRUE clear char tables */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004629 int si_region; /* region mask */
4630 vimconv_T si_conv; /* for conversion to 'encoding' */
Bram Moolenaar50cde822005-06-05 21:54:54 +00004631 int si_memtot; /* runtime memory used */
Bram Moolenaarb765d632005-06-07 21:00:02 +00004632 int si_verbose; /* verbose messages */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004633 int si_msg_count; /* number of words added since last message */
Bram Moolenaar3982c542005-06-08 21:56:31 +00004634 int si_region_count; /* number of regions supported (1 when there
4635 are no regions) */
Bram Moolenaar5195e452005-08-19 20:32:47 +00004636 char_u si_region_name[16]; /* region names; used only if
4637 * si_region_count > 1) */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004638
4639 garray_T si_rep; /* list of fromto_T entries from REP lines */
Bram Moolenaar4770d092006-01-12 23:22:24 +00004640 garray_T si_repsal; /* list of fromto_T entries from REPSAL lines */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004641 garray_T si_sal; /* list of fromto_T entries from SAL lines */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004642 char_u *si_sofofr; /* SOFOFROM text */
4643 char_u *si_sofoto; /* SOFOTO text */
Bram Moolenaar4770d092006-01-12 23:22:24 +00004644 int si_nosugfile; /* NOSUGFILE item found */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004645 int si_followup; /* soundsalike: ? */
4646 int si_collapse; /* soundsalike: ? */
Bram Moolenaar4770d092006-01-12 23:22:24 +00004647 hashtab_T si_commonwords; /* hashtable for common words */
4648 time_t si_sugtime; /* timestamp for .sug file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004649 int si_rem_accents; /* soundsalike: remove accents */
4650 garray_T si_map; /* MAP info concatenated */
Bram Moolenaar6de68532005-08-24 22:08:48 +00004651 char_u *si_midword; /* MIDWORD chars or NULL */
Bram Moolenaar5195e452005-08-19 20:32:47 +00004652 int si_compmax; /* max nr of words for compounding */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00004653 int si_compminlen; /* minimal length for compounding */
Bram Moolenaar5195e452005-08-19 20:32:47 +00004654 int si_compsylmax; /* max nr of syllables for compounding */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00004655 char_u *si_compflags; /* flags used for compounding */
Bram Moolenaar78622822005-08-23 21:00:13 +00004656 char_u si_nobreak; /* NOBREAK */
Bram Moolenaar5195e452005-08-19 20:32:47 +00004657 char_u *si_syllable; /* syllable string */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004658 garray_T si_prefcond; /* table with conditions for postponed
4659 * prefixes, each stored as a string */
Bram Moolenaar6de68532005-08-24 22:08:48 +00004660 int si_newprefID; /* current value for ah_newID */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004661 int si_newcompID; /* current value for compound ID */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004662} spellinfo_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004663
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004664static afffile_T *spell_read_aff __ARGS((spellinfo_T *spin, char_u *fname));
Bram Moolenaar6de68532005-08-24 22:08:48 +00004665static unsigned affitem2flag __ARGS((int flagtype, char_u *item, char_u *fname, int lnum));
4666static unsigned get_affitem __ARGS((int flagtype, char_u **pp));
4667static void process_compflags __ARGS((spellinfo_T *spin, afffile_T *aff, char_u *compflags));
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004668static void check_renumber __ARGS((spellinfo_T *spin));
Bram Moolenaar6de68532005-08-24 22:08:48 +00004669static int flag_in_afflist __ARGS((int flagtype, char_u *afflist, unsigned flag));
4670static void aff_check_number __ARGS((int spinval, int affval, char *name));
4671static void aff_check_string __ARGS((char_u *spinval, char_u *affval, char *name));
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004672static int str_equal __ARGS((char_u *s1, char_u *s2));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004673static void add_fromto __ARGS((spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to));
4674static int sal_to_bool __ARGS((char_u *s));
Bram Moolenaar5482f332005-04-17 20:18:43 +00004675static int has_non_ascii __ARGS((char_u *s));
Bram Moolenaar51485f02005-06-04 21:55:20 +00004676static void spell_free_aff __ARGS((afffile_T *aff));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004677static int spell_read_dic __ARGS((spellinfo_T *spin, char_u *fname, afffile_T *affile));
Bram Moolenaar5195e452005-08-19 20:32:47 +00004678static int get_pfxlist __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist));
Bram Moolenaar6de68532005-08-24 22:08:48 +00004679static void get_compflags __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist));
Bram Moolenaar5195e452005-08-19 20:32:47 +00004680static int store_aff_word __ARGS((spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int comb, int flags, char_u *pfxlist, int pfxlen));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004681static int spell_read_wordfile __ARGS((spellinfo_T *spin, char_u *fname));
4682static void *getroom __ARGS((spellinfo_T *spin, size_t len, int align));
4683static char_u *getroom_save __ARGS((spellinfo_T *spin, char_u *s));
Bram Moolenaar51485f02005-06-04 21:55:20 +00004684static void free_blocks __ARGS((sblock_T *bl));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004685static wordnode_T *wordtree_alloc __ARGS((spellinfo_T *spin));
Bram Moolenaar5195e452005-08-19 20:32:47 +00004686static int store_word __ARGS((spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix));
Bram Moolenaarae5bce12005-08-15 21:41:48 +00004687static int tree_add_word __ARGS((spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004688static wordnode_T *get_wordnode __ARGS((spellinfo_T *spin));
Bram Moolenaar4770d092006-01-12 23:22:24 +00004689static int deref_wordnode __ARGS((spellinfo_T *spin, wordnode_T *node));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004690static void free_wordnode __ARGS((spellinfo_T *spin, wordnode_T *n));
4691static void wordtree_compress __ARGS((spellinfo_T *spin, wordnode_T *root));
4692static int node_compress __ARGS((spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot));
Bram Moolenaar51485f02005-06-04 21:55:20 +00004693static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2));
Bram Moolenaar4770d092006-01-12 23:22:24 +00004694static void put_sugtime __ARGS((spellinfo_T *spin, FILE *fd));
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004695static int write_vim_spell __ARGS((spellinfo_T *spin, char_u *fname));
Bram Moolenaar0c405862005-06-22 22:26:26 +00004696static void clear_node __ARGS((wordnode_T *node));
4697static int put_node __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree));
Bram Moolenaar4770d092006-01-12 23:22:24 +00004698static void spell_make_sugfile __ARGS((spellinfo_T *spin, char_u *wfname));
4699static int sug_filltree __ARGS((spellinfo_T *spin, slang_T *slang));
4700static int sug_maketable __ARGS((spellinfo_T *spin));
4701static int sug_filltable __ARGS((spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap));
4702static int offset2bytes __ARGS((int nr, char_u *buf));
4703static int bytes2offset __ARGS((char_u **pp));
4704static void sug_write __ARGS((spellinfo_T *spin, char_u *fname));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004705static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word));
Bram Moolenaar4770d092006-01-12 23:22:24 +00004706static void spell_message __ARGS((spellinfo_T *spin, char_u *str));
Bram Moolenaarb765d632005-06-07 21:00:02 +00004707static void init_spellfile __ARGS((void));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004708
Bram Moolenaar53805d12005-08-01 07:08:33 +00004709/* In the postponed prefixes tree wn_flags is used to store the WFP_ flags,
4710 * but it must be negative to indicate the prefix tree to tree_add_word().
4711 * Use a negative number with the lower 8 bits zero. */
4712#define PFX_FLAGS -256
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00004713
Bram Moolenaar5195e452005-08-19 20:32:47 +00004714/*
4715 * Tunable parameters for when the tree is compressed. See 'mkspellmem'.
4716 */
4717static long compress_start = 30000; /* memory / SBLOCKSIZE */
4718static long compress_inc = 100; /* memory / SBLOCKSIZE */
4719static long compress_added = 500000; /* word count */
4720
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00004721#ifdef SPELL_PRINTTREE
4722/*
4723 * For debugging the tree code: print the current tree in a (more or less)
4724 * readable format, so that we can see what happens when adding a word and/or
4725 * compressing the tree.
4726 * Based on code from Olaf Seibert.
4727 */
4728#define PRINTLINESIZE 1000
4729#define PRINTWIDTH 6
4730
4731#define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
4732 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
4733
4734static char line1[PRINTLINESIZE];
4735static char line2[PRINTLINESIZE];
4736static char line3[PRINTLINESIZE];
4737
4738 static void
4739spell_clear_flags(wordnode_T *node)
4740{
4741 wordnode_T *np;
4742
4743 for (np = node; np != NULL; np = np->wn_sibling)
4744 {
4745 np->wn_u1.index = FALSE;
4746 spell_clear_flags(np->wn_child);
4747 }
4748}
4749
4750 static void
4751spell_print_node(wordnode_T *node, int depth)
4752{
4753 if (node->wn_u1.index)
4754 {
4755 /* Done this node before, print the reference. */
4756 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
4757 PRINTSOME(line2, depth, " ", 0, 0);
4758 PRINTSOME(line3, depth, " ", 0, 0);
4759 msg(line1);
4760 msg(line2);
4761 msg(line3);
4762 }
4763 else
4764 {
4765 node->wn_u1.index = TRUE;
4766
4767 if (node->wn_byte != NUL)
4768 {
4769 if (node->wn_child != NULL)
4770 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0);
4771 else
4772 /* Cannot happen? */
4773 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0);
4774 }
4775 else
4776 PRINTSOME(line1, depth, " $ ", 0, 0);
4777
4778 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs);
4779
4780 if (node->wn_sibling != NULL)
4781 PRINTSOME(line3, depth, " | ", 0, 0);
4782 else
4783 PRINTSOME(line3, depth, " ", 0, 0);
4784
4785 if (node->wn_byte == NUL)
4786 {
4787 msg(line1);
4788 msg(line2);
4789 msg(line3);
4790 }
4791
4792 /* do the children */
4793 if (node->wn_byte != NUL && node->wn_child != NULL)
4794 spell_print_node(node->wn_child, depth + 1);
4795
4796 /* do the siblings */
4797 if (node->wn_sibling != NULL)
4798 {
4799 /* get rid of all parent details except | */
4800 STRCPY(line1, line3);
4801 STRCPY(line2, line3);
4802 spell_print_node(node->wn_sibling, depth);
4803 }
4804 }
4805}
4806
4807 static void
4808spell_print_tree(wordnode_T *root)
4809{
4810 if (root != NULL)
4811 {
4812 /* Clear the "wn_u1.index" fields, used to remember what has been
4813 * done. */
4814 spell_clear_flags(root);
4815
4816 /* Recursively print the tree. */
4817 spell_print_node(root, 0);
4818 }
4819}
4820#endif /* SPELL_PRINTTREE */
4821
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004822/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004823 * Read the affix file "fname".
Bram Moolenaar3982c542005-06-08 21:56:31 +00004824 * Returns an afffile_T, NULL for complete failure.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004825 */
4826 static afffile_T *
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004827spell_read_aff(spin, fname)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004828 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004829 char_u *fname;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004830{
4831 FILE *fd;
4832 afffile_T *aff;
4833 char_u rline[MAXLINELEN];
4834 char_u *line;
4835 char_u *pc = NULL;
Bram Moolenaar4770d092006-01-12 23:22:24 +00004836#define MAXITEMCNT 30
Bram Moolenaar8db73182005-06-17 21:51:16 +00004837 char_u *(items[MAXITEMCNT]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004838 int itemcnt;
4839 char_u *p;
4840 int lnum = 0;
4841 affheader_T *cur_aff = NULL;
Bram Moolenaar6de68532005-08-24 22:08:48 +00004842 int did_postpone_prefix = FALSE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004843 int aff_todo = 0;
4844 hashtab_T *tp;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00004845 char_u *low = NULL;
4846 char_u *fol = NULL;
4847 char_u *upp = NULL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004848 int do_rep;
Bram Moolenaar4770d092006-01-12 23:22:24 +00004849 int do_repsal;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004850 int do_sal;
4851 int do_map;
4852 int found_map = FALSE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004853 hashitem_T *hi;
Bram Moolenaar53805d12005-08-01 07:08:33 +00004854 int l;
Bram Moolenaar6de68532005-08-24 22:08:48 +00004855 int compminlen = 0; /* COMPOUNDMIN value */
4856 int compsylmax = 0; /* COMPOUNDSYLMAX value */
4857 int compmax = 0; /* COMPOUNDMAX value */
4858 char_u *compflags = NULL; /* COMPOUNDFLAG and COMPOUNDFLAGS
4859 concatenated */
4860 char_u *midword = NULL; /* MIDWORD value */
4861 char_u *syllable = NULL; /* SYLLABLE value */
4862 char_u *sofofrom = NULL; /* SOFOFROM value */
4863 char_u *sofoto = NULL; /* SOFOTO value */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004864
Bram Moolenaar51485f02005-06-04 21:55:20 +00004865 /*
4866 * Open the file.
4867 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00004868 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004869 if (fd == NULL)
4870 {
4871 EMSG2(_(e_notopen), fname);
4872 return NULL;
4873 }
4874
Bram Moolenaar4770d092006-01-12 23:22:24 +00004875 vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname);
4876 spell_message(spin, IObuff);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004877
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004878 /* Only do REP lines when not done in another .aff file already. */
4879 do_rep = spin->si_rep.ga_len == 0;
4880
Bram Moolenaar4770d092006-01-12 23:22:24 +00004881 /* Only do REPSAL lines when not done in another .aff file already. */
4882 do_repsal = spin->si_repsal.ga_len == 0;
4883
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004884 /* Only do SAL lines when not done in another .aff file already. */
4885 do_sal = spin->si_sal.ga_len == 0;
4886
4887 /* Only do MAP lines when not done in another .aff file already. */
4888 do_map = spin->si_map.ga_len == 0;
4889
Bram Moolenaar51485f02005-06-04 21:55:20 +00004890 /*
4891 * Allocate and init the afffile_T structure.
4892 */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004893 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004894 if (aff == NULL)
4895 return NULL;
4896 hash_init(&aff->af_pref);
4897 hash_init(&aff->af_suff);
Bram Moolenaar6de68532005-08-24 22:08:48 +00004898 hash_init(&aff->af_comp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004899
4900 /*
4901 * Read all the lines in the file one by one.
4902 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00004903 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004904 {
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00004905 line_breakcheck();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004906 ++lnum;
4907
4908 /* Skip comment lines. */
4909 if (*rline == '#')
4910 continue;
4911
4912 /* Convert from "SET" to 'encoding' when needed. */
4913 vim_free(pc);
Bram Moolenaarb765d632005-06-07 21:00:02 +00004914#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00004915 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004916 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00004917 pc = string_convert(&spin->si_conv, rline, NULL);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00004918 if (pc == NULL)
4919 {
4920 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
4921 fname, lnum, rline);
4922 continue;
4923 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004924 line = pc;
4925 }
4926 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00004927#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004928 {
4929 pc = NULL;
4930 line = rline;
4931 }
4932
4933 /* Split the line up in white separated items. Put a NUL after each
4934 * item. */
4935 itemcnt = 0;
4936 for (p = line; ; )
4937 {
4938 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */
4939 ++p;
4940 if (*p == NUL)
4941 break;
Bram Moolenaar8db73182005-06-17 21:51:16 +00004942 if (itemcnt == MAXITEMCNT) /* too many items */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004943 break;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004944 items[itemcnt++] = p;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004945 while (*p > ' ') /* skip until white space or CR/NL */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004946 ++p;
4947 if (*p == NUL)
4948 break;
4949 *p++ = NUL;
4950 }
4951
4952 /* Handle non-empty lines. */
4953 if (itemcnt > 0)
4954 {
4955 if (STRCMP(items[0], "SET") == 0 && itemcnt == 2
4956 && aff->af_enc == NULL)
4957 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00004958#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00004959 /* Setup for conversion from "ENC" to 'encoding'. */
4960 aff->af_enc = enc_canonize(items[1]);
4961 if (aff->af_enc != NULL && !spin->si_ascii
4962 && convert_setup(&spin->si_conv, aff->af_enc,
4963 p_enc) == FAIL)
4964 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
4965 fname, aff->af_enc, p_enc);
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004966 spin->si_conv.vc_fail = TRUE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00004967#else
4968 smsg((char_u *)_("Conversion in %s not supported"), fname);
4969#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004970 }
Bram Moolenaar6de68532005-08-24 22:08:48 +00004971 else if (STRCMP(items[0], "FLAG") == 0 && itemcnt == 2
4972 && aff->af_flagtype == AFT_CHAR)
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004973 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00004974 if (STRCMP(items[1], "long") == 0)
Bram Moolenaar95529562005-08-25 21:21:38 +00004975 aff->af_flagtype = AFT_LONG;
Bram Moolenaar6de68532005-08-24 22:08:48 +00004976 else if (STRCMP(items[1], "num") == 0)
Bram Moolenaar95529562005-08-25 21:21:38 +00004977 aff->af_flagtype = AFT_NUM;
4978 else if (STRCMP(items[1], "caplong") == 0)
4979 aff->af_flagtype = AFT_CAPLONG;
Bram Moolenaar6de68532005-08-24 22:08:48 +00004980 else
4981 smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"),
4982 fname, lnum, items[1]);
Bram Moolenaar371baa92005-12-29 22:43:53 +00004983 if (aff->af_rare != 0
4984 || aff->af_keepcase != 0
4985 || aff->af_bad != 0
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004986 || aff->af_needaffix != 0
4987 || aff->af_needcomp != 0
4988 || compflags != NULL
Bram Moolenaar6de68532005-08-24 22:08:48 +00004989 || aff->af_suff.ht_used > 0
4990 || aff->af_pref.ht_used > 0)
4991 smsg((char_u *)_("FLAG after using flags in %s line %d: %s"),
4992 fname, lnum, items[1]);
4993 }
4994 else if (STRCMP(items[0], "MIDWORD") == 0 && itemcnt == 2
4995 && midword == NULL)
4996 {
4997 midword = getroom_save(spin, items[1]);
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004998 }
Bram Moolenaar50cde822005-06-05 21:54:54 +00004999 else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1)
5000 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005001 /* ignored, we always split */
Bram Moolenaar50cde822005-06-05 21:54:54 +00005002 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005003 else if (STRCMP(items[0], "TRY") == 0 && itemcnt == 2)
Bram Moolenaar51485f02005-06-04 21:55:20 +00005004 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005005 /* ignored, we look in the tree for what chars may appear */
Bram Moolenaar51485f02005-06-04 21:55:20 +00005006 }
Bram Moolenaar371baa92005-12-29 22:43:53 +00005007 /* TODO: remove "RAR" later */
5008 else if ((STRCMP(items[0], "RAR") == 0
5009 || STRCMP(items[0], "RARE") == 0) && itemcnt == 2
5010 && aff->af_rare == 0)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005011 {
Bram Moolenaar371baa92005-12-29 22:43:53 +00005012 aff->af_rare = affitem2flag(aff->af_flagtype, items[1],
Bram Moolenaar6de68532005-08-24 22:08:48 +00005013 fname, lnum);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005014 }
Bram Moolenaar371baa92005-12-29 22:43:53 +00005015 /* TODO: remove "KEP" later */
5016 else if ((STRCMP(items[0], "KEP") == 0
5017 || STRCMP(items[0], "KEEPCASE") == 0) && itemcnt == 2
5018 && aff->af_keepcase == 0)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005019 {
Bram Moolenaar371baa92005-12-29 22:43:53 +00005020 aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1],
Bram Moolenaar6de68532005-08-24 22:08:48 +00005021 fname, lnum);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005022 }
Bram Moolenaar0c405862005-06-22 22:26:26 +00005023 else if (STRCMP(items[0], "BAD") == 0 && itemcnt == 2
5024 && aff->af_bad == 0)
5025 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005026 aff->af_bad = affitem2flag(aff->af_flagtype, items[1],
5027 fname, lnum);
Bram Moolenaar0c405862005-06-22 22:26:26 +00005028 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00005029 else if (STRCMP(items[0], "NEEDAFFIX") == 0 && itemcnt == 2
5030 && aff->af_needaffix == 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005031 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005032 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1],
5033 fname, lnum);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005034 }
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005035 else if (STRCMP(items[0], "NEEDCOMPOUND") == 0 && itemcnt == 2
5036 && aff->af_needcomp == 0)
5037 {
5038 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1],
5039 fname, lnum);
5040 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00005041 else if (STRCMP(items[0], "COMPOUNDFLAG") == 0 && itemcnt == 2
Bram Moolenaar6de68532005-08-24 22:08:48 +00005042 && compflags == NULL)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005043 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005044 /* Turn flag "c" into COMPOUNDFLAGS compatible string "c+",
5045 * "Na" into "Na+", "1234" into "1234+". */
5046 p = getroom(spin, STRLEN(items[1]) + 2, FALSE);
Bram Moolenaar5195e452005-08-19 20:32:47 +00005047 if (p != NULL)
5048 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005049 STRCPY(p, items[1]);
5050 STRCAT(p, "+");
5051 compflags = p;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005052 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00005053 }
5054 else if (STRCMP(items[0], "COMPOUNDFLAGS") == 0 && itemcnt == 2)
5055 {
5056 /* Concatenate this string to previously defined ones, using a
5057 * slash to separate them. */
5058 l = STRLEN(items[1]) + 1;
Bram Moolenaar6de68532005-08-24 22:08:48 +00005059 if (compflags != NULL)
5060 l += STRLEN(compflags) + 1;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005061 p = getroom(spin, l, FALSE);
5062 if (p != NULL)
5063 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005064 if (compflags != NULL)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005065 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005066 STRCPY(p, compflags);
Bram Moolenaar5195e452005-08-19 20:32:47 +00005067 STRCAT(p, "/");
5068 }
5069 STRCAT(p, items[1]);
Bram Moolenaar6de68532005-08-24 22:08:48 +00005070 compflags = p;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005071 }
5072 }
5073 else if (STRCMP(items[0], "COMPOUNDMAX") == 0 && itemcnt == 2
Bram Moolenaar6de68532005-08-24 22:08:48 +00005074 && compmax == 0)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005075 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005076 compmax = atoi((char *)items[1]);
5077 if (compmax == 0)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005078 smsg((char_u *)_("Wrong COMPOUNDMAX value in %s line %d: %s"),
5079 fname, lnum, items[1]);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005080 }
5081 else if (STRCMP(items[0], "COMPOUNDMIN") == 0 && itemcnt == 2
Bram Moolenaar6de68532005-08-24 22:08:48 +00005082 && compminlen == 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005083 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005084 compminlen = atoi((char *)items[1]);
5085 if (compminlen == 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005086 smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"),
5087 fname, lnum, items[1]);
5088 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00005089 else if (STRCMP(items[0], "COMPOUNDSYLMAX") == 0 && itemcnt == 2
Bram Moolenaar6de68532005-08-24 22:08:48 +00005090 && compsylmax == 0)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005091 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005092 compsylmax = atoi((char *)items[1]);
5093 if (compsylmax == 0)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005094 smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"),
5095 fname, lnum, items[1]);
5096 }
5097 else if (STRCMP(items[0], "SYLLABLE") == 0 && itemcnt == 2
Bram Moolenaar6de68532005-08-24 22:08:48 +00005098 && syllable == NULL)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005099 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005100 syllable = getroom_save(spin, items[1]);
Bram Moolenaar5195e452005-08-19 20:32:47 +00005101 }
Bram Moolenaar78622822005-08-23 21:00:13 +00005102 else if (STRCMP(items[0], "NOBREAK") == 0 && itemcnt == 1)
5103 {
5104 spin->si_nobreak = TRUE;
5105 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005106 else if (STRCMP(items[0], "NOSUGFILE") == 0 && itemcnt == 1)
5107 {
5108 spin->si_nosugfile = TRUE;
5109 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005110 else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1)
5111 {
5112 aff->af_pfxpostpone = TRUE;
5113 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005114 else if ((STRCMP(items[0], "PFX") == 0
5115 || STRCMP(items[0], "SFX") == 0)
5116 && aff_todo == 0
Bram Moolenaar8db73182005-06-17 21:51:16 +00005117 && itemcnt >= 4)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005118 {
Bram Moolenaar95529562005-08-25 21:21:38 +00005119 int lasti = 4;
5120 char_u key[AH_KEY_LEN];
5121
5122 if (*items[0] == 'P')
5123 tp = &aff->af_pref;
5124 else
5125 tp = &aff->af_suff;
5126
5127 /* Myspell allows the same affix name to be used multiple
5128 * times. The affix files that do this have an undocumented
5129 * "S" flag on all but the last block, thus we check for that
5130 * and store it in ah_follows. */
5131 vim_strncpy(key, items[1], AH_KEY_LEN - 1);
5132 hi = hash_find(tp, key);
5133 if (!HASHITEM_EMPTY(hi))
5134 {
5135 cur_aff = HI2AH(hi);
5136 if (cur_aff->ah_combine != (*items[2] == 'Y'))
5137 smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"),
5138 fname, lnum, items[1]);
5139 if (!cur_aff->ah_follows)
5140 smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
5141 fname, lnum, items[1]);
5142 }
5143 else
5144 {
5145 /* New affix letter. */
5146 cur_aff = (affheader_T *)getroom(spin,
5147 sizeof(affheader_T), TRUE);
5148 if (cur_aff == NULL)
5149 break;
5150 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1],
5151 fname, lnum);
5152 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN)
5153 break;
5154 if (cur_aff->ah_flag == aff->af_bad
Bram Moolenaar371baa92005-12-29 22:43:53 +00005155 || cur_aff->ah_flag == aff->af_rare
5156 || cur_aff->ah_flag == aff->af_keepcase
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005157 || cur_aff->ah_flag == aff->af_needaffix
5158 || cur_aff->ah_flag == aff->af_needcomp)
Bram Moolenaar371baa92005-12-29 22:43:53 +00005159 smsg((char_u *)_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND in %s line %d: %s"),
Bram Moolenaar95529562005-08-25 21:21:38 +00005160 fname, lnum, items[1]);
5161 STRCPY(cur_aff->ah_key, items[1]);
5162 hash_add(tp, cur_aff->ah_key);
5163
5164 cur_aff->ah_combine = (*items[2] == 'Y');
5165 }
5166
5167 /* Check for the "S" flag, which apparently means that another
5168 * block with the same affix name is following. */
5169 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0)
5170 {
5171 ++lasti;
5172 cur_aff->ah_follows = TRUE;
5173 }
5174 else
5175 cur_aff->ah_follows = FALSE;
5176
Bram Moolenaar8db73182005-06-17 21:51:16 +00005177 /* Myspell allows extra text after the item, but that might
5178 * mean mistakes go unnoticed. Require a comment-starter. */
Bram Moolenaar95529562005-08-25 21:21:38 +00005179 if (itemcnt > lasti && *items[lasti] != '#')
Bram Moolenaar8db73182005-06-17 21:51:16 +00005180 smsg((char_u *)_("Trailing text in %s line %d: %s"),
5181 fname, lnum, items[4]);
5182
Bram Moolenaar95529562005-08-25 21:21:38 +00005183 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005184 smsg((char_u *)_("Expected Y or N in %s line %d: %s"),
5185 fname, lnum, items[2]);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005186
Bram Moolenaar95529562005-08-25 21:21:38 +00005187 if (*items[0] == 'P' && aff->af_pfxpostpone)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005188 {
Bram Moolenaar95529562005-08-25 21:21:38 +00005189 if (cur_aff->ah_newID == 0)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005190 {
5191 /* Use a new number in the .spl file later, to be able
5192 * to handle multiple .aff files. */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005193 check_renumber(spin);
Bram Moolenaar6de68532005-08-24 22:08:48 +00005194 cur_aff->ah_newID = ++spin->si_newprefID;
5195
5196 /* We only really use ah_newID if the prefix is
5197 * postponed. We know that only after handling all
5198 * the items. */
5199 did_postpone_prefix = FALSE;
5200 }
Bram Moolenaar95529562005-08-25 21:21:38 +00005201 else
5202 /* Did use the ID in a previous block. */
5203 did_postpone_prefix = TRUE;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005204 }
Bram Moolenaar95529562005-08-25 21:21:38 +00005205
Bram Moolenaar51485f02005-06-04 21:55:20 +00005206 aff_todo = atoi((char *)items[3]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005207 }
5208 else if ((STRCMP(items[0], "PFX") == 0
5209 || STRCMP(items[0], "SFX") == 0)
5210 && aff_todo > 0
5211 && STRCMP(cur_aff->ah_key, items[1]) == 0
Bram Moolenaar8db73182005-06-17 21:51:16 +00005212 && itemcnt >= 5)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005213 {
5214 affentry_T *aff_entry;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005215 int rare = FALSE;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005216 int nocomp = FALSE;
Bram Moolenaar53805d12005-08-01 07:08:33 +00005217 int upper = FALSE;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005218 int lasti = 5;
5219
Bram Moolenaar5195e452005-08-19 20:32:47 +00005220 /* Check for "rare" and "nocomp" after the other info. */
5221 while (itemcnt > lasti)
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005222 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00005223 if (!rare && STRICMP(items[lasti], "rare") == 0)
5224 {
5225 rare = TRUE;
5226 ++lasti;
5227 }
5228 else if (!nocomp && STRICMP(items[lasti], "nocomp") == 0)
5229 {
5230 nocomp = TRUE;
5231 ++lasti;
5232 }
5233 else
5234 break;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005235 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005236
Bram Moolenaar8db73182005-06-17 21:51:16 +00005237 /* Myspell allows extra text after the item, but that might
5238 * mean mistakes go unnoticed. Require a comment-starter. */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005239 if (itemcnt > lasti && *items[lasti] != '#')
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00005240 smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]);
Bram Moolenaar8db73182005-06-17 21:51:16 +00005241
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005242 /* New item for an affix letter. */
5243 --aff_todo;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005244 aff_entry = (affentry_T *)getroom(spin,
Bram Moolenaarcfc7d632005-07-28 22:28:16 +00005245 sizeof(affentry_T), TRUE);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005246 if (aff_entry == NULL)
5247 break;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005248 aff_entry->ae_rare = rare;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005249 aff_entry->ae_nocomp = nocomp;
Bram Moolenaar5482f332005-04-17 20:18:43 +00005250
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005251 if (STRCMP(items[2], "0") != 0)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005252 aff_entry->ae_chop = getroom_save(spin, items[2]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005253 if (STRCMP(items[3], "0") != 0)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005254 aff_entry->ae_add = getroom_save(spin, items[3]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005255
Bram Moolenaar51485f02005-06-04 21:55:20 +00005256 /* Don't use an affix entry with non-ASCII characters when
5257 * "spin->si_ascii" is TRUE. */
5258 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop)
Bram Moolenaar5482f332005-04-17 20:18:43 +00005259 || has_non_ascii(aff_entry->ae_add)))
5260 {
Bram Moolenaar5482f332005-04-17 20:18:43 +00005261 aff_entry->ae_next = cur_aff->ah_first;
5262 cur_aff->ah_first = aff_entry;
Bram Moolenaar51485f02005-06-04 21:55:20 +00005263
5264 if (STRCMP(items[4], ".") != 0)
5265 {
5266 char_u buf[MAXLINELEN];
5267
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005268 aff_entry->ae_cond = getroom_save(spin, items[4]);
Bram Moolenaar51485f02005-06-04 21:55:20 +00005269 if (*items[0] == 'P')
5270 sprintf((char *)buf, "^%s", items[4]);
5271 else
5272 sprintf((char *)buf, "%s$", items[4]);
5273 aff_entry->ae_prog = vim_regcomp(buf,
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005274 RE_MAGIC + RE_STRING + RE_STRICT);
5275 if (aff_entry->ae_prog == NULL)
5276 smsg((char_u *)_("Broken condition in %s line %d: %s"),
5277 fname, lnum, items[4]);
Bram Moolenaar51485f02005-06-04 21:55:20 +00005278 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005279
5280 /* For postponed prefixes we need an entry in si_prefcond
5281 * for the condition. Use an existing one if possible. */
Bram Moolenaar53805d12005-08-01 07:08:33 +00005282 if (*items[0] == 'P' && aff->af_pfxpostpone)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005283 {
Bram Moolenaar53805d12005-08-01 07:08:33 +00005284 /* When the chop string is one lower-case letter and
5285 * the add string ends in the upper-case letter we set
5286 * the "upper" flag, clear "ae_chop" and remove the
5287 * letters from "ae_add". The condition must either
5288 * be empty or start with the same letter. */
5289 if (aff_entry->ae_chop != NULL
5290 && aff_entry->ae_add != NULL
5291#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005292 && aff_entry->ae_chop[(*mb_ptr2len)(
Bram Moolenaar53805d12005-08-01 07:08:33 +00005293 aff_entry->ae_chop)] == NUL
5294#else
5295 && aff_entry->ae_chop[1] == NUL
5296#endif
5297 )
5298 {
5299 int c, c_up;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005300
Bram Moolenaar53805d12005-08-01 07:08:33 +00005301 c = PTR2CHAR(aff_entry->ae_chop);
5302 c_up = SPELL_TOUPPER(c);
5303 if (c_up != c
5304 && (aff_entry->ae_cond == NULL
5305 || PTR2CHAR(aff_entry->ae_cond) == c))
5306 {
5307 p = aff_entry->ae_add
5308 + STRLEN(aff_entry->ae_add);
5309 mb_ptr_back(aff_entry->ae_add, p);
5310 if (PTR2CHAR(p) == c_up)
5311 {
5312 upper = TRUE;
5313 aff_entry->ae_chop = NULL;
5314 *p = NUL;
5315
5316 /* The condition is matched with the
5317 * actual word, thus must check for the
5318 * upper-case letter. */
5319 if (aff_entry->ae_cond != NULL)
5320 {
5321 char_u buf[MAXLINELEN];
5322#ifdef FEAT_MBYTE
5323 if (has_mbyte)
5324 {
5325 onecap_copy(items[4], buf, TRUE);
5326 aff_entry->ae_cond = getroom_save(
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005327 spin, buf);
Bram Moolenaar53805d12005-08-01 07:08:33 +00005328 }
5329 else
5330#endif
5331 *aff_entry->ae_cond = c_up;
5332 if (aff_entry->ae_cond != NULL)
5333 {
5334 sprintf((char *)buf, "^%s",
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005335 aff_entry->ae_cond);
Bram Moolenaar53805d12005-08-01 07:08:33 +00005336 vim_free(aff_entry->ae_prog);
5337 aff_entry->ae_prog = vim_regcomp(
5338 buf, RE_MAGIC + RE_STRING);
5339 }
5340 }
5341 }
5342 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005343 }
5344
Bram Moolenaar53805d12005-08-01 07:08:33 +00005345 if (aff_entry->ae_chop == NULL)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00005346 {
Bram Moolenaar53805d12005-08-01 07:08:33 +00005347 int idx;
5348 char_u **pp;
5349 int n;
5350
Bram Moolenaar6de68532005-08-24 22:08:48 +00005351 /* Find a previously used condition. */
Bram Moolenaar53805d12005-08-01 07:08:33 +00005352 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0;
5353 --idx)
5354 {
5355 p = ((char_u **)spin->si_prefcond.ga_data)[idx];
5356 if (str_equal(p, aff_entry->ae_cond))
5357 break;
5358 }
5359 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK)
5360 {
5361 /* Not found, add a new condition. */
5362 idx = spin->si_prefcond.ga_len++;
5363 pp = ((char_u **)spin->si_prefcond.ga_data)
5364 + idx;
5365 if (aff_entry->ae_cond == NULL)
5366 *pp = NULL;
5367 else
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005368 *pp = getroom_save(spin,
Bram Moolenaar53805d12005-08-01 07:08:33 +00005369 aff_entry->ae_cond);
5370 }
5371
5372 /* Add the prefix to the prefix tree. */
5373 if (aff_entry->ae_add == NULL)
5374 p = (char_u *)"";
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00005375 else
Bram Moolenaar53805d12005-08-01 07:08:33 +00005376 p = aff_entry->ae_add;
5377 /* PFX_FLAGS is a negative number, so that
5378 * tree_add_word() knows this is the prefix tree. */
5379 n = PFX_FLAGS;
5380 if (rare)
5381 n |= WFP_RARE;
5382 if (!cur_aff->ah_combine)
5383 n |= WFP_NC;
5384 if (upper)
5385 n |= WFP_UP;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005386 tree_add_word(spin, p, spin->si_prefroot, n,
5387 idx, cur_aff->ah_newID);
Bram Moolenaar6de68532005-08-24 22:08:48 +00005388 did_postpone_prefix = TRUE;
5389 }
5390
5391 /* Didn't actually use ah_newID, backup si_newprefID. */
5392 if (aff_todo == 0 && !did_postpone_prefix)
5393 {
5394 --spin->si_newprefID;
5395 cur_aff->ah_newID = 0;
Bram Moolenaar53805d12005-08-01 07:08:33 +00005396 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005397 }
Bram Moolenaar5482f332005-04-17 20:18:43 +00005398 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005399 }
Bram Moolenaar6de68532005-08-24 22:08:48 +00005400 else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2
5401 && fol == NULL)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005402 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005403 fol = vim_strsave(items[1]);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005404 }
Bram Moolenaar6de68532005-08-24 22:08:48 +00005405 else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2
5406 && low == NULL)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005407 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005408 low = vim_strsave(items[1]);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005409 }
Bram Moolenaar6de68532005-08-24 22:08:48 +00005410 else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2
5411 && upp == NULL)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005412 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005413 upp = vim_strsave(items[1]);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005414 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005415 else if ((STRCMP(items[0], "REP") == 0
5416 || STRCMP(items[0], "REPSAL") == 0)
5417 && itemcnt == 2)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005418 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00005419 /* Ignore REP/REPSAL count */;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005420 if (!isdigit(*items[1]))
Bram Moolenaar4770d092006-01-12 23:22:24 +00005421 smsg((char_u *)_("Expected REP(SAL) count in %s line %d"),
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005422 fname, lnum);
5423 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005424 else if ((STRCMP(items[0], "REP") == 0
5425 || STRCMP(items[0], "REPSAL") == 0)
5426 && itemcnt >= 3)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005427 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00005428 /* REP/REPSAL item */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00005429 /* Myspell ignores extra arguments, we require it starts with
5430 * # to detect mistakes. */
5431 if (itemcnt > 3 && items[3][0] != '#')
5432 smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005433 if (items[0][3] == 'S' ? do_repsal : do_rep)
Bram Moolenaar1e015462005-09-25 22:16:38 +00005434 {
5435 /* Replace underscore with space (can't include a space
5436 * directly). */
5437 for (p = items[1]; *p != NUL; mb_ptr_adv(p))
5438 if (*p == '_')
5439 *p = ' ';
5440 for (p = items[2]; *p != NUL; mb_ptr_adv(p))
5441 if (*p == '_')
5442 *p = ' ';
Bram Moolenaar4770d092006-01-12 23:22:24 +00005443 add_fromto(spin, items[0][3] == 'S'
5444 ? &spin->si_repsal
5445 : &spin->si_rep, items[1], items[2]);
Bram Moolenaar1e015462005-09-25 22:16:38 +00005446 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005447 }
5448 else if (STRCMP(items[0], "MAP") == 0 && itemcnt == 2)
5449 {
5450 /* MAP item or count */
5451 if (!found_map)
5452 {
5453 /* First line contains the count. */
5454 found_map = TRUE;
5455 if (!isdigit(*items[1]))
5456 smsg((char_u *)_("Expected MAP count in %s line %d"),
5457 fname, lnum);
5458 }
5459 else if (do_map)
5460 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00005461 int c;
5462
5463 /* Check that every character appears only once. */
5464 for (p = items[1]; *p != NUL; )
5465 {
5466#ifdef FEAT_MBYTE
5467 c = mb_ptr2char_adv(&p);
5468#else
5469 c = *p++;
5470#endif
5471 if ((spin->si_map.ga_len > 0
5472 && vim_strchr(spin->si_map.ga_data, c)
5473 != NULL)
5474 || vim_strchr(p, c) != NULL)
5475 smsg((char_u *)_("Duplicate character in MAP in %s line %d"),
5476 fname, lnum);
5477 }
5478
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005479 /* We simply concatenate all the MAP strings, separated by
5480 * slashes. */
5481 ga_concat(&spin->si_map, items[1]);
5482 ga_append(&spin->si_map, '/');
5483 }
5484 }
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00005485 /* Accept "SAL from to" and "SAL from to # comment". */
5486 else if (STRCMP(items[0], "SAL") == 0
5487 && (itemcnt == 3 || (itemcnt > 3 && items[3][0] == '#')))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005488 {
5489 if (do_sal)
5490 {
5491 /* SAL item (sounds-a-like)
5492 * Either one of the known keys or a from-to pair. */
5493 if (STRCMP(items[1], "followup") == 0)
5494 spin->si_followup = sal_to_bool(items[2]);
5495 else if (STRCMP(items[1], "collapse_result") == 0)
5496 spin->si_collapse = sal_to_bool(items[2]);
5497 else if (STRCMP(items[1], "remove_accents") == 0)
5498 spin->si_rem_accents = sal_to_bool(items[2]);
5499 else
5500 /* when "to" is "_" it means empty */
5501 add_fromto(spin, &spin->si_sal, items[1],
5502 STRCMP(items[2], "_") == 0 ? (char_u *)""
5503 : items[2]);
5504 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005505 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005506 else if (STRCMP(items[0], "SOFOFROM") == 0 && itemcnt == 2
Bram Moolenaar6de68532005-08-24 22:08:48 +00005507 && sofofrom == NULL)
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005508 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005509 sofofrom = getroom_save(spin, items[1]);
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005510 }
5511 else if (STRCMP(items[0], "SOFOTO") == 0 && itemcnt == 2
Bram Moolenaar6de68532005-08-24 22:08:48 +00005512 && sofoto == NULL)
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005513 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005514 sofoto = getroom_save(spin, items[1]);
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005515 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005516 else if (STRCMP(items[0], "COMMON") == 0)
5517 {
5518 int i;
5519
5520 for (i = 1; i < itemcnt; ++i)
5521 {
5522 if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords,
5523 items[i])))
5524 {
5525 p = vim_strsave(items[i]);
5526 if (p == NULL)
5527 break;
5528 hash_add(&spin->si_commonwords, p);
5529 }
5530 }
5531 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00005532 else
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005533 smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"),
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005534 fname, lnum, items[0]);
5535 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005536 }
5537
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005538 if (fol != NULL || low != NULL || upp != NULL)
5539 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00005540 if (spin->si_clear_chartab)
5541 {
5542 /* Clear the char type tables, don't want to use any of the
5543 * currently used spell properties. */
5544 init_spell_chartab();
5545 spin->si_clear_chartab = FALSE;
5546 }
5547
Bram Moolenaar3982c542005-06-08 21:56:31 +00005548 /*
5549 * Don't write a word table for an ASCII file, so that we don't check
5550 * for conflicts with a word table that matches 'encoding'.
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005551 * Don't write one for utf-8 either, we use utf_*() and
Bram Moolenaar3982c542005-06-08 21:56:31 +00005552 * mb_get_class(), the list of chars in the file will be incomplete.
5553 */
5554 if (!spin->si_ascii
5555#ifdef FEAT_MBYTE
5556 && !enc_utf8
5557#endif
5558 )
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00005559 {
5560 if (fol == NULL || low == NULL || upp == NULL)
5561 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
5562 else
Bram Moolenaar3982c542005-06-08 21:56:31 +00005563 (void)set_spell_chartab(fol, low, upp);
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00005564 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005565
5566 vim_free(fol);
5567 vim_free(low);
5568 vim_free(upp);
5569 }
5570
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005571 /* Use compound specifications of the .aff file for the spell info. */
Bram Moolenaar6de68532005-08-24 22:08:48 +00005572 if (compmax != 0)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005573 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005574 aff_check_number(spin->si_compmax, compmax, "COMPOUNDMAX");
5575 spin->si_compmax = compmax;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005576 }
5577
Bram Moolenaar6de68532005-08-24 22:08:48 +00005578 if (compminlen != 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005579 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005580 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN");
5581 spin->si_compminlen = compminlen;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005582 }
5583
Bram Moolenaar6de68532005-08-24 22:08:48 +00005584 if (compsylmax != 0)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005585 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005586 if (syllable == NULL)
5587 smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE"));
5588 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX");
5589 spin->si_compsylmax = compsylmax;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005590 }
5591
Bram Moolenaar6de68532005-08-24 22:08:48 +00005592 if (compflags != NULL)
5593 process_compflags(spin, aff, compflags);
5594
5595 /* Check that we didn't use too many renumbered flags. */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005596 if (spin->si_newcompID < spin->si_newprefID)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005597 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005598 if (spin->si_newcompID == 127 || spin->si_newcompID == 255)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005599 MSG(_("Too many postponed prefixes"));
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005600 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005601 MSG(_("Too many compound flags"));
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005602 else
Bram Moolenaar6de68532005-08-24 22:08:48 +00005603 MSG(_("Too many posponed prefixes and/or compound flags"));
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005604 }
5605
Bram Moolenaar6de68532005-08-24 22:08:48 +00005606 if (syllable != NULL)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005607 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005608 aff_check_string(spin->si_syllable, syllable, "SYLLABLE");
5609 spin->si_syllable = syllable;
5610 }
5611
5612 if (sofofrom != NULL || sofoto != NULL)
5613 {
5614 if (sofofrom == NULL || sofoto == NULL)
5615 smsg((char_u *)_("Missing SOFO%s line in %s"),
5616 sofofrom == NULL ? "FROM" : "TO", fname);
5617 else if (spin->si_sal.ga_len > 0)
5618 smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname);
Bram Moolenaar5195e452005-08-19 20:32:47 +00005619 else
Bram Moolenaar6de68532005-08-24 22:08:48 +00005620 {
5621 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM");
5622 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO");
5623 spin->si_sofofr = sofofrom;
5624 spin->si_sofoto = sofoto;
5625 }
5626 }
5627
5628 if (midword != NULL)
5629 {
5630 aff_check_string(spin->si_midword, midword, "MIDWORD");
5631 spin->si_midword = midword;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005632 }
5633
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005634 vim_free(pc);
5635 fclose(fd);
5636 return aff;
5637}
5638
5639/*
Bram Moolenaar6de68532005-08-24 22:08:48 +00005640 * Turn an affix flag name into a number, according to the FLAG type.
5641 * returns zero for failure.
5642 */
5643 static unsigned
5644affitem2flag(flagtype, item, fname, lnum)
5645 int flagtype;
5646 char_u *item;
5647 char_u *fname;
5648 int lnum;
5649{
5650 unsigned res;
5651 char_u *p = item;
5652
5653 res = get_affitem(flagtype, &p);
5654 if (res == 0)
5655 {
Bram Moolenaar95529562005-08-25 21:21:38 +00005656 if (flagtype == AFT_NUM)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005657 smsg((char_u *)_("Flag is not a number in %s line %d: %s"),
5658 fname, lnum, item);
5659 else
5660 smsg((char_u *)_("Illegal flag in %s line %d: %s"),
5661 fname, lnum, item);
5662 }
5663 if (*p != NUL)
5664 {
5665 smsg((char_u *)_(e_affname), fname, lnum, item);
5666 return 0;
5667 }
5668
5669 return res;
5670}
5671
5672/*
5673 * Get one affix name from "*pp" and advance the pointer.
5674 * Returns zero for an error, still advances the pointer then.
5675 */
5676 static unsigned
5677get_affitem(flagtype, pp)
5678 int flagtype;
5679 char_u **pp;
5680{
5681 int res;
5682
Bram Moolenaar95529562005-08-25 21:21:38 +00005683 if (flagtype == AFT_NUM)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005684 {
5685 if (!VIM_ISDIGIT(**pp))
5686 {
Bram Moolenaar95529562005-08-25 21:21:38 +00005687 ++*pp; /* always advance, avoid getting stuck */
Bram Moolenaar6de68532005-08-24 22:08:48 +00005688 return 0;
5689 }
5690 res = getdigits(pp);
5691 }
5692 else
5693 {
5694#ifdef FEAT_MBYTE
5695 res = mb_ptr2char_adv(pp);
5696#else
5697 res = *(*pp)++;
5698#endif
Bram Moolenaar95529562005-08-25 21:21:38 +00005699 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG
Bram Moolenaar6de68532005-08-24 22:08:48 +00005700 && res >= 'A' && res <= 'Z'))
5701 {
5702 if (**pp == NUL)
5703 return 0;
5704#ifdef FEAT_MBYTE
5705 res = mb_ptr2char_adv(pp) + (res << 16);
5706#else
5707 res = *(*pp)++ + (res << 16);
5708#endif
5709 }
5710 }
5711 return res;
5712}
5713
5714/*
5715 * Process the "compflags" string used in an affix file and append it to
5716 * spin->si_compflags.
5717 * The processing involves changing the affix names to ID numbers, so that
5718 * they fit in one byte.
5719 */
5720 static void
5721process_compflags(spin, aff, compflags)
5722 spellinfo_T *spin;
5723 afffile_T *aff;
5724 char_u *compflags;
5725{
5726 char_u *p;
5727 char_u *prevp;
5728 unsigned flag;
5729 compitem_T *ci;
5730 int id;
5731 int len;
5732 char_u *tp;
5733 char_u key[AH_KEY_LEN];
5734 hashitem_T *hi;
5735
5736 /* Make room for the old and the new compflags, concatenated with a / in
5737 * between. Processing it makes it shorter, but we don't know by how
5738 * much, thus allocate the maximum. */
5739 len = STRLEN(compflags) + 1;
5740 if (spin->si_compflags != NULL)
5741 len += STRLEN(spin->si_compflags) + 1;
5742 p = getroom(spin, len, FALSE);
5743 if (p == NULL)
5744 return;
5745 if (spin->si_compflags != NULL)
5746 {
5747 STRCPY(p, spin->si_compflags);
5748 STRCAT(p, "/");
5749 }
Bram Moolenaar6de68532005-08-24 22:08:48 +00005750 spin->si_compflags = p;
5751 tp = p + STRLEN(p);
5752
5753 for (p = compflags; *p != NUL; )
5754 {
5755 if (vim_strchr((char_u *)"/*+[]", *p) != NULL)
5756 /* Copy non-flag characters directly. */
5757 *tp++ = *p++;
5758 else
5759 {
5760 /* First get the flag number, also checks validity. */
5761 prevp = p;
5762 flag = get_affitem(aff->af_flagtype, &p);
5763 if (flag != 0)
5764 {
5765 /* Find the flag in the hashtable. If it was used before, use
5766 * the existing ID. Otherwise add a new entry. */
5767 vim_strncpy(key, prevp, p - prevp);
5768 hi = hash_find(&aff->af_comp, key);
5769 if (!HASHITEM_EMPTY(hi))
5770 id = HI2CI(hi)->ci_newID;
5771 else
5772 {
5773 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE);
5774 if (ci == NULL)
5775 break;
5776 STRCPY(ci->ci_key, key);
5777 ci->ci_flag = flag;
5778 /* Avoid using a flag ID that has a special meaning in a
5779 * regexp (also inside []). */
5780 do
5781 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005782 check_renumber(spin);
5783 id = spin->si_newcompID--;
5784 } while (vim_strchr((char_u *)"/+*[]\\-^", id) != NULL);
Bram Moolenaar6de68532005-08-24 22:08:48 +00005785 ci->ci_newID = id;
5786 hash_add(&aff->af_comp, ci->ci_key);
5787 }
5788 *tp++ = id;
5789 }
Bram Moolenaar95529562005-08-25 21:21:38 +00005790 if (aff->af_flagtype == AFT_NUM && *p == ',')
Bram Moolenaar6de68532005-08-24 22:08:48 +00005791 ++p;
5792 }
5793 }
5794
5795 *tp = NUL;
5796}
5797
5798/*
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005799 * Check that the new IDs for postponed affixes and compounding don't overrun
5800 * each other. We have almost 255 available, but start at 0-127 to avoid
5801 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255.
5802 * When that is used up an error message is given.
5803 */
5804 static void
5805check_renumber(spin)
5806 spellinfo_T *spin;
5807{
5808 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128)
5809 {
5810 spin->si_newprefID = 127;
5811 spin->si_newcompID = 255;
5812 }
5813}
5814
5815/*
Bram Moolenaar6de68532005-08-24 22:08:48 +00005816 * Return TRUE if flag "flag" appears in affix list "afflist".
5817 */
5818 static int
5819flag_in_afflist(flagtype, afflist, flag)
5820 int flagtype;
5821 char_u *afflist;
5822 unsigned flag;
5823{
5824 char_u *p;
5825 unsigned n;
5826
5827 switch (flagtype)
5828 {
5829 case AFT_CHAR:
5830 return vim_strchr(afflist, flag) != NULL;
5831
Bram Moolenaar95529562005-08-25 21:21:38 +00005832 case AFT_CAPLONG:
5833 case AFT_LONG:
Bram Moolenaar6de68532005-08-24 22:08:48 +00005834 for (p = afflist; *p != NUL; )
5835 {
5836#ifdef FEAT_MBYTE
5837 n = mb_ptr2char_adv(&p);
5838#else
5839 n = *p++;
5840#endif
Bram Moolenaar95529562005-08-25 21:21:38 +00005841 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z'))
Bram Moolenaar6de68532005-08-24 22:08:48 +00005842 && *p != NUL)
5843#ifdef FEAT_MBYTE
5844 n = mb_ptr2char_adv(&p) + (n << 16);
5845#else
5846 n = *p++ + (n << 16);
5847#endif
5848 if (n == flag)
5849 return TRUE;
5850 }
5851 break;
5852
Bram Moolenaar95529562005-08-25 21:21:38 +00005853 case AFT_NUM:
Bram Moolenaar6de68532005-08-24 22:08:48 +00005854 for (p = afflist; *p != NUL; )
5855 {
5856 n = getdigits(&p);
5857 if (n == flag)
5858 return TRUE;
5859 if (*p != NUL) /* skip over comma */
5860 ++p;
5861 }
5862 break;
5863 }
5864 return FALSE;
5865}
5866
5867/*
5868 * Give a warning when "spinval" and "affval" numbers are set and not the same.
5869 */
5870 static void
5871aff_check_number(spinval, affval, name)
5872 int spinval;
5873 int affval;
5874 char *name;
5875{
5876 if (spinval != 0 && spinval != affval)
5877 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
5878}
5879
5880/*
5881 * Give a warning when "spinval" and "affval" strings are set and not the same.
5882 */
5883 static void
5884aff_check_string(spinval, affval, name)
5885 char_u *spinval;
5886 char_u *affval;
5887 char *name;
5888{
5889 if (spinval != NULL && STRCMP(spinval, affval) != 0)
5890 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
5891}
5892
5893/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005894 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being
5895 * NULL as equal.
5896 */
5897 static int
5898str_equal(s1, s2)
5899 char_u *s1;
5900 char_u *s2;
5901{
5902 if (s1 == NULL || s2 == NULL)
5903 return s1 == s2;
5904 return STRCMP(s1, s2) == 0;
5905}
5906
5907/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005908 * Add a from-to item to "gap". Used for REP and SAL items.
5909 * They are stored case-folded.
5910 */
5911 static void
5912add_fromto(spin, gap, from, to)
5913 spellinfo_T *spin;
5914 garray_T *gap;
5915 char_u *from;
5916 char_u *to;
5917{
5918 fromto_T *ftp;
5919 char_u word[MAXWLEN];
5920
5921 if (ga_grow(gap, 1) == OK)
5922 {
5923 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
5924 (void)spell_casefold(from, STRLEN(from), word, MAXWLEN);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005925 ftp->ft_from = getroom_save(spin, word);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005926 (void)spell_casefold(to, STRLEN(to), word, MAXWLEN);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005927 ftp->ft_to = getroom_save(spin, word);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005928 ++gap->ga_len;
5929 }
5930}
5931
5932/*
5933 * Convert a boolean argument in a SAL line to TRUE or FALSE;
5934 */
5935 static int
5936sal_to_bool(s)
5937 char_u *s;
5938{
5939 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0;
5940}
5941
5942/*
Bram Moolenaar5482f332005-04-17 20:18:43 +00005943 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
5944 * When "s" is NULL FALSE is returned.
5945 */
5946 static int
5947has_non_ascii(s)
5948 char_u *s;
5949{
5950 char_u *p;
5951
5952 if (s != NULL)
5953 for (p = s; *p != NUL; ++p)
5954 if (*p >= 128)
5955 return TRUE;
5956 return FALSE;
5957}
5958
5959/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005960 * Free the structure filled by spell_read_aff().
5961 */
5962 static void
5963spell_free_aff(aff)
5964 afffile_T *aff;
5965{
5966 hashtab_T *ht;
5967 hashitem_T *hi;
5968 int todo;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005969 affheader_T *ah;
Bram Moolenaar51485f02005-06-04 21:55:20 +00005970 affentry_T *ae;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005971
5972 vim_free(aff->af_enc);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005973
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005974 /* All this trouble to free the "ae_prog" items... */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005975 for (ht = &aff->af_pref; ; ht = &aff->af_suff)
5976 {
5977 todo = ht->ht_used;
5978 for (hi = ht->ht_array; todo > 0; ++hi)
5979 {
5980 if (!HASHITEM_EMPTY(hi))
5981 {
5982 --todo;
5983 ah = HI2AH(hi);
Bram Moolenaar51485f02005-06-04 21:55:20 +00005984 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
5985 vim_free(ae->ae_prog);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005986 }
5987 }
5988 if (ht == &aff->af_suff)
5989 break;
5990 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00005991
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005992 hash_clear(&aff->af_pref);
5993 hash_clear(&aff->af_suff);
Bram Moolenaar6de68532005-08-24 22:08:48 +00005994 hash_clear(&aff->af_comp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005995}
5996
5997/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00005998 * Read dictionary file "fname".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005999 * Returns OK or FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006000 */
6001 static int
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006002spell_read_dic(spin, fname, affile)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006003 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006004 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006005 afffile_T *affile;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006006{
Bram Moolenaar51485f02005-06-04 21:55:20 +00006007 hashtab_T ht;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006008 char_u line[MAXLINELEN];
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006009 char_u *p;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006010 char_u *afflist;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006011 char_u store_afflist[MAXWLEN];
6012 int pfxlen;
6013 int need_affix;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006014 char_u *dw;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006015 char_u *pc;
6016 char_u *w;
6017 int l;
6018 hash_T hash;
6019 hashitem_T *hi;
6020 FILE *fd;
6021 int lnum = 1;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006022 int non_ascii = 0;
6023 int retval = OK;
6024 char_u message[MAXLINELEN + MAXWLEN];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006025 int flags;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006026 int duplicate = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006027
Bram Moolenaar51485f02005-06-04 21:55:20 +00006028 /*
6029 * Open the file.
6030 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00006031 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006032 if (fd == NULL)
6033 {
6034 EMSG2(_(e_notopen), fname);
6035 return FAIL;
6036 }
6037
Bram Moolenaar51485f02005-06-04 21:55:20 +00006038 /* The hashtable is only used to detect duplicated words. */
6039 hash_init(&ht);
6040
Bram Moolenaar4770d092006-01-12 23:22:24 +00006041 vim_snprintf((char *)IObuff, IOSIZE,
6042 _("Reading dictionary file %s ..."), fname);
6043 spell_message(spin, IObuff);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006044
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006045 /* start with a message for the first line */
6046 spin->si_msg_count = 999999;
6047
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006048 /* Read and ignore the first line: word count. */
6049 (void)vim_fgets(line, MAXLINELEN, fd);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006050 if (!vim_isdigit(*skipwhite(line)))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006051 EMSG2(_("E760: No word count in %s"), fname);
6052
6053 /*
6054 * Read all the lines in the file one by one.
6055 * The words are converted to 'encoding' here, before being added to
6056 * the hashtable.
6057 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006058 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006059 {
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006060 line_breakcheck();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006061 ++lnum;
Bram Moolenaar53805d12005-08-01 07:08:33 +00006062 if (line[0] == '#' || line[0] == '/')
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006063 continue; /* comment line */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006064
Bram Moolenaar51485f02005-06-04 21:55:20 +00006065 /* Remove CR, LF and white space from the end. White space halfway
6066 * the word is kept to allow e.g., "et al.". */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006067 l = STRLEN(line);
6068 while (l > 0 && line[l - 1] <= ' ')
6069 --l;
6070 if (l == 0)
6071 continue; /* empty line */
6072 line[l] = NUL;
6073
Bram Moolenaar66fa2712006-01-22 23:22:22 +00006074 /* Truncate the word at the "/", set "afflist" to what follows.
6075 * Replace "\/" by "/" and "\\" by "\". */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006076 afflist = NULL;
6077 for (p = line; *p != NUL; mb_ptr_adv(p))
6078 {
Bram Moolenaar66fa2712006-01-22 23:22:22 +00006079 if (*p == '\\' && (p[1] == '\\' || p[1] == '/'))
6080 mch_memmove(p, p + 1, STRLEN(p));
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006081 else if (*p == '/')
6082 {
6083 *p = NUL;
6084 afflist = p + 1;
6085 break;
6086 }
6087 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00006088
6089 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
6090 if (spin->si_ascii && has_non_ascii(line))
6091 {
6092 ++non_ascii;
Bram Moolenaar5482f332005-04-17 20:18:43 +00006093 continue;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006094 }
Bram Moolenaar5482f332005-04-17 20:18:43 +00006095
Bram Moolenaarb765d632005-06-07 21:00:02 +00006096#ifdef FEAT_MBYTE
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006097 /* Convert from "SET" to 'encoding' when needed. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00006098 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006099 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006100 pc = string_convert(&spin->si_conv, line, NULL);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006101 if (pc == NULL)
6102 {
6103 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
6104 fname, lnum, line);
6105 continue;
6106 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006107 w = pc;
6108 }
6109 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00006110#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006111 {
6112 pc = NULL;
6113 w = line;
6114 }
6115
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006116 /* This takes time, print a message every 10000 words. */
6117 if (spin->si_verbose && spin->si_msg_count > 10000)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006118 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006119 spin->si_msg_count = 0;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006120 vim_snprintf((char *)message, sizeof(message),
6121 _("line %6d, word %6d - %s"),
6122 lnum, spin->si_foldwcount + spin->si_keepwcount, w);
6123 msg_start();
6124 msg_puts_long_attr(message, 0);
6125 msg_clr_eos();
6126 msg_didout = FALSE;
6127 msg_col = 0;
6128 out_flush();
6129 }
6130
Bram Moolenaar51485f02005-06-04 21:55:20 +00006131 /* Store the word in the hashtable to be able to find duplicates. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006132 dw = (char_u *)getroom_save(spin, w);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006133 if (dw == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006134 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006135 vim_free(pc);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006136 if (retval == FAIL)
6137 break;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006138
Bram Moolenaar51485f02005-06-04 21:55:20 +00006139 hash = hash_hash(dw);
6140 hi = hash_lookup(&ht, dw, hash);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006141 if (!HASHITEM_EMPTY(hi))
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006142 {
6143 if (p_verbose > 0)
6144 smsg((char_u *)_("Duplicate word in %s line %d: %s"),
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006145 fname, lnum, dw);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006146 else if (duplicate == 0)
6147 smsg((char_u *)_("First duplicate word in %s line %d: %s"),
6148 fname, lnum, dw);
6149 ++duplicate;
6150 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006151 else
Bram Moolenaar51485f02005-06-04 21:55:20 +00006152 hash_add_item(&ht, hi, dw, hash);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006153
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006154 flags = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006155 store_afflist[0] = NUL;
6156 pfxlen = 0;
6157 need_affix = FALSE;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006158 if (afflist != NULL)
6159 {
6160 /* Check for affix name that stands for keep-case word and stands
6161 * for rare word (if defined). */
Bram Moolenaar371baa92005-12-29 22:43:53 +00006162 if (affile->af_keepcase != 0 && flag_in_afflist(
6163 affile->af_flagtype, afflist, affile->af_keepcase))
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00006164 flags |= WF_KEEPCAP | WF_FIXCAP;
Bram Moolenaar371baa92005-12-29 22:43:53 +00006165 if (affile->af_rare != 0 && flag_in_afflist(
6166 affile->af_flagtype, afflist, affile->af_rare))
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006167 flags |= WF_RARE;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006168 if (affile->af_bad != 0 && flag_in_afflist(
6169 affile->af_flagtype, afflist, affile->af_bad))
Bram Moolenaar0c405862005-06-22 22:26:26 +00006170 flags |= WF_BANNED;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006171 if (affile->af_needaffix != 0 && flag_in_afflist(
6172 affile->af_flagtype, afflist, affile->af_needaffix))
Bram Moolenaar5195e452005-08-19 20:32:47 +00006173 need_affix = TRUE;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00006174 if (affile->af_needcomp != 0 && flag_in_afflist(
6175 affile->af_flagtype, afflist, affile->af_needcomp))
6176 flags |= WF_NEEDCOMP;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006177
6178 if (affile->af_pfxpostpone)
6179 /* Need to store the list of prefix IDs with the word. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006180 pfxlen = get_pfxlist(affile, afflist, store_afflist);
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006181
Bram Moolenaar5195e452005-08-19 20:32:47 +00006182 if (spin->si_compflags != NULL)
6183 /* Need to store the list of compound flags with the word.
6184 * Concatenate them to the list of prefix IDs. */
Bram Moolenaar6de68532005-08-24 22:08:48 +00006185 get_compflags(affile, afflist, store_afflist + pfxlen);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006186 }
6187
Bram Moolenaar51485f02005-06-04 21:55:20 +00006188 /* Add the word to the word tree(s). */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006189 if (store_word(spin, dw, flags, spin->si_region,
6190 store_afflist, need_affix) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006191 retval = FAIL;
6192
6193 if (afflist != NULL)
6194 {
6195 /* Find all matching suffixes and add the resulting words.
6196 * Additionally do matching prefixes that combine. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006197 if (store_aff_word(spin, dw, afflist, affile,
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006198 &affile->af_suff, &affile->af_pref,
Bram Moolenaar5195e452005-08-19 20:32:47 +00006199 FALSE, flags, store_afflist, pfxlen) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006200 retval = FAIL;
6201
6202 /* Find all matching prefixes and add the resulting words. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006203 if (store_aff_word(spin, dw, afflist, affile,
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006204 &affile->af_pref, NULL,
Bram Moolenaar5195e452005-08-19 20:32:47 +00006205 FALSE, flags, store_afflist, pfxlen) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006206 retval = FAIL;
6207 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006208 }
6209
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006210 if (duplicate > 0)
6211 smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006212 if (spin->si_ascii && non_ascii > 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006213 smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"),
6214 non_ascii, fname);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006215 hash_clear(&ht);
6216
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006217 fclose(fd);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006218 return retval;
6219}
6220
6221/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006222 * Get the list of prefix IDs from the affix list "afflist".
6223 * Used for PFXPOSTPONE.
Bram Moolenaar5195e452005-08-19 20:32:47 +00006224 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL
6225 * and return the number of affixes.
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006226 */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006227 static int
6228get_pfxlist(affile, afflist, store_afflist)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006229 afffile_T *affile;
6230 char_u *afflist;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006231 char_u *store_afflist;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006232{
6233 char_u *p;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006234 char_u *prevp;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006235 int cnt = 0;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006236 int id;
6237 char_u key[AH_KEY_LEN];
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006238 hashitem_T *hi;
6239
Bram Moolenaar6de68532005-08-24 22:08:48 +00006240 for (p = afflist; *p != NUL; )
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006241 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00006242 prevp = p;
6243 if (get_affitem(affile->af_flagtype, &p) != 0)
6244 {
6245 /* A flag is a postponed prefix flag if it appears in "af_pref"
6246 * and it's ID is not zero. */
6247 vim_strncpy(key, prevp, p - prevp);
6248 hi = hash_find(&affile->af_pref, key);
6249 if (!HASHITEM_EMPTY(hi))
6250 {
6251 id = HI2AH(hi)->ah_newID;
6252 if (id != 0)
6253 store_afflist[cnt++] = id;
6254 }
6255 }
Bram Moolenaar95529562005-08-25 21:21:38 +00006256 if (affile->af_flagtype == AFT_NUM && *p == ',')
Bram Moolenaar6de68532005-08-24 22:08:48 +00006257 ++p;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006258 }
6259
Bram Moolenaar5195e452005-08-19 20:32:47 +00006260 store_afflist[cnt] = NUL;
6261 return cnt;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006262}
6263
6264/*
Bram Moolenaar6de68532005-08-24 22:08:48 +00006265 * Get the list of compound IDs from the affix list "afflist" that are used
6266 * for compound words.
Bram Moolenaar5195e452005-08-19 20:32:47 +00006267 * Puts the flags in "store_afflist[]".
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006268 */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006269 static void
Bram Moolenaar6de68532005-08-24 22:08:48 +00006270get_compflags(affile, afflist, store_afflist)
6271 afffile_T *affile;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006272 char_u *afflist;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006273 char_u *store_afflist;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006274{
6275 char_u *p;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006276 char_u *prevp;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006277 int cnt = 0;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006278 char_u key[AH_KEY_LEN];
6279 hashitem_T *hi;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006280
Bram Moolenaar6de68532005-08-24 22:08:48 +00006281 for (p = afflist; *p != NUL; )
6282 {
6283 prevp = p;
6284 if (get_affitem(affile->af_flagtype, &p) != 0)
6285 {
6286 /* A flag is a compound flag if it appears in "af_comp". */
6287 vim_strncpy(key, prevp, p - prevp);
6288 hi = hash_find(&affile->af_comp, key);
6289 if (!HASHITEM_EMPTY(hi))
6290 store_afflist[cnt++] = HI2CI(hi)->ci_newID;
6291 }
Bram Moolenaar95529562005-08-25 21:21:38 +00006292 if (affile->af_flagtype == AFT_NUM && *p == ',')
Bram Moolenaar6de68532005-08-24 22:08:48 +00006293 ++p;
6294 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006295
Bram Moolenaar5195e452005-08-19 20:32:47 +00006296 store_afflist[cnt] = NUL;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006297}
6298
6299/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00006300 * Apply affixes to a word and store the resulting words.
6301 * "ht" is the hashtable with affentry_T that need to be applied, either
6302 * prefixes or suffixes.
6303 * "xht", when not NULL, is the prefix hashtable, to be used additionally on
6304 * the resulting words for combining affixes.
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006305 *
6306 * Returns FAIL when out of memory.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006307 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006308 static int
Bram Moolenaar5195e452005-08-19 20:32:47 +00006309store_aff_word(spin, word, afflist, affile, ht, xht, comb, flags,
6310 pfxlist, pfxlen)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006311 spellinfo_T *spin; /* spell info */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006312 char_u *word; /* basic word start */
Bram Moolenaar51485f02005-06-04 21:55:20 +00006313 char_u *afflist; /* list of names of supported affixes */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006314 afffile_T *affile;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006315 hashtab_T *ht;
6316 hashtab_T *xht;
6317 int comb; /* only use affixes that combine */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006318 int flags; /* flags for the word */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006319 char_u *pfxlist; /* list of prefix IDs */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006320 int pfxlen; /* nr of flags in "pfxlist" for prefixes, rest
6321 * is compound flags */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006322{
6323 int todo;
6324 hashitem_T *hi;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006325 affheader_T *ah;
6326 affentry_T *ae;
6327 regmatch_T regmatch;
6328 char_u newword[MAXWLEN];
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006329 int retval = OK;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006330 int i;
6331 char_u *p;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00006332 int use_flags;
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00006333 char_u *use_pfxlist;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006334 char_u pfx_pfxlist[MAXWLEN];
Bram Moolenaar5195e452005-08-19 20:32:47 +00006335 size_t wordlen = STRLEN(word);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006336
Bram Moolenaar51485f02005-06-04 21:55:20 +00006337 todo = ht->ht_used;
6338 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006339 {
6340 if (!HASHITEM_EMPTY(hi))
6341 {
6342 --todo;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006343 ah = HI2AH(hi);
Bram Moolenaar5482f332005-04-17 20:18:43 +00006344
Bram Moolenaar51485f02005-06-04 21:55:20 +00006345 /* Check that the affix combines, if required, and that the word
6346 * supports this affix. */
Bram Moolenaar6de68532005-08-24 22:08:48 +00006347 if ((!comb || ah->ah_combine) && flag_in_afflist(
6348 affile->af_flagtype, afflist, ah->ah_flag))
Bram Moolenaar5482f332005-04-17 20:18:43 +00006349 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006350 /* Loop over all affix entries with this name. */
6351 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006352 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006353 /* Check the condition. It's not logical to match case
6354 * here, but it is required for compatibility with
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006355 * Myspell.
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006356 * Another requirement from Myspell is that the chop
6357 * string is shorter than the word itself.
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006358 * For prefixes, when "PFXPOSTPONE" was used, only do
6359 * prefixes with a chop string. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00006360 regmatch.regprog = ae->ae_prog;
6361 regmatch.rm_ic = FALSE;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006362 if ((xht != NULL || !affile->af_pfxpostpone
6363 || ae->ae_chop != NULL)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006364 && (ae->ae_chop == NULL
6365 || STRLEN(ae->ae_chop) < wordlen)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006366 && (ae->ae_prog == NULL
6367 || vim_regexec(&regmatch, word, (colnr_T)0)))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006368 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006369 /* Match. Remove the chop and add the affix. */
6370 if (xht == NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006371 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006372 /* prefix: chop/add at the start of the word */
6373 if (ae->ae_add == NULL)
6374 *newword = NUL;
6375 else
6376 STRCPY(newword, ae->ae_add);
6377 p = word;
6378 if (ae->ae_chop != NULL)
Bram Moolenaarb765d632005-06-07 21:00:02 +00006379 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006380 /* Skip chop string. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00006381#ifdef FEAT_MBYTE
6382 if (has_mbyte)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006383 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00006384 i = mb_charlen(ae->ae_chop);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006385 for ( ; i > 0; --i)
6386 mb_ptr_adv(p);
6387 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00006388 else
6389#endif
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006390 p += STRLEN(ae->ae_chop);
Bram Moolenaarb765d632005-06-07 21:00:02 +00006391 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00006392 STRCAT(newword, p);
6393 }
6394 else
6395 {
6396 /* suffix: chop/add at the end of the word */
6397 STRCPY(newword, word);
6398 if (ae->ae_chop != NULL)
6399 {
6400 /* Remove chop string. */
6401 p = newword + STRLEN(newword);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00006402 i = MB_CHARLEN(ae->ae_chop);
Bram Moolenaarb765d632005-06-07 21:00:02 +00006403 for ( ; i > 0; --i)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006404 mb_ptr_back(newword, p);
6405 *p = NUL;
6406 }
6407 if (ae->ae_add != NULL)
6408 STRCAT(newword, ae->ae_add);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006409 }
6410
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00006411 /* Obey the "rare" flag of the affix. */
6412 if (ae->ae_rare)
6413 use_flags = flags | WF_RARE;
6414 else
6415 use_flags = flags;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006416
6417 /* Obey the "nocomp" flag of the affix: don't use the
6418 * compound flags. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00006419 use_pfxlist = pfxlist;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006420 if (ae->ae_nocomp && pfxlist != NULL)
6421 {
6422 vim_strncpy(pfx_pfxlist, pfxlist, pfxlen);
6423 use_pfxlist = pfx_pfxlist;
6424 }
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00006425
6426 /* When there are postponed prefixes... */
Bram Moolenaar551f84f2005-07-06 22:29:20 +00006427 if (spin->si_prefroot != NULL
6428 && spin->si_prefroot->wn_sibling != NULL)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00006429 {
6430 /* ... add a flag to indicate an affix was used. */
6431 use_flags |= WF_HAS_AFF;
6432
6433 /* ... don't use a prefix list if combining
Bram Moolenaar5195e452005-08-19 20:32:47 +00006434 * affixes is not allowed. But do use the
6435 * compound flags after them. */
6436 if ((!ah->ah_combine || comb) && pfxlist != NULL)
6437 use_pfxlist += pfxlen;
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00006438 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00006439
Bram Moolenaar51485f02005-06-04 21:55:20 +00006440 /* Store the modified word. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006441 if (store_word(spin, newword, use_flags,
Bram Moolenaar5195e452005-08-19 20:32:47 +00006442 spin->si_region, use_pfxlist, FALSE) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006443 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006444
Bram Moolenaar51485f02005-06-04 21:55:20 +00006445 /* When added a suffix and combining is allowed also
6446 * try adding prefixes additionally. */
6447 if (xht != NULL && ah->ah_combine)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006448 if (store_aff_word(spin, newword, afflist, affile,
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00006449 xht, NULL, TRUE,
Bram Moolenaar5195e452005-08-19 20:32:47 +00006450 use_flags, use_pfxlist, pfxlen) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006451 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006452 }
6453 }
6454 }
6455 }
6456 }
6457
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006458 return retval;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006459}
6460
6461/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00006462 * Read a file with a list of words.
6463 */
6464 static int
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006465spell_read_wordfile(spin, fname)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006466 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006467 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006468{
6469 FILE *fd;
6470 long lnum = 0;
6471 char_u rline[MAXLINELEN];
6472 char_u *line;
6473 char_u *pc = NULL;
Bram Moolenaar7887d882005-07-01 22:33:52 +00006474 char_u *p;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006475 int l;
6476 int retval = OK;
6477 int did_word = FALSE;
6478 int non_ascii = 0;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006479 int flags;
Bram Moolenaar3982c542005-06-08 21:56:31 +00006480 int regionmask;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006481
6482 /*
6483 * Open the file.
6484 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00006485 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar51485f02005-06-04 21:55:20 +00006486 if (fd == NULL)
6487 {
6488 EMSG2(_(e_notopen), fname);
6489 return FAIL;
6490 }
6491
Bram Moolenaar4770d092006-01-12 23:22:24 +00006492 vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname);
6493 spell_message(spin, IObuff);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006494
6495 /*
6496 * Read all the lines in the file one by one.
6497 */
6498 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
6499 {
6500 line_breakcheck();
6501 ++lnum;
6502
6503 /* Skip comment lines. */
6504 if (*rline == '#')
6505 continue;
6506
6507 /* Remove CR, LF and white space from the end. */
6508 l = STRLEN(rline);
6509 while (l > 0 && rline[l - 1] <= ' ')
6510 --l;
6511 if (l == 0)
6512 continue; /* empty or blank line */
6513 rline[l] = NUL;
6514
6515 /* Convert from "=encoding={encoding}" to 'encoding' when needed. */
6516 vim_free(pc);
Bram Moolenaarb765d632005-06-07 21:00:02 +00006517#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00006518 if (spin->si_conv.vc_type != CONV_NONE)
6519 {
6520 pc = string_convert(&spin->si_conv, rline, NULL);
6521 if (pc == NULL)
6522 {
6523 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
6524 fname, lnum, rline);
6525 continue;
6526 }
6527 line = pc;
6528 }
6529 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00006530#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00006531 {
6532 pc = NULL;
6533 line = rline;
6534 }
6535
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006536 if (*line == '/')
Bram Moolenaar51485f02005-06-04 21:55:20 +00006537 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006538 ++line;
6539 if (STRNCMP(line, "encoding=", 9) == 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006540 {
6541 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar3982c542005-06-08 21:56:31 +00006542 smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"),
6543 fname, lnum, line - 1);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006544 else if (did_word)
Bram Moolenaar3982c542005-06-08 21:56:31 +00006545 smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"),
6546 fname, lnum, line - 1);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006547 else
6548 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00006549#ifdef FEAT_MBYTE
6550 char_u *enc;
6551
Bram Moolenaar51485f02005-06-04 21:55:20 +00006552 /* Setup for conversion to 'encoding'. */
Bram Moolenaar3982c542005-06-08 21:56:31 +00006553 line += 10;
6554 enc = enc_canonize(line);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006555 if (enc != NULL && !spin->si_ascii
6556 && convert_setup(&spin->si_conv, enc,
6557 p_enc) == FAIL)
6558 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
Bram Moolenaar3982c542005-06-08 21:56:31 +00006559 fname, line, p_enc);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006560 vim_free(enc);
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006561 spin->si_conv.vc_fail = TRUE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00006562#else
6563 smsg((char_u *)_("Conversion in %s not supported"), fname);
6564#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00006565 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006566 continue;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006567 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006568
Bram Moolenaar3982c542005-06-08 21:56:31 +00006569 if (STRNCMP(line, "regions=", 8) == 0)
6570 {
6571 if (spin->si_region_count > 1)
6572 smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"),
6573 fname, lnum, line);
6574 else
6575 {
6576 line += 8;
6577 if (STRLEN(line) > 16)
6578 smsg((char_u *)_("Too many regions in %s line %d: %s"),
6579 fname, lnum, line);
6580 else
6581 {
6582 spin->si_region_count = STRLEN(line) / 2;
6583 STRCPY(spin->si_region_name, line);
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00006584
6585 /* Adjust the mask for a word valid in all regions. */
6586 spin->si_region = (1 << spin->si_region_count) - 1;
Bram Moolenaar3982c542005-06-08 21:56:31 +00006587 }
6588 }
6589 continue;
6590 }
6591
Bram Moolenaar7887d882005-07-01 22:33:52 +00006592 smsg((char_u *)_("/ line ignored in %s line %d: %s"),
6593 fname, lnum, line - 1);
6594 continue;
6595 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006596
Bram Moolenaar7887d882005-07-01 22:33:52 +00006597 flags = 0;
6598 regionmask = spin->si_region;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006599
Bram Moolenaar7887d882005-07-01 22:33:52 +00006600 /* Check for flags and region after a slash. */
6601 p = vim_strchr(line, '/');
6602 if (p != NULL)
6603 {
6604 *p++ = NUL;
6605 while (*p != NUL)
Bram Moolenaar3982c542005-06-08 21:56:31 +00006606 {
Bram Moolenaar7887d882005-07-01 22:33:52 +00006607 if (*p == '=') /* keep-case word */
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00006608 flags |= WF_KEEPCAP | WF_FIXCAP;
Bram Moolenaar7887d882005-07-01 22:33:52 +00006609 else if (*p == '!') /* Bad, bad, wicked word. */
6610 flags |= WF_BANNED;
6611 else if (*p == '?') /* Rare word. */
6612 flags |= WF_RARE;
6613 else if (VIM_ISDIGIT(*p)) /* region number(s) */
Bram Moolenaar3982c542005-06-08 21:56:31 +00006614 {
Bram Moolenaar7887d882005-07-01 22:33:52 +00006615 if ((flags & WF_REGION) == 0) /* first one */
6616 regionmask = 0;
6617 flags |= WF_REGION;
6618
6619 l = *p - '0';
Bram Moolenaar3982c542005-06-08 21:56:31 +00006620 if (l > spin->si_region_count)
6621 {
6622 smsg((char_u *)_("Invalid region nr in %s line %d: %s"),
Bram Moolenaar7887d882005-07-01 22:33:52 +00006623 fname, lnum, p);
Bram Moolenaar3982c542005-06-08 21:56:31 +00006624 break;
6625 }
6626 regionmask |= 1 << (l - 1);
Bram Moolenaar3982c542005-06-08 21:56:31 +00006627 }
Bram Moolenaar7887d882005-07-01 22:33:52 +00006628 else
6629 {
6630 smsg((char_u *)_("Unrecognized flags in %s line %d: %s"),
6631 fname, lnum, p);
6632 break;
6633 }
6634 ++p;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006635 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00006636 }
6637
6638 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
6639 if (spin->si_ascii && has_non_ascii(line))
6640 {
6641 ++non_ascii;
6642 continue;
6643 }
6644
6645 /* Normal word: store it. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006646 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006647 {
6648 retval = FAIL;
6649 break;
6650 }
6651 did_word = TRUE;
6652 }
6653
6654 vim_free(pc);
6655 fclose(fd);
6656
Bram Moolenaar4770d092006-01-12 23:22:24 +00006657 if (spin->si_ascii && non_ascii > 0)
Bram Moolenaarb765d632005-06-07 21:00:02 +00006658 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00006659 vim_snprintf((char *)IObuff, IOSIZE,
6660 _("Ignored %d words with non-ASCII characters"), non_ascii);
6661 spell_message(spin, IObuff);
Bram Moolenaarb765d632005-06-07 21:00:02 +00006662 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00006663
Bram Moolenaar51485f02005-06-04 21:55:20 +00006664 return retval;
6665}
6666
6667/*
6668 * Get part of an sblock_T, "len" bytes long.
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006669 * This avoids calling free() for every little struct we use (and keeping
6670 * track of them).
Bram Moolenaar51485f02005-06-04 21:55:20 +00006671 * The memory is cleared to all zeros.
6672 * Returns NULL when out of memory.
6673 */
6674 static void *
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006675getroom(spin, len, align)
6676 spellinfo_T *spin;
Bram Moolenaarcfc7d632005-07-28 22:28:16 +00006677 size_t len; /* length needed */
6678 int align; /* align for pointer */
Bram Moolenaar51485f02005-06-04 21:55:20 +00006679{
6680 char_u *p;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006681 sblock_T *bl = spin->si_blocks;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006682
Bram Moolenaarcfc7d632005-07-28 22:28:16 +00006683 if (align && bl != NULL)
6684 /* Round size up for alignment. On some systems structures need to be
6685 * aligned to the size of a pointer (e.g., SPARC). */
6686 bl->sb_used = (bl->sb_used + sizeof(char *) - 1)
6687 & ~(sizeof(char *) - 1);
6688
Bram Moolenaar51485f02005-06-04 21:55:20 +00006689 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE)
6690 {
6691 /* Allocate a block of memory. This is not freed until much later. */
6692 bl = (sblock_T *)alloc_clear((unsigned)(sizeof(sblock_T) + SBLOCKSIZE));
6693 if (bl == NULL)
6694 return NULL;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006695 bl->sb_next = spin->si_blocks;
6696 spin->si_blocks = bl;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006697 bl->sb_used = 0;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006698 ++spin->si_blocks_cnt;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006699 }
6700
6701 p = bl->sb_data + bl->sb_used;
6702 bl->sb_used += len;
6703
6704 return p;
6705}
6706
6707/*
6708 * Make a copy of a string into memory allocated with getroom().
6709 */
6710 static char_u *
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006711getroom_save(spin, s)
6712 spellinfo_T *spin;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006713 char_u *s;
6714{
6715 char_u *sc;
6716
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006717 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006718 if (sc != NULL)
6719 STRCPY(sc, s);
6720 return sc;
6721}
6722
6723
6724/*
6725 * Free the list of allocated sblock_T.
6726 */
6727 static void
6728free_blocks(bl)
6729 sblock_T *bl;
6730{
6731 sblock_T *next;
6732
6733 while (bl != NULL)
6734 {
6735 next = bl->sb_next;
6736 vim_free(bl);
6737 bl = next;
6738 }
6739}
6740
6741/*
6742 * Allocate the root of a word tree.
6743 */
6744 static wordnode_T *
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006745wordtree_alloc(spin)
6746 spellinfo_T *spin;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006747{
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006748 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006749}
6750
6751/*
6752 * Store a word in the tree(s).
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00006753 * Always store it in the case-folded tree. For a keep-case word this is
6754 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and
6755 * used to find suggestions.
Bram Moolenaar51485f02005-06-04 21:55:20 +00006756 * For a keep-case word also store it in the keep-case tree.
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006757 * When "pfxlist" is not NULL store the word for each postponed prefix ID and
6758 * compound flag.
Bram Moolenaar51485f02005-06-04 21:55:20 +00006759 */
6760 static int
Bram Moolenaar5195e452005-08-19 20:32:47 +00006761store_word(spin, word, flags, region, pfxlist, need_affix)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006762 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006763 char_u *word;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006764 int flags; /* extra flags, WF_BANNED */
Bram Moolenaar3982c542005-06-08 21:56:31 +00006765 int region; /* supported region(s) */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006766 char_u *pfxlist; /* list of prefix IDs or NULL */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006767 int need_affix; /* only store word with affix ID */
Bram Moolenaar51485f02005-06-04 21:55:20 +00006768{
6769 int len = STRLEN(word);
6770 int ct = captype(word, word + len);
6771 char_u foldword[MAXWLEN];
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006772 int res = OK;
6773 char_u *p;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006774
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006775 (void)spell_casefold(word, len, foldword, MAXWLEN);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006776 for (p = pfxlist; res == OK; ++p)
6777 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00006778 if (!need_affix || (p != NULL && *p != NUL))
6779 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags,
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006780 region, p == NULL ? 0 : *p);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006781 if (p == NULL || *p == NUL)
6782 break;
6783 }
Bram Moolenaar8db73182005-06-17 21:51:16 +00006784 ++spin->si_foldwcount;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006785
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006786 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP)))
Bram Moolenaar8db73182005-06-17 21:51:16 +00006787 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006788 for (p = pfxlist; res == OK; ++p)
6789 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00006790 if (!need_affix || (p != NULL && *p != NUL))
6791 res = tree_add_word(spin, word, spin->si_keeproot, flags,
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006792 region, p == NULL ? 0 : *p);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006793 if (p == NULL || *p == NUL)
6794 break;
6795 }
Bram Moolenaar8db73182005-06-17 21:51:16 +00006796 ++spin->si_keepwcount;
6797 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00006798 return res;
6799}
6800
6801/*
6802 * Add word "word" to a word tree at "root".
Bram Moolenaar4770d092006-01-12 23:22:24 +00006803 * When "flags" < 0 we are adding to the prefix tree where "flags" is used for
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00006804 * "rare" and "region" is the condition nr.
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006805 * Returns FAIL when out of memory.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006806 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006807 static int
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006808tree_add_word(spin, word, root, flags, region, affixID)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006809 spellinfo_T *spin;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006810 char_u *word;
6811 wordnode_T *root;
6812 int flags;
6813 int region;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006814 int affixID;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006815{
Bram Moolenaar51485f02005-06-04 21:55:20 +00006816 wordnode_T *node = root;
6817 wordnode_T *np;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00006818 wordnode_T *copyp, **copyprev;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006819 wordnode_T **prev = NULL;
6820 int i;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006821
Bram Moolenaar51485f02005-06-04 21:55:20 +00006822 /* Add each byte of the word to the tree, including the NUL at the end. */
6823 for (i = 0; ; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006824 {
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00006825 /* When there is more than one reference to this node we need to make
6826 * a copy, so that we can modify it. Copy the whole list of siblings
6827 * (we don't optimize for a partly shared list of siblings). */
6828 if (node != NULL && node->wn_refs > 1)
6829 {
6830 --node->wn_refs;
6831 copyprev = prev;
6832 for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling)
6833 {
6834 /* Allocate a new node and copy the info. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006835 np = get_wordnode(spin);
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00006836 if (np == NULL)
6837 return FAIL;
6838 np->wn_child = copyp->wn_child;
6839 if (np->wn_child != NULL)
6840 ++np->wn_child->wn_refs; /* child gets extra ref */
6841 np->wn_byte = copyp->wn_byte;
6842 if (np->wn_byte == NUL)
6843 {
6844 np->wn_flags = copyp->wn_flags;
6845 np->wn_region = copyp->wn_region;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006846 np->wn_affixID = copyp->wn_affixID;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00006847 }
6848
6849 /* Link the new node in the list, there will be one ref. */
6850 np->wn_refs = 1;
6851 *copyprev = np;
6852 copyprev = &np->wn_sibling;
6853
6854 /* Let "node" point to the head of the copied list. */
6855 if (copyp == node)
6856 node = np;
6857 }
6858 }
6859
Bram Moolenaar51485f02005-06-04 21:55:20 +00006860 /* Look for the sibling that has the same character. They are sorted
6861 * on byte value, thus stop searching when a sibling is found with a
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006862 * higher byte value. For zero bytes (end of word) the sorting is
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006863 * done on flags and then on affixID. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006864 while (node != NULL
6865 && (node->wn_byte < word[i]
6866 || (node->wn_byte == NUL
6867 && (flags < 0
Bram Moolenaar4770d092006-01-12 23:22:24 +00006868 ? node->wn_affixID < (unsigned)affixID
6869 : (node->wn_flags < (unsigned)(flags & WN_MASK)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00006870 || (node->wn_flags == (flags & WN_MASK)
Bram Moolenaar4770d092006-01-12 23:22:24 +00006871 && (spin->si_sugtree
6872 ? (node->wn_region & 0xffff) < region
6873 : node->wn_affixID
6874 < (unsigned)affixID)))))))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006875 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006876 prev = &node->wn_sibling;
6877 node = *prev;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006878 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006879 if (node == NULL
6880 || node->wn_byte != word[i]
6881 || (word[i] == NUL
6882 && (flags < 0
Bram Moolenaar4770d092006-01-12 23:22:24 +00006883 || spin->si_sugtree
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00006884 || node->wn_flags != (flags & WN_MASK)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006885 || node->wn_affixID != affixID)))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006886 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006887 /* Allocate a new node. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006888 np = get_wordnode(spin);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006889 if (np == NULL)
6890 return FAIL;
6891 np->wn_byte = word[i];
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00006892
6893 /* If "node" is NULL this is a new child or the end of the sibling
6894 * list: ref count is one. Otherwise use ref count of sibling and
6895 * make ref count of sibling one (matters when inserting in front
6896 * of the list of siblings). */
6897 if (node == NULL)
6898 np->wn_refs = 1;
6899 else
6900 {
6901 np->wn_refs = node->wn_refs;
6902 node->wn_refs = 1;
6903 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00006904 *prev = np;
6905 np->wn_sibling = node;
6906 node = np;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006907 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006908
Bram Moolenaar51485f02005-06-04 21:55:20 +00006909 if (word[i] == NUL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006910 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006911 node->wn_flags = flags;
6912 node->wn_region |= region;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006913 node->wn_affixID = affixID;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006914 break;
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +00006915 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00006916 prev = &node->wn_child;
6917 node = *prev;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006918 }
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00006919#ifdef SPELL_PRINTTREE
6920 smsg("Added \"%s\"", word);
6921 spell_print_tree(root->wn_sibling);
6922#endif
6923
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006924 /* count nr of words added since last message */
6925 ++spin->si_msg_count;
6926
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006927 if (spin->si_compress_cnt > 1)
6928 {
6929 if (--spin->si_compress_cnt == 1)
6930 /* Did enough words to lower the block count limit. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006931 spin->si_blocks_cnt += compress_inc;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006932 }
6933
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00006934 /*
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006935 * When we have allocated lots of memory we need to compress the word tree
6936 * to free up some room. But compression is slow, and we might actually
6937 * need that room, thus only compress in the following situations:
6938 * 1. When not compressed before (si_compress_cnt == 0): when using
Bram Moolenaar5195e452005-08-19 20:32:47 +00006939 * "compress_start" blocks.
6940 * 2. When compressed before and used "compress_inc" blocks before
6941 * adding "compress_added" words (si_compress_cnt > 1).
6942 * 3. When compressed before, added "compress_added" words
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006943 * (si_compress_cnt == 1) and the number of free nodes drops below the
6944 * maximum word length.
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00006945 */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006946#ifndef SPELL_PRINTTREE
6947 if (spin->si_compress_cnt == 1
6948 ? spin->si_free_count < MAXWLEN
Bram Moolenaar5195e452005-08-19 20:32:47 +00006949 : spin->si_blocks_cnt >= compress_start)
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006950#endif
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00006951 {
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006952 /* Decrement the block counter. The effect is that we compress again
Bram Moolenaar5195e452005-08-19 20:32:47 +00006953 * when the freed up room has been used and another "compress_inc"
6954 * blocks have been allocated. Unless "compress_added" words have
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006955 * been added, then the limit is put back again. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006956 spin->si_blocks_cnt -= compress_inc;
6957 spin->si_compress_cnt = compress_added;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006958
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006959 if (spin->si_verbose)
6960 {
6961 msg_start();
6962 msg_puts((char_u *)_(msg_compressing));
6963 msg_clr_eos();
6964 msg_didout = FALSE;
6965 msg_col = 0;
6966 out_flush();
6967 }
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006968
6969 /* Compress both trees. Either they both have many nodes, which makes
6970 * compression useful, or one of them is small, which means
Bram Moolenaar4770d092006-01-12 23:22:24 +00006971 * compression goes fast. But when filling the souldfold word tree
6972 * there is no keep-case tree. */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006973 wordtree_compress(spin, spin->si_foldroot);
Bram Moolenaar4770d092006-01-12 23:22:24 +00006974 if (affixID >= 0)
6975 wordtree_compress(spin, spin->si_keeproot);
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00006976 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006977
6978 return OK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006979}
6980
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00006981/*
Bram Moolenaar5195e452005-08-19 20:32:47 +00006982 * Check the 'mkspellmem' option. Return FAIL if it's wrong.
6983 * Sets "sps_flags".
6984 */
6985 int
6986spell_check_msm()
6987{
6988 char_u *p = p_msm;
6989 long start = 0;
6990 long inc = 0;
6991 long added = 0;
6992
6993 if (!VIM_ISDIGIT(*p))
6994 return FAIL;
6995 /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/
6996 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102);
6997 if (*p != ',')
6998 return FAIL;
6999 ++p;
7000 if (!VIM_ISDIGIT(*p))
7001 return FAIL;
7002 inc = (getdigits(&p) * 102) / (SBLOCKSIZE / 10);
7003 if (*p != ',')
7004 return FAIL;
7005 ++p;
7006 if (!VIM_ISDIGIT(*p))
7007 return FAIL;
7008 added = getdigits(&p) * 1024;
7009 if (*p != NUL)
7010 return FAIL;
7011
7012 if (start == 0 || inc == 0 || added == 0 || inc > start)
7013 return FAIL;
7014
7015 compress_start = start;
7016 compress_inc = inc;
7017 compress_added = added;
7018 return OK;
7019}
7020
7021
7022/*
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007023 * Get a wordnode_T, either from the list of previously freed nodes or
7024 * allocate a new one.
7025 */
7026 static wordnode_T *
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007027get_wordnode(spin)
7028 spellinfo_T *spin;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007029{
7030 wordnode_T *n;
7031
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007032 if (spin->si_first_free == NULL)
7033 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007034 else
7035 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007036 n = spin->si_first_free;
7037 spin->si_first_free = n->wn_child;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007038 vim_memset(n, 0, sizeof(wordnode_T));
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007039 --spin->si_free_count;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007040 }
7041#ifdef SPELL_PRINTTREE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007042 n->wn_nr = ++spin->si_wordnode_nr;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007043#endif
7044 return n;
7045}
7046
7047/*
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007048 * Decrement the reference count on a node (which is the head of a list of
7049 * siblings). If the reference count becomes zero free the node and its
7050 * siblings.
Bram Moolenaar4770d092006-01-12 23:22:24 +00007051 * Returns the number of nodes actually freed.
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007052 */
Bram Moolenaar4770d092006-01-12 23:22:24 +00007053 static int
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007054deref_wordnode(spin, node)
7055 spellinfo_T *spin;
7056 wordnode_T *node;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007057{
Bram Moolenaar4770d092006-01-12 23:22:24 +00007058 wordnode_T *np;
7059 int cnt = 0;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007060
7061 if (--node->wn_refs == 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +00007062 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007063 for (np = node; np != NULL; np = np->wn_sibling)
7064 {
7065 if (np->wn_child != NULL)
Bram Moolenaar4770d092006-01-12 23:22:24 +00007066 cnt += deref_wordnode(spin, np->wn_child);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007067 free_wordnode(spin, np);
Bram Moolenaar4770d092006-01-12 23:22:24 +00007068 ++cnt;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007069 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00007070 ++cnt; /* length field */
7071 }
7072 return cnt;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007073}
7074
7075/*
7076 * Free a wordnode_T for re-use later.
7077 * Only the "wn_child" field becomes invalid.
7078 */
7079 static void
7080free_wordnode(spin, n)
7081 spellinfo_T *spin;
7082 wordnode_T *n;
7083{
7084 n->wn_child = spin->si_first_free;
7085 spin->si_first_free = n;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007086 ++spin->si_free_count;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007087}
7088
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007089/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00007090 * Compress a tree: find tails that are identical and can be shared.
7091 */
7092 static void
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007093wordtree_compress(spin, root)
Bram Moolenaarb765d632005-06-07 21:00:02 +00007094 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007095 wordnode_T *root;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007096{
7097 hashtab_T ht;
7098 int n;
7099 int tot = 0;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007100 int perc;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007101
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007102 /* Skip the root itself, it's not actually used. The first sibling is the
7103 * start of the tree. */
7104 if (root->wn_sibling != NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007105 {
7106 hash_init(&ht);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007107 n = node_compress(spin, root->wn_sibling, &ht, &tot);
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007108
7109#ifndef SPELL_PRINTTREE
Bram Moolenaarb765d632005-06-07 21:00:02 +00007110 if (spin->si_verbose || p_verbose > 2)
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007111#endif
Bram Moolenaarb765d632005-06-07 21:00:02 +00007112 {
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007113 if (tot > 1000000)
7114 perc = (tot - n) / (tot / 100);
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007115 else if (tot == 0)
7116 perc = 0;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007117 else
7118 perc = (tot - n) * 100 / tot;
Bram Moolenaar4770d092006-01-12 23:22:24 +00007119 vim_snprintf((char *)IObuff, IOSIZE,
7120 _("Compressed %d of %d nodes; %d (%d%%) remaining"),
7121 n, tot, tot - n, perc);
7122 spell_message(spin, IObuff);
Bram Moolenaarb765d632005-06-07 21:00:02 +00007123 }
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007124#ifdef SPELL_PRINTTREE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007125 spell_print_tree(root->wn_sibling);
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007126#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00007127 hash_clear(&ht);
7128 }
7129}
7130
7131/*
7132 * Compress a node, its siblings and its children, depth first.
7133 * Returns the number of compressed nodes.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007134 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00007135 static int
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007136node_compress(spin, node, ht, tot)
7137 spellinfo_T *spin;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007138 wordnode_T *node;
7139 hashtab_T *ht;
7140 int *tot; /* total count of nodes before compressing,
7141 incremented while going through the tree */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007142{
Bram Moolenaar51485f02005-06-04 21:55:20 +00007143 wordnode_T *np;
7144 wordnode_T *tp;
7145 wordnode_T *child;
7146 hash_T hash;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007147 hashitem_T *hi;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007148 int len = 0;
7149 unsigned nr, n;
7150 int compressed = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007151
Bram Moolenaar51485f02005-06-04 21:55:20 +00007152 /*
7153 * Go through the list of siblings. Compress each child and then try
7154 * finding an identical child to replace it.
7155 * Note that with "child" we mean not just the node that is pointed to,
Bram Moolenaar4770d092006-01-12 23:22:24 +00007156 * but the whole list of siblings of which the child node is the first.
Bram Moolenaar51485f02005-06-04 21:55:20 +00007157 */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007158 for (np = node; np != NULL && !got_int; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007159 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007160 ++len;
7161 if ((child = np->wn_child) != NULL)
7162 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00007163 /* Compress the child first. This fills hashkey. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007164 compressed += node_compress(spin, child, ht, tot);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007165
7166 /* Try to find an identical child. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007167 hash = hash_hash(child->wn_u1.hashkey);
7168 hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007169 if (!HASHITEM_EMPTY(hi))
7170 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00007171 /* There are children we encountered before with a hash value
7172 * identical to the current child. Now check if there is one
7173 * that is really identical. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007174 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007175 if (node_equal(child, tp))
7176 {
7177 /* Found one! Now use that child in place of the
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007178 * current one. This means the current child and all
7179 * its siblings is unlinked from the tree. */
7180 ++tp->wn_refs;
Bram Moolenaar4770d092006-01-12 23:22:24 +00007181 compressed += deref_wordnode(spin, child);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007182 np->wn_child = tp;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007183 break;
7184 }
7185 if (tp == NULL)
7186 {
7187 /* No other child with this hash value equals the child of
7188 * the node, add it to the linked list after the first
7189 * item. */
7190 tp = HI2WN(hi);
Bram Moolenaar0c405862005-06-22 22:26:26 +00007191 child->wn_u2.next = tp->wn_u2.next;
7192 tp->wn_u2.next = child;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007193 }
7194 }
7195 else
7196 /* No other child has this hash value, add it to the
7197 * hashtable. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007198 hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007199 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007200 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00007201 *tot += len + 1; /* add one for the node that stores the length */
Bram Moolenaar51485f02005-06-04 21:55:20 +00007202
7203 /*
7204 * Make a hash key for the node and its siblings, so that we can quickly
7205 * find a lookalike node. This must be done after compressing the sibling
7206 * list, otherwise the hash key would become invalid by the compression.
7207 */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007208 node->wn_u1.hashkey[0] = len;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007209 nr = 0;
7210 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007211 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007212 if (np->wn_byte == NUL)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007213 /* end node: use wn_flags, wn_region and wn_affixID */
7214 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007215 else
7216 /* byte node: use the byte value and the child pointer */
7217 n = np->wn_byte + ((long_u)np->wn_child << 8);
7218 nr = nr * 101 + n;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007219 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00007220
7221 /* Avoid NUL bytes, it terminates the hash key. */
7222 n = nr & 0xff;
Bram Moolenaar0c405862005-06-22 22:26:26 +00007223 node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007224 n = (nr >> 8) & 0xff;
Bram Moolenaar0c405862005-06-22 22:26:26 +00007225 node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007226 n = (nr >> 16) & 0xff;
Bram Moolenaar0c405862005-06-22 22:26:26 +00007227 node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007228 n = (nr >> 24) & 0xff;
Bram Moolenaar0c405862005-06-22 22:26:26 +00007229 node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
7230 node->wn_u1.hashkey[5] = NUL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007231
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007232 /* Check for CTRL-C pressed now and then. */
7233 fast_breakcheck();
7234
Bram Moolenaar51485f02005-06-04 21:55:20 +00007235 return compressed;
7236}
7237
7238/*
7239 * Return TRUE when two nodes have identical siblings and children.
7240 */
7241 static int
7242node_equal(n1, n2)
7243 wordnode_T *n1;
7244 wordnode_T *n2;
7245{
7246 wordnode_T *p1;
7247 wordnode_T *p2;
7248
7249 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
7250 p1 = p1->wn_sibling, p2 = p2->wn_sibling)
7251 if (p1->wn_byte != p2->wn_byte
7252 || (p1->wn_byte == NUL
7253 ? (p1->wn_flags != p2->wn_flags
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007254 || p1->wn_region != p2->wn_region
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007255 || p1->wn_affixID != p2->wn_affixID)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007256 : (p1->wn_child != p2->wn_child)))
7257 break;
7258
7259 return p1 == NULL && p2 == NULL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007260}
7261
7262/*
7263 * Write a number to file "fd", MSB first, in "len" bytes.
7264 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00007265 void
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007266put_bytes(fd, nr, len)
7267 FILE *fd;
7268 long_u nr;
7269 int len;
7270{
7271 int i;
7272
7273 for (i = len - 1; i >= 0; --i)
7274 putc((int)(nr >> (i * 8)), fd);
7275}
7276
Bram Moolenaar4770d092006-01-12 23:22:24 +00007277/*
7278 * Write spin->si_sugtime to file "fd".
7279 */
7280 static void
7281put_sugtime(spin, fd)
7282 spellinfo_T *spin;
7283 FILE *fd;
7284{
7285 int c;
7286 int i;
7287
7288 /* time_t can be up to 8 bytes in size, more than long_u, thus we
7289 * can't use put_bytes() here. */
7290 for (i = 7; i >= 0; --i)
7291 if (i + 1 > sizeof(time_t))
7292 /* ">>" doesn't work well when shifting more bits than avail */
7293 putc(0, fd);
7294 else
7295 {
7296 c = (unsigned)spin->si_sugtime >> (i * 8);
7297 putc(c, fd);
7298 }
7299}
7300
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007301static int
7302#ifdef __BORLANDC__
7303_RTLENTRYF
7304#endif
7305rep_compare __ARGS((const void *s1, const void *s2));
7306
7307/*
7308 * Function given to qsort() to sort the REP items on "from" string.
7309 */
7310 static int
7311#ifdef __BORLANDC__
7312_RTLENTRYF
7313#endif
7314rep_compare(s1, s2)
7315 const void *s1;
7316 const void *s2;
7317{
7318 fromto_T *p1 = (fromto_T *)s1;
7319 fromto_T *p2 = (fromto_T *)s2;
7320
7321 return STRCMP(p1->ft_from, p2->ft_from);
7322}
7323
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007324/*
Bram Moolenaar5195e452005-08-19 20:32:47 +00007325 * Write the Vim .spl file "fname".
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00007326 * Return FAIL or OK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007327 */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00007328 static int
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007329write_vim_spell(spin, fname)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007330 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007331 char_u *fname;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007332{
Bram Moolenaar51485f02005-06-04 21:55:20 +00007333 FILE *fd;
7334 int regionmask;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007335 int round;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007336 wordnode_T *tree;
7337 int nodecount;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007338 int i;
7339 int l;
7340 garray_T *gap;
7341 fromto_T *ftp;
7342 char_u *p;
7343 int rr;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00007344 int retval = OK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007345
Bram Moolenaarb765d632005-06-07 21:00:02 +00007346 fd = mch_fopen((char *)fname, "w");
Bram Moolenaar51485f02005-06-04 21:55:20 +00007347 if (fd == NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007348 {
7349 EMSG2(_(e_notopen), fname);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00007350 return FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007351 }
7352
Bram Moolenaar5195e452005-08-19 20:32:47 +00007353 /* <HEADER>: <fileID> <versionnr> */
Bram Moolenaar51485f02005-06-04 21:55:20 +00007354 /* <fileID> */
7355 if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00007356 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007357 EMSG(_(e_write));
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00007358 retval = FAIL;
7359 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00007360 putc(VIMSPELLVERSION, fd); /* <versionnr> */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007361
Bram Moolenaar5195e452005-08-19 20:32:47 +00007362 /*
7363 * <SECTIONS>: <section> ... <sectionend>
7364 */
7365
7366 /* SN_REGION: <regionname> ...
7367 * Write the region names only if there is more than one. */
Bram Moolenaar3982c542005-06-08 21:56:31 +00007368 if (spin->si_region_count > 1)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007369 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00007370 putc(SN_REGION, fd); /* <sectionID> */
7371 putc(SNF_REQUIRED, fd); /* <sectionflags> */
7372 l = spin->si_region_count * 2;
7373 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
7374 fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd);
7375 /* <regionname> ... */
Bram Moolenaar3982c542005-06-08 21:56:31 +00007376 regionmask = (1 << spin->si_region_count) - 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007377 }
7378 else
Bram Moolenaar51485f02005-06-04 21:55:20 +00007379 regionmask = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007380
Bram Moolenaar5195e452005-08-19 20:32:47 +00007381 /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars>
7382 *
7383 * The table with character flags and the table for case folding.
7384 * This makes sure the same characters are recognized as word characters
7385 * when generating an when using a spell file.
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00007386 * Skip this for ASCII, the table may conflict with the one used for
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007387 * 'encoding'.
7388 * Also skip this for an .add.spl file, the main spell file must contain
7389 * the table (avoids that it conflicts). File is shorter too.
7390 */
Bram Moolenaar5195e452005-08-19 20:32:47 +00007391 if (!spin->si_ascii && !spin->si_add)
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00007392 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00007393 char_u folchars[128 * 8];
7394 int flags;
7395
Bram Moolenaard12a1322005-08-21 22:08:24 +00007396 putc(SN_CHARFLAGS, fd); /* <sectionID> */
Bram Moolenaar5195e452005-08-19 20:32:47 +00007397 putc(SNF_REQUIRED, fd); /* <sectionflags> */
7398
7399 /* Form the <folchars> string first, we need to know its length. */
7400 l = 0;
7401 for (i = 128; i < 256; ++i)
7402 {
7403#ifdef FEAT_MBYTE
7404 if (has_mbyte)
7405 l += mb_char2bytes(spelltab.st_fold[i], folchars + l);
7406 else
7407#endif
7408 folchars[l++] = spelltab.st_fold[i];
7409 }
7410 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */
7411
7412 fputc(128, fd); /* <charflagslen> */
7413 for (i = 128; i < 256; ++i)
7414 {
7415 flags = 0;
7416 if (spelltab.st_isw[i])
7417 flags |= CF_WORD;
7418 if (spelltab.st_isu[i])
7419 flags |= CF_UPPER;
7420 fputc(flags, fd); /* <charflags> */
7421 }
7422
7423 put_bytes(fd, (long_u)l, 2); /* <folcharslen> */
7424 fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00007425 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00007426
Bram Moolenaar5195e452005-08-19 20:32:47 +00007427 /* SN_MIDWORD: <midword> */
7428 if (spin->si_midword != NULL)
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00007429 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00007430 putc(SN_MIDWORD, fd); /* <sectionID> */
7431 putc(SNF_REQUIRED, fd); /* <sectionflags> */
7432
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00007433 i = STRLEN(spin->si_midword);
Bram Moolenaar5195e452005-08-19 20:32:47 +00007434 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00007435 fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); /* <midword> */
7436 }
7437
Bram Moolenaar5195e452005-08-19 20:32:47 +00007438 /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */
7439 if (spin->si_prefcond.ga_len > 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007440 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00007441 putc(SN_PREFCOND, fd); /* <sectionID> */
7442 putc(SNF_REQUIRED, fd); /* <sectionflags> */
7443
7444 l = write_spell_prefcond(NULL, &spin->si_prefcond);
7445 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
7446
7447 write_spell_prefcond(fd, &spin->si_prefcond);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007448 }
7449
Bram Moolenaar5195e452005-08-19 20:32:47 +00007450 /* SN_REP: <repcount> <rep> ...
Bram Moolenaar4770d092006-01-12 23:22:24 +00007451 * SN_SAL: <salflags> <salcount> <sal> ...
7452 * SN_REPSAL: <repcount> <rep> ... */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007453
Bram Moolenaar5195e452005-08-19 20:32:47 +00007454 /* round 1: SN_REP section
Bram Moolenaar4770d092006-01-12 23:22:24 +00007455 * round 2: SN_SAL section (unless SN_SOFO is used)
7456 * round 3: SN_REPSAL section */
7457 for (round = 1; round <= 3; ++round)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007458 {
7459 if (round == 1)
7460 gap = &spin->si_rep;
Bram Moolenaar4770d092006-01-12 23:22:24 +00007461 else if (round == 2)
7462 {
7463 /* Don't write SN_SAL when using a SN_SOFO section */
7464 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
7465 continue;
7466 gap = &spin->si_sal;
Bram Moolenaar5195e452005-08-19 20:32:47 +00007467 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007468 else
Bram Moolenaar4770d092006-01-12 23:22:24 +00007469 gap = &spin->si_repsal;
7470
7471 /* Don't write the section if there are no items. */
7472 if (gap->ga_len == 0)
7473 continue;
7474
7475 /* Sort the REP/REPSAL items. */
7476 if (round != 2)
7477 qsort(gap->ga_data, (size_t)gap->ga_len,
7478 sizeof(fromto_T), rep_compare);
7479
7480 i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL);
7481 putc(i, fd); /* <sectionID> */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007482
Bram Moolenaar5195e452005-08-19 20:32:47 +00007483 /* This is for making suggestions, section is not required. */
7484 putc(0, fd); /* <sectionflags> */
7485
7486 /* Compute the length of what follows. */
7487 l = 2; /* count <repcount> or <salcount> */
7488 for (i = 0; i < gap->ga_len; ++i)
7489 {
7490 ftp = &((fromto_T *)gap->ga_data)[i];
7491 l += 1 + STRLEN(ftp->ft_from); /* count <*fromlen> and <*from> */
7492 l += 1 + STRLEN(ftp->ft_to); /* count <*tolen> and <*to> */
7493 }
7494 if (round == 2)
7495 ++l; /* count <salflags> */
7496 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
7497
7498 if (round == 2)
7499 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007500 i = 0;
7501 if (spin->si_followup)
7502 i |= SAL_F0LLOWUP;
7503 if (spin->si_collapse)
7504 i |= SAL_COLLAPSE;
7505 if (spin->si_rem_accents)
7506 i |= SAL_REM_ACCENTS;
7507 putc(i, fd); /* <salflags> */
7508 }
7509
7510 put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */
7511 for (i = 0; i < gap->ga_len; ++i)
7512 {
7513 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
7514 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
7515 ftp = &((fromto_T *)gap->ga_data)[i];
7516 for (rr = 1; rr <= 2; ++rr)
7517 {
7518 p = rr == 1 ? ftp->ft_from : ftp->ft_to;
7519 l = STRLEN(p);
7520 putc(l, fd);
7521 fwrite(p, l, (size_t)1, fd);
7522 }
7523 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00007524
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007525 }
7526
Bram Moolenaar5195e452005-08-19 20:32:47 +00007527 /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
7528 * This is for making suggestions, section is not required. */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007529 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
7530 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00007531 putc(SN_SOFO, fd); /* <sectionID> */
7532 putc(0, fd); /* <sectionflags> */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007533
7534 l = STRLEN(spin->si_sofofr);
Bram Moolenaar5195e452005-08-19 20:32:47 +00007535 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4);
7536 /* <sectionlen> */
7537
7538 put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */
7539 fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007540
7541 l = STRLEN(spin->si_sofoto);
Bram Moolenaar5195e452005-08-19 20:32:47 +00007542 put_bytes(fd, (long_u)l, 2); /* <sofotolen> */
7543 fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007544 }
7545
Bram Moolenaar4770d092006-01-12 23:22:24 +00007546 /* SN_WORDS: <word> ...
7547 * This is for making suggestions, section is not required. */
7548 if (spin->si_commonwords.ht_used > 0)
7549 {
7550 putc(SN_WORDS, fd); /* <sectionID> */
7551 putc(0, fd); /* <sectionflags> */
7552
7553 /* round 1: count the bytes
7554 * round 2: write the bytes */
7555 for (round = 1; round <= 2; ++round)
7556 {
7557 int todo;
7558 int len = 0;
7559 hashitem_T *hi;
7560
7561 todo = spin->si_commonwords.ht_used;
7562 for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi)
7563 if (!HASHITEM_EMPTY(hi))
7564 {
7565 l = STRLEN(hi->hi_key) + 1;
7566 len += l;
7567 if (round == 2) /* <word> */
7568 fwrite(hi->hi_key, (size_t)l, (size_t)1, fd);
7569 --todo;
7570 }
7571 if (round == 1)
7572 put_bytes(fd, (long_u)len, 4); /* <sectionlen> */
7573 }
7574 }
7575
Bram Moolenaar5195e452005-08-19 20:32:47 +00007576 /* SN_MAP: <mapstr>
7577 * This is for making suggestions, section is not required. */
7578 if (spin->si_map.ga_len > 0)
7579 {
7580 putc(SN_MAP, fd); /* <sectionID> */
7581 putc(0, fd); /* <sectionflags> */
7582 l = spin->si_map.ga_len;
7583 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
7584 fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd);
7585 /* <mapstr> */
7586 }
7587
Bram Moolenaar4770d092006-01-12 23:22:24 +00007588 /* SN_SUGFILE: <timestamp>
7589 * This is used to notify that a .sug file may be available and at the
7590 * same time allows for checking that a .sug file that is found matches
7591 * with this .spl file. That's because the word numbers must be exactly
7592 * right. */
7593 if (!spin->si_nosugfile
7594 && (spin->si_sal.ga_len > 0
7595 || (spin->si_sofofr != NULL && spin->si_sofoto != NULL)))
7596 {
7597 putc(SN_SUGFILE, fd); /* <sectionID> */
7598 putc(0, fd); /* <sectionflags> */
7599 put_bytes(fd, (long_u)8, 4); /* <sectionlen> */
7600
7601 /* Set si_sugtime and write it to the file. */
7602 spin->si_sugtime = time(NULL);
7603 put_sugtime(spin, fd); /* <timestamp> */
7604 }
7605
Bram Moolenaar5195e452005-08-19 20:32:47 +00007606 /* SN_COMPOUND: compound info.
7607 * We don't mark it required, when not supported all compound words will
7608 * be bad words. */
7609 if (spin->si_compflags != NULL)
7610 {
7611 putc(SN_COMPOUND, fd); /* <sectionID> */
7612 putc(0, fd); /* <sectionflags> */
7613
7614 l = STRLEN(spin->si_compflags);
7615 put_bytes(fd, (long_u)(l + 3), 4); /* <sectionlen> */
7616 putc(spin->si_compmax, fd); /* <compmax> */
7617 putc(spin->si_compminlen, fd); /* <compminlen> */
7618 putc(spin->si_compsylmax, fd); /* <compsylmax> */
7619 /* <compflags> */
7620 fwrite(spin->si_compflags, (size_t)l, (size_t)1, fd);
7621 }
7622
Bram Moolenaar78622822005-08-23 21:00:13 +00007623 /* SN_NOBREAK: NOBREAK flag */
7624 if (spin->si_nobreak)
7625 {
7626 putc(SN_NOBREAK, fd); /* <sectionID> */
7627 putc(0, fd); /* <sectionflags> */
7628
7629 /* It's empty, the precense of the section flags the feature. */
7630 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
7631 }
7632
Bram Moolenaar5195e452005-08-19 20:32:47 +00007633 /* SN_SYLLABLE: syllable info.
7634 * We don't mark it required, when not supported syllables will not be
7635 * counted. */
7636 if (spin->si_syllable != NULL)
7637 {
7638 putc(SN_SYLLABLE, fd); /* <sectionID> */
7639 putc(0, fd); /* <sectionflags> */
7640
7641 l = STRLEN(spin->si_syllable);
7642 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
7643 fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); /* <syllable> */
7644 }
7645
7646 /* end of <SECTIONS> */
7647 putc(SN_END, fd); /* <sectionend> */
7648
Bram Moolenaar50cde822005-06-05 21:54:54 +00007649
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007650 /*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007651 * <LWORDTREE> <KWORDTREE> <PREFIXTREE>
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007652 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007653 spin->si_memtot = 0;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007654 for (round = 1; round <= 3; ++round)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007655 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007656 if (round == 1)
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007657 tree = spin->si_foldroot->wn_sibling;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007658 else if (round == 2)
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007659 tree = spin->si_keeproot->wn_sibling;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007660 else
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007661 tree = spin->si_prefroot->wn_sibling;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007662
Bram Moolenaar0c405862005-06-22 22:26:26 +00007663 /* Clear the index and wnode fields in the tree. */
7664 clear_node(tree);
7665
Bram Moolenaar51485f02005-06-04 21:55:20 +00007666 /* Count the number of nodes. Needed to be able to allocate the
Bram Moolenaar0c405862005-06-22 22:26:26 +00007667 * memory when reading the nodes. Also fills in index for shared
Bram Moolenaar51485f02005-06-04 21:55:20 +00007668 * nodes. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007669 nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007670
Bram Moolenaar51485f02005-06-04 21:55:20 +00007671 /* number of nodes in 4 bytes */
7672 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
Bram Moolenaar50cde822005-06-05 21:54:54 +00007673 spin->si_memtot += nodecount + nodecount * sizeof(int);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007674
Bram Moolenaar51485f02005-06-04 21:55:20 +00007675 /* Write the nodes. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007676 (void)put_node(fd, tree, 0, regionmask, round == 3);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007677 }
7678
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00007679 /* Write another byte to check for errors. */
7680 if (putc(0, fd) == EOF)
7681 retval = FAIL;
7682
7683 if (fclose(fd) == EOF)
7684 retval = FAIL;
7685
7686 return retval;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00007687}
7688
7689/*
Bram Moolenaar0c405862005-06-22 22:26:26 +00007690 * Clear the index and wnode fields of "node", it siblings and its
7691 * children. This is needed because they are a union with other items to save
7692 * space.
7693 */
7694 static void
7695clear_node(node)
7696 wordnode_T *node;
7697{
7698 wordnode_T *np;
7699
7700 if (node != NULL)
7701 for (np = node; np != NULL; np = np->wn_sibling)
7702 {
7703 np->wn_u1.index = 0;
7704 np->wn_u2.wnode = NULL;
7705
7706 if (np->wn_byte != NUL)
7707 clear_node(np->wn_child);
7708 }
7709}
7710
7711
7712/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00007713 * Dump a word tree at node "node".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007714 *
Bram Moolenaar51485f02005-06-04 21:55:20 +00007715 * This first writes the list of possible bytes (siblings). Then for each
7716 * byte recursively write the children.
7717 *
Bram Moolenaar4770d092006-01-12 23:22:24 +00007718 * NOTE: The code here must match the code in read_tree_node(), since
7719 * assumptions are made about the indexes (so that we don't have to write them
7720 * in the file).
Bram Moolenaar51485f02005-06-04 21:55:20 +00007721 *
7722 * Returns the number of nodes used.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007723 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00007724 static int
Bram Moolenaar0c405862005-06-22 22:26:26 +00007725put_node(fd, node, index, regionmask, prefixtree)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007726 FILE *fd; /* NULL when only counting */
Bram Moolenaar51485f02005-06-04 21:55:20 +00007727 wordnode_T *node;
7728 int index;
7729 int regionmask;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007730 int prefixtree; /* TRUE for PREFIXTREE */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007731{
Bram Moolenaar51485f02005-06-04 21:55:20 +00007732 int newindex = index;
7733 int siblingcount = 0;
7734 wordnode_T *np;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007735 int flags;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007736
Bram Moolenaar51485f02005-06-04 21:55:20 +00007737 /* If "node" is zero the tree is empty. */
7738 if (node == NULL)
7739 return 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007740
Bram Moolenaar51485f02005-06-04 21:55:20 +00007741 /* Store the index where this node is written. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007742 node->wn_u1.index = index;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007743
7744 /* Count the number of siblings. */
7745 for (np = node; np != NULL; np = np->wn_sibling)
7746 ++siblingcount;
7747
7748 /* Write the sibling count. */
7749 if (fd != NULL)
7750 putc(siblingcount, fd); /* <siblingcount> */
7751
7752 /* Write each sibling byte and optionally extra info. */
7753 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007754 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007755 if (np->wn_byte == 0)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00007756 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007757 if (fd != NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007758 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007759 /* For a NUL byte (end of word) write the flags etc. */
7760 if (prefixtree)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00007761 {
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007762 /* In PREFIXTREE write the required affixID and the
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00007763 * associated condition nr (stored in wn_region). The
7764 * byte value is misused to store the "rare" and "not
7765 * combining" flags */
Bram Moolenaar53805d12005-08-01 07:08:33 +00007766 if (np->wn_flags == (short_u)PFX_FLAGS)
7767 putc(BY_NOFLAGS, fd); /* <byte> */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00007768 else
Bram Moolenaar53805d12005-08-01 07:08:33 +00007769 {
7770 putc(BY_FLAGS, fd); /* <byte> */
7771 putc(np->wn_flags, fd); /* <pflags> */
7772 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007773 putc(np->wn_affixID, fd); /* <affixID> */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007774 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */
Bram Moolenaar51485f02005-06-04 21:55:20 +00007775 }
7776 else
7777 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007778 /* For word trees we write the flag/region items. */
7779 flags = np->wn_flags;
7780 if (regionmask != 0 && np->wn_region != regionmask)
7781 flags |= WF_REGION;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007782 if (np->wn_affixID != 0)
7783 flags |= WF_AFX;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007784 if (flags == 0)
7785 {
7786 /* word without flags or region */
7787 putc(BY_NOFLAGS, fd); /* <byte> */
7788 }
7789 else
7790 {
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00007791 if (np->wn_flags >= 0x100)
7792 {
7793 putc(BY_FLAGS2, fd); /* <byte> */
7794 putc(flags, fd); /* <flags> */
7795 putc((unsigned)flags >> 8, fd); /* <flags2> */
7796 }
7797 else
7798 {
7799 putc(BY_FLAGS, fd); /* <byte> */
7800 putc(flags, fd); /* <flags> */
7801 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007802 if (flags & WF_REGION)
7803 putc(np->wn_region, fd); /* <region> */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007804 if (flags & WF_AFX)
7805 putc(np->wn_affixID, fd); /* <affixID> */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007806 }
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00007807 }
7808 }
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00007809 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00007810 else
7811 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00007812 if (np->wn_child->wn_u1.index != 0
7813 && np->wn_child->wn_u2.wnode != node)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007814 {
7815 /* The child is written elsewhere, write the reference. */
7816 if (fd != NULL)
7817 {
7818 putc(BY_INDEX, fd); /* <byte> */
7819 /* <nodeidx> */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007820 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007821 }
7822 }
Bram Moolenaar0c405862005-06-22 22:26:26 +00007823 else if (np->wn_child->wn_u2.wnode == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007824 /* We will write the child below and give it an index. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007825 np->wn_child->wn_u2.wnode = node;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00007826
Bram Moolenaar51485f02005-06-04 21:55:20 +00007827 if (fd != NULL)
7828 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
7829 {
7830 EMSG(_(e_write));
7831 return 0;
7832 }
7833 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007834 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00007835
7836 /* Space used in the array when reading: one for each sibling and one for
7837 * the count. */
7838 newindex += siblingcount + 1;
7839
7840 /* Recursively dump the children of each sibling. */
7841 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar0c405862005-06-22 22:26:26 +00007842 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
7843 newindex = put_node(fd, np->wn_child, newindex, regionmask,
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007844 prefixtree);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007845
7846 return newindex;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007847}
7848
7849
7850/*
Bram Moolenaarb765d632005-06-07 21:00:02 +00007851 * ":mkspell [-ascii] outfile infile ..."
7852 * ":mkspell [-ascii] addfile"
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007853 */
7854 void
7855ex_mkspell(eap)
7856 exarg_T *eap;
7857{
7858 int fcount;
7859 char_u **fnames;
Bram Moolenaarb765d632005-06-07 21:00:02 +00007860 char_u *arg = eap->arg;
7861 int ascii = FALSE;
7862
7863 if (STRNCMP(arg, "-ascii", 6) == 0)
7864 {
7865 ascii = TRUE;
7866 arg = skipwhite(arg + 6);
7867 }
7868
7869 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
7870 if (get_arglist_exp(arg, &fcount, &fnames) == OK)
7871 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007872 mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
Bram Moolenaarb765d632005-06-07 21:00:02 +00007873 FreeWild(fcount, fnames);
7874 }
7875}
7876
7877/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00007878 * Create the .sug file.
7879 * Uses the soundfold info in "spin".
7880 * Writes the file with the name "wfname", with ".spl" changed to ".sug".
7881 */
7882 static void
7883spell_make_sugfile(spin, wfname)
7884 spellinfo_T *spin;
7885 char_u *wfname;
7886{
7887 char_u fname[MAXPATHL];
7888 int len;
7889 slang_T *slang;
7890 int free_slang = FALSE;
7891
7892 /*
7893 * Read back the .spl file that was written. This fills the required
7894 * info for soundfolding. This also uses less memory than the
7895 * pointer-linked version of the trie. And it avoids having two versions
7896 * of the code for the soundfolding stuff.
7897 * It might have been done already by spell_reload_one().
7898 */
7899 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
7900 if (fullpathcmp(wfname, slang->sl_fname, FALSE) == FPC_SAME)
7901 break;
7902 if (slang == NULL)
7903 {
7904 spell_message(spin, (char_u *)_("Reading back spell file..."));
7905 slang = spell_load_file(wfname, NULL, NULL, FALSE);
7906 if (slang == NULL)
7907 return;
Bram Moolenaar4770d092006-01-12 23:22:24 +00007908 free_slang = TRUE;
7909 }
7910
7911 /*
7912 * Clear the info in "spin" that is used.
7913 */
7914 spin->si_blocks = NULL;
7915 spin->si_blocks_cnt = 0;
7916 spin->si_compress_cnt = 0; /* will stay at 0 all the time*/
7917 spin->si_free_count = 0;
7918 spin->si_first_free = NULL;
7919 spin->si_foldwcount = 0;
7920
7921 /*
7922 * Go through the trie of good words, soundfold each word and add it to
7923 * the soundfold trie.
7924 */
7925 spell_message(spin, (char_u *)_("Performing soundfolding..."));
7926 if (sug_filltree(spin, slang) == FAIL)
7927 goto theend;
7928
7929 /*
7930 * Create the table which links each soundfold word with a list of the
7931 * good words it may come from. Creates buffer "spin->si_spellbuf".
7932 * This also removes the wordnr from the NUL byte entries to make
7933 * compression possible.
7934 */
7935 if (sug_maketable(spin) == FAIL)
7936 goto theend;
7937
7938 smsg((char_u *)_("Number of words after soundfolding: %ld"),
7939 (long)spin->si_spellbuf->b_ml.ml_line_count);
7940
7941 /*
7942 * Compress the soundfold trie.
7943 */
7944 spell_message(spin, (char_u *)_(msg_compressing));
7945 wordtree_compress(spin, spin->si_foldroot);
7946
7947 /*
7948 * Write the .sug file.
7949 * Make the file name by changing ".spl" to ".sug".
7950 */
7951 STRCPY(fname, wfname);
7952 len = STRLEN(fname);
7953 fname[len - 2] = 'u';
7954 fname[len - 1] = 'g';
7955 sug_write(spin, fname);
7956
7957theend:
7958 if (free_slang)
7959 slang_free(slang);
7960 free_blocks(spin->si_blocks);
7961 close_spellbuf(spin->si_spellbuf);
7962}
7963
7964/*
7965 * Build the soundfold trie for language "slang".
7966 */
7967 static int
7968sug_filltree(spin, slang)
7969 spellinfo_T *spin;
7970 slang_T *slang;
7971{
7972 char_u *byts;
7973 idx_T *idxs;
7974 int depth;
7975 idx_T arridx[MAXWLEN];
7976 int curi[MAXWLEN];
7977 char_u tword[MAXWLEN];
7978 char_u tsalword[MAXWLEN];
7979 int c;
7980 idx_T n;
7981 unsigned words_done = 0;
7982 int wordcount[MAXWLEN];
7983
7984 /* We use si_foldroot for the souldfolded trie. */
7985 spin->si_foldroot = wordtree_alloc(spin);
7986 if (spin->si_foldroot == NULL)
7987 return FAIL;
7988
7989 /* let tree_add_word() know we're adding to the soundfolded tree */
7990 spin->si_sugtree = TRUE;
7991
7992 /*
7993 * Go through the whole case-folded tree, soundfold each word and put it
7994 * in the trie.
7995 */
7996 byts = slang->sl_fbyts;
7997 idxs = slang->sl_fidxs;
7998
7999 arridx[0] = 0;
8000 curi[0] = 1;
8001 wordcount[0] = 0;
8002
8003 depth = 0;
8004 while (depth >= 0 && !got_int)
8005 {
8006 if (curi[depth] > byts[arridx[depth]])
8007 {
8008 /* Done all bytes at this node, go up one level. */
8009 idxs[arridx[depth]] = wordcount[depth];
8010 if (depth > 0)
8011 wordcount[depth - 1] += wordcount[depth];
8012
8013 --depth;
8014 line_breakcheck();
8015 }
8016 else
8017 {
8018
8019 /* Do one more byte at this node. */
8020 n = arridx[depth] + curi[depth];
8021 ++curi[depth];
8022
8023 c = byts[n];
8024 if (c == 0)
8025 {
8026 /* Sound-fold the word. */
8027 tword[depth] = NUL;
8028 spell_soundfold(slang, tword, TRUE, tsalword);
8029
8030 /* We use the "flags" field for the MSB of the wordnr,
8031 * "region" for the LSB of the wordnr. */
8032 if (tree_add_word(spin, tsalword, spin->si_foldroot,
8033 words_done >> 16, words_done & 0xffff,
8034 0) == FAIL)
8035 return FAIL;
8036
8037 ++words_done;
8038 ++wordcount[depth];
8039
8040 /* Reset the block count each time to avoid compression
8041 * kicking in. */
8042 spin->si_blocks_cnt = 0;
8043
8044 /* Skip over any other NUL bytes (same word with different
8045 * flags). */
8046 while (byts[n + 1] == 0)
8047 {
8048 ++n;
8049 ++curi[depth];
8050 }
8051 }
8052 else
8053 {
8054 /* Normal char, go one level deeper. */
8055 tword[depth++] = c;
8056 arridx[depth] = idxs[n];
8057 curi[depth] = 1;
8058 wordcount[depth] = 0;
8059 }
8060 }
8061 }
8062
8063 smsg((char_u *)_("Total number of words: %d"), words_done);
8064
8065 return OK;
8066}
8067
8068/*
8069 * Make the table that links each word in the soundfold trie to the words it
8070 * can be produced from.
8071 * This is not unlike lines in a file, thus use a memfile to be able to access
8072 * the table efficiently.
8073 * Returns FAIL when out of memory.
8074 */
8075 static int
8076sug_maketable(spin)
8077 spellinfo_T *spin;
8078{
8079 garray_T ga;
8080 int res = OK;
8081
8082 /* Allocate a buffer, open a memline for it and create the swap file
8083 * (uses a temp file, not a .swp file). */
8084 spin->si_spellbuf = open_spellbuf();
8085 if (spin->si_spellbuf == NULL)
8086 return FAIL;
8087
8088 /* Use a buffer to store the line info, avoids allocating many small
8089 * pieces of memory. */
8090 ga_init2(&ga, 1, 100);
8091
8092 /* recursively go through the tree */
8093 if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1)
8094 res = FAIL;
8095
8096 ga_clear(&ga);
8097 return res;
8098}
8099
8100/*
8101 * Fill the table for one node and its children.
8102 * Returns the wordnr at the start of the node.
8103 * Returns -1 when out of memory.
8104 */
8105 static int
8106sug_filltable(spin, node, startwordnr, gap)
8107 spellinfo_T *spin;
8108 wordnode_T *node;
8109 int startwordnr;
8110 garray_T *gap; /* place to store line of numbers */
8111{
8112 wordnode_T *p, *np;
8113 int wordnr = startwordnr;
8114 int nr;
8115 int prev_nr;
8116
8117 for (p = node; p != NULL; p = p->wn_sibling)
8118 {
8119 if (p->wn_byte == NUL)
8120 {
8121 gap->ga_len = 0;
8122 prev_nr = 0;
8123 for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling)
8124 {
8125 if (ga_grow(gap, 10) == FAIL)
8126 return -1;
8127
8128 nr = (np->wn_flags << 16) + (np->wn_region & 0xffff);
8129 /* Compute the offset from the previous nr and store the
8130 * offset in a way that it takes a minimum number of bytes.
8131 * It's a bit like utf-8, but without the need to mark
8132 * following bytes. */
8133 nr -= prev_nr;
8134 prev_nr += nr;
8135 gap->ga_len += offset2bytes(nr,
8136 (char_u *)gap->ga_data + gap->ga_len);
8137 }
8138
8139 /* add the NUL byte */
8140 ((char_u *)gap->ga_data)[gap->ga_len++] = NUL;
8141
8142 if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr,
8143 gap->ga_data, gap->ga_len, TRUE) == FAIL)
8144 return -1;
8145 ++wordnr;
8146
8147 /* Remove extra NUL entries, we no longer need them. We don't
8148 * bother freeing the nodes, the won't be reused anyway. */
8149 while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL)
8150 p->wn_sibling = p->wn_sibling->wn_sibling;
8151
8152 /* Clear the flags on the remaining NUL node, so that compression
8153 * works a lot better. */
8154 p->wn_flags = 0;
8155 p->wn_region = 0;
8156 }
8157 else
8158 {
8159 wordnr = sug_filltable(spin, p->wn_child, wordnr, gap);
8160 if (wordnr == -1)
8161 return -1;
8162 }
8163 }
8164 return wordnr;
8165}
8166
8167/*
8168 * Convert an offset into a minimal number of bytes.
8169 * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL
8170 * bytes.
8171 */
8172 static int
8173offset2bytes(nr, buf)
8174 int nr;
8175 char_u *buf;
8176{
8177 int rem;
8178 int b1, b2, b3, b4;
8179
8180 /* Split the number in parts of base 255. We need to avoid NUL bytes. */
8181 b1 = nr % 255 + 1;
8182 rem = nr / 255;
8183 b2 = rem % 255 + 1;
8184 rem = rem / 255;
8185 b3 = rem % 255 + 1;
8186 b4 = rem / 255 + 1;
8187
8188 if (b4 > 1 || b3 > 0x1f) /* 4 bytes */
8189 {
8190 buf[0] = 0xe0 + b4;
8191 buf[1] = b3;
8192 buf[2] = b2;
8193 buf[3] = b1;
8194 return 4;
8195 }
8196 if (b3 > 1 || b2 > 0x3f ) /* 3 bytes */
8197 {
8198 buf[0] = 0xc0 + b3;
8199 buf[1] = b2;
8200 buf[2] = b1;
8201 return 3;
8202 }
8203 if (b2 > 1 || b1 > 0x7f ) /* 2 bytes */
8204 {
8205 buf[0] = 0x80 + b2;
8206 buf[1] = b1;
8207 return 2;
8208 }
8209 /* 1 byte */
8210 buf[0] = b1;
8211 return 1;
8212}
8213
8214/*
8215 * Opposite of offset2bytes().
8216 * "pp" points to the bytes and is advanced over it.
8217 * Returns the offset.
8218 */
8219 static int
8220bytes2offset(pp)
8221 char_u **pp;
8222{
8223 char_u *p = *pp;
8224 int nr;
8225 int c;
8226
8227 c = *p++;
8228 if ((c & 0x80) == 0x00) /* 1 byte */
8229 {
8230 nr = c - 1;
8231 }
8232 else if ((c & 0xc0) == 0x80) /* 2 bytes */
8233 {
8234 nr = (c & 0x3f) - 1;
8235 nr = nr * 255 + (*p++ - 1);
8236 }
8237 else if ((c & 0xe0) == 0xc0) /* 3 bytes */
8238 {
8239 nr = (c & 0x1f) - 1;
8240 nr = nr * 255 + (*p++ - 1);
8241 nr = nr * 255 + (*p++ - 1);
8242 }
8243 else /* 4 bytes */
8244 {
8245 nr = (c & 0x0f) - 1;
8246 nr = nr * 255 + (*p++ - 1);
8247 nr = nr * 255 + (*p++ - 1);
8248 nr = nr * 255 + (*p++ - 1);
8249 }
8250
8251 *pp = p;
8252 return nr;
8253}
8254
8255/*
8256 * Write the .sug file in "fname".
8257 */
8258 static void
8259sug_write(spin, fname)
8260 spellinfo_T *spin;
8261 char_u *fname;
8262{
8263 FILE *fd;
8264 wordnode_T *tree;
8265 int nodecount;
8266 int wcount;
8267 char_u *line;
8268 linenr_T lnum;
8269 int len;
8270
8271 /* Create the file. Note that an existing file is silently overwritten! */
8272 fd = mch_fopen((char *)fname, "w");
8273 if (fd == NULL)
8274 {
8275 EMSG2(_(e_notopen), fname);
8276 return;
8277 }
8278
8279 vim_snprintf((char *)IObuff, IOSIZE,
8280 _("Writing suggestion file %s ..."), fname);
8281 spell_message(spin, IObuff);
8282
8283 /*
8284 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
8285 */
8286 if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */
8287 {
8288 EMSG(_(e_write));
8289 goto theend;
8290 }
8291 putc(VIMSUGVERSION, fd); /* <versionnr> */
8292
8293 /* Write si_sugtime to the file. */
8294 put_sugtime(spin, fd); /* <timestamp> */
8295
8296 /*
8297 * <SUGWORDTREE>
8298 */
8299 spin->si_memtot = 0;
8300 tree = spin->si_foldroot->wn_sibling;
8301
8302 /* Clear the index and wnode fields in the tree. */
8303 clear_node(tree);
8304
8305 /* Count the number of nodes. Needed to be able to allocate the
8306 * memory when reading the nodes. Also fills in index for shared
8307 * nodes. */
8308 nodecount = put_node(NULL, tree, 0, 0, FALSE);
8309
8310 /* number of nodes in 4 bytes */
8311 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
8312 spin->si_memtot += nodecount + nodecount * sizeof(int);
8313
8314 /* Write the nodes. */
8315 (void)put_node(fd, tree, 0, 0, FALSE);
8316
8317 /*
8318 * <SUGTABLE>: <sugwcount> <sugline> ...
8319 */
8320 wcount = spin->si_spellbuf->b_ml.ml_line_count;
8321 put_bytes(fd, (long_u)wcount, 4); /* <sugwcount> */
8322
8323 for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum)
8324 {
8325 /* <sugline>: <sugnr> ... NUL */
8326 line = ml_get_buf(spin->si_spellbuf, lnum, FALSE);
8327 len = STRLEN(line) + 1;
8328 if (fwrite(line, (size_t)len, (size_t)1, fd) == 0)
8329 {
8330 EMSG(_(e_write));
8331 goto theend;
8332 }
8333 spin->si_memtot += len;
8334 }
8335
8336 /* Write another byte to check for errors. */
8337 if (putc(0, fd) == EOF)
8338 EMSG(_(e_write));
8339
8340 vim_snprintf((char *)IObuff, IOSIZE,
8341 _("Estimated runtime memory use: %d bytes"), spin->si_memtot);
8342 spell_message(spin, IObuff);
8343
8344theend:
8345 /* close the file */
8346 fclose(fd);
8347}
8348
8349/*
8350 * Open a spell buffer. This is a nameless buffer that is not in the buffer
8351 * list and only contains text lines. Can use a swapfile to reduce memory
8352 * use.
8353 * Most other fields are invalid! Esp. watch out for string options being
8354 * NULL and there is no undo info.
8355 * Returns NULL when out of memory.
8356 */
8357 static buf_T *
8358open_spellbuf()
8359{
8360 buf_T *buf;
8361
8362 buf = (buf_T *)alloc_clear(sizeof(buf_T));
8363 if (buf != NULL)
8364 {
8365 buf->b_spell = TRUE;
8366 buf->b_p_swf = TRUE; /* may create a swap file */
8367 ml_open(buf);
8368 ml_open_file(buf); /* create swap file now */
8369 }
8370 return buf;
8371}
8372
8373/*
8374 * Close the buffer used for spell info.
8375 */
8376 static void
8377close_spellbuf(buf)
8378 buf_T *buf;
8379{
8380 if (buf != NULL)
8381 {
8382 ml_close(buf, TRUE);
8383 vim_free(buf);
8384 }
8385}
8386
8387
8388/*
Bram Moolenaarb765d632005-06-07 21:00:02 +00008389 * Create a Vim spell file from one or more word lists.
8390 * "fnames[0]" is the output file name.
8391 * "fnames[fcount - 1]" is the last input file name.
8392 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name
8393 * and ".spl" is appended to make the output file name.
8394 */
8395 static void
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008396mkspell(fcount, fnames, ascii, overwrite, added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00008397 int fcount;
8398 char_u **fnames;
8399 int ascii; /* -ascii argument given */
8400 int overwrite; /* overwrite existing output file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008401 int added_word; /* invoked through "zg" */
Bram Moolenaarb765d632005-06-07 21:00:02 +00008402{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008403 char_u fname[MAXPATHL];
8404 char_u wfname[MAXPATHL];
Bram Moolenaarb765d632005-06-07 21:00:02 +00008405 char_u **innames;
8406 int incount;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008407 afffile_T *(afile[8]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008408 int i;
8409 int len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008410 struct stat st;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00008411 int error = FALSE;
Bram Moolenaar51485f02005-06-04 21:55:20 +00008412 spellinfo_T spin;
8413
8414 vim_memset(&spin, 0, sizeof(spin));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008415 spin.si_verbose = !added_word;
Bram Moolenaarb765d632005-06-07 21:00:02 +00008416 spin.si_ascii = ascii;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008417 spin.si_followup = TRUE;
8418 spin.si_rem_accents = TRUE;
8419 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20);
Bram Moolenaar4770d092006-01-12 23:22:24 +00008420 ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008421 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
8422 ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008423 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
Bram Moolenaar4770d092006-01-12 23:22:24 +00008424 hash_init(&spin.si_commonwords);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00008425 spin.si_newcompID = 127; /* start compound ID at first maximum */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008426
Bram Moolenaarb765d632005-06-07 21:00:02 +00008427 /* default: fnames[0] is output file, following are input files */
8428 innames = &fnames[1];
8429 incount = fcount - 1;
8430
8431 if (fcount >= 1)
Bram Moolenaar5482f332005-04-17 20:18:43 +00008432 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00008433 len = STRLEN(fnames[0]);
8434 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
8435 {
8436 /* For ":mkspell path/en.latin1.add" output file is
8437 * "path/en.latin1.add.spl". */
8438 innames = &fnames[0];
8439 incount = 1;
8440 vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]);
8441 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00008442 else if (fcount == 1)
8443 {
8444 /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */
8445 innames = &fnames[0];
8446 incount = 1;
8447 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
8448 spin.si_ascii ? (char_u *)"ascii" : spell_enc());
8449 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00008450 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
8451 {
8452 /* Name ends in ".spl", use as the file name. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008453 vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1);
Bram Moolenaarb765d632005-06-07 21:00:02 +00008454 }
8455 else
8456 /* Name should be language, make the file name from it. */
8457 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
8458 spin.si_ascii ? (char_u *)"ascii" : spell_enc());
8459
8460 /* Check for .ascii.spl. */
8461 if (strstr((char *)gettail(wfname), ".ascii.") != NULL)
8462 spin.si_ascii = TRUE;
8463
8464 /* Check for .add.spl. */
8465 if (strstr((char *)gettail(wfname), ".add.") != NULL)
8466 spin.si_add = TRUE;
Bram Moolenaar5482f332005-04-17 20:18:43 +00008467 }
8468
Bram Moolenaarb765d632005-06-07 21:00:02 +00008469 if (incount <= 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008470 EMSG(_(e_invarg)); /* need at least output and input names */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008471 else if (vim_strchr(gettail(wfname), '_') != NULL)
8472 EMSG(_("E751: Output file name must not have region name"));
Bram Moolenaarb765d632005-06-07 21:00:02 +00008473 else if (incount > 8)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008474 EMSG(_("E754: Only up to 8 regions supported"));
8475 else
8476 {
8477 /* Check for overwriting before doing things that may take a lot of
8478 * time. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00008479 if (!overwrite && mch_stat((char *)wfname, &st) >= 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008480 {
8481 EMSG(_(e_exists));
Bram Moolenaarb765d632005-06-07 21:00:02 +00008482 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008483 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00008484 if (mch_isdir(wfname))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008485 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00008486 EMSG2(_(e_isadir2), wfname);
8487 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008488 }
8489
8490 /*
8491 * Init the aff and dic pointers.
8492 * Get the region names if there are more than 2 arguments.
8493 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00008494 for (i = 0; i < incount; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008495 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00008496 afile[i] = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00008497
Bram Moolenaar3982c542005-06-08 21:56:31 +00008498 if (incount > 1)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008499 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00008500 len = STRLEN(innames[i]);
8501 if (STRLEN(gettail(innames[i])) < 5
8502 || innames[i][len - 3] != '_')
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008503 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00008504 EMSG2(_("E755: Invalid region in %s"), innames[i]);
8505 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008506 }
Bram Moolenaar3982c542005-06-08 21:56:31 +00008507 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
8508 spin.si_region_name[i * 2 + 1] =
8509 TOLOWER_ASC(innames[i][len - 1]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008510 }
8511 }
Bram Moolenaar3982c542005-06-08 21:56:31 +00008512 spin.si_region_count = incount;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008513
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00008514 spin.si_foldroot = wordtree_alloc(&spin);
8515 spin.si_keeproot = wordtree_alloc(&spin);
8516 spin.si_prefroot = wordtree_alloc(&spin);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008517 if (spin.si_foldroot == NULL
8518 || spin.si_keeproot == NULL
8519 || spin.si_prefroot == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00008520 {
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00008521 free_blocks(spin.si_blocks);
Bram Moolenaarb765d632005-06-07 21:00:02 +00008522 return;
Bram Moolenaar51485f02005-06-04 21:55:20 +00008523 }
8524
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008525 /* When not producing a .add.spl file clear the character table when
8526 * we encounter one in the .aff file. This means we dump the current
8527 * one in the .spl file if the .aff file doesn't define one. That's
8528 * better than guessing the contents, the table will match a
8529 * previously loaded spell file. */
8530 if (!spin.si_add)
8531 spin.si_clear_chartab = TRUE;
8532
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008533 /*
8534 * Read all the .aff and .dic files.
8535 * Text is converted to 'encoding'.
Bram Moolenaar51485f02005-06-04 21:55:20 +00008536 * Words are stored in the case-folded and keep-case trees.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008537 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00008538 for (i = 0; i < incount && !error; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008539 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00008540 spin.si_conv.vc_type = CONV_NONE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00008541 spin.si_region = 1 << i;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008542
Bram Moolenaarb765d632005-06-07 21:00:02 +00008543 vim_snprintf((char *)fname, sizeof(fname), "%s.aff", innames[i]);
Bram Moolenaar51485f02005-06-04 21:55:20 +00008544 if (mch_stat((char *)fname, &st) >= 0)
8545 {
8546 /* Read the .aff file. Will init "spin->si_conv" based on the
8547 * "SET" line. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00008548 afile[i] = spell_read_aff(&spin, fname);
Bram Moolenaarb765d632005-06-07 21:00:02 +00008549 if (afile[i] == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00008550 error = TRUE;
8551 else
8552 {
8553 /* Read the .dic file and store the words in the trees. */
8554 vim_snprintf((char *)fname, sizeof(fname), "%s.dic",
Bram Moolenaarb765d632005-06-07 21:00:02 +00008555 innames[i]);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00008556 if (spell_read_dic(&spin, fname, afile[i]) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00008557 error = TRUE;
8558 }
8559 }
8560 else
8561 {
8562 /* No .aff file, try reading the file as a word list. Store
8563 * the words in the trees. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00008564 if (spell_read_wordfile(&spin, innames[i]) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00008565 error = TRUE;
8566 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008567
Bram Moolenaarb765d632005-06-07 21:00:02 +00008568#ifdef FEAT_MBYTE
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008569 /* Free any conversion stuff. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00008570 convert_setup(&spin.si_conv, NULL, NULL);
Bram Moolenaarb765d632005-06-07 21:00:02 +00008571#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008572 }
8573
Bram Moolenaar78622822005-08-23 21:00:13 +00008574 if (spin.si_compflags != NULL && spin.si_nobreak)
8575 MSG(_("Warning: both compounding and NOBREAK specified"));
8576
Bram Moolenaar4770d092006-01-12 23:22:24 +00008577 if (!error && !got_int)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008578 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00008579 /*
Bram Moolenaar51485f02005-06-04 21:55:20 +00008580 * Combine tails in the tree.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008581 */
Bram Moolenaar4770d092006-01-12 23:22:24 +00008582 spell_message(&spin, (char_u *)_(msg_compressing));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00008583 wordtree_compress(&spin, spin.si_foldroot);
8584 wordtree_compress(&spin, spin.si_keeproot);
8585 wordtree_compress(&spin, spin.si_prefroot);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008586 }
8587
Bram Moolenaar4770d092006-01-12 23:22:24 +00008588 if (!error && !got_int)
Bram Moolenaar51485f02005-06-04 21:55:20 +00008589 {
8590 /*
8591 * Write the info in the spell file.
8592 */
Bram Moolenaar4770d092006-01-12 23:22:24 +00008593 vim_snprintf((char *)IObuff, IOSIZE,
8594 _("Writing spell file %s ..."), wfname);
8595 spell_message(&spin, IObuff);
Bram Moolenaar50cde822005-06-05 21:54:54 +00008596
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00008597 error = write_vim_spell(&spin, wfname) == FAIL;
Bram Moolenaarb765d632005-06-07 21:00:02 +00008598
Bram Moolenaar4770d092006-01-12 23:22:24 +00008599 spell_message(&spin, (char_u *)_("Done!"));
8600 vim_snprintf((char *)IObuff, IOSIZE,
8601 _("Estimated runtime memory use: %d bytes"), spin.si_memtot);
8602 spell_message(&spin, IObuff);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00008603
Bram Moolenaar4770d092006-01-12 23:22:24 +00008604 /*
8605 * If the file is loaded need to reload it.
8606 */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00008607 if (!error)
8608 spell_reload_one(wfname, added_word);
Bram Moolenaar51485f02005-06-04 21:55:20 +00008609 }
8610
8611 /* Free the allocated memory. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008612 ga_clear(&spin.si_rep);
Bram Moolenaar4770d092006-01-12 23:22:24 +00008613 ga_clear(&spin.si_repsal);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008614 ga_clear(&spin.si_sal);
8615 ga_clear(&spin.si_map);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008616 ga_clear(&spin.si_prefcond);
Bram Moolenaar4770d092006-01-12 23:22:24 +00008617 hash_clear_all(&spin.si_commonwords, 0);
Bram Moolenaar51485f02005-06-04 21:55:20 +00008618
8619 /* Free the .aff file structures. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00008620 for (i = 0; i < incount; ++i)
8621 if (afile[i] != NULL)
8622 spell_free_aff(afile[i]);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008623
8624 /* Free all the bits and pieces at once. */
8625 free_blocks(spin.si_blocks);
Bram Moolenaar4770d092006-01-12 23:22:24 +00008626
8627 /*
8628 * If there is soundfolding info and no NOSUGFILE item create the
8629 * .sug file with the soundfolded word trie.
8630 */
8631 if (spin.si_sugtime != 0 && !error && !got_int)
8632 spell_make_sugfile(&spin, wfname);
8633
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008634 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008635}
8636
Bram Moolenaar4770d092006-01-12 23:22:24 +00008637/*
8638 * Display a message for spell file processing when 'verbose' is set or using
8639 * ":mkspell". "str" can be IObuff.
8640 */
8641 static void
8642spell_message(spin, str)
8643 spellinfo_T *spin;
8644 char_u *str;
8645{
8646 if (spin->si_verbose || p_verbose > 2)
8647 {
8648 if (!spin->si_verbose)
8649 verbose_enter();
8650 MSG(str);
8651 out_flush();
8652 if (!spin->si_verbose)
8653 verbose_leave();
8654 }
8655}
Bram Moolenaarb765d632005-06-07 21:00:02 +00008656
8657/*
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008658 * ":[count]spellgood {word}"
8659 * ":[count]spellwrong {word}"
Bram Moolenaarb765d632005-06-07 21:00:02 +00008660 */
8661 void
8662ex_spell(eap)
8663 exarg_T *eap;
8664{
Bram Moolenaar7887d882005-07-01 22:33:52 +00008665 spell_add_word(eap->arg, STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong,
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008666 eap->forceit ? 0 : (int)eap->line2);
Bram Moolenaarb765d632005-06-07 21:00:02 +00008667}
8668
8669/*
8670 * Add "word[len]" to 'spellfile' as a good or bad word.
8671 */
8672 void
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008673spell_add_word(word, len, bad, index)
Bram Moolenaarb765d632005-06-07 21:00:02 +00008674 char_u *word;
8675 int len;
8676 int bad;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008677 int index; /* "zG" and "zW": zero, otherwise index in
8678 'spellfile' */
Bram Moolenaarb765d632005-06-07 21:00:02 +00008679{
8680 FILE *fd;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008681 buf_T *buf = NULL;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008682 int new_spf = FALSE;
Bram Moolenaar7887d882005-07-01 22:33:52 +00008683 char_u *fname;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008684 char_u fnamebuf[MAXPATHL];
8685 char_u line[MAXWLEN * 2];
8686 long fpos, fpos_next = 0;
8687 int i;
8688 char_u *spf;
Bram Moolenaarb765d632005-06-07 21:00:02 +00008689
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008690 if (index == 0) /* use internal wordlist */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008691 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008692 if (int_wordlist == NULL)
Bram Moolenaar7887d882005-07-01 22:33:52 +00008693 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008694 int_wordlist = vim_tempname('s');
8695 if (int_wordlist == NULL)
Bram Moolenaar7887d882005-07-01 22:33:52 +00008696 return;
8697 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008698 fname = int_wordlist;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008699 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00008700 else
8701 {
Bram Moolenaar7887d882005-07-01 22:33:52 +00008702 /* If 'spellfile' isn't set figure out a good default value. */
8703 if (*curbuf->b_p_spf == NUL)
8704 {
8705 init_spellfile();
8706 new_spf = TRUE;
8707 }
8708
8709 if (*curbuf->b_p_spf == NUL)
8710 {
Bram Moolenaarf75a9632005-09-13 21:20:47 +00008711 EMSG2(_(e_notset), "spellfile");
Bram Moolenaar7887d882005-07-01 22:33:52 +00008712 return;
8713 }
8714
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008715 for (spf = curbuf->b_p_spf, i = 1; *spf != NUL; ++i)
8716 {
8717 copy_option_part(&spf, fnamebuf, MAXPATHL, ",");
8718 if (i == index)
8719 break;
8720 if (*spf == NUL)
8721 {
Bram Moolenaare344bea2005-09-01 20:46:49 +00008722 EMSGN(_("E765: 'spellfile' does not have %ld entries"), index);
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008723 return;
8724 }
8725 }
8726
Bram Moolenaarb765d632005-06-07 21:00:02 +00008727 /* Check that the user isn't editing the .add file somewhere. */
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008728 buf = buflist_findname_exp(fnamebuf);
Bram Moolenaarb765d632005-06-07 21:00:02 +00008729 if (buf != NULL && buf->b_ml.ml_mfp == NULL)
8730 buf = NULL;
8731 if (buf != NULL && bufIsChanged(buf))
Bram Moolenaarb765d632005-06-07 21:00:02 +00008732 {
Bram Moolenaar7887d882005-07-01 22:33:52 +00008733 EMSG(_(e_bufloaded));
8734 return;
Bram Moolenaarb765d632005-06-07 21:00:02 +00008735 }
Bram Moolenaar7887d882005-07-01 22:33:52 +00008736
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008737 fname = fnamebuf;
8738 }
8739
8740 if (bad)
8741 {
8742 /* When the word also appears as good word we need to remove that one,
8743 * since its flags sort before the one with WF_BANNED. */
8744 fd = mch_fopen((char *)fname, "r");
8745 if (fd != NULL)
8746 {
8747 while (!vim_fgets(line, MAXWLEN * 2, fd))
8748 {
8749 fpos = fpos_next;
8750 fpos_next = ftell(fd);
8751 if (STRNCMP(word, line, len) == 0
8752 && (line[len] == '/' || line[len] < ' '))
8753 {
8754 /* Found duplicate word. Remove it by writing a '#' at
8755 * the start of the line. Mixing reading and writing
8756 * doesn't work for all systems, close the file first. */
8757 fclose(fd);
8758 fd = mch_fopen((char *)fname, "r+");
8759 if (fd == NULL)
8760 break;
8761 if (fseek(fd, fpos, SEEK_SET) == 0)
8762 fputc('#', fd);
8763 fseek(fd, fpos_next, SEEK_SET);
8764 }
8765 }
8766 fclose(fd);
8767 }
Bram Moolenaar7887d882005-07-01 22:33:52 +00008768 }
8769
8770 fd = mch_fopen((char *)fname, "a");
8771 if (fd == NULL && new_spf)
8772 {
8773 /* We just initialized the 'spellfile' option and can't open the file.
8774 * We may need to create the "spell" directory first. We already
8775 * checked the runtime directory is writable in init_spellfile(). */
Bram Moolenaar5b962cf2005-12-12 21:58:40 +00008776 if (!dir_of_file_exists(fname))
Bram Moolenaar7887d882005-07-01 22:33:52 +00008777 {
8778 /* The directory doesn't exist. Try creating it and opening the
8779 * file again. */
8780 vim_mkdir(NameBuff, 0755);
8781 fd = mch_fopen((char *)fname, "a");
8782 }
8783 }
8784
8785 if (fd == NULL)
8786 EMSG2(_(e_notopen), fname);
8787 else
8788 {
8789 if (bad)
8790 fprintf(fd, "%.*s/!\n", len, word);
8791 else
8792 fprintf(fd, "%.*s\n", len, word);
8793 fclose(fd);
8794
8795 /* Update the .add.spl file. */
8796 mkspell(1, &fname, FALSE, TRUE, TRUE);
8797
8798 /* If the .add file is edited somewhere, reload it. */
8799 if (buf != NULL)
Bram Moolenaarea8bd732006-01-14 21:15:59 +00008800 buf_reload(buf, buf->b_orig_mode);
Bram Moolenaar7887d882005-07-01 22:33:52 +00008801
8802 redraw_all_later(NOT_VALID);
Bram Moolenaarb765d632005-06-07 21:00:02 +00008803 }
8804}
8805
8806/*
8807 * Initialize 'spellfile' for the current buffer.
8808 */
8809 static void
8810init_spellfile()
8811{
8812 char_u buf[MAXPATHL];
8813 int l;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00008814 char_u *fname;
Bram Moolenaarb765d632005-06-07 21:00:02 +00008815 char_u *rtp;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008816 char_u *lend;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00008817 int aspath = FALSE;
8818 char_u *lstart = curbuf->b_p_spl;
Bram Moolenaarb765d632005-06-07 21:00:02 +00008819
8820 if (*curbuf->b_p_spl != NUL && curbuf->b_langp.ga_len > 0)
8821 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +00008822 /* Find the end of the language name. Exclude the region. If there
8823 * is a path separator remember the start of the tail. */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008824 for (lend = curbuf->b_p_spl; *lend != NUL
8825 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend)
Bram Moolenaarda2303d2005-08-30 21:55:26 +00008826 if (vim_ispathsep(*lend))
8827 {
8828 aspath = TRUE;
8829 lstart = lend + 1;
8830 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008831
8832 /* Loop over all entries in 'runtimepath'. Use the first one where we
8833 * are allowed to write. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00008834 rtp = p_rtp;
8835 while (*rtp != NUL)
8836 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +00008837 if (aspath)
8838 /* Use directory of an entry with path, e.g., for
8839 * "/dir/lg.utf-8.spl" use "/dir". */
8840 vim_strncpy(buf, curbuf->b_p_spl, lstart - curbuf->b_p_spl - 1);
8841 else
8842 /* Copy the path from 'runtimepath' to buf[]. */
8843 copy_option_part(&rtp, buf, MAXPATHL, ",");
Bram Moolenaarb765d632005-06-07 21:00:02 +00008844 if (filewritable(buf) == 2)
8845 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00008846 /* Use the first language name from 'spelllang' and the
8847 * encoding used in the first loaded .spl file. */
Bram Moolenaarda2303d2005-08-30 21:55:26 +00008848 if (aspath)
8849 vim_strncpy(buf, curbuf->b_p_spl, lend - curbuf->b_p_spl);
8850 else
8851 {
8852 l = STRLEN(buf);
8853 vim_snprintf((char *)buf + l, MAXPATHL - l,
8854 "/spell/%.*s", (int)(lend - lstart), lstart);
8855 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00008856 l = STRLEN(buf);
Bram Moolenaarda2303d2005-08-30 21:55:26 +00008857 fname = LANGP_ENTRY(curbuf->b_langp, 0)->lp_slang->sl_fname;
8858 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add",
8859 fname != NULL
8860 && strstr((char *)gettail(fname), ".ascii.") != NULL
8861 ? (char_u *)"ascii" : spell_enc());
Bram Moolenaarb765d632005-06-07 21:00:02 +00008862 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
8863 break;
8864 }
Bram Moolenaarda2303d2005-08-30 21:55:26 +00008865 aspath = FALSE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00008866 }
8867 }
8868}
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008869
Bram Moolenaar51485f02005-06-04 21:55:20 +00008870
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00008871/*
8872 * Init the chartab used for spelling for ASCII.
8873 * EBCDIC is not supported!
8874 */
8875 static void
8876clear_spell_chartab(sp)
8877 spelltab_T *sp;
8878{
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008879 int i;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00008880
8881 /* Init everything to FALSE. */
8882 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
8883 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
8884 for (i = 0; i < 256; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008885 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00008886 sp->st_fold[i] = i;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008887 sp->st_upper[i] = i;
8888 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00008889
8890 /* We include digits. A word shouldn't start with a digit, but handling
8891 * that is done separately. */
8892 for (i = '0'; i <= '9'; ++i)
8893 sp->st_isw[i] = TRUE;
8894 for (i = 'A'; i <= 'Z'; ++i)
8895 {
8896 sp->st_isw[i] = TRUE;
8897 sp->st_isu[i] = TRUE;
8898 sp->st_fold[i] = i + 0x20;
8899 }
8900 for (i = 'a'; i <= 'z'; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008901 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00008902 sp->st_isw[i] = TRUE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008903 sp->st_upper[i] = i - 0x20;
8904 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00008905}
8906
8907/*
8908 * Init the chartab used for spelling. Only depends on 'encoding'.
8909 * Called once while starting up and when 'encoding' changes.
8910 * The default is to use isalpha(), but the spell file should define the word
8911 * characters to make it possible that 'encoding' differs from the current
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00008912 * locale. For utf-8 we don't use isalpha() but our own functions.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00008913 */
8914 void
8915init_spell_chartab()
8916{
8917 int i;
8918
8919 did_set_spelltab = FALSE;
8920 clear_spell_chartab(&spelltab);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00008921#ifdef FEAT_MBYTE
8922 if (enc_dbcs)
8923 {
8924 /* DBCS: assume double-wide characters are word characters. */
8925 for (i = 128; i <= 255; ++i)
8926 if (MB_BYTE2LEN(i) == 2)
8927 spelltab.st_isw[i] = TRUE;
8928 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008929 else if (enc_utf8)
8930 {
8931 for (i = 128; i < 256; ++i)
8932 {
8933 spelltab.st_isu[i] = utf_isupper(i);
8934 spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
8935 spelltab.st_fold[i] = utf_fold(i);
8936 spelltab.st_upper[i] = utf_toupper(i);
8937 }
8938 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00008939 else
8940#endif
8941 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008942 /* Rough guess: use locale-dependent library functions. */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00008943 for (i = 128; i < 256; ++i)
8944 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00008945 if (MB_ISUPPER(i))
8946 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008947 spelltab.st_isw[i] = TRUE;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00008948 spelltab.st_isu[i] = TRUE;
8949 spelltab.st_fold[i] = MB_TOLOWER(i);
8950 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008951 else if (MB_ISLOWER(i))
8952 {
8953 spelltab.st_isw[i] = TRUE;
8954 spelltab.st_upper[i] = MB_TOUPPER(i);
8955 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00008956 }
8957 }
8958}
8959
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00008960/*
8961 * Set the spell character tables from strings in the affix file.
8962 */
8963 static int
8964set_spell_chartab(fol, low, upp)
8965 char_u *fol;
8966 char_u *low;
8967 char_u *upp;
8968{
8969 /* We build the new tables here first, so that we can compare with the
8970 * previous one. */
8971 spelltab_T new_st;
8972 char_u *pf = fol, *pl = low, *pu = upp;
8973 int f, l, u;
8974
8975 clear_spell_chartab(&new_st);
8976
8977 while (*pf != NUL)
8978 {
8979 if (*pl == NUL || *pu == NUL)
8980 {
8981 EMSG(_(e_affform));
8982 return FAIL;
8983 }
8984#ifdef FEAT_MBYTE
8985 f = mb_ptr2char_adv(&pf);
8986 l = mb_ptr2char_adv(&pl);
8987 u = mb_ptr2char_adv(&pu);
8988#else
8989 f = *pf++;
8990 l = *pl++;
8991 u = *pu++;
8992#endif
8993 /* Every character that appears is a word character. */
8994 if (f < 256)
8995 new_st.st_isw[f] = TRUE;
8996 if (l < 256)
8997 new_st.st_isw[l] = TRUE;
8998 if (u < 256)
8999 new_st.st_isw[u] = TRUE;
9000
9001 /* if "LOW" and "FOL" are not the same the "LOW" char needs
9002 * case-folding */
9003 if (l < 256 && l != f)
9004 {
9005 if (f >= 256)
9006 {
9007 EMSG(_(e_affrange));
9008 return FAIL;
9009 }
9010 new_st.st_fold[l] = f;
9011 }
9012
9013 /* if "UPP" and "FOL" are not the same the "UPP" char needs
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009014 * case-folding, it's upper case and the "UPP" is the upper case of
9015 * "FOL" . */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009016 if (u < 256 && u != f)
9017 {
9018 if (f >= 256)
9019 {
9020 EMSG(_(e_affrange));
9021 return FAIL;
9022 }
9023 new_st.st_fold[u] = f;
9024 new_st.st_isu[u] = TRUE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009025 new_st.st_upper[f] = u;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009026 }
9027 }
9028
9029 if (*pl != NUL || *pu != NUL)
9030 {
9031 EMSG(_(e_affform));
9032 return FAIL;
9033 }
9034
9035 return set_spell_finish(&new_st);
9036}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009037
9038/*
9039 * Set the spell character tables from strings in the .spl file.
9040 */
Bram Moolenaar5195e452005-08-19 20:32:47 +00009041 static void
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009042set_spell_charflags(flags, cnt, fol)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009043 char_u *flags;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009044 int cnt; /* length of "flags" */
9045 char_u *fol;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009046{
9047 /* We build the new tables here first, so that we can compare with the
9048 * previous one. */
9049 spelltab_T new_st;
9050 int i;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009051 char_u *p = fol;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009052 int c;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009053
9054 clear_spell_chartab(&new_st);
9055
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009056 for (i = 0; i < 128; ++i)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009057 {
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009058 if (i < cnt)
9059 {
9060 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
9061 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
9062 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009063
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009064 if (*p != NUL)
9065 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009066#ifdef FEAT_MBYTE
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009067 c = mb_ptr2char_adv(&p);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009068#else
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009069 c = *p++;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009070#endif
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009071 new_st.st_fold[i + 128] = c;
9072 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
9073 new_st.st_upper[c] = i + 128;
9074 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009075 }
9076
Bram Moolenaar5195e452005-08-19 20:32:47 +00009077 (void)set_spell_finish(&new_st);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009078}
9079
9080 static int
9081set_spell_finish(new_st)
9082 spelltab_T *new_st;
9083{
9084 int i;
9085
9086 if (did_set_spelltab)
9087 {
9088 /* check that it's the same table */
9089 for (i = 0; i < 256; ++i)
9090 {
9091 if (spelltab.st_isw[i] != new_st->st_isw[i]
9092 || spelltab.st_isu[i] != new_st->st_isu[i]
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009093 || spelltab.st_fold[i] != new_st->st_fold[i]
9094 || spelltab.st_upper[i] != new_st->st_upper[i])
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009095 {
9096 EMSG(_("E763: Word characters differ between spell files"));
9097 return FAIL;
9098 }
9099 }
9100 }
9101 else
9102 {
9103 /* copy the new spelltab into the one being used */
9104 spelltab = *new_st;
9105 did_set_spelltab = TRUE;
9106 }
9107
9108 return OK;
9109}
9110
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009111/*
Bram Moolenaarea408852005-06-25 22:49:46 +00009112 * Return TRUE if "p" points to a word character.
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009113 * As a special case we see "midword" characters as word character when it is
Bram Moolenaarea408852005-06-25 22:49:46 +00009114 * followed by a word character. This finds they'there but not 'they there'.
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009115 * Thus this only works properly when past the first character of the word.
Bram Moolenaarea408852005-06-25 22:49:46 +00009116 */
9117 static int
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009118spell_iswordp(p, buf)
Bram Moolenaarea408852005-06-25 22:49:46 +00009119 char_u *p;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009120 buf_T *buf; /* buffer used */
Bram Moolenaarea408852005-06-25 22:49:46 +00009121{
Bram Moolenaarea408852005-06-25 22:49:46 +00009122#ifdef FEAT_MBYTE
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009123 char_u *s;
9124 int l;
9125 int c;
9126
9127 if (has_mbyte)
9128 {
9129 l = MB_BYTE2LEN(*p);
9130 s = p;
9131 if (l == 1)
9132 {
9133 /* be quick for ASCII */
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009134 if (buf->b_spell_ismw[*p])
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009135 {
9136 s = p + 1; /* skip a mid-word character */
9137 l = MB_BYTE2LEN(*s);
9138 }
9139 }
9140 else
9141 {
9142 c = mb_ptr2char(p);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009143 if (c < 256 ? buf->b_spell_ismw[c]
9144 : (buf->b_spell_ismw_mb != NULL
9145 && vim_strchr(buf->b_spell_ismw_mb, c) != NULL))
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009146 {
9147 s = p + l;
9148 l = MB_BYTE2LEN(*s);
9149 }
9150 }
9151
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009152 c = mb_ptr2char(s);
9153 if (c > 255)
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009154 return mb_get_class(s) >= 2;
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009155 return spelltab.st_isw[c];
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009156 }
Bram Moolenaarea408852005-06-25 22:49:46 +00009157#endif
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009158
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009159 return spelltab.st_isw[buf->b_spell_ismw[*p] ? p[1] : p[0]];
9160}
9161
9162/*
9163 * Return TRUE if "p" points to a word character.
9164 * Unlike spell_iswordp() this doesn't check for "midword" characters.
9165 */
9166 static int
9167spell_iswordp_nmw(p)
9168 char_u *p;
9169{
9170#ifdef FEAT_MBYTE
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009171 int c;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009172
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009173 if (has_mbyte)
9174 {
9175 c = mb_ptr2char(p);
9176 if (c > 255)
9177 return mb_get_class(p) >= 2;
9178 return spelltab.st_isw[c];
9179 }
9180#endif
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009181 return spelltab.st_isw[*p];
Bram Moolenaarea408852005-06-25 22:49:46 +00009182}
9183
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009184#ifdef FEAT_MBYTE
9185/*
9186 * Return TRUE if "p" points to a word character.
9187 * Wide version of spell_iswordp().
9188 */
9189 static int
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009190spell_iswordp_w(p, buf)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009191 int *p;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009192 buf_T *buf;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009193{
9194 int *s;
9195
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009196 if (*p < 256 ? buf->b_spell_ismw[*p]
9197 : (buf->b_spell_ismw_mb != NULL
9198 && vim_strchr(buf->b_spell_ismw_mb, *p) != NULL))
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009199 s = p + 1;
9200 else
9201 s = p;
9202
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009203 if (*s > 255)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009204 {
9205 if (enc_utf8)
9206 return utf_class(*s) >= 2;
9207 if (enc_dbcs)
9208 return dbcs_class((unsigned)*s >> 8, *s & 0xff) >= 2;
9209 return 0;
9210 }
9211 return spelltab.st_isw[*s];
9212}
9213#endif
9214
Bram Moolenaarea408852005-06-25 22:49:46 +00009215/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009216 * Write the table with prefix conditions to the .spl file.
Bram Moolenaar5195e452005-08-19 20:32:47 +00009217 * When "fd" is NULL only count the length of what is written.
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009218 */
Bram Moolenaar5195e452005-08-19 20:32:47 +00009219 static int
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009220write_spell_prefcond(fd, gap)
9221 FILE *fd;
9222 garray_T *gap;
9223{
9224 int i;
9225 char_u *p;
9226 int len;
Bram Moolenaar5195e452005-08-19 20:32:47 +00009227 int totlen;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009228
Bram Moolenaar5195e452005-08-19 20:32:47 +00009229 if (fd != NULL)
9230 put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */
9231
9232 totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009233
9234 for (i = 0; i < gap->ga_len; ++i)
9235 {
9236 /* <prefcond> : <condlen> <condstr> */
9237 p = ((char_u **)gap->ga_data)[i];
Bram Moolenaar5195e452005-08-19 20:32:47 +00009238 if (p != NULL)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009239 {
9240 len = STRLEN(p);
Bram Moolenaar5195e452005-08-19 20:32:47 +00009241 if (fd != NULL)
9242 {
9243 fputc(len, fd);
9244 fwrite(p, (size_t)len, (size_t)1, fd);
9245 }
9246 totlen += len;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009247 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00009248 else if (fd != NULL)
9249 fputc(0, fd);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009250 }
9251
Bram Moolenaar5195e452005-08-19 20:32:47 +00009252 return totlen;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009253}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009254
9255/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009256 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated.
9257 * Uses the character definitions from the .spl file.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009258 * When using a multi-byte 'encoding' the length may change!
9259 * Returns FAIL when something wrong.
9260 */
9261 static int
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009262spell_casefold(str, len, buf, buflen)
9263 char_u *str;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009264 int len;
9265 char_u *buf;
9266 int buflen;
9267{
9268 int i;
9269
9270 if (len >= buflen)
9271 {
9272 buf[0] = NUL;
9273 return FAIL; /* result will not fit */
9274 }
9275
9276#ifdef FEAT_MBYTE
9277 if (has_mbyte)
9278 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009279 int outi = 0;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009280 char_u *p;
9281 int c;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009282
9283 /* Fold one character at a time. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009284 for (p = str; p < str + len; )
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009285 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009286 if (outi + MB_MAXBYTES > buflen)
9287 {
9288 buf[outi] = NUL;
9289 return FAIL;
9290 }
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009291 c = mb_cptr2char_adv(&p);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009292 outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009293 }
9294 buf[outi] = NUL;
9295 }
9296 else
9297#endif
9298 {
9299 /* Be quick for non-multibyte encodings. */
9300 for (i = 0; i < len; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009301 buf[i] = spelltab.st_fold[str[i]];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009302 buf[i] = NUL;
9303 }
9304
9305 return OK;
9306}
9307
Bram Moolenaar4770d092006-01-12 23:22:24 +00009308/* values for sps_flags */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009309#define SPS_BEST 1
9310#define SPS_FAST 2
9311#define SPS_DOUBLE 4
9312
Bram Moolenaar4770d092006-01-12 23:22:24 +00009313static int sps_flags = SPS_BEST; /* flags from 'spellsuggest' */
9314static int sps_limit = 9999; /* max nr of suggestions given */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009315
9316/*
9317 * Check the 'spellsuggest' option. Return FAIL if it's wrong.
Bram Moolenaar5195e452005-08-19 20:32:47 +00009318 * Sets "sps_flags" and "sps_limit".
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009319 */
9320 int
9321spell_check_sps()
9322{
9323 char_u *p;
Bram Moolenaar5195e452005-08-19 20:32:47 +00009324 char_u *s;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009325 char_u buf[MAXPATHL];
9326 int f;
9327
9328 sps_flags = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00009329 sps_limit = 9999;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009330
9331 for (p = p_sps; *p != NUL; )
9332 {
9333 copy_option_part(&p, buf, MAXPATHL, ",");
9334
9335 f = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00009336 if (VIM_ISDIGIT(*buf))
9337 {
9338 s = buf;
9339 sps_limit = getdigits(&s);
9340 if (*s != NUL && !VIM_ISDIGIT(*s))
9341 f = -1;
9342 }
9343 else if (STRCMP(buf, "best") == 0)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009344 f = SPS_BEST;
9345 else if (STRCMP(buf, "fast") == 0)
9346 f = SPS_FAST;
9347 else if (STRCMP(buf, "double") == 0)
9348 f = SPS_DOUBLE;
9349 else if (STRNCMP(buf, "expr:", 5) != 0
9350 && STRNCMP(buf, "file:", 5) != 0)
9351 f = -1;
9352
9353 if (f == -1 || (sps_flags != 0 && f != 0))
9354 {
9355 sps_flags = SPS_BEST;
Bram Moolenaar5195e452005-08-19 20:32:47 +00009356 sps_limit = 9999;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009357 return FAIL;
9358 }
9359 if (f != 0)
9360 sps_flags = f;
9361 }
9362
9363 if (sps_flags == 0)
9364 sps_flags = SPS_BEST;
9365
9366 return OK;
9367}
9368
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009369/*
9370 * "z?": Find badly spelled word under or after the cursor.
9371 * Give suggestions for the properly spelled word.
Bram Moolenaar66fa2712006-01-22 23:22:22 +00009372 * In Visual mode use the highlighted word as the bad word.
Bram Moolenaard12a1322005-08-21 22:08:24 +00009373 * When "count" is non-zero use that suggestion.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009374 */
9375 void
Bram Moolenaard12a1322005-08-21 22:08:24 +00009376spell_suggest(count)
9377 int count;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009378{
9379 char_u *line;
9380 pos_T prev_cursor = curwin->w_cursor;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009381 char_u wcopy[MAXWLEN + 2];
9382 char_u *p;
9383 int i;
9384 int c;
9385 suginfo_T sug;
9386 suggest_T *stp;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009387 int mouse_used;
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +00009388 int need_cap;
Bram Moolenaar5195e452005-08-19 20:32:47 +00009389 int limit;
Bram Moolenaard12a1322005-08-21 22:08:24 +00009390 int selected = count;
Bram Moolenaar66fa2712006-01-22 23:22:22 +00009391 int badlen = 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009392
Bram Moolenaar66fa2712006-01-22 23:22:22 +00009393 if (no_spell_checking(curwin))
9394 return;
9395
9396#ifdef FEAT_VISUAL
9397 if (VIsual_active)
9398 {
9399 /* Use the Visually selected text as the bad word. But reject
9400 * a multi-line selection. */
9401 if (curwin->w_cursor.lnum != VIsual.lnum)
9402 {
9403 vim_beep();
9404 return;
9405 }
9406 badlen = (int)curwin->w_cursor.col - (int)VIsual.col;
9407 if (badlen < 0)
9408 badlen = -badlen;
9409 else
9410 curwin->w_cursor.col = VIsual.col;
9411 ++badlen;
9412 end_visual_mode();
9413 }
9414 else
9415#endif
9416 /* Find the start of the badly spelled word. */
9417 if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0
Bram Moolenaar0c405862005-06-22 22:26:26 +00009418 || curwin->w_cursor.col > prev_cursor.col)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009419 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00009420 /* No bad word or it starts after the cursor: use the word under the
9421 * cursor. */
9422 curwin->w_cursor = prev_cursor;
9423 line = ml_get_curline();
9424 p = line + curwin->w_cursor.col;
9425 /* Backup to before start of word. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009426 while (p > line && spell_iswordp_nmw(p))
Bram Moolenaar0c405862005-06-22 22:26:26 +00009427 mb_ptr_back(line, p);
9428 /* Forward to start of word. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009429 while (*p != NUL && !spell_iswordp_nmw(p))
Bram Moolenaar0c405862005-06-22 22:26:26 +00009430 mb_ptr_adv(p);
9431
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009432 if (!spell_iswordp_nmw(p)) /* No word found. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00009433 {
9434 beep_flush();
9435 return;
9436 }
9437 curwin->w_cursor.col = p - line;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009438 }
9439
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009440 /* Get the word and its length. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009441
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +00009442 /* Figure out if the word should be capitalised. */
Bram Moolenaar8b59de92005-08-11 19:59:29 +00009443 need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col);
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +00009444
Bram Moolenaar8b59de92005-08-11 19:59:29 +00009445 line = ml_get_curline();
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +00009446
Bram Moolenaar5195e452005-08-19 20:32:47 +00009447 /* Get the list of suggestions. Limit to 'lines' - 2 or the number in
9448 * 'spellsuggest', whatever is smaller. */
9449 if (sps_limit > (int)Rows - 2)
9450 limit = (int)Rows - 2;
9451 else
9452 limit = sps_limit;
Bram Moolenaar66fa2712006-01-22 23:22:22 +00009453 spell_find_suggest(line + curwin->w_cursor.col, badlen, &sug, limit,
Bram Moolenaar4770d092006-01-12 23:22:24 +00009454 TRUE, need_cap, TRUE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009455
9456 if (sug.su_ga.ga_len == 0)
9457 MSG(_("Sorry, no suggestions"));
Bram Moolenaard12a1322005-08-21 22:08:24 +00009458 else if (count > 0)
9459 {
9460 if (count > sug.su_ga.ga_len)
9461 smsg((char_u *)_("Sorry, only %ld suggestions"),
9462 (long)sug.su_ga.ga_len);
9463 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009464 else
9465 {
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009466 vim_free(repl_from);
9467 repl_from = NULL;
9468 vim_free(repl_to);
9469 repl_to = NULL;
9470
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009471#ifdef FEAT_RIGHTLEFT
9472 /* When 'rightleft' is set the list is drawn right-left. */
9473 cmdmsg_rl = curwin->w_p_rl;
9474 if (cmdmsg_rl)
9475 msg_col = Columns - 1;
9476#endif
9477
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009478 /* List the suggestions. */
9479 msg_start();
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009480 lines_left = Rows; /* avoid more prompt */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009481 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"),
9482 sug.su_badlen, sug.su_badptr);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009483#ifdef FEAT_RIGHTLEFT
9484 if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0)
9485 {
9486 /* And now the rabbit from the high hat: Avoid showing the
9487 * untranslated message rightleft. */
9488 vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC",
9489 sug.su_badlen, sug.su_badptr);
9490 }
9491#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009492 msg_puts(IObuff);
9493 msg_clr_eos();
9494 msg_putchar('\n');
Bram Moolenaar0c405862005-06-22 22:26:26 +00009495
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009496 msg_scroll = TRUE;
9497 for (i = 0; i < sug.su_ga.ga_len; ++i)
9498 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009499 stp = &SUG(sug.su_ga, i);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009500
9501 /* The suggested word may replace only part of the bad word, add
9502 * the not replaced part. */
9503 STRCPY(wcopy, stp->st_word);
9504 if (sug.su_badlen > stp->st_orglen)
Bram Moolenaar4770d092006-01-12 23:22:24 +00009505 vim_strncpy(wcopy + stp->st_wordlen,
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009506 sug.su_badptr + stp->st_orglen,
9507 sug.su_badlen - stp->st_orglen);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009508 vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1);
9509#ifdef FEAT_RIGHTLEFT
9510 if (cmdmsg_rl)
9511 rl_mirror(IObuff);
9512#endif
9513 msg_puts(IObuff);
9514
9515 vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy);
Bram Moolenaar0c405862005-06-22 22:26:26 +00009516 msg_puts(IObuff);
9517
9518 /* The word may replace more than "su_badlen". */
9519 if (sug.su_badlen < stp->st_orglen)
9520 {
9521 vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""),
9522 stp->st_orglen, sug.su_badptr);
9523 msg_puts(IObuff);
9524 }
9525
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009526 if (p_verbose > 0)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009527 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00009528 /* Add the score. */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009529 if (sps_flags & (SPS_DOUBLE | SPS_BEST))
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009530 vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)",
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009531 stp->st_salscore ? "s " : "",
9532 stp->st_score, stp->st_altscore);
9533 else
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009534 vim_snprintf((char *)IObuff, IOSIZE, " (%d)",
Bram Moolenaar0c405862005-06-22 22:26:26 +00009535 stp->st_score);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009536#ifdef FEAT_RIGHTLEFT
9537 if (cmdmsg_rl)
9538 /* Mirror the numbers, but keep the leading space. */
9539 rl_mirror(IObuff + 1);
9540#endif
Bram Moolenaar0c405862005-06-22 22:26:26 +00009541 msg_advance(30);
9542 msg_puts(IObuff);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009543 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009544 msg_putchar('\n');
9545 }
9546
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009547#ifdef FEAT_RIGHTLEFT
9548 cmdmsg_rl = FALSE;
9549 msg_col = 0;
9550#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009551 /* Ask for choice. */
Bram Moolenaard12a1322005-08-21 22:08:24 +00009552 selected = prompt_for_number(&mouse_used);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009553 if (mouse_used)
Bram Moolenaard12a1322005-08-21 22:08:24 +00009554 selected -= lines_left;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009555 }
9556
Bram Moolenaard12a1322005-08-21 22:08:24 +00009557 if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK)
9558 {
9559 /* Save the from and to text for :spellrepall. */
9560 stp = &SUG(sug.su_ga, selected - 1);
Bram Moolenaard5cdbeb2005-10-10 20:59:28 +00009561 if (sug.su_badlen > stp->st_orglen)
9562 {
9563 /* Replacing less than "su_badlen", append the remainder to
9564 * repl_to. */
9565 repl_from = vim_strnsave(sug.su_badptr, sug.su_badlen);
9566 vim_snprintf((char *)IObuff, IOSIZE, "%s%.*s", stp->st_word,
9567 sug.su_badlen - stp->st_orglen,
9568 sug.su_badptr + stp->st_orglen);
9569 repl_to = vim_strsave(IObuff);
9570 }
9571 else
9572 {
9573 /* Replacing su_badlen or more, use the whole word. */
9574 repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen);
9575 repl_to = vim_strsave(stp->st_word);
9576 }
Bram Moolenaard12a1322005-08-21 22:08:24 +00009577
9578 /* Replace the word. */
Bram Moolenaar4770d092006-01-12 23:22:24 +00009579 p = alloc(STRLEN(line) - stp->st_orglen + stp->st_wordlen + 1);
Bram Moolenaard12a1322005-08-21 22:08:24 +00009580 if (p != NULL)
9581 {
9582 c = sug.su_badptr - line;
9583 mch_memmove(p, line, c);
9584 STRCPY(p + c, stp->st_word);
9585 STRCAT(p, sug.su_badptr + stp->st_orglen);
9586 ml_replace(curwin->w_cursor.lnum, p, FALSE);
9587 curwin->w_cursor.col = c;
9588 changed_bytes(curwin->w_cursor.lnum, c);
9589
9590 /* For redo we use a change-word command. */
9591 ResetRedobuff();
9592 AppendToRedobuff((char_u *)"ciw");
Bram Moolenaarebefac62005-12-28 22:39:57 +00009593 AppendToRedobuffLit(p + c,
Bram Moolenaar4770d092006-01-12 23:22:24 +00009594 stp->st_wordlen + sug.su_badlen - stp->st_orglen);
Bram Moolenaard12a1322005-08-21 22:08:24 +00009595 AppendCharToRedobuff(ESC);
9596 }
9597 }
9598 else
9599 curwin->w_cursor = prev_cursor;
9600
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009601 spell_find_cleanup(&sug);
9602}
9603
9604/*
Bram Moolenaar8b59de92005-08-11 19:59:29 +00009605 * Check if the word at line "lnum" column "col" is required to start with a
9606 * capital. This uses 'spellcapcheck' of the current buffer.
9607 */
9608 static int
9609check_need_cap(lnum, col)
9610 linenr_T lnum;
9611 colnr_T col;
9612{
9613 int need_cap = FALSE;
9614 char_u *line;
9615 char_u *line_copy = NULL;
9616 char_u *p;
9617 colnr_T endcol;
9618 regmatch_T regmatch;
9619
9620 if (curbuf->b_cap_prog == NULL)
9621 return FALSE;
9622
9623 line = ml_get_curline();
9624 endcol = 0;
9625 if ((int)(skipwhite(line) - line) >= (int)col)
9626 {
9627 /* At start of line, check if previous line is empty or sentence
9628 * ends there. */
9629 if (lnum == 1)
9630 need_cap = TRUE;
9631 else
9632 {
9633 line = ml_get(lnum - 1);
9634 if (*skipwhite(line) == NUL)
9635 need_cap = TRUE;
9636 else
9637 {
9638 /* Append a space in place of the line break. */
9639 line_copy = concat_str(line, (char_u *)" ");
9640 line = line_copy;
9641 endcol = STRLEN(line);
9642 }
9643 }
9644 }
9645 else
9646 endcol = col;
9647
9648 if (endcol > 0)
9649 {
9650 /* Check if sentence ends before the bad word. */
9651 regmatch.regprog = curbuf->b_cap_prog;
9652 regmatch.rm_ic = FALSE;
9653 p = line + endcol;
9654 for (;;)
9655 {
9656 mb_ptr_back(line, p);
9657 if (p == line || spell_iswordp_nmw(p))
9658 break;
9659 if (vim_regexec(&regmatch, p, 0)
9660 && regmatch.endp[0] == line + endcol)
9661 {
9662 need_cap = TRUE;
9663 break;
9664 }
9665 }
9666 }
9667
9668 vim_free(line_copy);
9669
9670 return need_cap;
9671}
9672
9673
9674/*
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009675 * ":spellrepall"
9676 */
9677/*ARGSUSED*/
9678 void
9679ex_spellrepall(eap)
9680 exarg_T *eap;
9681{
9682 pos_T pos = curwin->w_cursor;
9683 char_u *frompat;
9684 int addlen;
9685 char_u *line;
9686 char_u *p;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009687 int save_ws = p_ws;
Bram Moolenaar5195e452005-08-19 20:32:47 +00009688 linenr_T prev_lnum = 0;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009689
9690 if (repl_from == NULL || repl_to == NULL)
9691 {
9692 EMSG(_("E752: No previous spell replacement"));
9693 return;
9694 }
9695 addlen = STRLEN(repl_to) - STRLEN(repl_from);
9696
9697 frompat = alloc(STRLEN(repl_from) + 7);
9698 if (frompat == NULL)
9699 return;
9700 sprintf((char *)frompat, "\\V\\<%s\\>", repl_from);
9701 p_ws = FALSE;
9702
Bram Moolenaar5195e452005-08-19 20:32:47 +00009703 sub_nsubs = 0;
9704 sub_nlines = 0;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009705 curwin->w_cursor.lnum = 0;
9706 while (!got_int)
9707 {
9708 if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP) == 0
9709 || u_save_cursor() == FAIL)
9710 break;
9711
9712 /* Only replace when the right word isn't there yet. This happens
9713 * when changing "etc" to "etc.". */
9714 line = ml_get_curline();
9715 if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col,
9716 repl_to, STRLEN(repl_to)) != 0)
9717 {
9718 p = alloc(STRLEN(line) + addlen + 1);
9719 if (p == NULL)
9720 break;
9721 mch_memmove(p, line, curwin->w_cursor.col);
9722 STRCPY(p + curwin->w_cursor.col, repl_to);
9723 STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from));
9724 ml_replace(curwin->w_cursor.lnum, p, FALSE);
9725 changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col);
Bram Moolenaar5195e452005-08-19 20:32:47 +00009726
9727 if (curwin->w_cursor.lnum != prev_lnum)
9728 {
9729 ++sub_nlines;
9730 prev_lnum = curwin->w_cursor.lnum;
9731 }
9732 ++sub_nsubs;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009733 }
9734 curwin->w_cursor.col += STRLEN(repl_to);
9735 }
9736
9737 p_ws = save_ws;
9738 curwin->w_cursor = pos;
9739 vim_free(frompat);
9740
Bram Moolenaar5195e452005-08-19 20:32:47 +00009741 if (sub_nsubs == 0)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009742 EMSG2(_("E753: Not found: %s"), repl_from);
Bram Moolenaar5195e452005-08-19 20:32:47 +00009743 else
9744 do_sub_msg(FALSE);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009745}
9746
9747/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009748 * Find spell suggestions for "word". Return them in the growarray "*gap" as
9749 * a list of allocated strings.
9750 */
9751 void
Bram Moolenaar4770d092006-01-12 23:22:24 +00009752spell_suggest_list(gap, word, maxcount, need_cap, interactive)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009753 garray_T *gap;
9754 char_u *word;
9755 int maxcount; /* maximum nr of suggestions */
Bram Moolenaar8b59de92005-08-11 19:59:29 +00009756 int need_cap; /* 'spellcapcheck' matched */
Bram Moolenaar4770d092006-01-12 23:22:24 +00009757 int interactive;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009758{
9759 suginfo_T sug;
9760 int i;
9761 suggest_T *stp;
9762 char_u *wcopy;
9763
Bram Moolenaar66fa2712006-01-22 23:22:22 +00009764 spell_find_suggest(word, 0, &sug, maxcount, FALSE, need_cap, interactive);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009765
9766 /* Make room in "gap". */
9767 ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1);
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00009768 if (ga_grow(gap, sug.su_ga.ga_len) == OK)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009769 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00009770 for (i = 0; i < sug.su_ga.ga_len; ++i)
9771 {
9772 stp = &SUG(sug.su_ga, i);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009773
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00009774 /* The suggested word may replace only part of "word", add the not
9775 * replaced part. */
9776 wcopy = alloc(stp->st_wordlen
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009777 + STRLEN(sug.su_badptr + stp->st_orglen) + 1);
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00009778 if (wcopy == NULL)
9779 break;
9780 STRCPY(wcopy, stp->st_word);
9781 STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen);
9782 ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy;
9783 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009784 }
9785
9786 spell_find_cleanup(&sug);
9787}
9788
9789/*
9790 * Find spell suggestions for the word at the start of "badptr".
9791 * Return the suggestions in "su->su_ga".
9792 * The maximum number of suggestions is "maxcount".
9793 * Note: does use info for the current window.
9794 * This is based on the mechanisms of Aspell, but completely reimplemented.
9795 */
9796 static void
Bram Moolenaar66fa2712006-01-22 23:22:22 +00009797spell_find_suggest(badptr, badlen, su, maxcount, banbadword, need_cap, interactive)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009798 char_u *badptr;
Bram Moolenaar66fa2712006-01-22 23:22:22 +00009799 int badlen; /* length of bad word or 0 if unknown */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009800 suginfo_T *su;
9801 int maxcount;
Bram Moolenaarea408852005-06-25 22:49:46 +00009802 int banbadword; /* don't include badword in suggestions */
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +00009803 int need_cap; /* word should start with capital */
Bram Moolenaar4770d092006-01-12 23:22:24 +00009804 int interactive;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009805{
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00009806 hlf_T attr = HLF_COUNT;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009807 char_u buf[MAXPATHL];
9808 char_u *p;
9809 int do_combine = FALSE;
9810 char_u *sps_copy;
9811#ifdef FEAT_EVAL
9812 static int expr_busy = FALSE;
9813#endif
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009814 int c;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00009815 int i;
9816 langp_T *lp;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009817
9818 /*
9819 * Set the info in "*su".
9820 */
9821 vim_memset(su, 0, sizeof(suginfo_T));
9822 ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10);
9823 ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10);
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00009824 if (*badptr == NUL)
9825 return;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009826 hash_init(&su->su_banned);
9827
9828 su->su_badptr = badptr;
Bram Moolenaar66fa2712006-01-22 23:22:22 +00009829 if (badlen != 0)
9830 su->su_badlen = badlen;
9831 else
9832 su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL, FALSE);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009833 su->su_maxcount = maxcount;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009834 su->su_maxscore = SCORE_MAXINIT;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009835
9836 if (su->su_badlen >= MAXWLEN)
9837 su->su_badlen = MAXWLEN - 1; /* just in case */
9838 vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen);
9839 (void)spell_casefold(su->su_badptr, su->su_badlen,
9840 su->su_fbadword, MAXWLEN);
Bram Moolenaar0c405862005-06-22 22:26:26 +00009841 /* get caps flags for bad word */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009842 su->su_badflags = badword_captype(su->su_badptr,
9843 su->su_badptr + su->su_badlen);
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +00009844 if (need_cap)
9845 su->su_badflags |= WF_ONECAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009846
Bram Moolenaar8b96d642005-09-05 22:05:30 +00009847 /* Find the default language for sound folding. We simply use the first
9848 * one in 'spelllang' that supports sound folding. That's good for when
9849 * using multiple files for one language, it's not that bad when mixing
9850 * languages (e.g., "pl,en"). */
9851 for (i = 0; i < curbuf->b_langp.ga_len; ++i)
9852 {
9853 lp = LANGP_ENTRY(curbuf->b_langp, i);
9854 if (lp->lp_sallang != NULL)
9855 {
9856 su->su_sallang = lp->lp_sallang;
9857 break;
9858 }
9859 }
9860
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00009861 /* Soundfold the bad word with the default sound folding, so that we don't
9862 * have to do this many times. */
9863 if (su->su_sallang != NULL)
9864 spell_soundfold(su->su_sallang, su->su_fbadword, TRUE,
9865 su->su_sal_badword);
9866
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009867 /* If the word is not capitalised and spell_check() doesn't consider the
9868 * word to be bad then it might need to be capitalised. Add a suggestion
9869 * for that. */
Bram Moolenaar53805d12005-08-01 07:08:33 +00009870 c = PTR2CHAR(su->su_badptr);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00009871 if (!SPELL_ISUPPER(c) && attr == HLF_COUNT)
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009872 {
9873 make_case_word(su->su_badword, buf, WF_ONECAP);
9874 add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE,
Bram Moolenaar4770d092006-01-12 23:22:24 +00009875 0, TRUE, su->su_sallang, FALSE);
Bram Moolenaarf9184a12005-07-02 23:10:47 +00009876 }
9877
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009878 /* Ban the bad word itself. It may appear in another region. */
Bram Moolenaarea408852005-06-25 22:49:46 +00009879 if (banbadword)
9880 add_banned(su, su->su_badword);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009881
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009882 /* Make a copy of 'spellsuggest', because the expression may change it. */
9883 sps_copy = vim_strsave(p_sps);
9884 if (sps_copy == NULL)
9885 return;
9886
9887 /* Loop over the items in 'spellsuggest'. */
9888 for (p = sps_copy; *p != NUL; )
9889 {
9890 copy_option_part(&p, buf, MAXPATHL, ",");
9891
9892 if (STRNCMP(buf, "expr:", 5) == 0)
9893 {
9894#ifdef FEAT_EVAL
Bram Moolenaar42eeac32005-06-29 22:40:58 +00009895 /* Evaluate an expression. Skip this when called recursively,
9896 * when using spellsuggest() in the expression. */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009897 if (!expr_busy)
9898 {
9899 expr_busy = TRUE;
9900 spell_suggest_expr(su, buf + 5);
9901 expr_busy = FALSE;
9902 }
9903#endif
9904 }
9905 else if (STRNCMP(buf, "file:", 5) == 0)
9906 /* Use list of suggestions in a file. */
9907 spell_suggest_file(su, buf + 5);
9908 else
9909 {
9910 /* Use internal method. */
Bram Moolenaar4770d092006-01-12 23:22:24 +00009911 spell_suggest_intern(su, interactive);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009912 if (sps_flags & SPS_DOUBLE)
9913 do_combine = TRUE;
9914 }
9915 }
9916
9917 vim_free(sps_copy);
9918
9919 if (do_combine)
9920 /* Combine the two list of suggestions. This must be done last,
9921 * because sorting changes the order again. */
9922 score_combine(su);
9923}
9924
9925#ifdef FEAT_EVAL
9926/*
9927 * Find suggestions by evaluating expression "expr".
9928 */
9929 static void
9930spell_suggest_expr(su, expr)
9931 suginfo_T *su;
9932 char_u *expr;
9933{
9934 list_T *list;
9935 listitem_T *li;
9936 int score;
9937 char_u *p;
9938
9939 /* The work is split up in a few parts to avoid having to export
9940 * suginfo_T.
9941 * First evaluate the expression and get the resulting list. */
9942 list = eval_spell_expr(su->su_badword, expr);
9943 if (list != NULL)
9944 {
9945 /* Loop over the items in the list. */
9946 for (li = list->lv_first; li != NULL; li = li->li_next)
9947 if (li->li_tv.v_type == VAR_LIST)
9948 {
9949 /* Get the word and the score from the items. */
9950 score = get_spellword(li->li_tv.vval.v_list, &p);
Bram Moolenaar4770d092006-01-12 23:22:24 +00009951 if (score >= 0 && score <= su->su_maxscore)
9952 add_suggestion(su, &su->su_ga, p, su->su_badlen,
9953 score, 0, TRUE, su->su_sallang, FALSE);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009954 }
9955 list_unref(list);
9956 }
9957
Bram Moolenaar4770d092006-01-12 23:22:24 +00009958 /* Remove bogus suggestions, sort and truncate at "maxcount". */
9959 check_suggestions(su, &su->su_ga);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009960 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
9961}
9962#endif
9963
9964/*
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009965 * Find suggestions in file "fname". Used for "file:" in 'spellsuggest'.
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009966 */
9967 static void
9968spell_suggest_file(su, fname)
9969 suginfo_T *su;
9970 char_u *fname;
9971{
9972 FILE *fd;
9973 char_u line[MAXWLEN * 2];
9974 char_u *p;
9975 int len;
9976 char_u cword[MAXWLEN];
9977
9978 /* Open the file. */
9979 fd = mch_fopen((char *)fname, "r");
9980 if (fd == NULL)
9981 {
9982 EMSG2(_(e_notopen), fname);
9983 return;
9984 }
9985
9986 /* Read it line by line. */
9987 while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int)
9988 {
9989 line_breakcheck();
9990
9991 p = vim_strchr(line, '/');
9992 if (p == NULL)
9993 continue; /* No Tab found, just skip the line. */
9994 *p++ = NUL;
9995 if (STRICMP(su->su_badword, line) == 0)
9996 {
9997 /* Match! Isolate the good word, until CR or NL. */
9998 for (len = 0; p[len] >= ' '; ++len)
9999 ;
10000 p[len] = NUL;
10001
10002 /* If the suggestion doesn't have specific case duplicate the case
10003 * of the bad word. */
10004 if (captype(p, NULL) == 0)
10005 {
10006 make_case_word(p, cword, su->su_badflags);
10007 p = cword;
10008 }
10009
10010 add_suggestion(su, &su->su_ga, p, su->su_badlen,
Bram Moolenaar4770d092006-01-12 23:22:24 +000010011 SCORE_FILE, 0, TRUE, su->su_sallang, FALSE);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010012 }
10013 }
10014
10015 fclose(fd);
10016
Bram Moolenaar4770d092006-01-12 23:22:24 +000010017 /* Remove bogus suggestions, sort and truncate at "maxcount". */
10018 check_suggestions(su, &su->su_ga);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010019 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
10020}
10021
10022/*
10023 * Find suggestions for the internal method indicated by "sps_flags".
10024 */
10025 static void
Bram Moolenaar4770d092006-01-12 23:22:24 +000010026spell_suggest_intern(su, interactive)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010027 suginfo_T *su;
Bram Moolenaar4770d092006-01-12 23:22:24 +000010028 int interactive;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010029{
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010030 /*
Bram Moolenaar4770d092006-01-12 23:22:24 +000010031 * Load the .sug file(s) that are available and not done yet.
10032 */
10033 suggest_load_files();
10034
10035 /*
Bram Moolenaar0c405862005-06-22 22:26:26 +000010036 * 1. Try special cases, such as repeating a word: "the the" -> "the".
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010037 *
10038 * Set a maximum score to limit the combination of operations that is
10039 * tried.
10040 */
Bram Moolenaar0c405862005-06-22 22:26:26 +000010041 suggest_try_special(su);
10042
10043 /*
10044 * 2. Try inserting/deleting/swapping/changing a letter, use REP entries
10045 * from the .aff file and inserting a space (split the word).
10046 */
10047 suggest_try_change(su);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010048
10049 /* For the resulting top-scorers compute the sound-a-like score. */
10050 if (sps_flags & SPS_DOUBLE)
10051 score_comp_sal(su);
10052
10053 /*
Bram Moolenaar0c405862005-06-22 22:26:26 +000010054 * 3. Try finding sound-a-like words.
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010055 */
Bram Moolenaar4770d092006-01-12 23:22:24 +000010056 if ((sps_flags & SPS_FAST) == 0)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010057 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000010058 if (sps_flags & SPS_BEST)
10059 /* Adjust the word score for the suggestions found so far for how
10060 * they sounds like. */
10061 rescore_suggestions(su);
10062
10063 /*
10064 * While going throught the soundfold tree "su_maxscore" is the score
10065 * for the soundfold word, limits the changes that are being tried,
10066 * and "su_sfmaxscore" the rescored score, which is set by
10067 * cleanup_suggestions().
10068 * First find words with a small edit distance, because this is much
10069 * faster and often already finds the top-N suggestions. If we didn't
10070 * find many suggestions try again with a higher edit distance.
10071 * "sl_sounddone" is used to avoid doing the same word twice.
10072 */
10073 suggest_try_soundalike_prep();
10074 su->su_maxscore = SCORE_SFMAX1;
10075 su->su_sfmaxscore = SCORE_MAXINIT * 3;
Bram Moolenaar0c405862005-06-22 22:26:26 +000010076 suggest_try_soundalike(su);
Bram Moolenaar4770d092006-01-12 23:22:24 +000010077 if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
10078 {
10079 /* We didn't find enough matches, try again, allowing more
10080 * changes to the soundfold word. */
10081 su->su_maxscore = SCORE_SFMAX2;
10082 suggest_try_soundalike(su);
10083 if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
10084 {
10085 /* Still didn't find enough matches, try again, allowing even
10086 * more changes to the soundfold word. */
10087 su->su_maxscore = SCORE_SFMAX3;
10088 suggest_try_soundalike(su);
10089 }
10090 }
10091 su->su_maxscore = su->su_sfmaxscore;
10092 suggest_try_soundalike_finish();
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010093 }
10094
Bram Moolenaar4770d092006-01-12 23:22:24 +000010095 /* When CTRL-C was hit while searching do show the results. Only clear
10096 * got_int when using a command, not for spellsuggest(). */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010097 ui_breakcheck();
Bram Moolenaar4770d092006-01-12 23:22:24 +000010098 if (interactive && got_int)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010099 {
10100 (void)vgetc();
10101 got_int = FALSE;
10102 }
10103
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010104 if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010105 {
10106 if (sps_flags & SPS_BEST)
10107 /* Adjust the word score for how it sounds like. */
10108 rescore_suggestions(su);
10109
Bram Moolenaar4770d092006-01-12 23:22:24 +000010110 /* Remove bogus suggestions, sort and truncate at "maxcount". */
10111 check_suggestions(su, &su->su_ga);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010112 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010113 }
10114}
10115
10116/*
Bram Moolenaar4770d092006-01-12 23:22:24 +000010117 * Load the .sug files for languages that have one and weren't loaded yet.
10118 */
10119 static void
10120suggest_load_files()
10121{
10122 langp_T *lp;
10123 int lpi;
10124 slang_T *slang;
10125 char_u *dotp;
10126 FILE *fd;
10127 char_u buf[MAXWLEN];
10128 int i;
10129 time_t timestamp;
10130 int wcount;
10131 int wordnr;
10132 garray_T ga;
10133 int c;
10134
10135 /* Do this for all languages that support sound folding. */
10136 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
10137 {
10138 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
10139 slang = lp->lp_slang;
10140 if (slang->sl_sugtime != 0 && !slang->sl_sugloaded)
10141 {
10142 /* Change ".spl" to ".sug" and open the file. When the file isn't
10143 * found silently skip it. Do set "sl_sugloaded" so that we
10144 * don't try again and again. */
10145 slang->sl_sugloaded = TRUE;
10146
10147 dotp = vim_strrchr(slang->sl_fname, '.');
10148 if (dotp == NULL || fnamecmp(dotp, ".spl") != 0)
10149 continue;
10150 STRCPY(dotp, ".sug");
10151 fd = fopen((char *)slang->sl_fname, "r");
10152 if (fd == NULL)
10153 goto nextone;
10154
10155 /*
10156 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
10157 */
10158 for (i = 0; i < VIMSUGMAGICL; ++i)
10159 buf[i] = getc(fd); /* <fileID> */
10160 if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0)
10161 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000010162 EMSG2(_("E778: This does not look like a .sug file: %s"),
Bram Moolenaar4770d092006-01-12 23:22:24 +000010163 slang->sl_fname);
10164 goto nextone;
10165 }
10166 c = getc(fd); /* <versionnr> */
10167 if (c < VIMSUGVERSION)
10168 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000010169 EMSG2(_("E779: Old .sug file, needs to be updated: %s"),
Bram Moolenaar4770d092006-01-12 23:22:24 +000010170 slang->sl_fname);
10171 goto nextone;
10172 }
10173 else if (c > VIMSUGVERSION)
10174 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000010175 EMSG2(_("E780: .sug file is for newer version of Vim: %s"),
Bram Moolenaar4770d092006-01-12 23:22:24 +000010176 slang->sl_fname);
10177 goto nextone;
10178 }
10179
10180 /* Check the timestamp, it must be exactly the same as the one in
10181 * the .spl file. Otherwise the word numbers won't match. */
10182 timestamp = 0;
10183 for (i = 7; i >= 0; --i) /* <timestamp> */
10184 timestamp += getc(fd) << (i * 8);
10185 if (timestamp != slang->sl_sugtime)
10186 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000010187 EMSG2(_("E781: .sug file doesn't match .spl file: %s"),
Bram Moolenaar4770d092006-01-12 23:22:24 +000010188 slang->sl_fname);
10189 goto nextone;
10190 }
10191
10192 /*
10193 * <SUGWORDTREE>: <wordtree>
10194 * Read the trie with the soundfolded words.
10195 */
10196 if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs,
10197 FALSE, 0) != 0)
10198 {
10199someerror:
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000010200 EMSG2(_("E782: error while reading .sug file: %s"),
Bram Moolenaar4770d092006-01-12 23:22:24 +000010201 slang->sl_fname);
10202 slang_clear_sug(slang);
10203 goto nextone;
10204 }
10205
10206 /*
10207 * <SUGTABLE>: <sugwcount> <sugline> ...
10208 *
10209 * Read the table with word numbers. We use a file buffer for
10210 * this, because it's so much like a file with lines. Makes it
10211 * possible to swap the info and save on memory use.
10212 */
10213 slang->sl_sugbuf = open_spellbuf();
10214 if (slang->sl_sugbuf == NULL)
10215 goto someerror;
10216 /* <sugwcount> */
10217 wcount = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8)
10218 + getc(fd);
10219 if (wcount < 0)
10220 goto someerror;
10221
10222 /* Read all the wordnr lists into the buffer, one NUL terminated
10223 * list per line. */
10224 ga_init2(&ga, 1, 100);
10225 for (wordnr = 0; wordnr < wcount; ++wordnr)
10226 {
10227 ga.ga_len = 0;
10228 for (;;)
10229 {
10230 c = getc(fd); /* <sugline> */
10231 if (c < 0 || ga_grow(&ga, 1) == FAIL)
10232 goto someerror;
10233 ((char_u *)ga.ga_data)[ga.ga_len++] = c;
10234 if (c == NUL)
10235 break;
10236 }
10237 if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr,
10238 ga.ga_data, ga.ga_len, TRUE) == FAIL)
10239 goto someerror;
10240 }
10241 ga_clear(&ga);
10242
10243 /*
10244 * Need to put word counts in the word tries, so that we can find
10245 * a word by its number.
10246 */
10247 tree_count_words(slang->sl_fbyts, slang->sl_fidxs);
10248 tree_count_words(slang->sl_sbyts, slang->sl_sidxs);
10249
10250nextone:
10251 if (fd != NULL)
10252 fclose(fd);
10253 STRCPY(dotp, ".spl");
10254 }
10255 }
10256}
10257
10258
10259/*
10260 * Fill in the wordcount fields for a trie.
10261 * Returns the total number of words.
10262 */
10263 static void
10264tree_count_words(byts, idxs)
10265 char_u *byts;
10266 idx_T *idxs;
10267{
10268 int depth;
10269 idx_T arridx[MAXWLEN];
10270 int curi[MAXWLEN];
10271 int c;
10272 idx_T n;
10273 int wordcount[MAXWLEN];
10274
10275 arridx[0] = 0;
10276 curi[0] = 1;
10277 wordcount[0] = 0;
10278 depth = 0;
10279 while (depth >= 0 && !got_int)
10280 {
10281 if (curi[depth] > byts[arridx[depth]])
10282 {
10283 /* Done all bytes at this node, go up one level. */
10284 idxs[arridx[depth]] = wordcount[depth];
10285 if (depth > 0)
10286 wordcount[depth - 1] += wordcount[depth];
10287
10288 --depth;
10289 fast_breakcheck();
10290 }
10291 else
10292 {
10293 /* Do one more byte at this node. */
10294 n = arridx[depth] + curi[depth];
10295 ++curi[depth];
10296
10297 c = byts[n];
10298 if (c == 0)
10299 {
10300 /* End of word, count it. */
10301 ++wordcount[depth];
10302
10303 /* Skip over any other NUL bytes (same word with different
10304 * flags). */
10305 while (byts[n + 1] == 0)
10306 {
10307 ++n;
10308 ++curi[depth];
10309 }
10310 }
10311 else
10312 {
10313 /* Normal char, go one level deeper to count the words. */
10314 ++depth;
10315 arridx[depth] = idxs[n];
10316 curi[depth] = 1;
10317 wordcount[depth] = 0;
10318 }
10319 }
10320 }
10321}
10322
10323/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010324 * Free the info put in "*su" by spell_find_suggest().
10325 */
10326 static void
10327spell_find_cleanup(su)
10328 suginfo_T *su;
10329{
10330 int i;
10331
10332 /* Free the suggestions. */
10333 for (i = 0; i < su->su_ga.ga_len; ++i)
10334 vim_free(SUG(su->su_ga, i).st_word);
10335 ga_clear(&su->su_ga);
10336 for (i = 0; i < su->su_sga.ga_len; ++i)
10337 vim_free(SUG(su->su_sga, i).st_word);
10338 ga_clear(&su->su_sga);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010339
10340 /* Free the banned words. */
Bram Moolenaar4770d092006-01-12 23:22:24 +000010341 hash_clear_all(&su->su_banned, 0);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010342}
10343
10344/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +000010345 * Make a copy of "word", with the first letter upper or lower cased, to
10346 * "wcopy[MAXWLEN]". "word" must not be empty.
10347 * The result is NUL terminated.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010348 */
10349 static void
Bram Moolenaar9f30f502005-06-14 22:01:04 +000010350onecap_copy(word, wcopy, upper)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010351 char_u *word;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010352 char_u *wcopy;
10353 int upper; /* TRUE: first letter made upper case */
10354{
10355 char_u *p;
10356 int c;
10357 int l;
10358
10359 p = word;
10360#ifdef FEAT_MBYTE
10361 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000010362 c = mb_cptr2char_adv(&p);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010363 else
10364#endif
10365 c = *p++;
10366 if (upper)
Bram Moolenaar9f30f502005-06-14 22:01:04 +000010367 c = SPELL_TOUPPER(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010368 else
Bram Moolenaar9f30f502005-06-14 22:01:04 +000010369 c = SPELL_TOFOLD(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010370#ifdef FEAT_MBYTE
10371 if (has_mbyte)
10372 l = mb_char2bytes(c, wcopy);
10373 else
10374#endif
10375 {
10376 l = 1;
10377 wcopy[0] = c;
10378 }
Bram Moolenaar9c96f592005-06-30 21:52:39 +000010379 vim_strncpy(wcopy + l, p, MAXWLEN - l - 1);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010380}
10381
10382/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +000010383 * Make a copy of "word" with all the letters upper cased into
10384 * "wcopy[MAXWLEN]". The result is NUL terminated.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010385 */
10386 static void
10387allcap_copy(word, wcopy)
10388 char_u *word;
10389 char_u *wcopy;
10390{
10391 char_u *s;
10392 char_u *d;
10393 int c;
10394
10395 d = wcopy;
10396 for (s = word; *s != NUL; )
10397 {
10398#ifdef FEAT_MBYTE
10399 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000010400 c = mb_cptr2char_adv(&s);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010401 else
10402#endif
10403 c = *s++;
Bram Moolenaar78622822005-08-23 21:00:13 +000010404
10405#ifdef FEAT_MBYTE
10406 /* We only change ß to SS when we are certain latin1 is used. It
10407 * would cause weird errors in other 8-bit encodings. */
10408 if (enc_latin1like && c == 0xdf)
10409 {
10410 c = 'S';
10411 if (d - wcopy >= MAXWLEN - 1)
10412 break;
10413 *d++ = c;
10414 }
10415 else
10416#endif
10417 c = SPELL_TOUPPER(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010418
10419#ifdef FEAT_MBYTE
10420 if (has_mbyte)
10421 {
10422 if (d - wcopy >= MAXWLEN - MB_MAXBYTES)
10423 break;
10424 d += mb_char2bytes(c, d);
10425 }
10426 else
10427#endif
10428 {
10429 if (d - wcopy >= MAXWLEN - 1)
10430 break;
10431 *d++ = c;
10432 }
10433 }
10434 *d = NUL;
10435}
10436
10437/*
Bram Moolenaar0c405862005-06-22 22:26:26 +000010438 * Try finding suggestions by recognizing specific situations.
10439 */
10440 static void
10441suggest_try_special(su)
10442 suginfo_T *su;
10443{
10444 char_u *p;
Bram Moolenaar9c96f592005-06-30 21:52:39 +000010445 size_t len;
Bram Moolenaar0c405862005-06-22 22:26:26 +000010446 int c;
10447 char_u word[MAXWLEN];
10448
10449 /*
10450 * Recognize a word that is repeated: "the the".
10451 */
10452 p = skiptowhite(su->su_fbadword);
10453 len = p - su->su_fbadword;
10454 p = skipwhite(p);
10455 if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0)
10456 {
10457 /* Include badflags: if the badword is onecap or allcap
10458 * use that for the goodword too: "The the" -> "The". */
10459 c = su->su_fbadword[len];
10460 su->su_fbadword[len] = NUL;
10461 make_case_word(su->su_fbadword, word, su->su_badflags);
10462 su->su_fbadword[len] = c;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000010463
10464 /* Give a soundalike score of 0, compute the score as if deleting one
10465 * character. */
10466 add_suggestion(su, &su->su_ga, word, su->su_badlen,
Bram Moolenaar4770d092006-01-12 23:22:24 +000010467 RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang, FALSE);
Bram Moolenaar0c405862005-06-22 22:26:26 +000010468 }
10469}
10470
10471/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010472 * Try finding suggestions by adding/removing/swapping letters.
10473 */
10474 static void
Bram Moolenaar0c405862005-06-22 22:26:26 +000010475suggest_try_change(su)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010476 suginfo_T *su;
10477{
10478 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000010479 int n;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010480 char_u *p;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000010481 int lpi;
Bram Moolenaar4770d092006-01-12 23:22:24 +000010482 langp_T *lp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010483
10484 /* We make a copy of the case-folded bad word, so that we can modify it
Bram Moolenaar0c405862005-06-22 22:26:26 +000010485 * to find matches (esp. REP items). Append some more text, changing
10486 * chars after the bad word may help. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010487 STRCPY(fword, su->su_fbadword);
Bram Moolenaar0c405862005-06-22 22:26:26 +000010488 n = STRLEN(fword);
10489 p = su->su_badptr + su->su_badlen;
10490 (void)spell_casefold(p, STRLEN(p), fword + n, MAXWLEN - n);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010491
Bram Moolenaar8b96d642005-09-05 22:05:30 +000010492 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010493 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +000010494 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000010495
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000010496 /* If reloading a spell file fails it's still in the list but
10497 * everything has been cleared. */
Bram Moolenaar4770d092006-01-12 23:22:24 +000010498 if (lp->lp_slang->sl_fbyts == NULL)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000010499 continue;
10500
Bram Moolenaar4770d092006-01-12 23:22:24 +000010501 /* Try it for this language. Will add possible suggestions. */
10502 suggest_trie_walk(su, lp, fword, FALSE);
10503 }
10504}
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010505
Bram Moolenaar4770d092006-01-12 23:22:24 +000010506/* Check the maximum score, if we go over it we won't try this change. */
10507#define TRY_DEEPER(su, stack, depth, add) \
10508 (stack[depth].ts_score + (add) < su->su_maxscore)
10509
10510/*
10511 * Try finding suggestions by adding/removing/swapping letters.
10512 *
10513 * This uses a state machine. At each node in the tree we try various
10514 * operations. When trying if an operation works "depth" is increased and the
10515 * stack[] is used to store info. This allows combinations, thus insert one
10516 * character, replace one and delete another. The number of changes is
10517 * limited by su->su_maxscore.
10518 *
10519 * After implementing this I noticed an article by Kemal Oflazer that
10520 * describes something similar: "Error-tolerant Finite State Recognition with
10521 * Applications to Morphological Analysis and Spelling Correction" (1996).
10522 * The implementation in the article is simplified and requires a stack of
10523 * unknown depth. The implementation here only needs a stack depth equal to
10524 * the length of the word.
10525 *
10526 * This is also used for the sound-folded word, "soundfold" is TRUE then.
10527 * The mechanism is the same, but we find a match with a sound-folded word
10528 * that comes from one or more original words. Each of these words may be
10529 * added, this is done by add_sound_suggest().
10530 * Don't use:
10531 * the prefix tree or the keep-case tree
10532 * "su->su_badlen"
10533 * anything to do with upper and lower case
10534 * anything to do with word or non-word characters ("spell_iswordp()")
10535 * banned words
10536 * word flags (rare, region, compounding)
10537 * word splitting for now
10538 * "similar_chars()"
10539 * use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep"
10540 */
10541 static void
10542suggest_trie_walk(su, lp, fword, soundfold)
10543 suginfo_T *su;
10544 langp_T *lp;
10545 char_u *fword;
10546 int soundfold;
10547{
10548 char_u tword[MAXWLEN]; /* good word collected so far */
10549 trystate_T stack[MAXWLEN];
10550 char_u preword[MAXWLEN * 3]; /* word found with proper case;
10551 * concatanation of prefix compound
10552 * words and split word. NUL terminated
10553 * when going deeper but not when coming
10554 * back. */
10555 char_u compflags[MAXWLEN]; /* compound flags, one for each word */
10556 trystate_T *sp;
10557 int newscore;
10558 int score;
10559 char_u *byts, *fbyts, *pbyts;
10560 idx_T *idxs, *fidxs, *pidxs;
10561 int depth;
10562 int c, c2, c3;
10563 int n = 0;
10564 int flags;
10565 garray_T *gap;
10566 idx_T arridx;
10567 int len;
10568 char_u *p;
10569 fromto_T *ftp;
10570 int fl = 0, tl;
10571 int repextra = 0; /* extra bytes in fword[] from REP item */
10572 slang_T *slang = lp->lp_slang;
10573 int fword_ends;
10574 int goodword_ends;
10575#ifdef DEBUG_TRIEWALK
10576 /* Stores the name of the change made at each level. */
10577 char_u changename[MAXWLEN][80];
10578#endif
10579 int breakcheckcount = 1000;
10580 int compound_ok;
10581
10582 /*
10583 * Go through the whole case-fold tree, try changes at each node.
10584 * "tword[]" contains the word collected from nodes in the tree.
10585 * "fword[]" the word we are trying to match with (initially the bad
10586 * word).
10587 */
10588 depth = 0;
10589 sp = &stack[0];
10590 vim_memset(sp, 0, sizeof(trystate_T));
10591 sp->ts_curi = 1;
10592
10593 if (soundfold)
10594 {
10595 /* Going through the soundfold tree. */
10596 byts = fbyts = slang->sl_sbyts;
10597 idxs = fidxs = slang->sl_sidxs;
10598 pbyts = NULL;
10599 pidxs = NULL;
10600 sp->ts_prefixdepth = PFD_NOPREFIX;
10601 sp->ts_state = STATE_START;
10602 }
10603 else
10604 {
Bram Moolenaarea424162005-06-16 21:51:00 +000010605 /*
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010606 * When there are postponed prefixes we need to use these first. At
10607 * the end of the prefix we continue in the case-fold tree.
10608 */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000010609 fbyts = slang->sl_fbyts;
10610 fidxs = slang->sl_fidxs;
10611 pbyts = slang->sl_pbyts;
10612 pidxs = slang->sl_pidxs;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010613 if (pbyts != NULL)
10614 {
10615 byts = pbyts;
10616 idxs = pidxs;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000010617 sp->ts_prefixdepth = PFD_PREFIXTREE;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010618 sp->ts_state = STATE_NOPREFIX; /* try without prefix first */
10619 }
10620 else
10621 {
10622 byts = fbyts;
10623 idxs = fidxs;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000010624 sp->ts_prefixdepth = PFD_NOPREFIX;
Bram Moolenaard12a1322005-08-21 22:08:24 +000010625 sp->ts_state = STATE_START;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010626 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000010627 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010628
Bram Moolenaar4770d092006-01-12 23:22:24 +000010629 /*
10630 * Loop to find all suggestions. At each round we either:
10631 * - For the current state try one operation, advance "ts_curi",
10632 * increase "depth".
10633 * - When a state is done go to the next, set "ts_state".
10634 * - When all states are tried decrease "depth".
10635 */
10636 while (depth >= 0 && !got_int)
10637 {
10638 sp = &stack[depth];
10639 switch (sp->ts_state)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010640 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000010641 case STATE_START:
10642 case STATE_NOPREFIX:
10643 /*
10644 * Start of node: Deal with NUL bytes, which means
10645 * tword[] may end here.
10646 */
10647 arridx = sp->ts_arridx; /* current node in the tree */
10648 len = byts[arridx]; /* bytes in this node */
10649 arridx += sp->ts_curi; /* index of current byte */
10650
10651 if (sp->ts_prefixdepth == PFD_PREFIXTREE)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010652 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000010653 /* Skip over the NUL bytes, we use them later. */
10654 for (n = 0; n < len && byts[arridx + n] == 0; ++n)
10655 ;
10656 sp->ts_curi += n;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010657
Bram Moolenaar4770d092006-01-12 23:22:24 +000010658 /* Always past NUL bytes now. */
10659 n = (int)sp->ts_state;
10660 sp->ts_state = STATE_ENDNUL;
10661 sp->ts_save_badflags = su->su_badflags;
10662
10663 /* At end of a prefix or at start of prefixtree: check for
10664 * following word. */
10665 if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX)
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010666 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000010667 /* Set su->su_badflags to the caps type at this position.
10668 * Use the caps type until here for the prefix itself. */
Bram Moolenaar53805d12005-08-01 07:08:33 +000010669#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000010670 if (has_mbyte)
10671 n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
10672 else
Bram Moolenaar53805d12005-08-01 07:08:33 +000010673#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000010674 n = sp->ts_fidx;
10675 flags = badword_captype(su->su_badptr, su->su_badptr + n);
10676 su->su_badflags = badword_captype(su->su_badptr + n,
Bram Moolenaar53805d12005-08-01 07:08:33 +000010677 su->su_badptr + su->su_badlen);
Bram Moolenaar4770d092006-01-12 23:22:24 +000010678#ifdef DEBUG_TRIEWALK
10679 sprintf(changename[depth], "prefix");
10680#endif
10681 go_deeper(stack, depth, 0);
10682 ++depth;
10683 sp = &stack[depth];
10684 sp->ts_prefixdepth = depth - 1;
10685 byts = fbyts;
10686 idxs = fidxs;
10687 sp->ts_arridx = 0;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010688
Bram Moolenaar4770d092006-01-12 23:22:24 +000010689 /* Move the prefix to preword[] with the right case
10690 * and make find_keepcap_word() works. */
10691 tword[sp->ts_twordlen] = NUL;
10692 make_case_word(tword + sp->ts_splitoff,
10693 preword + sp->ts_prewordlen, flags);
10694 sp->ts_prewordlen = STRLEN(preword);
10695 sp->ts_splitoff = sp->ts_twordlen;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010696 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000010697 break;
10698 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010699
Bram Moolenaar4770d092006-01-12 23:22:24 +000010700 if (sp->ts_curi > len || byts[arridx] != 0)
10701 {
10702 /* Past bytes in node and/or past NUL bytes. */
10703 sp->ts_state = STATE_ENDNUL;
10704 sp->ts_save_badflags = su->su_badflags;
10705 break;
10706 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010707
Bram Moolenaar4770d092006-01-12 23:22:24 +000010708 /*
10709 * End of word in tree.
10710 */
10711 ++sp->ts_curi; /* eat one NUL byte */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010712
Bram Moolenaar4770d092006-01-12 23:22:24 +000010713 flags = (int)idxs[arridx];
10714 fword_ends = (fword[sp->ts_fidx] == NUL
10715 || (soundfold
10716 ? vim_iswhite(fword[sp->ts_fidx])
10717 : !spell_iswordp(fword + sp->ts_fidx, curbuf)));
10718 tword[sp->ts_twordlen] = NUL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010719
Bram Moolenaar4770d092006-01-12 23:22:24 +000010720 if (sp->ts_prefixdepth <= PFD_NOTSPECIAL
Bram Moolenaard12a1322005-08-21 22:08:24 +000010721 && (sp->ts_flags & TSF_PREFIXOK) == 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +000010722 {
10723 /* There was a prefix before the word. Check that the prefix
10724 * can be used with this word. */
10725 /* Count the length of the NULs in the prefix. If there are
10726 * none this must be the first try without a prefix. */
10727 n = stack[sp->ts_prefixdepth].ts_arridx;
10728 len = pbyts[n++];
10729 for (c = 0; c < len && pbyts[n + c] == 0; ++c)
10730 ;
10731 if (c > 0)
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010732 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000010733 c = valid_word_prefix(c, n, flags,
Bram Moolenaar5195e452005-08-19 20:32:47 +000010734 tword + sp->ts_splitoff, slang, FALSE);
Bram Moolenaar4770d092006-01-12 23:22:24 +000010735 if (c == 0)
10736 break;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010737
Bram Moolenaar4770d092006-01-12 23:22:24 +000010738 /* Use the WF_RARE flag for a rare prefix. */
10739 if (c & WF_RAREPFX)
10740 flags |= WF_RARE;
Bram Moolenaard12a1322005-08-21 22:08:24 +000010741
Bram Moolenaar4770d092006-01-12 23:22:24 +000010742 /* Tricky: when checking for both prefix and compounding
10743 * we run into the prefix flag first.
10744 * Remember that it's OK, so that we accept the prefix
10745 * when arriving at a compound flag. */
10746 sp->ts_flags |= TSF_PREFIXOK;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010747 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000010748 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010749
Bram Moolenaar4770d092006-01-12 23:22:24 +000010750 /* Check NEEDCOMPOUND: can't use word without compounding. Do try
10751 * appending another compound word below. */
10752 if (sp->ts_complen == sp->ts_compsplit && fword_ends
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000010753 && (flags & WF_NEEDCOMP))
Bram Moolenaar4770d092006-01-12 23:22:24 +000010754 goodword_ends = FALSE;
10755 else
10756 goodword_ends = TRUE;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000010757
Bram Moolenaar4770d092006-01-12 23:22:24 +000010758 p = NULL;
10759 compound_ok = TRUE;
10760 if (sp->ts_complen > sp->ts_compsplit)
10761 {
10762 if (slang->sl_nobreak)
Bram Moolenaard12a1322005-08-21 22:08:24 +000010763 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000010764 /* There was a word before this word. When there was no
10765 * change in this word (it was correct) add the first word
10766 * as a suggestion. If this word was corrected too, we
10767 * need to check if a correct word follows. */
10768 if (sp->ts_fidx - sp->ts_splitfidx
Bram Moolenaar78622822005-08-23 21:00:13 +000010769 == sp->ts_twordlen - sp->ts_splitoff
Bram Moolenaar4770d092006-01-12 23:22:24 +000010770 && STRNCMP(fword + sp->ts_splitfidx,
10771 tword + sp->ts_splitoff,
Bram Moolenaar78622822005-08-23 21:00:13 +000010772 sp->ts_fidx - sp->ts_splitfidx) == 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +000010773 {
10774 preword[sp->ts_prewordlen] = NUL;
10775 newscore = score_wordcount_adj(slang, sp->ts_score,
10776 preword + sp->ts_prewordlen,
10777 sp->ts_prewordlen > 0);
10778 /* Add the suggestion if the score isn't too bad. */
10779 if (newscore <= su->su_maxscore)
Bram Moolenaar78622822005-08-23 21:00:13 +000010780 add_suggestion(su, &su->su_ga, preword,
Bram Moolenaar8b96d642005-09-05 22:05:30 +000010781 sp->ts_splitfidx - repextra,
Bram Moolenaar4770d092006-01-12 23:22:24 +000010782 newscore, 0, FALSE,
10783 lp->lp_sallang, FALSE);
10784 break;
Bram Moolenaar78622822005-08-23 21:00:13 +000010785 }
Bram Moolenaard12a1322005-08-21 22:08:24 +000010786 }
Bram Moolenaare52325c2005-08-22 22:54:29 +000010787 else
Bram Moolenaar0c405862005-06-22 22:26:26 +000010788 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000010789 /* There was a compound word before this word. If this
10790 * word does not support compounding then give up
10791 * (splitting is tried for the word without compound
10792 * flag). */
10793 if (((unsigned)flags >> 24) == 0
10794 || sp->ts_twordlen - sp->ts_splitoff
10795 < slang->sl_compminlen)
10796 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +000010797#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000010798 /* For multi-byte chars check character length against
10799 * COMPOUNDMIN. */
10800 if (has_mbyte
10801 && slang->sl_compminlen > 0
10802 && mb_charlen(tword + sp->ts_splitoff)
10803 < slang->sl_compminlen)
10804 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +000010805#endif
Bram Moolenaare52325c2005-08-22 22:54:29 +000010806
Bram Moolenaar4770d092006-01-12 23:22:24 +000010807 compflags[sp->ts_complen] = ((unsigned)flags >> 24);
10808 compflags[sp->ts_complen + 1] = NUL;
10809 vim_strncpy(preword + sp->ts_prewordlen,
10810 tword + sp->ts_splitoff,
10811 sp->ts_twordlen - sp->ts_splitoff);
10812 p = preword;
10813 while (*skiptowhite(p) != NUL)
10814 p = skipwhite(skiptowhite(p));
10815 if (fword_ends && !can_compound(slang, p,
10816 compflags + sp->ts_compsplit))
10817 /* Compound is not allowed. But it may still be
10818 * possible if we add another (short) word. */
10819 compound_ok = FALSE;
10820
10821 /* Get pointer to last char of previous word. */
10822 p = preword + sp->ts_prewordlen;
10823 mb_ptr_back(preword, p);
Bram Moolenaar0c405862005-06-22 22:26:26 +000010824 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000010825 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010826
Bram Moolenaar4770d092006-01-12 23:22:24 +000010827 /*
10828 * Form the word with proper case in preword.
10829 * If there is a word from a previous split, append.
10830 * For the soundfold tree don't change the case, simply append.
10831 */
10832 if (soundfold)
10833 STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff);
10834 else if (flags & WF_KEEPCAP)
10835 /* Must find the word in the keep-case tree. */
10836 find_keepcap_word(slang, tword + sp->ts_splitoff,
10837 preword + sp->ts_prewordlen);
10838 else
10839 {
10840 /* Include badflags: If the badword is onecap or allcap
10841 * use that for the goodword too. But if the badword is
10842 * allcap and it's only one char long use onecap. */
10843 c = su->su_badflags;
10844 if ((c & WF_ALLCAP)
10845#ifdef FEAT_MBYTE
10846 && su->su_badlen == (*mb_ptr2len)(su->su_badptr)
10847#else
10848 && su->su_badlen == 1
10849#endif
10850 )
10851 c = WF_ONECAP;
10852 c |= flags;
10853
10854 /* When appending a compound word after a word character don't
10855 * use Onecap. */
10856 if (p != NULL && spell_iswordp_nmw(p))
10857 c &= ~WF_ONECAP;
10858 make_case_word(tword + sp->ts_splitoff,
10859 preword + sp->ts_prewordlen, c);
10860 }
10861
10862 if (!soundfold)
10863 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010864 /* Don't use a banned word. It may appear again as a good
10865 * word, thus remember it. */
10866 if (flags & WF_BANNED)
10867 {
Bram Moolenaar5195e452005-08-19 20:32:47 +000010868 add_banned(su, preword + sp->ts_prewordlen);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010869 break;
10870 }
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000010871 if ((sp->ts_complen == sp->ts_compsplit
Bram Moolenaar4770d092006-01-12 23:22:24 +000010872 && WAS_BANNED(su, preword + sp->ts_prewordlen))
10873 || WAS_BANNED(su, preword))
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000010874 {
10875 if (slang->sl_compprog == NULL)
10876 break;
10877 /* the word so far was banned but we may try compounding */
10878 goodword_ends = FALSE;
10879 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000010880 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010881
Bram Moolenaar4770d092006-01-12 23:22:24 +000010882 newscore = 0;
10883 if (!soundfold) /* soundfold words don't have flags */
10884 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010885 if ((flags & WF_REGION)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000010886 && (((unsigned)flags >> 16) & lp->lp_region) == 0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010887 newscore += SCORE_REGION;
10888 if (flags & WF_RARE)
10889 newscore += SCORE_RARE;
10890
Bram Moolenaar0c405862005-06-22 22:26:26 +000010891 if (!spell_valid_case(su->su_badflags,
Bram Moolenaar5195e452005-08-19 20:32:47 +000010892 captype(preword + sp->ts_prewordlen, NULL)))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010893 newscore += SCORE_ICASE;
Bram Moolenaar4770d092006-01-12 23:22:24 +000010894 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010895
Bram Moolenaar4770d092006-01-12 23:22:24 +000010896 /* TODO: how about splitting in the soundfold tree? */
10897 if (fword_ends
10898 && goodword_ends
10899 && sp->ts_fidx >= sp->ts_fidxtry
10900 && compound_ok)
10901 {
10902 /* The badword also ends: add suggestions. */
10903#ifdef DEBUG_TRIEWALK
10904 if (soundfold && STRCMP(preword, "smwrd") == 0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010905 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000010906 int j;
10907
10908 /* print the stack of changes that brought us here */
10909 smsg("------ %s -------", fword);
10910 for (j = 0; j < depth; ++j)
10911 smsg("%s", changename[j]);
10912 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +000010913#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000010914 if (soundfold)
10915 {
10916 /* For soundfolded words we need to find the original
10917 * words, the edit distrance and then add them. */
10918 add_sound_suggest(su, preword, sp->ts_score, lp);
10919 }
10920 else
10921 {
10922 /* Give a penalty when changing non-word char to word
10923 * char, e.g., "thes," -> "these". */
10924 p = fword + sp->ts_fidx;
10925 mb_ptr_back(fword, p);
Bram Moolenaar9c96f592005-06-30 21:52:39 +000010926 if (!spell_iswordp(p, curbuf))
Bram Moolenaarcf6bf392005-06-27 22:27:46 +000010927 {
10928 p = preword + STRLEN(preword);
Bram Moolenaar4770d092006-01-12 23:22:24 +000010929 mb_ptr_back(preword, p);
Bram Moolenaar9c96f592005-06-30 21:52:39 +000010930 if (spell_iswordp(p, curbuf))
Bram Moolenaarcf6bf392005-06-27 22:27:46 +000010931 newscore += SCORE_NONWORD;
10932 }
10933
Bram Moolenaar4770d092006-01-12 23:22:24 +000010934 /* Give a bonus to words seen before. */
10935 score = score_wordcount_adj(slang,
10936 sp->ts_score + newscore,
10937 preword + sp->ts_prewordlen,
10938 sp->ts_prewordlen > 0);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000010939
Bram Moolenaar4770d092006-01-12 23:22:24 +000010940 /* Add the suggestion if the score isn't too bad. */
10941 if (score <= su->su_maxscore)
Bram Moolenaar2d3f4892006-01-20 23:02:51 +000010942 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000010943 add_suggestion(su, &su->su_ga, preword,
10944 sp->ts_fidx - repextra,
10945 score, 0, FALSE, lp->lp_sallang, FALSE);
Bram Moolenaar2d3f4892006-01-20 23:02:51 +000010946
10947 if (su->su_badflags & WF_MIXCAP)
10948 {
10949 /* We really don't know if the word should be
10950 * upper or lower case, add both. */
10951 c = captype(preword, NULL);
10952 if (c == 0 || c == WF_ALLCAP)
10953 {
10954 make_case_word(tword + sp->ts_splitoff,
10955 preword + sp->ts_prewordlen,
10956 c == 0 ? WF_ALLCAP : 0);
10957
10958 add_suggestion(su, &su->su_ga, preword,
10959 sp->ts_fidx - repextra,
10960 score + SCORE_ICASE, 0, FALSE,
10961 lp->lp_sallang, FALSE);
10962 }
10963 }
10964 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010965 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000010966 }
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000010967
Bram Moolenaar4770d092006-01-12 23:22:24 +000010968 /*
10969 * Try word split and/or compounding.
10970 */
10971 if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends)
Bram Moolenaarea424162005-06-16 21:51:00 +000010972#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000010973 /* Don't split halfway a character. */
10974 && (!has_mbyte || sp->ts_tcharlen == 0)
Bram Moolenaarea424162005-06-16 21:51:00 +000010975#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000010976 )
10977 {
10978 int try_compound;
10979 int try_split;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000010980
Bram Moolenaar4770d092006-01-12 23:22:24 +000010981 /* If past the end of the bad word don't try a split.
10982 * Otherwise try changing the next word. E.g., find
10983 * suggestions for "the the" where the second "the" is
10984 * different. It's done like a split.
10985 * TODO: word split for soundfold words */
10986 try_split = (sp->ts_fidx - repextra < su->su_badlen)
10987 && !soundfold;
10988
10989 /* Get here in several situations:
10990 * 1. The word in the tree ends:
10991 * If the word allows compounding try that. Otherwise try
10992 * a split by inserting a space. For both check that a
10993 * valid words starts at fword[sp->ts_fidx].
10994 * For NOBREAK do like compounding to be able to check if
10995 * the next word is valid.
10996 * 2. The badword does end, but it was due to a change (e.g.,
10997 * a swap). No need to split, but do check that the
10998 * following word is valid.
10999 * 3. The badword and the word in the tree end. It may still
11000 * be possible to compound another (short) word.
11001 */
11002 try_compound = FALSE;
11003 if (!soundfold
11004 && slang->sl_compprog != NULL
11005 && ((unsigned)flags >> 24) != 0
11006 && sp->ts_twordlen - sp->ts_splitoff
11007 >= slang->sl_compminlen
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000011008#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000011009 && (!has_mbyte
11010 || slang->sl_compminlen == 0
11011 || mb_charlen(tword + sp->ts_splitoff)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000011012 >= slang->sl_compminlen)
11013#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000011014 && (slang->sl_compsylmax < MAXWLEN
11015 || sp->ts_complen + 1 - sp->ts_compsplit
11016 < slang->sl_compmax)
11017 && (byte_in_str(sp->ts_complen == sp->ts_compsplit
11018 ? slang->sl_compstartflags
11019 : slang->sl_compallflags,
Bram Moolenaar6de68532005-08-24 22:08:48 +000011020 ((unsigned)flags >> 24))))
Bram Moolenaar4770d092006-01-12 23:22:24 +000011021 {
11022 try_compound = TRUE;
11023 compflags[sp->ts_complen] = ((unsigned)flags >> 24);
11024 compflags[sp->ts_complen + 1] = NUL;
11025 }
Bram Moolenaard12a1322005-08-21 22:08:24 +000011026
Bram Moolenaar4770d092006-01-12 23:22:24 +000011027 /* For NOBREAK we never try splitting, it won't make any word
11028 * valid. */
11029 if (slang->sl_nobreak)
11030 try_compound = TRUE;
Bram Moolenaar78622822005-08-23 21:00:13 +000011031
Bram Moolenaar4770d092006-01-12 23:22:24 +000011032 /* If we could add a compound word, and it's also possible to
11033 * split at this point, do the split first and set
11034 * TSF_DIDSPLIT to avoid doing it again. */
11035 else if (!fword_ends
11036 && try_compound
11037 && (sp->ts_flags & TSF_DIDSPLIT) == 0)
11038 {
11039 try_compound = FALSE;
11040 sp->ts_flags |= TSF_DIDSPLIT;
11041 --sp->ts_curi; /* do the same NUL again */
11042 compflags[sp->ts_complen] = NUL;
11043 }
11044 else
11045 sp->ts_flags &= ~TSF_DIDSPLIT;
Bram Moolenaard12a1322005-08-21 22:08:24 +000011046
Bram Moolenaar4770d092006-01-12 23:22:24 +000011047 if (try_split || try_compound)
11048 {
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000011049 if (!try_compound && (!fword_ends || !goodword_ends))
Bram Moolenaard12a1322005-08-21 22:08:24 +000011050 {
11051 /* If we're going to split need to check that the
Bram Moolenaarda2303d2005-08-30 21:55:26 +000011052 * words so far are valid for compounding. If there
11053 * is only one word it must not have the NEEDCOMPOUND
11054 * flag. */
11055 if (sp->ts_complen == sp->ts_compsplit
11056 && (flags & WF_NEEDCOMP))
11057 break;
Bram Moolenaare52325c2005-08-22 22:54:29 +000011058 p = preword;
11059 while (*skiptowhite(p) != NUL)
11060 p = skipwhite(skiptowhite(p));
Bram Moolenaard12a1322005-08-21 22:08:24 +000011061 if (sp->ts_complen > sp->ts_compsplit
Bram Moolenaare52325c2005-08-22 22:54:29 +000011062 && !can_compound(slang, p,
Bram Moolenaard12a1322005-08-21 22:08:24 +000011063 compflags + sp->ts_compsplit))
11064 break;
11065 newscore += SCORE_SPLIT;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011066
11067 /* Give a bonus to words seen before. */
11068 newscore = score_wordcount_adj(slang, newscore,
11069 preword + sp->ts_prewordlen, TRUE);
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011070 }
11071
Bram Moolenaar4770d092006-01-12 23:22:24 +000011072 if (TRY_DEEPER(su, stack, depth, newscore))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011073 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011074 go_deeper(stack, depth, newscore);
11075#ifdef DEBUG_TRIEWALK
11076 if (!try_compound && !fword_ends)
11077 sprintf(changename[depth], "%.*s-%s: split",
11078 sp->ts_twordlen, tword, fword + sp->ts_fidx);
11079 else
11080 sprintf(changename[depth], "%.*s-%s: compound",
11081 sp->ts_twordlen, tword, fword + sp->ts_fidx);
11082#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011083 /* Save things to be restored at STATE_SPLITUNDO. */
Bram Moolenaar0c405862005-06-22 22:26:26 +000011084 sp->ts_save_badflags = su->su_badflags;
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011085 sp->ts_state = STATE_SPLITUNDO;
11086
11087 ++depth;
11088 sp = &stack[depth];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011089
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011090 /* Append a space to preword when splitting. */
11091 if (!try_compound && !fword_ends)
11092 STRCAT(preword, " ");
Bram Moolenaar5195e452005-08-19 20:32:47 +000011093 sp->ts_prewordlen = STRLEN(preword);
11094 sp->ts_splitoff = sp->ts_twordlen;
Bram Moolenaar78622822005-08-23 21:00:13 +000011095 sp->ts_splitfidx = sp->ts_fidx;
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011096
11097 /* If the badword has a non-word character at this
11098 * position skip it. That means replacing the
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011099 * non-word character with a space. Always skip a
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000011100 * character when the word ends. But only when the
11101 * good word can end. */
Bram Moolenaar4770d092006-01-12 23:22:24 +000011102 if (((!try_compound && !spell_iswordp_nmw(fword
11103 + sp->ts_fidx))
11104 || fword_ends)
11105 && fword[sp->ts_fidx] != NUL
11106 && goodword_ends)
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011107 {
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011108 int l;
11109
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011110#ifdef FEAT_MBYTE
11111 if (has_mbyte)
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011112 l = MB_BYTE2LEN(fword[sp->ts_fidx]);
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011113 else
11114#endif
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011115 l = 1;
11116 if (fword_ends)
11117 {
11118 /* Copy the skipped character to preword. */
Bram Moolenaar5195e452005-08-19 20:32:47 +000011119 mch_memmove(preword + sp->ts_prewordlen,
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011120 fword + sp->ts_fidx, l);
Bram Moolenaar5195e452005-08-19 20:32:47 +000011121 sp->ts_prewordlen += l;
11122 preword[sp->ts_prewordlen] = NUL;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011123 }
11124 else
11125 sp->ts_score -= SCORE_SPLIT - SCORE_SUBST;
11126 sp->ts_fidx += l;
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011127 }
Bram Moolenaar53805d12005-08-01 07:08:33 +000011128
Bram Moolenaard12a1322005-08-21 22:08:24 +000011129 /* When compounding include compound flag in
11130 * compflags[] (already set above). When splitting we
11131 * may start compounding over again. */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011132 if (try_compound)
Bram Moolenaar5195e452005-08-19 20:32:47 +000011133 ++sp->ts_complen;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011134 else
Bram Moolenaard12a1322005-08-21 22:08:24 +000011135 sp->ts_compsplit = sp->ts_complen;
11136 sp->ts_prefixdepth = PFD_NOPREFIX;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011137
Bram Moolenaar53805d12005-08-01 07:08:33 +000011138 /* set su->su_badflags to the caps type at this
11139 * position */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000011140#ifdef FEAT_MBYTE
11141 if (has_mbyte)
Bram Moolenaar53805d12005-08-01 07:08:33 +000011142 n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
Bram Moolenaar9f30f502005-06-14 22:01:04 +000011143 else
11144#endif
Bram Moolenaar53805d12005-08-01 07:08:33 +000011145 n = sp->ts_fidx;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000011146 su->su_badflags = badword_captype(su->su_badptr + n,
Bram Moolenaar53805d12005-08-01 07:08:33 +000011147 su->su_badptr + su->su_badlen);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011148
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011149 /* Restart at top of the tree. */
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011150 sp->ts_arridx = 0;
Bram Moolenaard12a1322005-08-21 22:08:24 +000011151
11152 /* If there are postponed prefixes, try these too. */
11153 if (pbyts != NULL)
11154 {
11155 byts = pbyts;
11156 idxs = pidxs;
11157 sp->ts_prefixdepth = PFD_PREFIXTREE;
11158 sp->ts_state = STATE_NOPREFIX;
11159 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011160 }
11161 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011162 }
11163 break;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011164
Bram Moolenaar4770d092006-01-12 23:22:24 +000011165 case STATE_SPLITUNDO:
11166 /* Undo the changes done for word split or compound word. */
11167 su->su_badflags = sp->ts_save_badflags;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011168
Bram Moolenaar4770d092006-01-12 23:22:24 +000011169 /* Continue looking for NUL bytes. */
11170 sp->ts_state = STATE_START;
Bram Moolenaard12a1322005-08-21 22:08:24 +000011171
Bram Moolenaar4770d092006-01-12 23:22:24 +000011172 /* In case we went into the prefix tree. */
11173 byts = fbyts;
11174 idxs = fidxs;
11175 break;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011176
Bram Moolenaar4770d092006-01-12 23:22:24 +000011177 case STATE_ENDNUL:
11178 /* Past the NUL bytes in the node. */
11179 su->su_badflags = sp->ts_save_badflags;
11180 if (fword[sp->ts_fidx] == NUL
Bram Moolenaarda2303d2005-08-30 21:55:26 +000011181#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000011182 && sp->ts_tcharlen == 0
Bram Moolenaarda2303d2005-08-30 21:55:26 +000011183#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000011184 )
11185 {
11186 /* The badword ends, can't use STATE_PLAIN. */
11187 sp->ts_state = STATE_DEL;
11188 break;
11189 }
11190 sp->ts_state = STATE_PLAIN;
11191 /*FALLTHROUGH*/
11192
11193 case STATE_PLAIN:
11194 /*
11195 * Go over all possible bytes at this node, add each to tword[]
11196 * and use child node. "ts_curi" is the index.
11197 */
11198 arridx = sp->ts_arridx;
11199 if (sp->ts_curi > byts[arridx])
11200 {
11201 /* Done all bytes at this node, do next state. When still at
11202 * already changed bytes skip the other tricks. */
11203 if (sp->ts_fidx >= sp->ts_fidxtry)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011204 sp->ts_state = STATE_DEL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011205 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000011206 sp->ts_state = STATE_FINAL;
11207 }
11208 else
11209 {
11210 arridx += sp->ts_curi++;
11211 c = byts[arridx];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011212
Bram Moolenaar4770d092006-01-12 23:22:24 +000011213 /* Normal byte, go one level deeper. If it's not equal to the
11214 * byte in the bad word adjust the score. But don't even try
11215 * when the byte was already changed. And don't try when we
11216 * just deleted this byte, accepting it is always cheaper then
11217 * delete + substitute. */
11218 if (c == fword[sp->ts_fidx]
Bram Moolenaarea424162005-06-16 21:51:00 +000011219#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000011220 || (sp->ts_tcharlen > 0 && sp->ts_isdiff != DIFF_NONE)
Bram Moolenaar9f30f502005-06-14 22:01:04 +000011221#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000011222 )
11223 newscore = 0;
11224 else
11225 newscore = SCORE_SUBST;
11226 if ((newscore == 0
11227 || (sp->ts_fidx >= sp->ts_fidxtry
11228 && ((sp->ts_flags & TSF_DIDDEL) == 0
11229 || c != fword[sp->ts_delidx])))
11230 && TRY_DEEPER(su, stack, depth, newscore))
11231 {
11232 go_deeper(stack, depth, newscore);
11233#ifdef DEBUG_TRIEWALK
11234 if (newscore > 0)
11235 sprintf(changename[depth], "%.*s-%s: subst %c to %c",
11236 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11237 fword[sp->ts_fidx], c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011238 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000011239 sprintf(changename[depth], "%.*s-%s: accept %c",
11240 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11241 fword[sp->ts_fidx]);
11242#endif
11243 ++depth;
11244 sp = &stack[depth];
11245 ++sp->ts_fidx;
11246 tword[sp->ts_twordlen++] = c;
11247 sp->ts_arridx = idxs[arridx];
Bram Moolenaarea424162005-06-16 21:51:00 +000011248#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000011249 if (newscore == SCORE_SUBST)
11250 sp->ts_isdiff = DIFF_YES;
11251 if (has_mbyte)
11252 {
11253 /* Multi-byte characters are a bit complicated to
11254 * handle: They differ when any of the bytes differ
11255 * and then their length may also differ. */
11256 if (sp->ts_tcharlen == 0)
Bram Moolenaarea424162005-06-16 21:51:00 +000011257 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011258 /* First byte. */
11259 sp->ts_tcharidx = 0;
11260 sp->ts_tcharlen = MB_BYTE2LEN(c);
11261 sp->ts_fcharstart = sp->ts_fidx - 1;
11262 sp->ts_isdiff = (newscore != 0)
Bram Moolenaarea424162005-06-16 21:51:00 +000011263 ? DIFF_YES : DIFF_NONE;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011264 }
11265 else if (sp->ts_isdiff == DIFF_INSERT)
11266 /* When inserting trail bytes don't advance in the
11267 * bad word. */
11268 --sp->ts_fidx;
11269 if (++sp->ts_tcharidx == sp->ts_tcharlen)
11270 {
11271 /* Last byte of character. */
11272 if (sp->ts_isdiff == DIFF_YES)
Bram Moolenaarea424162005-06-16 21:51:00 +000011273 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011274 /* Correct ts_fidx for the byte length of the
11275 * character (we didn't check that before). */
11276 sp->ts_fidx = sp->ts_fcharstart
11277 + MB_BYTE2LEN(
Bram Moolenaarea424162005-06-16 21:51:00 +000011278 fword[sp->ts_fcharstart]);
11279
Bram Moolenaar4770d092006-01-12 23:22:24 +000011280 /* For changing a composing character adjust
11281 * the score from SCORE_SUBST to
11282 * SCORE_SUBCOMP. */
11283 if (enc_utf8
11284 && utf_iscomposing(
11285 mb_ptr2char(tword
11286 + sp->ts_twordlen
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +000011287 - sp->ts_tcharlen))
Bram Moolenaar4770d092006-01-12 23:22:24 +000011288 && utf_iscomposing(
11289 mb_ptr2char(fword
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +000011290 + sp->ts_fcharstart)))
Bram Moolenaar4770d092006-01-12 23:22:24 +000011291 sp->ts_score -=
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +000011292 SCORE_SUBST - SCORE_SUBCOMP;
11293
Bram Moolenaar4770d092006-01-12 23:22:24 +000011294 /* For a similar character adjust score from
11295 * SCORE_SUBST to SCORE_SIMILAR. */
11296 else if (!soundfold
11297 && slang->sl_has_map
11298 && similar_chars(slang,
11299 mb_ptr2char(tword
11300 + sp->ts_twordlen
Bram Moolenaarea424162005-06-16 21:51:00 +000011301 - sp->ts_tcharlen),
Bram Moolenaar4770d092006-01-12 23:22:24 +000011302 mb_ptr2char(fword
Bram Moolenaarea424162005-06-16 21:51:00 +000011303 + sp->ts_fcharstart)))
Bram Moolenaar4770d092006-01-12 23:22:24 +000011304 sp->ts_score -=
Bram Moolenaarea424162005-06-16 21:51:00 +000011305 SCORE_SUBST - SCORE_SIMILAR;
Bram Moolenaarea424162005-06-16 21:51:00 +000011306 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011307 else if (sp->ts_isdiff == DIFF_INSERT
11308 && sp->ts_twordlen > sp->ts_tcharlen)
11309 {
11310 p = tword + sp->ts_twordlen - sp->ts_tcharlen;
11311 c = mb_ptr2char(p);
11312 if (enc_utf8 && utf_iscomposing(c))
11313 {
11314 /* Inserting a composing char doesn't
11315 * count that much. */
11316 sp->ts_score -= SCORE_INS - SCORE_INSCOMP;
11317 }
11318 else
11319 {
11320 /* If the previous character was the same,
11321 * thus doubling a character, give a bonus
11322 * to the score. Also for the soundfold
11323 * tree (might seem illogical but does
11324 * give better scores). */
11325 mb_ptr_back(tword, p);
11326 if (c == mb_ptr2char(p))
11327 sp->ts_score -= SCORE_INS
11328 - SCORE_INSDUP;
11329 }
11330 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011331
Bram Moolenaar4770d092006-01-12 23:22:24 +000011332 /* Starting a new char, reset the length. */
11333 sp->ts_tcharlen = 0;
11334 }
Bram Moolenaarea408852005-06-25 22:49:46 +000011335 }
Bram Moolenaarea424162005-06-16 21:51:00 +000011336 else
11337#endif
Bram Moolenaarea408852005-06-25 22:49:46 +000011338 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011339 /* If we found a similar char adjust the score.
11340 * We do this after calling go_deeper() because
11341 * it's slow. */
11342 if (newscore != 0
11343 && !soundfold
11344 && slang->sl_has_map
11345 && similar_chars(slang,
11346 c, fword[sp->ts_fidx - 1]))
11347 sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
Bram Moolenaarea408852005-06-25 22:49:46 +000011348 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011349 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011350 }
11351 break;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011352
Bram Moolenaar4770d092006-01-12 23:22:24 +000011353 case STATE_DEL:
11354#ifdef FEAT_MBYTE
11355 /* When past the first byte of a multi-byte char don't try
11356 * delete/insert/swap a character. */
11357 if (has_mbyte && sp->ts_tcharlen > 0)
11358 {
11359 sp->ts_state = STATE_FINAL;
11360 break;
11361 }
11362#endif
11363 /*
11364 * Try skipping one character in the bad word (delete it).
11365 */
11366 sp->ts_state = STATE_INS_PREP;
11367 sp->ts_curi = 1;
11368 if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*')
11369 /* Deleting a vowel at the start of a word counts less, see
11370 * soundalike_score(). */
11371 newscore = 2 * SCORE_DEL / 3;
11372 else
11373 newscore = SCORE_DEL;
11374 if (fword[sp->ts_fidx] != NUL
11375 && TRY_DEEPER(su, stack, depth, newscore))
11376 {
11377 go_deeper(stack, depth, newscore);
11378#ifdef DEBUG_TRIEWALK
11379 sprintf(changename[depth], "%.*s-%s: delete %c",
11380 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11381 fword[sp->ts_fidx]);
11382#endif
11383 ++depth;
11384
11385 /* Remember what character we deleted, so that we can avoid
11386 * inserting it again. */
11387 stack[depth].ts_flags |= TSF_DIDDEL;
11388 stack[depth].ts_delidx = sp->ts_fidx;
11389
11390 /* Advance over the character in fword[]. Give a bonus to the
11391 * score if the same character is following "nn" -> "n". It's
11392 * a bit illogical for soundfold tree but it does give better
11393 * results. */
11394#ifdef FEAT_MBYTE
11395 if (has_mbyte)
11396 {
11397 c = mb_ptr2char(fword + sp->ts_fidx);
11398 stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]);
11399 if (enc_utf8 && utf_iscomposing(c))
11400 stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP;
11401 else if (c == mb_ptr2char(fword + stack[depth].ts_fidx))
11402 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
11403 }
11404 else
11405#endif
11406 {
11407 ++stack[depth].ts_fidx;
11408 if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1])
11409 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
11410 }
11411 break;
11412 }
11413 /*FALLTHROUGH*/
11414
11415 case STATE_INS_PREP:
11416 if (sp->ts_flags & TSF_DIDDEL)
11417 {
11418 /* If we just deleted a byte then inserting won't make sense,
11419 * a substitute is always cheaper. */
11420 sp->ts_state = STATE_SWAP;
11421 break;
11422 }
11423
11424 /* skip over NUL bytes */
11425 n = sp->ts_arridx;
11426 for (;;)
11427 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011428 if (sp->ts_curi > byts[n])
11429 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011430 /* Only NUL bytes at this node, go to next state. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011431 sp->ts_state = STATE_SWAP;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011432 break;
11433 }
11434 if (byts[n + sp->ts_curi] != NUL)
11435 {
11436 /* Found a byte to insert. */
11437 sp->ts_state = STATE_INS;
11438 break;
11439 }
11440 ++sp->ts_curi;
11441 }
11442 break;
11443
11444 /*FALLTHROUGH*/
11445
11446 case STATE_INS:
11447 /* Insert one byte. Repeat this for each possible byte at this
11448 * node. */
11449 n = sp->ts_arridx;
11450 if (sp->ts_curi > byts[n])
11451 {
11452 /* Done all bytes at this node, go to next state. */
11453 sp->ts_state = STATE_SWAP;
11454 break;
11455 }
11456
11457 /* Do one more byte at this node, but:
11458 * - Skip NUL bytes.
11459 * - Skip the byte if it's equal to the byte in the word,
11460 * accepting that byte is always better.
11461 */
11462 n += sp->ts_curi++;
11463 c = byts[n];
11464 if (soundfold && sp->ts_twordlen == 0 && c == '*')
11465 /* Inserting a vowel at the start of a word counts less,
11466 * see soundalike_score(). */
11467 newscore = 2 * SCORE_INS / 3;
11468 else
11469 newscore = SCORE_INS;
11470 if (c != fword[sp->ts_fidx]
11471 && TRY_DEEPER(su, stack, depth, newscore))
11472 {
11473 go_deeper(stack, depth, newscore);
11474#ifdef DEBUG_TRIEWALK
11475 sprintf(changename[depth], "%.*s-%s: insert %c",
11476 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11477 c);
11478#endif
11479 ++depth;
11480 sp = &stack[depth];
11481 tword[sp->ts_twordlen++] = c;
11482 sp->ts_arridx = idxs[n];
11483#ifdef FEAT_MBYTE
11484 if (has_mbyte)
11485 {
11486 fl = MB_BYTE2LEN(c);
11487 if (fl > 1)
11488 {
11489 /* There are following bytes for the same character.
11490 * We must find all bytes before trying
11491 * delete/insert/swap/etc. */
11492 sp->ts_tcharlen = fl;
11493 sp->ts_tcharidx = 1;
11494 sp->ts_isdiff = DIFF_INSERT;
11495 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011496 }
11497 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000011498 fl = 1;
11499 if (fl == 1)
Bram Moolenaarea424162005-06-16 21:51:00 +000011500#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000011501 {
11502 /* If the previous character was the same, thus doubling a
11503 * character, give a bonus to the score. Also for
11504 * soundfold words (illogical but does give a better
11505 * score). */
11506 if (sp->ts_twordlen >= 2
Bram Moolenaarea408852005-06-25 22:49:46 +000011507 && tword[sp->ts_twordlen - 2] == c)
Bram Moolenaar4770d092006-01-12 23:22:24 +000011508 sp->ts_score -= SCORE_INS - SCORE_INSDUP;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011509 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011510 }
11511 break;
11512
11513 case STATE_SWAP:
11514 /*
11515 * Swap two bytes in the bad word: "12" -> "21".
11516 * We change "fword" here, it's changed back afterwards at
11517 * STATE_UNSWAP.
11518 */
11519 p = fword + sp->ts_fidx;
11520 c = *p;
11521 if (c == NUL)
11522 {
11523 /* End of word, can't swap or replace. */
11524 sp->ts_state = STATE_FINAL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011525 break;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011526 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011527
Bram Moolenaar4770d092006-01-12 23:22:24 +000011528 /* Don't swap if the first character is not a word character.
11529 * SWAP3 etc. also don't make sense then. */
11530 if (!soundfold && !spell_iswordp(p, curbuf))
11531 {
11532 sp->ts_state = STATE_REP_INI;
11533 break;
11534 }
Bram Moolenaarbb15b652005-10-03 21:52:09 +000011535
Bram Moolenaar4770d092006-01-12 23:22:24 +000011536#ifdef FEAT_MBYTE
11537 if (has_mbyte)
11538 {
11539 n = mb_cptr2len(p);
11540 c = mb_ptr2char(p);
11541 if (!soundfold && !spell_iswordp(p + n, curbuf))
11542 c2 = c; /* don't swap non-word char */
11543 else
11544 c2 = mb_ptr2char(p + n);
11545 }
11546 else
11547#endif
11548 {
11549 if (!soundfold && !spell_iswordp(p + 1, curbuf))
11550 c2 = c; /* don't swap non-word char */
11551 else
11552 c2 = p[1];
11553 }
Bram Moolenaarbb15b652005-10-03 21:52:09 +000011554
Bram Moolenaar4770d092006-01-12 23:22:24 +000011555 /* When characters are identical, swap won't do anything.
11556 * Also get here if the second char is not a word character. */
11557 if (c == c2)
11558 {
11559 sp->ts_state = STATE_SWAP3;
11560 break;
11561 }
11562 if (c2 != NUL && TRY_DEEPER(su, stack, depth, SCORE_SWAP))
11563 {
11564 go_deeper(stack, depth, SCORE_SWAP);
11565#ifdef DEBUG_TRIEWALK
11566 sprintf(changename[depth], "%.*s-%s: swap %c and %c",
11567 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11568 c, c2);
11569#endif
11570 sp->ts_state = STATE_UNSWAP;
11571 ++depth;
Bram Moolenaarea424162005-06-16 21:51:00 +000011572#ifdef FEAT_MBYTE
11573 if (has_mbyte)
11574 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011575 fl = mb_char2len(c2);
11576 mch_memmove(p, p + n, fl);
11577 mb_char2bytes(c, p + fl);
11578 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
Bram Moolenaarea424162005-06-16 21:51:00 +000011579 }
11580 else
11581#endif
Bram Moolenaarbb15b652005-10-03 21:52:09 +000011582 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011583 p[0] = c2;
Bram Moolenaarea424162005-06-16 21:51:00 +000011584 p[1] = c;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011585 stack[depth].ts_fidxtry = sp->ts_fidx + 2;
Bram Moolenaarea424162005-06-16 21:51:00 +000011586 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011587 }
11588 else
11589 /* If this swap doesn't work then SWAP3 won't either. */
11590 sp->ts_state = STATE_REP_INI;
11591 break;
Bram Moolenaarea424162005-06-16 21:51:00 +000011592
Bram Moolenaar4770d092006-01-12 23:22:24 +000011593 case STATE_UNSWAP:
11594 /* Undo the STATE_SWAP swap: "21" -> "12". */
11595 p = fword + sp->ts_fidx;
11596#ifdef FEAT_MBYTE
11597 if (has_mbyte)
11598 {
11599 n = MB_BYTE2LEN(*p);
11600 c = mb_ptr2char(p + n);
11601 mch_memmove(p + MB_BYTE2LEN(p[n]), p, n);
11602 mb_char2bytes(c, p);
11603 }
11604 else
11605#endif
11606 {
11607 c = *p;
11608 *p = p[1];
11609 p[1] = c;
11610 }
11611 /*FALLTHROUGH*/
11612
11613 case STATE_SWAP3:
11614 /* Swap two bytes, skipping one: "123" -> "321". We change
11615 * "fword" here, it's changed back afterwards at STATE_UNSWAP3. */
11616 p = fword + sp->ts_fidx;
11617#ifdef FEAT_MBYTE
11618 if (has_mbyte)
11619 {
11620 n = mb_cptr2len(p);
11621 c = mb_ptr2char(p);
11622 fl = mb_cptr2len(p + n);
11623 c2 = mb_ptr2char(p + n);
11624 if (!soundfold && !spell_iswordp(p + n + fl, curbuf))
11625 c3 = c; /* don't swap non-word char */
11626 else
11627 c3 = mb_ptr2char(p + n + fl);
11628 }
11629 else
11630#endif
11631 {
11632 c = *p;
11633 c2 = p[1];
11634 if (!soundfold && !spell_iswordp(p + 2, curbuf))
11635 c3 = c; /* don't swap non-word char */
11636 else
11637 c3 = p[2];
11638 }
11639
11640 /* When characters are identical: "121" then SWAP3 result is
11641 * identical, ROT3L result is same as SWAP: "211", ROT3L result is
11642 * same as SWAP on next char: "112". Thus skip all swapping.
11643 * Also skip when c3 is NUL.
11644 * Also get here when the third character is not a word character.
11645 * Second character may any char: "a.b" -> "b.a" */
11646 if (c == c3 || c3 == NUL)
11647 {
11648 sp->ts_state = STATE_REP_INI;
11649 break;
11650 }
11651 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
11652 {
11653 go_deeper(stack, depth, SCORE_SWAP3);
11654#ifdef DEBUG_TRIEWALK
11655 sprintf(changename[depth], "%.*s-%s: swap3 %c and %c",
11656 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11657 c, c3);
11658#endif
11659 sp->ts_state = STATE_UNSWAP3;
11660 ++depth;
11661#ifdef FEAT_MBYTE
11662 if (has_mbyte)
11663 {
11664 tl = mb_char2len(c3);
11665 mch_memmove(p, p + n + fl, tl);
11666 mb_char2bytes(c2, p + tl);
11667 mb_char2bytes(c, p + fl + tl);
11668 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl;
11669 }
11670 else
11671#endif
11672 {
11673 p[0] = p[2];
11674 p[2] = c;
11675 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
11676 }
11677 }
11678 else
11679 sp->ts_state = STATE_REP_INI;
11680 break;
11681
11682 case STATE_UNSWAP3:
11683 /* Undo STATE_SWAP3: "321" -> "123" */
11684 p = fword + sp->ts_fidx;
11685#ifdef FEAT_MBYTE
11686 if (has_mbyte)
11687 {
11688 n = MB_BYTE2LEN(*p);
11689 c2 = mb_ptr2char(p + n);
11690 fl = MB_BYTE2LEN(p[n]);
11691 c = mb_ptr2char(p + n + fl);
11692 tl = MB_BYTE2LEN(p[n + fl]);
11693 mch_memmove(p + fl + tl, p, n);
11694 mb_char2bytes(c, p);
11695 mb_char2bytes(c2, p + tl);
11696 p = p + tl;
11697 }
11698 else
11699#endif
11700 {
11701 c = *p;
11702 *p = p[2];
11703 p[2] = c;
11704 ++p;
11705 }
11706
11707 if (!soundfold && !spell_iswordp(p, curbuf))
11708 {
11709 /* Middle char is not a word char, skip the rotate. First and
11710 * third char were already checked at swap and swap3. */
11711 sp->ts_state = STATE_REP_INI;
11712 break;
11713 }
11714
11715 /* Rotate three characters left: "123" -> "231". We change
11716 * "fword" here, it's changed back afterwards at STATE_UNROT3L. */
11717 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
11718 {
11719 go_deeper(stack, depth, SCORE_SWAP3);
11720#ifdef DEBUG_TRIEWALK
11721 p = fword + sp->ts_fidx;
11722 sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c",
11723 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11724 p[0], p[1], p[2]);
11725#endif
11726 sp->ts_state = STATE_UNROT3L;
11727 ++depth;
Bram Moolenaarea424162005-06-16 21:51:00 +000011728 p = fword + sp->ts_fidx;
11729#ifdef FEAT_MBYTE
11730 if (has_mbyte)
11731 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000011732 n = mb_cptr2len(p);
Bram Moolenaarea424162005-06-16 21:51:00 +000011733 c = mb_ptr2char(p);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000011734 fl = mb_cptr2len(p + n);
Bram Moolenaar4770d092006-01-12 23:22:24 +000011735 fl += mb_cptr2len(p + n + fl);
11736 mch_memmove(p, p + n, fl);
11737 mb_char2bytes(c, p + fl);
11738 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
Bram Moolenaarea424162005-06-16 21:51:00 +000011739 }
11740 else
11741#endif
11742 {
11743 c = *p;
11744 *p = p[1];
11745 p[1] = p[2];
11746 p[2] = c;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011747 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
Bram Moolenaarea424162005-06-16 21:51:00 +000011748 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011749 }
11750 else
11751 sp->ts_state = STATE_REP_INI;
11752 break;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011753
Bram Moolenaar4770d092006-01-12 23:22:24 +000011754 case STATE_UNROT3L:
11755 /* Undo ROT3L: "231" -> "123" */
11756 p = fword + sp->ts_fidx;
Bram Moolenaarea424162005-06-16 21:51:00 +000011757#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000011758 if (has_mbyte)
11759 {
11760 n = MB_BYTE2LEN(*p);
11761 n += MB_BYTE2LEN(p[n]);
11762 c = mb_ptr2char(p + n);
11763 tl = MB_BYTE2LEN(p[n]);
11764 mch_memmove(p + tl, p, n);
11765 mb_char2bytes(c, p);
11766 }
11767 else
Bram Moolenaarea424162005-06-16 21:51:00 +000011768#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000011769 {
11770 c = p[2];
11771 p[2] = p[1];
11772 p[1] = *p;
11773 *p = c;
11774 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011775
Bram Moolenaar4770d092006-01-12 23:22:24 +000011776 /* Rotate three bytes right: "123" -> "312". We change "fword"
11777 * here, it's changed back afterwards at STATE_UNROT3R. */
11778 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
11779 {
11780 go_deeper(stack, depth, SCORE_SWAP3);
11781#ifdef DEBUG_TRIEWALK
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011782 p = fword + sp->ts_fidx;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011783 sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c",
11784 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11785 p[0], p[1], p[2]);
11786#endif
11787 sp->ts_state = STATE_UNROT3R;
11788 ++depth;
11789 p = fword + sp->ts_fidx;
11790#ifdef FEAT_MBYTE
11791 if (has_mbyte)
Bram Moolenaar0c405862005-06-22 22:26:26 +000011792 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011793 n = mb_cptr2len(p);
11794 n += mb_cptr2len(p + n);
11795 c = mb_ptr2char(p + n);
11796 tl = mb_cptr2len(p + n);
11797 mch_memmove(p + tl, p, n);
11798 mb_char2bytes(c, p);
11799 stack[depth].ts_fidxtry = sp->ts_fidx + n + tl;
Bram Moolenaar0c405862005-06-22 22:26:26 +000011800 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011801 else
11802#endif
11803 {
11804 c = p[2];
11805 p[2] = p[1];
11806 p[1] = *p;
11807 *p = c;
11808 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
11809 }
11810 }
11811 else
11812 sp->ts_state = STATE_REP_INI;
11813 break;
11814
11815 case STATE_UNROT3R:
11816 /* Undo ROT3R: "312" -> "123" */
11817 p = fword + sp->ts_fidx;
11818#ifdef FEAT_MBYTE
11819 if (has_mbyte)
11820 {
11821 c = mb_ptr2char(p);
11822 tl = MB_BYTE2LEN(*p);
11823 n = MB_BYTE2LEN(p[tl]);
11824 n += MB_BYTE2LEN(p[tl + n]);
11825 mch_memmove(p, p + tl, n);
11826 mb_char2bytes(c, p + n);
11827 }
11828 else
11829#endif
11830 {
11831 c = *p;
11832 *p = p[1];
11833 p[1] = p[2];
11834 p[2] = c;
11835 }
11836 /*FALLTHROUGH*/
11837
11838 case STATE_REP_INI:
11839 /* Check if matching with REP items from the .aff file would work.
11840 * Quickly skip if:
11841 * - there are no REP items and we are not in the soundfold trie
11842 * - the score is going to be too high anyway
11843 * - already applied a REP item or swapped here */
11844 if ((lp->lp_replang == NULL && !soundfold)
11845 || sp->ts_score + SCORE_REP >= su->su_maxscore
11846 || sp->ts_fidx < sp->ts_fidxtry)
11847 {
11848 sp->ts_state = STATE_FINAL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011849 break;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011850 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011851
Bram Moolenaar4770d092006-01-12 23:22:24 +000011852 /* Use the first byte to quickly find the first entry that may
11853 * match. If the index is -1 there is none. */
11854 if (soundfold)
11855 sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]];
11856 else
11857 sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011858
Bram Moolenaar4770d092006-01-12 23:22:24 +000011859 if (sp->ts_curi < 0)
11860 {
11861 sp->ts_state = STATE_FINAL;
11862 break;
11863 }
11864
11865 sp->ts_state = STATE_REP;
11866 /*FALLTHROUGH*/
11867
11868 case STATE_REP:
11869 /* Try matching with REP items from the .aff file. For each match
11870 * replace the characters and check if the resulting word is
11871 * valid. */
11872 p = fword + sp->ts_fidx;
11873
11874 if (soundfold)
11875 gap = &slang->sl_repsal;
11876 else
11877 gap = &lp->lp_replang->sl_rep;
11878 while (sp->ts_curi < gap->ga_len)
11879 {
11880 ftp = (fromto_T *)gap->ga_data + sp->ts_curi++;
11881 if (*ftp->ft_from != *p)
Bram Moolenaar42eeac32005-06-29 22:40:58 +000011882 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011883 /* past possible matching entries */
11884 sp->ts_curi = gap->ga_len;
11885 break;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000011886 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011887 if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0
11888 && TRY_DEEPER(su, stack, depth, SCORE_REP))
11889 {
11890 go_deeper(stack, depth, SCORE_REP);
11891#ifdef DEBUG_TRIEWALK
11892 sprintf(changename[depth], "%.*s-%s: replace %s with %s",
11893 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11894 ftp->ft_from, ftp->ft_to);
11895#endif
11896 /* Need to undo this afterwards. */
11897 sp->ts_state = STATE_REP_UNDO;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000011898
Bram Moolenaar4770d092006-01-12 23:22:24 +000011899 /* Change the "from" to the "to" string. */
11900 ++depth;
11901 fl = STRLEN(ftp->ft_from);
11902 tl = STRLEN(ftp->ft_to);
11903 if (fl != tl)
11904 {
11905 mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1);
11906 repextra += tl - fl;
11907 }
11908 mch_memmove(p, ftp->ft_to, tl);
11909 stack[depth].ts_fidxtry = sp->ts_fidx + tl;
11910#ifdef FEAT_MBYTE
11911 stack[depth].ts_tcharlen = 0;
11912#endif
11913 break;
11914 }
11915 }
11916
11917 if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP)
11918 /* No (more) matches. */
11919 sp->ts_state = STATE_FINAL;
11920
11921 break;
11922
11923 case STATE_REP_UNDO:
11924 /* Undo a REP replacement and continue with the next one. */
11925 if (soundfold)
11926 gap = &slang->sl_repsal;
11927 else
11928 gap = &lp->lp_replang->sl_rep;
11929 ftp = (fromto_T *)gap->ga_data + sp->ts_curi - 1;
11930 fl = STRLEN(ftp->ft_from);
11931 tl = STRLEN(ftp->ft_to);
11932 p = fword + sp->ts_fidx;
11933 if (fl != tl)
11934 {
11935 mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1);
11936 repextra -= tl - fl;
11937 }
11938 mch_memmove(p, ftp->ft_from, fl);
11939 sp->ts_state = STATE_REP;
11940 break;
11941
11942 default:
11943 /* Did all possible states at this level, go up one level. */
11944 --depth;
11945
11946 if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE)
11947 {
11948 /* Continue in or go back to the prefix tree. */
11949 byts = pbyts;
11950 idxs = pidxs;
11951 }
11952
11953 /* Don't check for CTRL-C too often, it takes time. */
11954 if (--breakcheckcount == 0)
11955 {
11956 ui_breakcheck();
11957 breakcheckcount = 1000;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000011958 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011959 }
11960 }
11961}
11962
Bram Moolenaar4770d092006-01-12 23:22:24 +000011963
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011964/*
Bram Moolenaar4770d092006-01-12 23:22:24 +000011965 * Go one level deeper in the tree.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011966 */
Bram Moolenaar4770d092006-01-12 23:22:24 +000011967 static void
11968go_deeper(stack, depth, score_add)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011969 trystate_T *stack;
11970 int depth;
11971 int score_add;
11972{
Bram Moolenaarea424162005-06-16 21:51:00 +000011973 stack[depth + 1] = stack[depth];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011974 stack[depth + 1].ts_state = STATE_START;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011975 stack[depth + 1].ts_score = stack[depth].ts_score + score_add;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011976 stack[depth + 1].ts_curi = 1; /* start just after length byte */
Bram Moolenaard12a1322005-08-21 22:08:24 +000011977 stack[depth + 1].ts_flags = 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011978}
11979
Bram Moolenaar53805d12005-08-01 07:08:33 +000011980#ifdef FEAT_MBYTE
11981/*
11982 * Case-folding may change the number of bytes: Count nr of chars in
11983 * fword[flen] and return the byte length of that many chars in "word".
11984 */
11985 static int
11986nofold_len(fword, flen, word)
11987 char_u *fword;
11988 int flen;
11989 char_u *word;
11990{
11991 char_u *p;
11992 int i = 0;
11993
11994 for (p = fword; p < fword + flen; mb_ptr_adv(p))
11995 ++i;
11996 for (p = word; i > 0; mb_ptr_adv(p))
11997 --i;
11998 return (int)(p - word);
11999}
12000#endif
12001
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012002/*
12003 * "fword" is a good word with case folded. Find the matching keep-case
12004 * words and put it in "kword".
12005 * Theoretically there could be several keep-case words that result in the
12006 * same case-folded word, but we only find one...
12007 */
12008 static void
12009find_keepcap_word(slang, fword, kword)
12010 slang_T *slang;
12011 char_u *fword;
12012 char_u *kword;
12013{
12014 char_u uword[MAXWLEN]; /* "fword" in upper-case */
12015 int depth;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012016 idx_T tryidx;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012017
12018 /* The following arrays are used at each depth in the tree. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012019 idx_T arridx[MAXWLEN];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012020 int round[MAXWLEN];
12021 int fwordidx[MAXWLEN];
12022 int uwordidx[MAXWLEN];
12023 int kwordlen[MAXWLEN];
12024
12025 int flen, ulen;
12026 int l;
12027 int len;
12028 int c;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012029 idx_T lo, hi, m;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012030 char_u *p;
12031 char_u *byts = slang->sl_kbyts; /* array with bytes of the words */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012032 idx_T *idxs = slang->sl_kidxs; /* array with indexes */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012033
12034 if (byts == NULL)
12035 {
12036 /* array is empty: "cannot happen" */
12037 *kword = NUL;
12038 return;
12039 }
12040
12041 /* Make an all-cap version of "fword". */
12042 allcap_copy(fword, uword);
12043
12044 /*
12045 * Each character needs to be tried both case-folded and upper-case.
12046 * All this gets very complicated if we keep in mind that changing case
12047 * may change the byte length of a multi-byte character...
12048 */
12049 depth = 0;
12050 arridx[0] = 0;
12051 round[0] = 0;
12052 fwordidx[0] = 0;
12053 uwordidx[0] = 0;
12054 kwordlen[0] = 0;
12055 while (depth >= 0)
12056 {
12057 if (fword[fwordidx[depth]] == NUL)
12058 {
12059 /* We are at the end of "fword". If the tree allows a word to end
12060 * here we have found a match. */
12061 if (byts[arridx[depth] + 1] == 0)
12062 {
12063 kword[kwordlen[depth]] = NUL;
12064 return;
12065 }
12066
12067 /* kword is getting too long, continue one level up */
12068 --depth;
12069 }
12070 else if (++round[depth] > 2)
12071 {
12072 /* tried both fold-case and upper-case character, continue one
12073 * level up */
12074 --depth;
12075 }
12076 else
12077 {
12078 /*
12079 * round[depth] == 1: Try using the folded-case character.
12080 * round[depth] == 2: Try using the upper-case character.
12081 */
12082#ifdef FEAT_MBYTE
12083 if (has_mbyte)
12084 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000012085 flen = mb_cptr2len(fword + fwordidx[depth]);
12086 ulen = mb_cptr2len(uword + uwordidx[depth]);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012087 }
12088 else
12089#endif
12090 ulen = flen = 1;
12091 if (round[depth] == 1)
12092 {
12093 p = fword + fwordidx[depth];
12094 l = flen;
12095 }
12096 else
12097 {
12098 p = uword + uwordidx[depth];
12099 l = ulen;
12100 }
12101
12102 for (tryidx = arridx[depth]; l > 0; --l)
12103 {
12104 /* Perform a binary search in the list of accepted bytes. */
12105 len = byts[tryidx++];
12106 c = *p++;
12107 lo = tryidx;
12108 hi = tryidx + len - 1;
12109 while (lo < hi)
12110 {
12111 m = (lo + hi) / 2;
12112 if (byts[m] > c)
12113 hi = m - 1;
12114 else if (byts[m] < c)
12115 lo = m + 1;
12116 else
12117 {
12118 lo = hi = m;
12119 break;
12120 }
12121 }
12122
12123 /* Stop if there is no matching byte. */
12124 if (hi < lo || byts[lo] != c)
12125 break;
12126
12127 /* Continue at the child (if there is one). */
12128 tryidx = idxs[lo];
12129 }
12130
12131 if (l == 0)
12132 {
12133 /*
12134 * Found the matching char. Copy it to "kword" and go a
12135 * level deeper.
12136 */
12137 if (round[depth] == 1)
12138 {
12139 STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth],
12140 flen);
12141 kwordlen[depth + 1] = kwordlen[depth] + flen;
12142 }
12143 else
12144 {
12145 STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth],
12146 ulen);
12147 kwordlen[depth + 1] = kwordlen[depth] + ulen;
12148 }
12149 fwordidx[depth + 1] = fwordidx[depth] + flen;
12150 uwordidx[depth + 1] = uwordidx[depth] + ulen;
12151
12152 ++depth;
12153 arridx[depth] = tryidx;
12154 round[depth] = 0;
12155 }
12156 }
12157 }
12158
12159 /* Didn't find it: "cannot happen". */
12160 *kword = NUL;
12161}
12162
12163/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012164 * Compute the sound-a-like score for suggestions in su->su_ga and add them to
12165 * su->su_sga.
12166 */
12167 static void
12168score_comp_sal(su)
12169 suginfo_T *su;
12170{
12171 langp_T *lp;
12172 char_u badsound[MAXWLEN];
12173 int i;
12174 suggest_T *stp;
12175 suggest_T *sstp;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012176 int score;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000012177 int lpi;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012178
12179 if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL)
12180 return;
12181
12182 /* Use the sound-folding of the first language that supports it. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012183 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000012184 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012185 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012186 if (lp->lp_slang->sl_sal.ga_len > 0)
12187 {
12188 /* soundfold the bad word */
Bram Moolenaar42eeac32005-06-29 22:40:58 +000012189 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012190
12191 for (i = 0; i < su->su_ga.ga_len; ++i)
12192 {
12193 stp = &SUG(su->su_ga, i);
12194
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012195 /* Case-fold the suggested word, sound-fold it and compute the
12196 * sound-a-like score. */
12197 score = stp_sal_score(stp, su, lp->lp_slang, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012198 if (score < SCORE_MAXMAX)
12199 {
12200 /* Add the suggestion. */
12201 sstp = &SUG(su->su_sga, su->su_sga.ga_len);
12202 sstp->st_word = vim_strsave(stp->st_word);
12203 if (sstp->st_word != NULL)
12204 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012205 sstp->st_wordlen = stp->st_wordlen;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012206 sstp->st_score = score;
12207 sstp->st_altscore = 0;
12208 sstp->st_orglen = stp->st_orglen;
12209 ++su->su_sga.ga_len;
12210 }
12211 }
12212 }
12213 break;
12214 }
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000012215 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012216}
12217
12218/*
12219 * Combine the list of suggestions in su->su_ga and su->su_sga.
12220 * They are intwined.
12221 */
12222 static void
12223score_combine(su)
12224 suginfo_T *su;
12225{
12226 int i;
12227 int j;
12228 garray_T ga;
12229 garray_T *gap;
12230 langp_T *lp;
12231 suggest_T *stp;
12232 char_u *p;
12233 char_u badsound[MAXWLEN];
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012234 int round;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000012235 int lpi;
Bram Moolenaar4770d092006-01-12 23:22:24 +000012236 slang_T *slang = NULL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012237
12238 /* Add the alternate score to su_ga. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012239 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012240 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012241 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012242 if (lp->lp_slang->sl_sal.ga_len > 0)
12243 {
12244 /* soundfold the bad word */
Bram Moolenaar4770d092006-01-12 23:22:24 +000012245 slang = lp->lp_slang;
12246 spell_soundfold(slang, su->su_fbadword, TRUE, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012247
12248 for (i = 0; i < su->su_ga.ga_len; ++i)
12249 {
12250 stp = &SUG(su->su_ga, i);
Bram Moolenaar4770d092006-01-12 23:22:24 +000012251 stp->st_altscore = stp_sal_score(stp, su, slang, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012252 if (stp->st_altscore == SCORE_MAXMAX)
12253 stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4;
12254 else
12255 stp->st_score = (stp->st_score * 3
12256 + stp->st_altscore) / 4;
12257 stp->st_salscore = FALSE;
12258 }
12259 break;
12260 }
12261 }
12262
Bram Moolenaar4770d092006-01-12 23:22:24 +000012263 if (slang == NULL) /* just in case */
12264 return;
12265
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012266 /* Add the alternate score to su_sga. */
12267 for (i = 0; i < su->su_sga.ga_len; ++i)
12268 {
12269 stp = &SUG(su->su_sga, i);
Bram Moolenaar4770d092006-01-12 23:22:24 +000012270 stp->st_altscore = spell_edit_score(slang,
12271 su->su_badword, stp->st_word);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012272 if (stp->st_score == SCORE_MAXMAX)
12273 stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8;
12274 else
12275 stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8;
12276 stp->st_salscore = TRUE;
12277 }
12278
Bram Moolenaar4770d092006-01-12 23:22:24 +000012279 /* Remove bad suggestions, sort the suggestions and truncate at "maxcount"
12280 * for both lists. */
12281 check_suggestions(su, &su->su_ga);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012282 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
Bram Moolenaar4770d092006-01-12 23:22:24 +000012283 check_suggestions(su, &su->su_sga);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012284 (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount);
12285
12286 ga_init2(&ga, (int)sizeof(suginfo_T), 1);
12287 if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL)
12288 return;
12289
12290 stp = &SUG(ga, 0);
12291 for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i)
12292 {
12293 /* round 1: get a suggestion from su_ga
12294 * round 2: get a suggestion from su_sga */
12295 for (round = 1; round <= 2; ++round)
12296 {
12297 gap = round == 1 ? &su->su_ga : &su->su_sga;
12298 if (i < gap->ga_len)
12299 {
12300 /* Don't add a word if it's already there. */
12301 p = SUG(*gap, i).st_word;
12302 for (j = 0; j < ga.ga_len; ++j)
12303 if (STRCMP(stp[j].st_word, p) == 0)
12304 break;
12305 if (j == ga.ga_len)
12306 stp[ga.ga_len++] = SUG(*gap, i);
12307 else
12308 vim_free(p);
12309 }
12310 }
12311 }
12312
12313 ga_clear(&su->su_ga);
12314 ga_clear(&su->su_sga);
12315
12316 /* Truncate the list to the number of suggestions that will be displayed. */
12317 if (ga.ga_len > su->su_maxcount)
12318 {
12319 for (i = su->su_maxcount; i < ga.ga_len; ++i)
12320 vim_free(stp[i].st_word);
12321 ga.ga_len = su->su_maxcount;
12322 }
12323
12324 su->su_ga = ga;
12325}
12326
12327/*
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012328 * For the goodword in "stp" compute the soundalike score compared to the
12329 * badword.
12330 */
12331 static int
12332stp_sal_score(stp, su, slang, badsound)
12333 suggest_T *stp;
12334 suginfo_T *su;
12335 slang_T *slang;
12336 char_u *badsound; /* sound-folded badword */
12337{
12338 char_u *p;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012339 char_u *pbad;
12340 char_u *pgood;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012341 char_u badsound2[MAXWLEN];
12342 char_u fword[MAXWLEN];
12343 char_u goodsound[MAXWLEN];
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012344 char_u goodword[MAXWLEN];
12345 int lendiff;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012346
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012347 lendiff = (int)(su->su_badlen - stp->st_orglen);
12348 if (lendiff >= 0)
12349 pbad = badsound;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012350 else
12351 {
12352 /* soundfold the bad word with more characters following */
12353 (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN);
12354
12355 /* When joining two words the sound often changes a lot. E.g., "t he"
12356 * sounds like "t h" while "the" sounds like "@". Avoid that by
12357 * removing the space. Don't do it when the good word also contains a
12358 * space. */
12359 if (vim_iswhite(su->su_badptr[su->su_badlen])
12360 && *skiptowhite(stp->st_word) == NUL)
12361 for (p = fword; *(p = skiptowhite(p)) != NUL; )
12362 mch_memmove(p, p + 1, STRLEN(p));
12363
Bram Moolenaar42eeac32005-06-29 22:40:58 +000012364 spell_soundfold(slang, fword, TRUE, badsound2);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012365 pbad = badsound2;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012366 }
12367
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012368 if (lendiff > 0)
12369 {
12370 /* Add part of the bad word to the good word, so that we soundfold
12371 * what replaces the bad word. */
12372 STRCPY(goodword, stp->st_word);
Bram Moolenaar4770d092006-01-12 23:22:24 +000012373 vim_strncpy(goodword + stp->st_wordlen,
12374 su->su_badptr + su->su_badlen - lendiff, lendiff);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012375 pgood = goodword;
12376 }
12377 else
12378 pgood = stp->st_word;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012379
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012380 /* Sound-fold the word and compute the score for the difference. */
12381 spell_soundfold(slang, pgood, FALSE, goodsound);
12382
12383 return soundalike_score(goodsound, pbad);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012384}
12385
Bram Moolenaar4770d092006-01-12 23:22:24 +000012386/* structure used to store soundfolded words that add_sound_suggest() has
12387 * handled already. */
12388typedef struct
12389{
12390 short sft_score; /* lowest score used */
12391 char_u sft_word[1]; /* soundfolded word, actually longer */
12392} sftword_T;
12393
12394static sftword_T dumsft;
12395#define HIKEY2SFT(p) ((sftword_T *)(p - (dumsft.sft_word - (char_u *)&dumsft)))
12396#define HI2SFT(hi) HIKEY2SFT((hi)->hi_key)
12397
12398/*
12399 * Prepare for calling suggest_try_soundalike().
12400 */
12401 static void
12402suggest_try_soundalike_prep()
12403{
12404 langp_T *lp;
12405 int lpi;
12406 slang_T *slang;
12407
12408 /* Do this for all languages that support sound folding and for which a
12409 * .sug file has been loaded. */
12410 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
12411 {
12412 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
12413 slang = lp->lp_slang;
12414 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
12415 /* prepare the hashtable used by add_sound_suggest() */
12416 hash_init(&slang->sl_sounddone);
12417 }
12418}
12419
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012420/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012421 * Find suggestions by comparing the word in a sound-a-like form.
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012422 * Note: This doesn't support postponed prefixes.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012423 */
12424 static void
Bram Moolenaar0c405862005-06-22 22:26:26 +000012425suggest_try_soundalike(su)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012426 suginfo_T *su;
12427{
12428 char_u salword[MAXWLEN];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012429 langp_T *lp;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000012430 int lpi;
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012431 slang_T *slang;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012432
Bram Moolenaar4770d092006-01-12 23:22:24 +000012433 /* Do this for all languages that support sound folding and for which a
12434 * .sug file has been loaded. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012435 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012436 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012437 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
12438 slang = lp->lp_slang;
Bram Moolenaar4770d092006-01-12 23:22:24 +000012439 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012440 {
12441 /* soundfold the bad word */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012442 spell_soundfold(slang, su->su_fbadword, TRUE, salword);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012443
Bram Moolenaar4770d092006-01-12 23:22:24 +000012444 /* try all kinds of inserts/deletes/swaps/etc. */
12445 /* TODO: also soundfold the next words, so that we can try joining
12446 * and splitting */
12447 suggest_trie_walk(su, lp, salword, TRUE);
12448 }
12449 }
12450}
12451
12452/*
12453 * Finish up after calling suggest_try_soundalike().
12454 */
12455 static void
12456suggest_try_soundalike_finish()
12457{
12458 langp_T *lp;
12459 int lpi;
12460 slang_T *slang;
12461 int todo;
12462 hashitem_T *hi;
12463
12464 /* Do this for all languages that support sound folding and for which a
12465 * .sug file has been loaded. */
12466 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
12467 {
12468 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
12469 slang = lp->lp_slang;
12470 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
12471 {
12472 /* Free the info about handled words. */
12473 todo = slang->sl_sounddone.ht_used;
12474 for (hi = slang->sl_sounddone.ht_array; todo > 0; ++hi)
12475 if (!HASHITEM_EMPTY(hi))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012476 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012477 vim_free(HI2SFT(hi));
12478 --todo;
12479 }
12480 hash_clear(&slang->sl_sounddone);
12481 }
12482 }
12483}
12484
12485/*
12486 * A match with a soundfolded word is found. Add the good word(s) that
12487 * produce this soundfolded word.
12488 */
12489 static void
12490add_sound_suggest(su, goodword, score, lp)
12491 suginfo_T *su;
12492 char_u *goodword;
12493 int score; /* soundfold score */
12494 langp_T *lp;
12495{
12496 slang_T *slang = lp->lp_slang; /* language for sound folding */
12497 int sfwordnr;
12498 char_u *nrline;
12499 int orgnr;
12500 char_u theword[MAXWLEN];
12501 int i;
12502 int wlen;
12503 char_u *byts;
12504 idx_T *idxs;
12505 int n;
12506 int wordcount;
12507 int wc;
12508 int goodscore;
12509 hash_T hash;
12510 hashitem_T *hi;
12511 sftword_T *sft;
12512 int bc, gc;
12513 int limit;
12514
12515 /*
12516 * It's very well possible that the same soundfold word is found several
12517 * times with different scores. Since the following is quite slow only do
12518 * the words that have a better score than before. Use a hashtable to
12519 * remember the words that have been done.
12520 */
12521 hash = hash_hash(goodword);
12522 hi = hash_lookup(&slang->sl_sounddone, goodword, hash);
12523 if (HASHITEM_EMPTY(hi))
12524 {
12525 sft = (sftword_T *)alloc(sizeof(sftword_T) + STRLEN(goodword));
12526 if (sft != NULL)
12527 {
12528 sft->sft_score = score;
12529 STRCPY(sft->sft_word, goodword);
12530 hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash);
12531 }
12532 }
12533 else
12534 {
12535 sft = HI2SFT(hi);
12536 if (score >= sft->sft_score)
12537 return;
12538 sft->sft_score = score;
12539 }
12540
12541 /*
12542 * Find the word nr in the soundfold tree.
12543 */
12544 sfwordnr = soundfold_find(slang, goodword);
12545 if (sfwordnr < 0)
12546 {
12547 EMSG2(_(e_intern2), "add_sound_suggest()");
12548 return;
12549 }
12550
12551 /*
12552 * go over the list of good words that produce this soundfold word
12553 */
12554 nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)(sfwordnr + 1), FALSE);
12555 orgnr = 0;
12556 while (*nrline != NUL)
12557 {
12558 /* The wordnr was stored in a minimal nr of bytes as an offset to the
12559 * previous wordnr. */
12560 orgnr += bytes2offset(&nrline);
12561
12562 byts = slang->sl_fbyts;
12563 idxs = slang->sl_fidxs;
12564
12565 /* Lookup the word "orgnr" one of the two tries. */
12566 n = 0;
12567 wlen = 0;
12568 wordcount = 0;
12569 for (;;)
12570 {
12571 i = 1;
12572 if (wordcount == orgnr && byts[n + 1] == NUL)
12573 break; /* found end of word */
12574
12575 if (byts[n + 1] == NUL)
12576 ++wordcount;
12577
12578 /* skip over the NUL bytes */
12579 for ( ; byts[n + i] == NUL; ++i)
12580 if (i > byts[n]) /* safety check */
12581 {
12582 STRCPY(theword + wlen, "BAD");
12583 goto badword;
12584 }
12585
12586 /* One of the siblings must have the word. */
12587 for ( ; i < byts[n]; ++i)
12588 {
12589 wc = idxs[idxs[n + i]]; /* nr of words under this byte */
12590 if (wordcount + wc > orgnr)
12591 break;
12592 wordcount += wc;
12593 }
12594
12595 theword[wlen++] = byts[n + i];
12596 n = idxs[n + i];
12597 }
12598badword:
12599 theword[wlen] = NUL;
12600
12601 /* Go over the possible flags and regions. */
12602 for (; i <= byts[n] && byts[n + i] == NUL; ++i)
12603 {
12604 char_u cword[MAXWLEN];
12605 char_u *p;
12606 int flags = (int)idxs[n + i];
12607
12608 if (flags & WF_KEEPCAP)
12609 {
12610 /* Must find the word in the keep-case tree. */
12611 find_keepcap_word(slang, theword, cword);
12612 p = cword;
12613 }
12614 else
12615 {
12616 flags |= su->su_badflags;
12617 if ((flags & WF_CAPMASK) != 0)
12618 {
12619 /* Need to fix case according to "flags". */
12620 make_case_word(theword, cword, flags);
12621 p = cword;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012622 }
12623 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000012624 p = theword;
12625 }
12626
12627 /* Add the suggestion. */
12628 if (sps_flags & SPS_DOUBLE)
12629 {
12630 /* Add the suggestion if the score isn't too bad. */
12631 if (score <= su->su_maxscore)
12632 add_suggestion(su, &su->su_sga, p, su->su_badlen,
12633 score, 0, FALSE, slang, FALSE);
12634 }
12635 else
12636 {
12637 /* Add a penalty for words in another region. */
12638 if ((flags & WF_REGION)
12639 && (((unsigned)flags >> 16) & lp->lp_region) == 0)
12640 goodscore = SCORE_REGION;
12641 else
12642 goodscore = 0;
12643
12644 /* Add a small penalty for changing the first letter from
12645 * lower to upper case. Helps for "tath" -> "Kath", which is
12646 * less common thatn "tath" -> "path". Don't do it when the
12647 * letter is the same, that has already been counted. */
12648 gc = PTR2CHAR(p);
12649 if (SPELL_ISUPPER(gc))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012650 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012651 bc = PTR2CHAR(su->su_badword);
12652 if (!SPELL_ISUPPER(bc)
12653 && SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc))
12654 goodscore += SCORE_ICASE / 2;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012655 }
12656
Bram Moolenaar4770d092006-01-12 23:22:24 +000012657 /* Compute the score for the good word. This only does letter
12658 * insert/delete/swap/replace. REP items are not considered,
12659 * which may make the score a bit higher.
12660 * Use a limit for the score to make it work faster. Use
12661 * MAXSCORE(), because RESCORE() will change the score.
12662 * If the limit is very high then the iterative method is
12663 * inefficient, using an array is quicker. */
12664 limit = MAXSCORE(su->su_sfmaxscore - goodscore, score);
12665 if (limit > SCORE_LIMITMAX)
12666 goodscore += spell_edit_score(slang, su->su_badword, p);
12667 else
12668 goodscore += spell_edit_score_limit(slang, su->su_badword,
12669 p, limit);
12670
12671 /* When going over the limit don't bother to do the rest. */
12672 if (goodscore < SCORE_MAXMAX)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012673 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012674 /* Give a bonus to words seen before. */
12675 goodscore = score_wordcount_adj(slang, goodscore, p, FALSE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012676
Bram Moolenaar4770d092006-01-12 23:22:24 +000012677 /* Add the suggestion if the score isn't too bad. */
12678 goodscore = RESCORE(goodscore, score);
12679 if (goodscore <= su->su_sfmaxscore)
12680 add_suggestion(su, &su->su_ga, p, su->su_badlen,
12681 goodscore, score, TRUE, slang, TRUE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012682 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012683 }
12684 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000012685 /* smsg("word %s (%d): %s (%d)", sftword, sftnr, theword, orgnr); */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012686 }
12687}
12688
12689/*
Bram Moolenaar4770d092006-01-12 23:22:24 +000012690 * Find word "word" in fold-case tree for "slang" and return the word number.
12691 */
12692 static int
12693soundfold_find(slang, word)
12694 slang_T *slang;
12695 char_u *word;
12696{
12697 idx_T arridx = 0;
12698 int len;
12699 int wlen = 0;
12700 int c;
12701 char_u *ptr = word;
12702 char_u *byts;
12703 idx_T *idxs;
12704 int wordnr = 0;
12705
12706 byts = slang->sl_sbyts;
12707 idxs = slang->sl_sidxs;
12708
12709 for (;;)
12710 {
12711 /* First byte is the number of possible bytes. */
12712 len = byts[arridx++];
12713
12714 /* If the first possible byte is a zero the word could end here.
12715 * If the word ends we found the word. If not skip the NUL bytes. */
12716 c = ptr[wlen];
12717 if (byts[arridx] == NUL)
12718 {
12719 if (c == NUL)
12720 break;
12721
12722 /* Skip over the zeros, there can be several. */
12723 while (len > 0 && byts[arridx] == NUL)
12724 {
12725 ++arridx;
12726 --len;
12727 }
12728 if (len == 0)
12729 return -1; /* no children, word should have ended here */
12730 ++wordnr;
12731 }
12732
12733 /* If the word ends we didn't find it. */
12734 if (c == NUL)
12735 return -1;
12736
12737 /* Perform a binary search in the list of accepted bytes. */
12738 if (c == TAB) /* <Tab> is handled like <Space> */
12739 c = ' ';
12740 while (byts[arridx] < c)
12741 {
12742 /* The word count is in the first idxs[] entry of the child. */
12743 wordnr += idxs[idxs[arridx]];
12744 ++arridx;
12745 if (--len == 0) /* end of the bytes, didn't find it */
12746 return -1;
12747 }
12748 if (byts[arridx] != c) /* didn't find the byte */
12749 return -1;
12750
12751 /* Continue at the child (if there is one). */
12752 arridx = idxs[arridx];
12753 ++wlen;
12754
12755 /* One space in the good word may stand for several spaces in the
12756 * checked word. */
12757 if (c == ' ')
12758 while (ptr[wlen] == ' ' || ptr[wlen] == TAB)
12759 ++wlen;
12760 }
12761
12762 return wordnr;
12763}
12764
12765/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012766 * Copy "fword" to "cword", fixing case according to "flags".
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012767 */
12768 static void
12769make_case_word(fword, cword, flags)
12770 char_u *fword;
12771 char_u *cword;
12772 int flags;
12773{
12774 if (flags & WF_ALLCAP)
12775 /* Make it all upper-case */
12776 allcap_copy(fword, cword);
12777 else if (flags & WF_ONECAP)
12778 /* Make the first letter upper-case */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012779 onecap_copy(fword, cword, TRUE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012780 else
12781 /* Use goodword as-is. */
12782 STRCPY(cword, fword);
12783}
12784
Bram Moolenaarea424162005-06-16 21:51:00 +000012785/*
12786 * Use map string "map" for languages "lp".
12787 */
12788 static void
12789set_map_str(lp, map)
12790 slang_T *lp;
12791 char_u *map;
12792{
12793 char_u *p;
12794 int headc = 0;
12795 int c;
12796 int i;
12797
12798 if (*map == NUL)
12799 {
12800 lp->sl_has_map = FALSE;
12801 return;
12802 }
12803 lp->sl_has_map = TRUE;
12804
Bram Moolenaar4770d092006-01-12 23:22:24 +000012805 /* Init the array and hash tables empty. */
Bram Moolenaarea424162005-06-16 21:51:00 +000012806 for (i = 0; i < 256; ++i)
12807 lp->sl_map_array[i] = 0;
12808#ifdef FEAT_MBYTE
12809 hash_init(&lp->sl_map_hash);
12810#endif
12811
12812 /*
12813 * The similar characters are stored separated with slashes:
12814 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and
12815 * before the same slash. For characters above 255 sl_map_hash is used.
12816 */
12817 for (p = map; *p != NUL; )
12818 {
12819#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000012820 c = mb_cptr2char_adv(&p);
Bram Moolenaarea424162005-06-16 21:51:00 +000012821#else
12822 c = *p++;
12823#endif
12824 if (c == '/')
12825 headc = 0;
12826 else
12827 {
12828 if (headc == 0)
12829 headc = c;
12830
12831#ifdef FEAT_MBYTE
12832 /* Characters above 255 don't fit in sl_map_array[], put them in
12833 * the hash table. Each entry is the char, a NUL the headchar and
12834 * a NUL. */
12835 if (c >= 256)
12836 {
12837 int cl = mb_char2len(c);
12838 int headcl = mb_char2len(headc);
12839 char_u *b;
12840 hash_T hash;
12841 hashitem_T *hi;
12842
12843 b = alloc((unsigned)(cl + headcl + 2));
12844 if (b == NULL)
12845 return;
12846 mb_char2bytes(c, b);
12847 b[cl] = NUL;
12848 mb_char2bytes(headc, b + cl + 1);
12849 b[cl + 1 + headcl] = NUL;
12850 hash = hash_hash(b);
12851 hi = hash_lookup(&lp->sl_map_hash, b, hash);
12852 if (HASHITEM_EMPTY(hi))
12853 hash_add_item(&lp->sl_map_hash, hi, b, hash);
12854 else
12855 {
12856 /* This should have been checked when generating the .spl
12857 * file. */
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000012858 EMSG(_("E783: duplicate char in MAP entry"));
Bram Moolenaarea424162005-06-16 21:51:00 +000012859 vim_free(b);
12860 }
12861 }
12862 else
12863#endif
12864 lp->sl_map_array[c] = headc;
12865 }
12866 }
12867}
12868
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012869/*
12870 * Return TRUE if "c1" and "c2" are similar characters according to the MAP
12871 * lines in the .aff file.
12872 */
12873 static int
12874similar_chars(slang, c1, c2)
12875 slang_T *slang;
12876 int c1;
12877 int c2;
12878{
Bram Moolenaarea424162005-06-16 21:51:00 +000012879 int m1, m2;
12880#ifdef FEAT_MBYTE
12881 char_u buf[MB_MAXBYTES];
12882 hashitem_T *hi;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012883
Bram Moolenaarea424162005-06-16 21:51:00 +000012884 if (c1 >= 256)
12885 {
12886 buf[mb_char2bytes(c1, buf)] = 0;
12887 hi = hash_find(&slang->sl_map_hash, buf);
12888 if (HASHITEM_EMPTY(hi))
12889 m1 = 0;
12890 else
12891 m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
12892 }
12893 else
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012894#endif
Bram Moolenaarea424162005-06-16 21:51:00 +000012895 m1 = slang->sl_map_array[c1];
12896 if (m1 == 0)
12897 return FALSE;
12898
12899
12900#ifdef FEAT_MBYTE
12901 if (c2 >= 256)
12902 {
12903 buf[mb_char2bytes(c2, buf)] = 0;
12904 hi = hash_find(&slang->sl_map_hash, buf);
12905 if (HASHITEM_EMPTY(hi))
12906 m2 = 0;
12907 else
12908 m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
12909 }
12910 else
12911#endif
12912 m2 = slang->sl_map_array[c2];
12913
12914 return m1 == m2;
12915}
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012916
12917/*
12918 * Add a suggestion to the list of suggestions.
Bram Moolenaar4770d092006-01-12 23:22:24 +000012919 * For a suggestion that is already in the list the lowest score is remembered.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012920 */
12921 static void
Bram Moolenaar4770d092006-01-12 23:22:24 +000012922add_suggestion(su, gap, goodword, badlenarg, score, altscore, had_bonus,
12923 slang, maxsf)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012924 suginfo_T *su;
Bram Moolenaar4770d092006-01-12 23:22:24 +000012925 garray_T *gap; /* either su_ga or su_sga */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012926 char_u *goodword;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012927 int badlenarg; /* len of bad word replaced with "goodword" */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012928 int score;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012929 int altscore;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012930 int had_bonus; /* value for st_had_bonus */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012931 slang_T *slang; /* language for sound folding */
Bram Moolenaar4770d092006-01-12 23:22:24 +000012932 int maxsf; /* su_maxscore applies to soundfold score,
12933 su_sfmaxscore to the total score. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012934{
Bram Moolenaar4770d092006-01-12 23:22:24 +000012935 int goodlen; /* len of goodword changed */
12936 int badlen; /* len of bad word changed */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012937 suggest_T *stp;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012938 suggest_T new_sug;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012939 int i;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012940 char_u *pgood, *pbad;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012941
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012942 /* Minimize "badlen" for consistency. Avoids that changing "the the" to
12943 * "thee the" is added next to changing the first "the" the "thee". */
12944 pgood = goodword + STRLEN(goodword);
Bram Moolenaar4770d092006-01-12 23:22:24 +000012945 pbad = su->su_badptr + badlenarg;
12946 for (;;)
Bram Moolenaar0c405862005-06-22 22:26:26 +000012947 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012948 goodlen = pgood - goodword;
12949 badlen = pbad - su->su_badptr;
12950 if (goodlen <= 0 || badlen <= 0)
12951 break;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012952 mb_ptr_back(goodword, pgood);
12953 mb_ptr_back(su->su_badptr, pbad);
12954#ifdef FEAT_MBYTE
12955 if (has_mbyte)
Bram Moolenaar0c405862005-06-22 22:26:26 +000012956 {
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012957 if (mb_ptr2char(pgood) != mb_ptr2char(pbad))
12958 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +000012959 }
12960 else
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012961#endif
12962 if (*pgood != *pbad)
12963 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +000012964 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000012965
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012966 if (badlen == 0 && goodlen == 0)
12967 /* goodword doesn't change anything; may happen for "the the" changing
12968 * the first "the" to itself. */
12969 return;
Bram Moolenaar0c405862005-06-22 22:26:26 +000012970
Bram Moolenaar4770d092006-01-12 23:22:24 +000012971 /* Check if the word is already there. Also check the length that is
12972 * being replaced "thes," -> "these" is a different suggestion from
12973 * "thes" -> "these". */
12974 stp = &SUG(*gap, 0);
12975 for (i = gap->ga_len; --i >= 0; ++stp)
12976 if (stp->st_wordlen == goodlen
12977 && stp->st_orglen == badlen
12978 && STRNCMP(stp->st_word, goodword, goodlen) == 0)
12979 {
12980 /*
12981 * Found it. Remember the word with the lowest score.
12982 */
12983 if (stp->st_slang == NULL)
12984 stp->st_slang = slang;
12985
12986 new_sug.st_score = score;
12987 new_sug.st_altscore = altscore;
12988 new_sug.st_had_bonus = had_bonus;
12989
12990 if (stp->st_had_bonus != had_bonus)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012991 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012992 /* Only one of the two had the soundalike score computed.
12993 * Need to do that for the other one now, otherwise the
12994 * scores can't be compared. This happens because
12995 * suggest_try_change() doesn't compute the soundalike
12996 * word to keep it fast, while some special methods set
12997 * the soundalike score to zero. */
12998 if (had_bonus)
12999 rescore_one(su, stp);
13000 else
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013001 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000013002 new_sug.st_word = stp->st_word;
13003 new_sug.st_wordlen = stp->st_wordlen;
13004 new_sug.st_slang = stp->st_slang;
13005 new_sug.st_orglen = badlen;
13006 rescore_one(su, &new_sug);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013007 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013008 }
13009
Bram Moolenaar4770d092006-01-12 23:22:24 +000013010 if (stp->st_score > new_sug.st_score)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013011 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000013012 stp->st_score = new_sug.st_score;
13013 stp->st_altscore = new_sug.st_altscore;
13014 stp->st_had_bonus = new_sug.st_had_bonus;
13015 }
13016 break;
13017 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013018
Bram Moolenaar4770d092006-01-12 23:22:24 +000013019 if (i < 0 && ga_grow(gap, 1) == OK)
13020 {
13021 /* Add a suggestion. */
13022 stp = &SUG(*gap, gap->ga_len);
13023 stp->st_word = vim_strnsave(goodword, goodlen);
13024 if (stp->st_word != NULL)
13025 {
13026 stp->st_wordlen = goodlen;
13027 stp->st_score = score;
13028 stp->st_altscore = altscore;
13029 stp->st_had_bonus = had_bonus;
13030 stp->st_orglen = badlen;
13031 stp->st_slang = slang;
13032 ++gap->ga_len;
13033
13034 /* If we have too many suggestions now, sort the list and keep
13035 * the best suggestions. */
13036 if (gap->ga_len > SUG_MAX_COUNT(su))
13037 {
13038 if (maxsf)
13039 su->su_sfmaxscore = cleanup_suggestions(gap,
13040 su->su_sfmaxscore, SUG_CLEAN_COUNT(su));
13041 else
13042 {
13043 i = su->su_maxscore;
13044 su->su_maxscore = cleanup_suggestions(gap,
13045 su->su_maxscore, SUG_CLEAN_COUNT(su));
13046 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013047 }
13048 }
13049 }
13050}
13051
13052/*
Bram Moolenaar4770d092006-01-12 23:22:24 +000013053 * Suggestions may in fact be flagged as errors. Esp. for banned words and
13054 * for split words, such as "the the". Remove these from the list here.
13055 */
13056 static void
13057check_suggestions(su, gap)
13058 suginfo_T *su;
13059 garray_T *gap; /* either su_ga or su_sga */
13060{
13061 suggest_T *stp;
13062 int i;
13063 char_u longword[MAXWLEN + 1];
13064 int len;
13065 hlf_T attr;
13066
13067 stp = &SUG(*gap, 0);
13068 for (i = gap->ga_len - 1; i >= 0; --i)
13069 {
13070 /* Need to append what follows to check for "the the". */
13071 STRCPY(longword, stp[i].st_word);
13072 len = stp[i].st_wordlen;
13073 vim_strncpy(longword + len, su->su_badptr + stp[i].st_orglen,
13074 MAXWLEN - len);
13075 attr = HLF_COUNT;
13076 (void)spell_check(curwin, longword, &attr, NULL, FALSE);
13077 if (attr != HLF_COUNT)
13078 {
13079 /* Remove this entry. */
13080 vim_free(stp[i].st_word);
13081 --gap->ga_len;
13082 if (i < gap->ga_len)
13083 mch_memmove(stp + i, stp + i + 1,
13084 sizeof(suggest_T) * (gap->ga_len - i));
13085 }
13086 }
13087}
13088
13089
13090/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013091 * Add a word to be banned.
13092 */
13093 static void
13094add_banned(su, word)
13095 suginfo_T *su;
13096 char_u *word;
13097{
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000013098 char_u *s;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013099 hash_T hash;
13100 hashitem_T *hi;
13101
Bram Moolenaar4770d092006-01-12 23:22:24 +000013102 hash = hash_hash(word);
13103 hi = hash_lookup(&su->su_banned, word, hash);
13104 if (HASHITEM_EMPTY(hi))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013105 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000013106 s = vim_strsave(word);
13107 if (s != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013108 hash_add_item(&su->su_banned, hi, s, hash);
13109 }
13110}
13111
13112/*
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013113 * Recompute the score for all suggestions if sound-folding is possible. This
13114 * is slow, thus only done for the final results.
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013115 */
13116 static void
13117rescore_suggestions(su)
13118 suginfo_T *su;
13119{
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013120 int i;
13121
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013122 if (su->su_sallang != NULL)
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013123 for (i = 0; i < su->su_ga.ga_len; ++i)
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013124 rescore_one(su, &SUG(su->su_ga, i));
13125}
13126
13127/*
13128 * Recompute the score for one suggestion if sound-folding is possible.
13129 */
13130 static void
13131rescore_one(su, stp)
Bram Moolenaar4effc802005-09-30 21:12:02 +000013132 suginfo_T *su;
13133 suggest_T *stp;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013134{
13135 slang_T *slang = stp->st_slang;
13136 char_u sal_badword[MAXWLEN];
Bram Moolenaar4effc802005-09-30 21:12:02 +000013137 char_u *p;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013138
13139 /* Only rescore suggestions that have no sal score yet and do have a
13140 * language. */
13141 if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus)
13142 {
13143 if (slang == su->su_sallang)
Bram Moolenaar4effc802005-09-30 21:12:02 +000013144 p = su->su_sal_badword;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013145 else
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013146 {
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013147 spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword);
Bram Moolenaar4effc802005-09-30 21:12:02 +000013148 p = sal_badword;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013149 }
Bram Moolenaar4effc802005-09-30 21:12:02 +000013150
13151 stp->st_altscore = stp_sal_score(stp, su, slang, p);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013152 if (stp->st_altscore == SCORE_MAXMAX)
13153 stp->st_altscore = SCORE_BIG;
13154 stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
13155 stp->st_had_bonus = TRUE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013156 }
13157}
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013158
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013159static int
13160#ifdef __BORLANDC__
13161_RTLENTRYF
13162#endif
13163sug_compare __ARGS((const void *s1, const void *s2));
13164
13165/*
13166 * Function given to qsort() to sort the suggestions on st_score.
Bram Moolenaar6b730e12005-09-16 21:47:57 +000013167 * First on "st_score", then "st_altscore" then alphabetically.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013168 */
13169 static int
13170#ifdef __BORLANDC__
13171_RTLENTRYF
13172#endif
13173sug_compare(s1, s2)
13174 const void *s1;
13175 const void *s2;
13176{
13177 suggest_T *p1 = (suggest_T *)s1;
13178 suggest_T *p2 = (suggest_T *)s2;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013179 int n = p1->st_score - p2->st_score;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013180
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013181 if (n == 0)
Bram Moolenaar6b730e12005-09-16 21:47:57 +000013182 {
13183 n = p1->st_altscore - p2->st_altscore;
13184 if (n == 0)
13185 n = STRICMP(p1->st_word, p2->st_word);
13186 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013187 return n;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013188}
13189
13190/*
13191 * Cleanup the suggestions:
13192 * - Sort on score.
13193 * - Remove words that won't be displayed.
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013194 * Returns the maximum score in the list or "maxscore" unmodified.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013195 */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013196 static int
13197cleanup_suggestions(gap, maxscore, keep)
13198 garray_T *gap;
13199 int maxscore;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013200 int keep; /* nr of suggestions to keep */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013201{
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013202 suggest_T *stp = &SUG(*gap, 0);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013203 int i;
13204
13205 /* Sort the list. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013206 qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013207
13208 /* Truncate the list to the number of suggestions that will be displayed. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013209 if (gap->ga_len > keep)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013210 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013211 for (i = keep; i < gap->ga_len; ++i)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013212 vim_free(stp[i].st_word);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013213 gap->ga_len = keep;
13214 return stp[keep - 1].st_score;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013215 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013216 return maxscore;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013217}
13218
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013219#if defined(FEAT_EVAL) || defined(PROTO)
13220/*
13221 * Soundfold a string, for soundfold().
13222 * Result is in allocated memory, NULL for an error.
13223 */
13224 char_u *
13225eval_soundfold(word)
13226 char_u *word;
13227{
13228 langp_T *lp;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013229 char_u sound[MAXWLEN];
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000013230 int lpi;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013231
13232 if (curwin->w_p_spell && *curbuf->b_p_spl != NUL)
13233 /* Use the sound-folding of the first language that supports it. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013234 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000013235 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013236 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013237 if (lp->lp_slang->sl_sal.ga_len > 0)
13238 {
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013239 /* soundfold the word */
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013240 spell_soundfold(lp->lp_slang, word, FALSE, sound);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013241 return vim_strsave(sound);
13242 }
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000013243 }
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013244
13245 /* No language with sound folding, return word as-is. */
13246 return vim_strsave(word);
13247}
13248#endif
13249
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013250/*
13251 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
Bram Moolenaard12a1322005-08-21 22:08:24 +000013252 *
13253 * There are many ways to turn a word into a sound-a-like representation. The
13254 * oldest is Soundex (1918!). A nice overview can be found in "Approximate
13255 * swedish name matching - survey and test of different algorithms" by Klas
13256 * Erikson.
13257 *
13258 * We support two methods:
13259 * 1. SOFOFROM/SOFOTO do a simple character mapping.
13260 * 2. SAL items define a more advanced sound-folding (and much slower).
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013261 */
13262 static void
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013263spell_soundfold(slang, inword, folded, res)
13264 slang_T *slang;
13265 char_u *inword;
13266 int folded; /* "inword" is already case-folded */
13267 char_u *res;
13268{
13269 char_u fword[MAXWLEN];
13270 char_u *word;
13271
13272 if (slang->sl_sofo)
13273 /* SOFOFROM and SOFOTO used */
13274 spell_soundfold_sofo(slang, inword, res);
13275 else
13276 {
13277 /* SAL items used. Requires the word to be case-folded. */
13278 if (folded)
13279 word = inword;
13280 else
13281 {
13282 (void)spell_casefold(inword, STRLEN(inword), fword, MAXWLEN);
13283 word = fword;
13284 }
13285
13286#ifdef FEAT_MBYTE
13287 if (has_mbyte)
13288 spell_soundfold_wsal(slang, word, res);
13289 else
13290#endif
13291 spell_soundfold_sal(slang, word, res);
13292 }
13293}
13294
13295/*
13296 * Perform sound folding of "inword" into "res" according to SOFOFROM and
13297 * SOFOTO lines.
13298 */
13299 static void
13300spell_soundfold_sofo(slang, inword, res)
13301 slang_T *slang;
13302 char_u *inword;
13303 char_u *res;
13304{
13305 char_u *s;
13306 int ri = 0;
13307 int c;
13308
13309#ifdef FEAT_MBYTE
13310 if (has_mbyte)
13311 {
13312 int prevc = 0;
13313 int *ip;
13314
13315 /* The sl_sal_first[] table contains the translation for chars up to
13316 * 255, sl_sal the rest. */
13317 for (s = inword; *s != NUL; )
13318 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000013319 c = mb_cptr2char_adv(&s);
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013320 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c))
13321 c = ' ';
13322 else if (c < 256)
13323 c = slang->sl_sal_first[c];
13324 else
13325 {
13326 ip = ((int **)slang->sl_sal.ga_data)[c & 0xff];
13327 if (ip == NULL) /* empty list, can't match */
13328 c = NUL;
13329 else
13330 for (;;) /* find "c" in the list */
13331 {
13332 if (*ip == 0) /* not found */
13333 {
13334 c = NUL;
13335 break;
13336 }
13337 if (*ip == c) /* match! */
13338 {
13339 c = ip[1];
13340 break;
13341 }
13342 ip += 2;
13343 }
13344 }
13345
13346 if (c != NUL && c != prevc)
13347 {
13348 ri += mb_char2bytes(c, res + ri);
13349 if (ri + MB_MAXBYTES > MAXWLEN)
13350 break;
13351 prevc = c;
13352 }
13353 }
13354 }
13355 else
13356#endif
13357 {
13358 /* The sl_sal_first[] table contains the translation. */
13359 for (s = inword; (c = *s) != NUL; ++s)
13360 {
13361 if (vim_iswhite(c))
13362 c = ' ';
13363 else
13364 c = slang->sl_sal_first[c];
13365 if (c != NUL && (ri == 0 || res[ri - 1] != c))
13366 res[ri++] = c;
13367 }
13368 }
13369
13370 res[ri] = NUL;
13371}
13372
13373 static void
13374spell_soundfold_sal(slang, inword, res)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013375 slang_T *slang;
13376 char_u *inword;
13377 char_u *res;
13378{
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013379 salitem_T *smp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013380 char_u word[MAXWLEN];
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013381 char_u *s = inword;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013382 char_u *t;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013383 char_u *pf;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013384 int i, j, z;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013385 int reslen;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013386 int n, k = 0;
13387 int z0;
13388 int k0;
13389 int n0;
13390 int c;
13391 int pri;
13392 int p0 = -333;
13393 int c0;
13394
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013395 /* Remove accents, if wanted. We actually remove all non-word characters.
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013396 * But keep white space. We need a copy, the word may be changed here. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013397 if (slang->sl_rem_accents)
13398 {
13399 t = word;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013400 while (*s != NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013401 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013402 if (vim_iswhite(*s))
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013403 {
13404 *t++ = ' ';
13405 s = skipwhite(s);
13406 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013407 else
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013408 {
Bram Moolenaar9c96f592005-06-30 21:52:39 +000013409 if (spell_iswordp_nmw(s))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013410 *t++ = *s;
13411 ++s;
13412 }
13413 }
13414 *t = NUL;
13415 }
13416 else
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013417 STRCPY(word, s);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013418
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013419 smp = (salitem_T *)slang->sl_sal.ga_data;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013420
13421 /*
13422 * This comes from Aspell phonet.cpp. Converted from C++ to C.
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013423 * Changed to keep spaces.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013424 */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013425 i = reslen = z = 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013426 while ((c = word[i]) != NUL)
13427 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013428 /* Start with the first rule that has the character in the word. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013429 n = slang->sl_sal_first[c];
13430 z0 = 0;
13431
13432 if (n >= 0)
13433 {
13434 /* check all rules for the same letter */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013435 for (; (s = smp[n].sm_lead)[0] == c; ++n)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013436 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013437 /* Quickly skip entries that don't match the word. Most
13438 * entries are less then three chars, optimize for that. */
13439 k = smp[n].sm_leadlen;
13440 if (k > 1)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013441 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013442 if (word[i + 1] != s[1])
13443 continue;
13444 if (k > 2)
13445 {
13446 for (j = 2; j < k; ++j)
13447 if (word[i + j] != s[j])
13448 break;
13449 if (j < k)
13450 continue;
13451 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013452 }
13453
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013454 if ((pf = smp[n].sm_oneof) != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013455 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013456 /* Check for match with one of the chars in "sm_oneof". */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013457 while (*pf != NUL && *pf != word[i + k])
13458 ++pf;
13459 if (*pf == NUL)
13460 continue;
13461 ++k;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013462 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013463 s = smp[n].sm_rules;
13464 pri = 5; /* default priority */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013465
13466 p0 = *s;
13467 k0 = k;
13468 while (*s == '-' && k > 1)
13469 {
13470 k--;
13471 s++;
13472 }
13473 if (*s == '<')
13474 s++;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013475 if (VIM_ISDIGIT(*s))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013476 {
13477 /* determine priority */
13478 pri = *s - '0';
13479 s++;
13480 }
13481 if (*s == '^' && *(s + 1) == '^')
13482 s++;
13483
13484 if (*s == NUL
13485 || (*s == '^'
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013486 && (i == 0 || !(word[i - 1] == ' '
Bram Moolenaar9c96f592005-06-30 21:52:39 +000013487 || spell_iswordp(word + i - 1, curbuf)))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013488 && (*(s + 1) != '$'
Bram Moolenaar9c96f592005-06-30 21:52:39 +000013489 || (!spell_iswordp(word + i + k0, curbuf))))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013490 || (*s == '$' && i > 0
Bram Moolenaar9c96f592005-06-30 21:52:39 +000013491 && spell_iswordp(word + i - 1, curbuf)
13492 && (!spell_iswordp(word + i + k0, curbuf))))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013493 {
13494 /* search for followup rules, if: */
13495 /* followup and k > 1 and NO '-' in searchstring */
13496 c0 = word[i + k - 1];
13497 n0 = slang->sl_sal_first[c0];
13498
13499 if (slang->sl_followup && k > 1 && n0 >= 0
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013500 && p0 != '-' && word[i + k] != NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013501 {
13502 /* test follow-up rule for "word[i + k]" */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013503 for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013504 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013505 /* Quickly skip entries that don't match the word.
13506 * */
13507 k0 = smp[n0].sm_leadlen;
13508 if (k0 > 1)
13509 {
13510 if (word[i + k] != s[1])
13511 continue;
13512 if (k0 > 2)
13513 {
13514 pf = word + i + k + 1;
13515 for (j = 2; j < k0; ++j)
13516 if (*pf++ != s[j])
13517 break;
13518 if (j < k0)
13519 continue;
13520 }
13521 }
13522 k0 += k - 1;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013523
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013524 if ((pf = smp[n0].sm_oneof) != NULL)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013525 {
13526 /* Check for match with one of the chars in
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013527 * "sm_oneof". */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013528 while (*pf != NUL && *pf != word[i + k0])
13529 ++pf;
13530 if (*pf == NUL)
13531 continue;
13532 ++k0;
13533 }
13534
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013535 p0 = 5;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013536 s = smp[n0].sm_rules;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013537 while (*s == '-')
13538 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013539 /* "k0" gets NOT reduced because
13540 * "if (k0 == k)" */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013541 s++;
13542 }
13543 if (*s == '<')
13544 s++;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013545 if (VIM_ISDIGIT(*s))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013546 {
13547 p0 = *s - '0';
13548 s++;
13549 }
13550
13551 if (*s == NUL
13552 /* *s == '^' cuts */
13553 || (*s == '$'
Bram Moolenaar9c96f592005-06-30 21:52:39 +000013554 && !spell_iswordp(word + i + k0,
13555 curbuf)))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013556 {
13557 if (k0 == k)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013558 /* this is just a piece of the string */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013559 continue;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013560
13561 if (p0 < pri)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013562 /* priority too low */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013563 continue;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013564 /* rule fits; stop search */
13565 break;
13566 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013567 }
13568
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013569 if (p0 >= pri && smp[n0].sm_lead[0] == c0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013570 continue;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013571 }
13572
13573 /* replace string */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013574 s = smp[n].sm_to;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000013575 if (s == NULL)
13576 s = (char_u *)"";
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013577 pf = smp[n].sm_rules;
13578 p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013579 if (p0 == 1 && z == 0)
13580 {
13581 /* rule with '<' is used */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013582 if (reslen > 0 && *s != NUL && (res[reslen - 1] == c
13583 || res[reslen - 1] == *s))
13584 reslen--;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013585 z0 = 1;
13586 z = 1;
13587 k0 = 0;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013588 while (*s != NUL && word[i + k0] != NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013589 {
13590 word[i + k0] = *s;
13591 k0++;
13592 s++;
13593 }
13594 if (k > k0)
13595 mch_memmove(word + i + k0, word + i + k,
13596 STRLEN(word + i + k) + 1);
13597
13598 /* new "actual letter" */
13599 c = word[i];
13600 }
13601 else
13602 {
13603 /* no '<' rule used */
13604 i += k - 1;
13605 z = 0;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013606 while (*s != NUL && s[1] != NUL && reslen < MAXWLEN)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013607 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013608 if (reslen == 0 || res[reslen - 1] != *s)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013609 res[reslen++] = *s;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013610 s++;
13611 }
13612 /* new "actual letter" */
13613 c = *s;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013614 if (strstr((char *)pf, "^^") != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013615 {
13616 if (c != NUL)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013617 res[reslen++] = c;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013618 mch_memmove(word, word + i + 1,
13619 STRLEN(word + i + 1) + 1);
13620 i = 0;
13621 z0 = 1;
13622 }
13623 }
13624 break;
13625 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013626 }
13627 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013628 else if (vim_iswhite(c))
13629 {
13630 c = ' ';
13631 k = 1;
13632 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013633
13634 if (z0 == 0)
13635 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013636 if (k && !p0 && reslen < MAXWLEN && c != NUL
13637 && (!slang->sl_collapse || reslen == 0
13638 || res[reslen - 1] != c))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013639 /* condense only double letters */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013640 res[reslen++] = c;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013641
13642 i++;
13643 z = 0;
13644 k = 0;
13645 }
13646 }
13647
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013648 res[reslen] = NUL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013649}
13650
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013651#ifdef FEAT_MBYTE
13652/*
13653 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
13654 * Multi-byte version of spell_soundfold().
13655 */
13656 static void
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013657spell_soundfold_wsal(slang, inword, res)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013658 slang_T *slang;
13659 char_u *inword;
13660 char_u *res;
13661{
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013662 salitem_T *smp = (salitem_T *)slang->sl_sal.ga_data;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013663 int word[MAXWLEN];
13664 int wres[MAXWLEN];
13665 int l;
13666 char_u *s;
13667 int *ws;
13668 char_u *t;
13669 int *pf;
13670 int i, j, z;
13671 int reslen;
13672 int n, k = 0;
13673 int z0;
13674 int k0;
13675 int n0;
13676 int c;
13677 int pri;
13678 int p0 = -333;
13679 int c0;
13680 int did_white = FALSE;
13681
13682 /*
13683 * Convert the multi-byte string to a wide-character string.
13684 * Remove accents, if wanted. We actually remove all non-word characters.
13685 * But keep white space.
13686 */
13687 n = 0;
13688 for (s = inword; *s != NUL; )
13689 {
13690 t = s;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000013691 c = mb_cptr2char_adv(&s);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013692 if (slang->sl_rem_accents)
13693 {
13694 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c))
13695 {
13696 if (did_white)
13697 continue;
13698 c = ' ';
13699 did_white = TRUE;
13700 }
13701 else
13702 {
13703 did_white = FALSE;
Bram Moolenaar9c96f592005-06-30 21:52:39 +000013704 if (!spell_iswordp_nmw(t))
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013705 continue;
13706 }
13707 }
13708 word[n++] = c;
13709 }
13710 word[n] = NUL;
13711
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013712 /*
13713 * This comes from Aspell phonet.cpp.
13714 * Converted from C++ to C. Added support for multi-byte chars.
13715 * Changed to keep spaces.
13716 */
13717 i = reslen = z = 0;
13718 while ((c = word[i]) != NUL)
13719 {
13720 /* Start with the first rule that has the character in the word. */
13721 n = slang->sl_sal_first[c & 0xff];
13722 z0 = 0;
13723
13724 if (n >= 0)
13725 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013726 /* check all rules for the same index byte */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013727 for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff); ++n)
13728 {
13729 /* Quickly skip entries that don't match the word. Most
13730 * entries are less then three chars, optimize for that. */
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013731 if (c != ws[0])
13732 continue;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013733 k = smp[n].sm_leadlen;
13734 if (k > 1)
13735 {
13736 if (word[i + 1] != ws[1])
13737 continue;
13738 if (k > 2)
13739 {
13740 for (j = 2; j < k; ++j)
13741 if (word[i + j] != ws[j])
13742 break;
13743 if (j < k)
13744 continue;
13745 }
13746 }
13747
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013748 if ((pf = smp[n].sm_oneof_w) != NULL)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013749 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013750 /* Check for match with one of the chars in "sm_oneof". */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013751 while (*pf != NUL && *pf != word[i + k])
13752 ++pf;
13753 if (*pf == NUL)
13754 continue;
13755 ++k;
13756 }
13757 s = smp[n].sm_rules;
13758 pri = 5; /* default priority */
13759
13760 p0 = *s;
13761 k0 = k;
13762 while (*s == '-' && k > 1)
13763 {
13764 k--;
13765 s++;
13766 }
13767 if (*s == '<')
13768 s++;
13769 if (VIM_ISDIGIT(*s))
13770 {
13771 /* determine priority */
13772 pri = *s - '0';
13773 s++;
13774 }
13775 if (*s == '^' && *(s + 1) == '^')
13776 s++;
13777
13778 if (*s == NUL
13779 || (*s == '^'
13780 && (i == 0 || !(word[i - 1] == ' '
Bram Moolenaar9c96f592005-06-30 21:52:39 +000013781 || spell_iswordp_w(word + i - 1, curbuf)))
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013782 && (*(s + 1) != '$'
Bram Moolenaar9c96f592005-06-30 21:52:39 +000013783 || (!spell_iswordp_w(word + i + k0, curbuf))))
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013784 || (*s == '$' && i > 0
Bram Moolenaar9c96f592005-06-30 21:52:39 +000013785 && spell_iswordp_w(word + i - 1, curbuf)
13786 && (!spell_iswordp_w(word + i + k0, curbuf))))
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013787 {
13788 /* search for followup rules, if: */
13789 /* followup and k > 1 and NO '-' in searchstring */
13790 c0 = word[i + k - 1];
13791 n0 = slang->sl_sal_first[c0 & 0xff];
13792
13793 if (slang->sl_followup && k > 1 && n0 >= 0
13794 && p0 != '-' && word[i + k] != NUL)
13795 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013796 /* Test follow-up rule for "word[i + k]"; loop over
13797 * all entries with the same index byte. */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013798 for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff)
13799 == (c0 & 0xff); ++n0)
13800 {
13801 /* Quickly skip entries that don't match the word.
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013802 */
13803 if (c0 != ws[0])
13804 continue;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013805 k0 = smp[n0].sm_leadlen;
13806 if (k0 > 1)
13807 {
13808 if (word[i + k] != ws[1])
13809 continue;
13810 if (k0 > 2)
13811 {
13812 pf = word + i + k + 1;
13813 for (j = 2; j < k0; ++j)
13814 if (*pf++ != ws[j])
13815 break;
13816 if (j < k0)
13817 continue;
13818 }
13819 }
13820 k0 += k - 1;
13821
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013822 if ((pf = smp[n0].sm_oneof_w) != NULL)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013823 {
13824 /* Check for match with one of the chars in
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013825 * "sm_oneof". */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013826 while (*pf != NUL && *pf != word[i + k0])
13827 ++pf;
13828 if (*pf == NUL)
13829 continue;
13830 ++k0;
13831 }
13832
13833 p0 = 5;
13834 s = smp[n0].sm_rules;
13835 while (*s == '-')
13836 {
13837 /* "k0" gets NOT reduced because
13838 * "if (k0 == k)" */
13839 s++;
13840 }
13841 if (*s == '<')
13842 s++;
13843 if (VIM_ISDIGIT(*s))
13844 {
13845 p0 = *s - '0';
13846 s++;
13847 }
13848
13849 if (*s == NUL
13850 /* *s == '^' cuts */
13851 || (*s == '$'
Bram Moolenaar9c96f592005-06-30 21:52:39 +000013852 && !spell_iswordp_w(word + i + k0,
13853 curbuf)))
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013854 {
13855 if (k0 == k)
13856 /* this is just a piece of the string */
13857 continue;
13858
13859 if (p0 < pri)
13860 /* priority too low */
13861 continue;
13862 /* rule fits; stop search */
13863 break;
13864 }
13865 }
13866
13867 if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff)
13868 == (c0 & 0xff))
13869 continue;
13870 }
13871
13872 /* replace string */
13873 ws = smp[n].sm_to_w;
13874 s = smp[n].sm_rules;
13875 p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0;
13876 if (p0 == 1 && z == 0)
13877 {
13878 /* rule with '<' is used */
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000013879 if (reslen > 0 && ws != NULL && *ws != NUL
13880 && (wres[reslen - 1] == c
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013881 || wres[reslen - 1] == *ws))
13882 reslen--;
13883 z0 = 1;
13884 z = 1;
13885 k0 = 0;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000013886 if (ws != NULL)
13887 while (*ws != NUL && word[i + k0] != NUL)
13888 {
13889 word[i + k0] = *ws;
13890 k0++;
13891 ws++;
13892 }
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013893 if (k > k0)
13894 mch_memmove(word + i + k0, word + i + k,
13895 sizeof(int) * (STRLEN(word + i + k) + 1));
13896
13897 /* new "actual letter" */
13898 c = word[i];
13899 }
13900 else
13901 {
13902 /* no '<' rule used */
13903 i += k - 1;
13904 z = 0;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000013905 if (ws != NULL)
13906 while (*ws != NUL && ws[1] != NUL
13907 && reslen < MAXWLEN)
13908 {
13909 if (reslen == 0 || wres[reslen - 1] != *ws)
13910 wres[reslen++] = *ws;
13911 ws++;
13912 }
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013913 /* new "actual letter" */
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000013914 if (ws == NULL)
13915 c = NUL;
13916 else
13917 c = *ws;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013918 if (strstr((char *)s, "^^") != NULL)
13919 {
13920 if (c != NUL)
13921 wres[reslen++] = c;
13922 mch_memmove(word, word + i + 1,
13923 sizeof(int) * (STRLEN(word + i + 1) + 1));
13924 i = 0;
13925 z0 = 1;
13926 }
13927 }
13928 break;
13929 }
13930 }
13931 }
13932 else if (vim_iswhite(c))
13933 {
13934 c = ' ';
13935 k = 1;
13936 }
13937
13938 if (z0 == 0)
13939 {
13940 if (k && !p0 && reslen < MAXWLEN && c != NUL
13941 && (!slang->sl_collapse || reslen == 0
13942 || wres[reslen - 1] != c))
13943 /* condense only double letters */
13944 wres[reslen++] = c;
13945
13946 i++;
13947 z = 0;
13948 k = 0;
13949 }
13950 }
13951
13952 /* Convert wide characters in "wres" to a multi-byte string in "res". */
13953 l = 0;
13954 for (n = 0; n < reslen; ++n)
13955 {
13956 l += mb_char2bytes(wres[n], res + l);
13957 if (l + MB_MAXBYTES > MAXWLEN)
13958 break;
13959 }
13960 res[l] = NUL;
13961}
13962#endif
13963
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013964/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013965 * Compute a score for two sound-a-like words.
13966 * This permits up to two inserts/deletes/swaps/etc. to keep things fast.
13967 * Instead of a generic loop we write out the code. That keeps it fast by
13968 * avoiding checks that will not be possible.
13969 */
13970 static int
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000013971soundalike_score(goodstart, badstart)
13972 char_u *goodstart; /* sound-folded good word */
13973 char_u *badstart; /* sound-folded bad word */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013974{
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000013975 char_u *goodsound = goodstart;
13976 char_u *badsound = badstart;
13977 int goodlen;
13978 int badlen;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013979 int n;
13980 char_u *pl, *ps;
13981 char_u *pl2, *ps2;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000013982 int score = 0;
13983
13984 /* adding/inserting "*" at the start (word starts with vowel) shouldn't be
13985 * counted so much, vowels halfway the word aren't counted at all. */
13986 if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound)
13987 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000013988 if (badsound[1] == goodsound[1]
13989 || (badsound[1] != NUL
13990 && goodsound[1] != NUL
13991 && badsound[2] == goodsound[2]))
13992 {
13993 /* handle like a substitute */
13994 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000013995 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000013996 {
13997 score = 2 * SCORE_DEL / 3;
13998 if (*badsound == '*')
13999 ++badsound;
14000 else
14001 ++goodsound;
14002 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014003 }
14004
14005 goodlen = STRLEN(goodsound);
14006 badlen = STRLEN(badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014007
14008 /* Return quickly if the lenghts are too different to be fixed by two
14009 * changes. */
14010 n = goodlen - badlen;
14011 if (n < -2 || n > 2)
14012 return SCORE_MAXMAX;
14013
14014 if (n > 0)
14015 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014016 pl = goodsound; /* goodsound is longest */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014017 ps = badsound;
14018 }
14019 else
14020 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014021 pl = badsound; /* badsound is longest */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014022 ps = goodsound;
14023 }
14024
14025 /* Skip over the identical part. */
14026 while (*pl == *ps && *pl != NUL)
14027 {
14028 ++pl;
14029 ++ps;
14030 }
14031
14032 switch (n)
14033 {
14034 case -2:
14035 case 2:
14036 /*
14037 * Must delete two characters from "pl".
14038 */
14039 ++pl; /* first delete */
14040 while (*pl == *ps)
14041 {
14042 ++pl;
14043 ++ps;
14044 }
14045 /* strings must be equal after second delete */
14046 if (STRCMP(pl + 1, ps) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014047 return score + SCORE_DEL * 2;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014048
14049 /* Failed to compare. */
14050 break;
14051
14052 case -1:
14053 case 1:
14054 /*
14055 * Minimal one delete from "pl" required.
14056 */
14057
14058 /* 1: delete */
14059 pl2 = pl + 1;
14060 ps2 = ps;
14061 while (*pl2 == *ps2)
14062 {
14063 if (*pl2 == NUL) /* reached the end */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014064 return score + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014065 ++pl2;
14066 ++ps2;
14067 }
14068
14069 /* 2: delete then swap, then rest must be equal */
14070 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
14071 && STRCMP(pl2 + 2, ps2 + 2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014072 return score + SCORE_DEL + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014073
14074 /* 3: delete then substitute, then the rest must be equal */
14075 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014076 return score + SCORE_DEL + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014077
14078 /* 4: first swap then delete */
14079 if (pl[0] == ps[1] && pl[1] == ps[0])
14080 {
14081 pl2 = pl + 2; /* swap, skip two chars */
14082 ps2 = ps + 2;
14083 while (*pl2 == *ps2)
14084 {
14085 ++pl2;
14086 ++ps2;
14087 }
14088 /* delete a char and then strings must be equal */
14089 if (STRCMP(pl2 + 1, ps2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014090 return score + SCORE_SWAP + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014091 }
14092
14093 /* 5: first substitute then delete */
14094 pl2 = pl + 1; /* substitute, skip one char */
14095 ps2 = ps + 1;
14096 while (*pl2 == *ps2)
14097 {
14098 ++pl2;
14099 ++ps2;
14100 }
14101 /* delete a char and then strings must be equal */
14102 if (STRCMP(pl2 + 1, ps2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014103 return score + SCORE_SUBST + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014104
14105 /* Failed to compare. */
14106 break;
14107
14108 case 0:
14109 /*
14110 * Lenghts are equal, thus changes must result in same length: An
14111 * insert is only possible in combination with a delete.
14112 * 1: check if for identical strings
14113 */
14114 if (*pl == NUL)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014115 return score;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014116
14117 /* 2: swap */
14118 if (pl[0] == ps[1] && pl[1] == ps[0])
14119 {
14120 pl2 = pl + 2; /* swap, skip two chars */
14121 ps2 = ps + 2;
14122 while (*pl2 == *ps2)
14123 {
14124 if (*pl2 == NUL) /* reached the end */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014125 return score + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014126 ++pl2;
14127 ++ps2;
14128 }
14129 /* 3: swap and swap again */
14130 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
14131 && STRCMP(pl2 + 2, ps2 + 2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014132 return score + SCORE_SWAP + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014133
14134 /* 4: swap and substitute */
14135 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014136 return score + SCORE_SWAP + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014137 }
14138
14139 /* 5: substitute */
14140 pl2 = pl + 1;
14141 ps2 = ps + 1;
14142 while (*pl2 == *ps2)
14143 {
14144 if (*pl2 == NUL) /* reached the end */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014145 return score + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014146 ++pl2;
14147 ++ps2;
14148 }
14149
14150 /* 6: substitute and swap */
14151 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
14152 && STRCMP(pl2 + 2, ps2 + 2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014153 return score + SCORE_SUBST + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014154
14155 /* 7: substitute and substitute */
14156 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014157 return score + SCORE_SUBST + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014158
14159 /* 8: insert then delete */
14160 pl2 = pl;
14161 ps2 = ps + 1;
14162 while (*pl2 == *ps2)
14163 {
14164 ++pl2;
14165 ++ps2;
14166 }
14167 if (STRCMP(pl2 + 1, ps2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014168 return score + SCORE_INS + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014169
14170 /* 9: delete then insert */
14171 pl2 = pl + 1;
14172 ps2 = ps;
14173 while (*pl2 == *ps2)
14174 {
14175 ++pl2;
14176 ++ps2;
14177 }
14178 if (STRCMP(pl2, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014179 return score + SCORE_INS + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014180
14181 /* Failed to compare. */
14182 break;
14183 }
14184
14185 return SCORE_MAXMAX;
14186}
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014187
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014188/*
14189 * Compute the "edit distance" to turn "badword" into "goodword". The less
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014190 * deletes/inserts/substitutes/swaps are required the lower the score.
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014191 *
Bram Moolenaard12a1322005-08-21 22:08:24 +000014192 * The algorithm is described by Du and Chang, 1992.
14193 * The implementation of the algorithm comes from Aspell editdist.cpp,
14194 * edit_distance(). It has been converted from C++ to C and modified to
14195 * support multi-byte characters.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014196 */
14197 static int
Bram Moolenaar4770d092006-01-12 23:22:24 +000014198spell_edit_score(slang, badword, goodword)
14199 slang_T *slang;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014200 char_u *badword;
14201 char_u *goodword;
14202{
14203 int *cnt;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014204 int badlen, goodlen; /* lenghts including NUL */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014205 int j, i;
14206 int t;
14207 int bc, gc;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014208 int pbc, pgc;
14209#ifdef FEAT_MBYTE
14210 char_u *p;
14211 int wbadword[MAXWLEN];
14212 int wgoodword[MAXWLEN];
14213
14214 if (has_mbyte)
14215 {
14216 /* Get the characters from the multi-byte strings and put them in an
14217 * int array for easy access. */
14218 for (p = badword, badlen = 0; *p != NUL; )
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000014219 wbadword[badlen++] = mb_cptr2char_adv(&p);
Bram Moolenaar97409f12005-07-08 22:17:29 +000014220 wbadword[badlen++] = 0;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014221 for (p = goodword, goodlen = 0; *p != NUL; )
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000014222 wgoodword[goodlen++] = mb_cptr2char_adv(&p);
Bram Moolenaar97409f12005-07-08 22:17:29 +000014223 wgoodword[goodlen++] = 0;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014224 }
14225 else
14226#endif
14227 {
14228 badlen = STRLEN(badword) + 1;
14229 goodlen = STRLEN(goodword) + 1;
14230 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014231
14232 /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */
14233#define CNT(a, b) cnt[(a) + (b) * (badlen + 1)]
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014234 cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)),
14235 TRUE);
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014236 if (cnt == NULL)
14237 return 0; /* out of memory */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014238
14239 CNT(0, 0) = 0;
14240 for (j = 1; j <= goodlen; ++j)
Bram Moolenaar4770d092006-01-12 23:22:24 +000014241 CNT(0, j) = CNT(0, j - 1) + SCORE_INS;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014242
14243 for (i = 1; i <= badlen; ++i)
14244 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000014245 CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014246 for (j = 1; j <= goodlen; ++j)
14247 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014248#ifdef FEAT_MBYTE
14249 if (has_mbyte)
14250 {
14251 bc = wbadword[i - 1];
14252 gc = wgoodword[j - 1];
14253 }
14254 else
14255#endif
14256 {
14257 bc = badword[i - 1];
14258 gc = goodword[j - 1];
14259 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014260 if (bc == gc)
14261 CNT(i, j) = CNT(i - 1, j - 1);
14262 else
14263 {
14264 /* Use a better score when there is only a case difference. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014265 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014266 CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1);
14267 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000014268 {
14269 /* For a similar character use SCORE_SIMILAR. */
14270 if (slang != NULL
14271 && slang->sl_has_map
14272 && similar_chars(slang, gc, bc))
14273 CNT(i, j) = SCORE_SIMILAR + CNT(i - 1, j - 1);
14274 else
14275 CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1);
14276 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014277
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014278 if (i > 1 && j > 1)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014279 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014280#ifdef FEAT_MBYTE
14281 if (has_mbyte)
14282 {
14283 pbc = wbadword[i - 2];
14284 pgc = wgoodword[j - 2];
14285 }
14286 else
14287#endif
14288 {
14289 pbc = badword[i - 2];
14290 pgc = goodword[j - 2];
14291 }
14292 if (bc == pgc && pbc == gc)
14293 {
14294 t = SCORE_SWAP + CNT(i - 2, j - 2);
14295 if (t < CNT(i, j))
14296 CNT(i, j) = t;
14297 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014298 }
14299 t = SCORE_DEL + CNT(i - 1, j);
14300 if (t < CNT(i, j))
14301 CNT(i, j) = t;
14302 t = SCORE_INS + CNT(i, j - 1);
14303 if (t < CNT(i, j))
14304 CNT(i, j) = t;
14305 }
14306 }
14307 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014308
14309 i = CNT(badlen - 1, goodlen - 1);
14310 vim_free(cnt);
14311 return i;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014312}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000014313
Bram Moolenaar4770d092006-01-12 23:22:24 +000014314typedef struct
14315{
14316 int badi;
14317 int goodi;
14318 int score;
14319} limitscore_T;
14320
14321/*
14322 * Like spell_edit_score(), but with a limit on the score to make it faster.
14323 * May return SCORE_MAXMAX when the score is higher than "limit".
14324 *
14325 * This uses a stack for the edits still to be tried.
14326 * The idea comes from Aspell leditdist.cpp. Rewritten in C and added support
14327 * for multi-byte characters.
14328 */
14329 static int
14330spell_edit_score_limit(slang, badword, goodword, limit)
14331 slang_T *slang;
14332 char_u *badword;
14333 char_u *goodword;
14334 int limit;
14335{
14336 limitscore_T stack[10]; /* allow for over 3 * 2 edits */
14337 int stackidx;
14338 int bi, gi;
14339 int bi2, gi2;
14340 int bc, gc;
14341 int score;
14342 int score_off;
14343 int minscore;
14344 int round;
14345
14346#ifdef FEAT_MBYTE
14347 /* Multi-byte characters require a bit more work, use a different function
14348 * to avoid testing "has_mbyte" quite often. */
14349 if (has_mbyte)
14350 return spell_edit_score_limit_w(slang, badword, goodword, limit);
14351#endif
14352
14353 /*
14354 * The idea is to go from start to end over the words. So long as
14355 * characters are equal just continue, this always gives the lowest score.
14356 * When there is a difference try several alternatives. Each alternative
14357 * increases "score" for the edit distance. Some of the alternatives are
14358 * pushed unto a stack and tried later, some are tried right away. At the
14359 * end of the word the score for one alternative is known. The lowest
14360 * possible score is stored in "minscore".
14361 */
14362 stackidx = 0;
14363 bi = 0;
14364 gi = 0;
14365 score = 0;
14366 minscore = limit + 1;
14367
14368 for (;;)
14369 {
14370 /* Skip over an equal part, score remains the same. */
14371 for (;;)
14372 {
14373 bc = badword[bi];
14374 gc = goodword[gi];
14375 if (bc != gc) /* stop at a char that's different */
14376 break;
14377 if (bc == NUL) /* both words end */
14378 {
14379 if (score < minscore)
14380 minscore = score;
14381 goto pop; /* do next alternative */
14382 }
14383 ++bi;
14384 ++gi;
14385 }
14386
14387 if (gc == NUL) /* goodword ends, delete badword chars */
14388 {
14389 do
14390 {
14391 if ((score += SCORE_DEL) >= minscore)
14392 goto pop; /* do next alternative */
14393 } while (badword[++bi] != NUL);
14394 minscore = score;
14395 }
14396 else if (bc == NUL) /* badword ends, insert badword chars */
14397 {
14398 do
14399 {
14400 if ((score += SCORE_INS) >= minscore)
14401 goto pop; /* do next alternative */
14402 } while (goodword[++gi] != NUL);
14403 minscore = score;
14404 }
14405 else /* both words continue */
14406 {
14407 /* If not close to the limit, perform a change. Only try changes
14408 * that may lead to a lower score than "minscore".
14409 * round 0: try deleting a char from badword
14410 * round 1: try inserting a char in badword */
14411 for (round = 0; round <= 1; ++round)
14412 {
14413 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
14414 if (score_off < minscore)
14415 {
14416 if (score_off + SCORE_EDIT_MIN >= minscore)
14417 {
14418 /* Near the limit, rest of the words must match. We
14419 * can check that right now, no need to push an item
14420 * onto the stack. */
14421 bi2 = bi + 1 - round;
14422 gi2 = gi + round;
14423 while (goodword[gi2] == badword[bi2])
14424 {
14425 if (goodword[gi2] == NUL)
14426 {
14427 minscore = score_off;
14428 break;
14429 }
14430 ++bi2;
14431 ++gi2;
14432 }
14433 }
14434 else
14435 {
14436 /* try deleting/inserting a character later */
14437 stack[stackidx].badi = bi + 1 - round;
14438 stack[stackidx].goodi = gi + round;
14439 stack[stackidx].score = score_off;
14440 ++stackidx;
14441 }
14442 }
14443 }
14444
14445 if (score + SCORE_SWAP < minscore)
14446 {
14447 /* If swapping two characters makes a match then the
14448 * substitution is more expensive, thus there is no need to
14449 * try both. */
14450 if (gc == badword[bi + 1] && bc == goodword[gi + 1])
14451 {
14452 /* Swap two characters, that is: skip them. */
14453 gi += 2;
14454 bi += 2;
14455 score += SCORE_SWAP;
14456 continue;
14457 }
14458 }
14459
14460 /* Substitute one character for another which is the same
14461 * thing as deleting a character from both goodword and badword.
14462 * Use a better score when there is only a case difference. */
14463 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
14464 score += SCORE_ICASE;
14465 else
14466 {
14467 /* For a similar character use SCORE_SIMILAR. */
14468 if (slang != NULL
14469 && slang->sl_has_map
14470 && similar_chars(slang, gc, bc))
14471 score += SCORE_SIMILAR;
14472 else
14473 score += SCORE_SUBST;
14474 }
14475
14476 if (score < minscore)
14477 {
14478 /* Do the substitution. */
14479 ++gi;
14480 ++bi;
14481 continue;
14482 }
14483 }
14484pop:
14485 /*
14486 * Get here to try the next alternative, pop it from the stack.
14487 */
14488 if (stackidx == 0) /* stack is empty, finished */
14489 break;
14490
14491 /* pop an item from the stack */
14492 --stackidx;
14493 gi = stack[stackidx].goodi;
14494 bi = stack[stackidx].badi;
14495 score = stack[stackidx].score;
14496 }
14497
14498 /* When the score goes over "limit" it may actually be much higher.
14499 * Return a very large number to avoid going below the limit when giving a
14500 * bonus. */
14501 if (minscore > limit)
14502 return SCORE_MAXMAX;
14503 return minscore;
14504}
14505
14506#ifdef FEAT_MBYTE
14507/*
14508 * Multi-byte version of spell_edit_score_limit().
14509 * Keep it in sync with the above!
14510 */
14511 static int
14512spell_edit_score_limit_w(slang, badword, goodword, limit)
14513 slang_T *slang;
14514 char_u *badword;
14515 char_u *goodword;
14516 int limit;
14517{
14518 limitscore_T stack[10]; /* allow for over 3 * 2 edits */
14519 int stackidx;
14520 int bi, gi;
14521 int bi2, gi2;
14522 int bc, gc;
14523 int score;
14524 int score_off;
14525 int minscore;
14526 int round;
14527 char_u *p;
14528 int wbadword[MAXWLEN];
14529 int wgoodword[MAXWLEN];
14530
14531 /* Get the characters from the multi-byte strings and put them in an
14532 * int array for easy access. */
14533 bi = 0;
14534 for (p = badword; *p != NUL; )
14535 wbadword[bi++] = mb_cptr2char_adv(&p);
14536 wbadword[bi++] = 0;
14537 gi = 0;
14538 for (p = goodword; *p != NUL; )
14539 wgoodword[gi++] = mb_cptr2char_adv(&p);
14540 wgoodword[gi++] = 0;
14541
14542 /*
14543 * The idea is to go from start to end over the words. So long as
14544 * characters are equal just continue, this always gives the lowest score.
14545 * When there is a difference try several alternatives. Each alternative
14546 * increases "score" for the edit distance. Some of the alternatives are
14547 * pushed unto a stack and tried later, some are tried right away. At the
14548 * end of the word the score for one alternative is known. The lowest
14549 * possible score is stored in "minscore".
14550 */
14551 stackidx = 0;
14552 bi = 0;
14553 gi = 0;
14554 score = 0;
14555 minscore = limit + 1;
14556
14557 for (;;)
14558 {
14559 /* Skip over an equal part, score remains the same. */
14560 for (;;)
14561 {
14562 bc = wbadword[bi];
14563 gc = wgoodword[gi];
14564
14565 if (bc != gc) /* stop at a char that's different */
14566 break;
14567 if (bc == NUL) /* both words end */
14568 {
14569 if (score < minscore)
14570 minscore = score;
14571 goto pop; /* do next alternative */
14572 }
14573 ++bi;
14574 ++gi;
14575 }
14576
14577 if (gc == NUL) /* goodword ends, delete badword chars */
14578 {
14579 do
14580 {
14581 if ((score += SCORE_DEL) >= minscore)
14582 goto pop; /* do next alternative */
14583 } while (wbadword[++bi] != NUL);
14584 minscore = score;
14585 }
14586 else if (bc == NUL) /* badword ends, insert badword chars */
14587 {
14588 do
14589 {
14590 if ((score += SCORE_INS) >= minscore)
14591 goto pop; /* do next alternative */
14592 } while (wgoodword[++gi] != NUL);
14593 minscore = score;
14594 }
14595 else /* both words continue */
14596 {
14597 /* If not close to the limit, perform a change. Only try changes
14598 * that may lead to a lower score than "minscore".
14599 * round 0: try deleting a char from badword
14600 * round 1: try inserting a char in badword */
14601 for (round = 0; round <= 1; ++round)
14602 {
14603 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
14604 if (score_off < minscore)
14605 {
14606 if (score_off + SCORE_EDIT_MIN >= minscore)
14607 {
14608 /* Near the limit, rest of the words must match. We
14609 * can check that right now, no need to push an item
14610 * onto the stack. */
14611 bi2 = bi + 1 - round;
14612 gi2 = gi + round;
14613 while (wgoodword[gi2] == wbadword[bi2])
14614 {
14615 if (wgoodword[gi2] == NUL)
14616 {
14617 minscore = score_off;
14618 break;
14619 }
14620 ++bi2;
14621 ++gi2;
14622 }
14623 }
14624 else
14625 {
14626 /* try deleting a character from badword later */
14627 stack[stackidx].badi = bi + 1 - round;
14628 stack[stackidx].goodi = gi + round;
14629 stack[stackidx].score = score_off;
14630 ++stackidx;
14631 }
14632 }
14633 }
14634
14635 if (score + SCORE_SWAP < minscore)
14636 {
14637 /* If swapping two characters makes a match then the
14638 * substitution is more expensive, thus there is no need to
14639 * try both. */
14640 if (gc == wbadword[bi + 1] && bc == wgoodword[gi + 1])
14641 {
14642 /* Swap two characters, that is: skip them. */
14643 gi += 2;
14644 bi += 2;
14645 score += SCORE_SWAP;
14646 continue;
14647 }
14648 }
14649
14650 /* Substitute one character for another which is the same
14651 * thing as deleting a character from both goodword and badword.
14652 * Use a better score when there is only a case difference. */
14653 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
14654 score += SCORE_ICASE;
14655 else
14656 {
14657 /* For a similar character use SCORE_SIMILAR. */
14658 if (slang != NULL
14659 && slang->sl_has_map
14660 && similar_chars(slang, gc, bc))
14661 score += SCORE_SIMILAR;
14662 else
14663 score += SCORE_SUBST;
14664 }
14665
14666 if (score < minscore)
14667 {
14668 /* Do the substitution. */
14669 ++gi;
14670 ++bi;
14671 continue;
14672 }
14673 }
14674pop:
14675 /*
14676 * Get here to try the next alternative, pop it from the stack.
14677 */
14678 if (stackidx == 0) /* stack is empty, finished */
14679 break;
14680
14681 /* pop an item from the stack */
14682 --stackidx;
14683 gi = stack[stackidx].goodi;
14684 bi = stack[stackidx].badi;
14685 score = stack[stackidx].score;
14686 }
14687
14688 /* When the score goes over "limit" it may actually be much higher.
14689 * Return a very large number to avoid going below the limit when giving a
14690 * bonus. */
14691 if (minscore > limit)
14692 return SCORE_MAXMAX;
14693 return minscore;
14694}
14695#endif
14696
14697#define DUMPFLAG_KEEPCASE 1 /* round 2: keep-case tree */
14698#define DUMPFLAG_COUNT 2 /* include word count */
14699
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014700/*
14701 * ":spelldump"
14702 */
14703/*ARGSUSED*/
14704 void
14705ex_spelldump(eap)
14706 exarg_T *eap;
14707{
14708 buf_T *buf = curbuf;
14709 langp_T *lp;
14710 slang_T *slang;
14711 idx_T arridx[MAXWLEN];
14712 int curi[MAXWLEN];
14713 char_u word[MAXWLEN];
14714 int c;
14715 char_u *byts;
14716 idx_T *idxs;
14717 linenr_T lnum = 0;
14718 int round;
14719 int depth;
14720 int n;
14721 int flags;
Bram Moolenaar7887d882005-07-01 22:33:52 +000014722 char_u *region_names = NULL; /* region names being used */
14723 int do_region = TRUE; /* dump region names and numbers */
14724 char_u *p;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000014725 int lpi;
Bram Moolenaar4770d092006-01-12 23:22:24 +000014726 int dumpflags;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014727
Bram Moolenaar95529562005-08-25 21:21:38 +000014728 if (no_spell_checking(curwin))
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014729 return;
14730
14731 /* Create a new empty buffer by splitting the window. */
14732 do_cmdline_cmd((char_u *)"new");
14733 if (!bufempty() || !buf_valid(buf))
14734 return;
14735
Bram Moolenaar7887d882005-07-01 22:33:52 +000014736 /* Find out if we can support regions: All languages must support the same
14737 * regions or none at all. */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000014738 for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi)
Bram Moolenaar7887d882005-07-01 22:33:52 +000014739 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000014740 lp = LANGP_ENTRY(buf->b_langp, lpi);
Bram Moolenaar7887d882005-07-01 22:33:52 +000014741 p = lp->lp_slang->sl_regions;
14742 if (p[0] != 0)
14743 {
14744 if (region_names == NULL) /* first language with regions */
14745 region_names = p;
14746 else if (STRCMP(region_names, p) != 0)
14747 {
14748 do_region = FALSE; /* region names are different */
14749 break;
14750 }
14751 }
14752 }
14753
14754 if (do_region && region_names != NULL)
14755 {
14756 vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names);
14757 ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
14758 }
14759 else
14760 do_region = FALSE;
14761
14762 /*
14763 * Loop over all files loaded for the entries in 'spelllang'.
14764 */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000014765 for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014766 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000014767 lp = LANGP_ENTRY(buf->b_langp, lpi);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014768 slang = lp->lp_slang;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000014769 if (slang->sl_fbyts == NULL) /* reloading failed */
14770 continue;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014771
14772 vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
14773 ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
14774
14775 /* round 1: case-folded tree
14776 * round 2: keep-case tree */
14777 for (round = 1; round <= 2; ++round)
14778 {
14779 if (round == 1)
14780 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000014781 dumpflags = 0;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014782 byts = slang->sl_fbyts;
14783 idxs = slang->sl_fidxs;
14784 }
14785 else
14786 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000014787 dumpflags = DUMPFLAG_KEEPCASE;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014788 byts = slang->sl_kbyts;
14789 idxs = slang->sl_kidxs;
14790 }
14791 if (byts == NULL)
14792 continue; /* array is empty */
14793
Bram Moolenaar4770d092006-01-12 23:22:24 +000014794 if (eap->forceit)
14795 dumpflags |= DUMPFLAG_COUNT;
14796
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014797 depth = 0;
14798 arridx[0] = 0;
14799 curi[0] = 1;
14800 while (depth >= 0 && !got_int)
14801 {
14802 if (curi[depth] > byts[arridx[depth]])
14803 {
14804 /* Done all bytes at this node, go up one level. */
14805 --depth;
14806 line_breakcheck();
14807 }
14808 else
14809 {
14810 /* Do one more byte at this node. */
14811 n = arridx[depth] + curi[depth];
14812 ++curi[depth];
14813 c = byts[n];
14814 if (c == 0)
14815 {
14816 /* End of word, deal with the word.
14817 * Don't use keep-case words in the fold-case tree,
14818 * they will appear in the keep-case tree.
14819 * Only use the word when the region matches. */
14820 flags = (int)idxs[n];
14821 if ((round == 2 || (flags & WF_KEEPCAP) == 0)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000014822 && (flags & WF_NEEDCOMP) == 0
Bram Moolenaar7887d882005-07-01 22:33:52 +000014823 && (do_region
14824 || (flags & WF_REGION) == 0
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000014825 || (((unsigned)flags >> 16)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014826 & lp->lp_region) != 0))
14827 {
14828 word[depth] = NUL;
Bram Moolenaar7887d882005-07-01 22:33:52 +000014829 if (!do_region)
14830 flags &= ~WF_REGION;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +000014831
14832 /* Dump the basic word if there is no prefix or
14833 * when it's the first one. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000014834 c = (unsigned)flags >> 24;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +000014835 if (c == 0 || curi[depth] == 2)
Bram Moolenaar4770d092006-01-12 23:22:24 +000014836 dump_word(slang, word, dumpflags,
14837 flags, lnum++);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014838
14839 /* Apply the prefix, if there is one. */
Bram Moolenaar0a5fe212005-06-24 23:01:23 +000014840 if (c != 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +000014841 lnum = dump_prefixes(slang, word, dumpflags,
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014842 flags, lnum);
14843 }
14844 }
14845 else
14846 {
14847 /* Normal char, go one level deeper. */
14848 word[depth++] = c;
14849 arridx[depth] = idxs[n];
14850 curi[depth] = 1;
14851 }
14852 }
14853 }
14854 }
14855 }
14856
14857 /* Delete the empty line that we started with. */
14858 if (curbuf->b_ml.ml_line_count > 1)
14859 ml_delete(curbuf->b_ml.ml_line_count, FALSE);
14860
14861 redraw_later(NOT_VALID);
14862}
14863
14864/*
14865 * Dump one word: apply case modifications and append a line to the buffer.
14866 */
14867 static void
Bram Moolenaar4770d092006-01-12 23:22:24 +000014868dump_word(slang, word, dumpflags, flags, lnum)
14869 slang_T *slang;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014870 char_u *word;
Bram Moolenaar4770d092006-01-12 23:22:24 +000014871 int dumpflags;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014872 int flags;
14873 linenr_T lnum;
14874{
14875 int keepcap = FALSE;
14876 char_u *p;
Bram Moolenaar4770d092006-01-12 23:22:24 +000014877 char_u *tw;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014878 char_u cword[MAXWLEN];
Bram Moolenaar7887d882005-07-01 22:33:52 +000014879 char_u badword[MAXWLEN + 10];
14880 int i;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014881
Bram Moolenaar4770d092006-01-12 23:22:24 +000014882 if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014883 {
14884 /* Need to fix case according to "flags". */
14885 make_case_word(word, cword, flags);
14886 p = cword;
14887 }
14888 else
14889 {
14890 p = word;
Bram Moolenaar4770d092006-01-12 23:22:24 +000014891 if ((dumpflags & DUMPFLAG_KEEPCASE)
14892 && ((captype(word, NULL) & WF_KEEPCAP) == 0
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000014893 || (flags & WF_FIXCAP) != 0))
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014894 keepcap = TRUE;
14895 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000014896 tw = p;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014897
Bram Moolenaar7887d882005-07-01 22:33:52 +000014898 /* Add flags and regions after a slash. */
14899 if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014900 {
Bram Moolenaar7887d882005-07-01 22:33:52 +000014901 STRCPY(badword, p);
14902 STRCAT(badword, "/");
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014903 if (keepcap)
14904 STRCAT(badword, "=");
14905 if (flags & WF_BANNED)
14906 STRCAT(badword, "!");
14907 else if (flags & WF_RARE)
14908 STRCAT(badword, "?");
Bram Moolenaar7887d882005-07-01 22:33:52 +000014909 if (flags & WF_REGION)
14910 for (i = 0; i < 7; ++i)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000014911 if (flags & (0x10000 << i))
Bram Moolenaar7887d882005-07-01 22:33:52 +000014912 sprintf((char *)badword + STRLEN(badword), "%d", i + 1);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014913 p = badword;
14914 }
14915
Bram Moolenaar4770d092006-01-12 23:22:24 +000014916 if (dumpflags & DUMPFLAG_COUNT)
14917 {
14918 hashitem_T *hi;
14919
14920 /* Include the word count for ":spelldump!". */
14921 hi = hash_find(&slang->sl_wordcount, tw);
14922 if (!HASHITEM_EMPTY(hi))
14923 {
14924 vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d",
14925 tw, HI2WC(hi)->wc_count);
14926 p = IObuff;
14927 }
14928 }
14929
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014930 ml_append(lnum, p, (colnr_T)0, FALSE);
14931}
14932
14933/*
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014934 * For ":spelldump": Find matching prefixes for "word". Prepend each to
14935 * "word" and append a line to the buffer.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014936 * Return the updated line number.
14937 */
14938 static linenr_T
Bram Moolenaar4770d092006-01-12 23:22:24 +000014939dump_prefixes(slang, word, dumpflags, flags, startlnum)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014940 slang_T *slang;
14941 char_u *word; /* case-folded word */
Bram Moolenaar4770d092006-01-12 23:22:24 +000014942 int dumpflags;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014943 int flags; /* flags with prefix ID */
14944 linenr_T startlnum;
14945{
14946 idx_T arridx[MAXWLEN];
14947 int curi[MAXWLEN];
14948 char_u prefix[MAXWLEN];
Bram Moolenaar53805d12005-08-01 07:08:33 +000014949 char_u word_up[MAXWLEN];
14950 int has_word_up = FALSE;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014951 int c;
14952 char_u *byts;
14953 idx_T *idxs;
14954 linenr_T lnum = startlnum;
14955 int depth;
14956 int n;
14957 int len;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014958 int i;
14959
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000014960 /* If the word starts with a lower-case letter make the word with an
Bram Moolenaar53805d12005-08-01 07:08:33 +000014961 * upper-case letter in word_up[]. */
14962 c = PTR2CHAR(word);
14963 if (SPELL_TOUPPER(c) != c)
14964 {
14965 onecap_copy(word, word_up, TRUE);
14966 has_word_up = TRUE;
14967 }
14968
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014969 byts = slang->sl_pbyts;
14970 idxs = slang->sl_pidxs;
14971 if (byts != NULL) /* array not is empty */
14972 {
14973 /*
14974 * Loop over all prefixes, building them byte-by-byte in prefix[].
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000014975 * When at the end of a prefix check that it supports "flags".
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014976 */
14977 depth = 0;
14978 arridx[0] = 0;
14979 curi[0] = 1;
14980 while (depth >= 0 && !got_int)
14981 {
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000014982 n = arridx[depth];
14983 len = byts[n];
14984 if (curi[depth] > len)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014985 {
14986 /* Done all bytes at this node, go up one level. */
14987 --depth;
14988 line_breakcheck();
14989 }
14990 else
14991 {
14992 /* Do one more byte at this node. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000014993 n += curi[depth];
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014994 ++curi[depth];
14995 c = byts[n];
14996 if (c == 0)
14997 {
14998 /* End of prefix, find out how many IDs there are. */
14999 for (i = 1; i < len; ++i)
15000 if (byts[n + i] != 0)
15001 break;
15002 curi[depth] += i - 1;
15003
Bram Moolenaar53805d12005-08-01 07:08:33 +000015004 c = valid_word_prefix(i, n, flags, word, slang, FALSE);
15005 if (c != 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015006 {
Bram Moolenaar9c96f592005-06-30 21:52:39 +000015007 vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1);
Bram Moolenaar4770d092006-01-12 23:22:24 +000015008 dump_word(slang, prefix, dumpflags,
Bram Moolenaar53805d12005-08-01 07:08:33 +000015009 (c & WF_RAREPFX) ? (flags | WF_RARE)
Bram Moolenaarcf6bf392005-06-27 22:27:46 +000015010 : flags, lnum++);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015011 }
Bram Moolenaar53805d12005-08-01 07:08:33 +000015012
15013 /* Check for prefix that matches the word when the
15014 * first letter is upper-case, but only if the prefix has
15015 * a condition. */
15016 if (has_word_up)
15017 {
15018 c = valid_word_prefix(i, n, flags, word_up, slang,
15019 TRUE);
15020 if (c != 0)
15021 {
15022 vim_strncpy(prefix + depth, word_up,
15023 MAXWLEN - depth - 1);
Bram Moolenaar4770d092006-01-12 23:22:24 +000015024 dump_word(slang, prefix, dumpflags,
Bram Moolenaar53805d12005-08-01 07:08:33 +000015025 (c & WF_RAREPFX) ? (flags | WF_RARE)
15026 : flags, lnum++);
15027 }
15028 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015029 }
15030 else
15031 {
15032 /* Normal char, go one level deeper. */
15033 prefix[depth++] = c;
15034 arridx[depth] = idxs[n];
15035 curi[depth] = 1;
15036 }
15037 }
15038 }
15039 }
15040
15041 return lnum;
15042}
15043
Bram Moolenaar95529562005-08-25 21:21:38 +000015044/*
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000015045 * Move "p" to the end of word "start".
15046 * Uses the spell-checking word characters.
Bram Moolenaar95529562005-08-25 21:21:38 +000015047 */
15048 char_u *
15049spell_to_word_end(start, buf)
15050 char_u *start;
15051 buf_T *buf;
15052{
15053 char_u *p = start;
15054
15055 while (*p != NUL && spell_iswordp(p, buf))
15056 mb_ptr_adv(p);
15057 return p;
15058}
15059
Bram Moolenaar8b59de92005-08-11 19:59:29 +000015060#if defined(FEAT_INS_EXPAND) || defined(PROTO)
Bram Moolenaar8b59de92005-08-11 19:59:29 +000015061/*
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000015062 * For Insert mode completion CTRL-X s:
15063 * Find start of the word in front of column "startcol".
15064 * We don't check if it is badly spelled, with completion we can only change
15065 * the word in front of the cursor.
Bram Moolenaar8b59de92005-08-11 19:59:29 +000015066 * Returns the column number of the word.
15067 */
15068 int
15069spell_word_start(startcol)
15070 int startcol;
15071{
15072 char_u *line;
15073 char_u *p;
15074 int col = 0;
15075
Bram Moolenaar95529562005-08-25 21:21:38 +000015076 if (no_spell_checking(curwin))
Bram Moolenaar8b59de92005-08-11 19:59:29 +000015077 return startcol;
15078
15079 /* Find a word character before "startcol". */
15080 line = ml_get_curline();
15081 for (p = line + startcol; p > line; )
15082 {
15083 mb_ptr_back(line, p);
15084 if (spell_iswordp_nmw(p))
15085 break;
15086 }
15087
15088 /* Go back to start of the word. */
15089 while (p > line)
15090 {
15091 col = p - line;
15092 mb_ptr_back(line, p);
15093 if (!spell_iswordp(p, curbuf))
15094 break;
15095 col = 0;
15096 }
15097
Bram Moolenaar8b59de92005-08-11 19:59:29 +000015098 return col;
15099}
15100
15101/*
Bram Moolenaar4effc802005-09-30 21:12:02 +000015102 * Need to check for 'spellcapcheck' now, the word is removed before
15103 * expand_spelling() is called. Therefore the ugly global variable.
15104 */
15105static int spell_expand_need_cap;
15106
15107 void
15108spell_expand_check_cap(col)
15109 colnr_T col;
15110{
15111 spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col);
15112}
15113
15114/*
Bram Moolenaar8b59de92005-08-11 19:59:29 +000015115 * Get list of spelling suggestions.
15116 * Used for Insert mode completion CTRL-X ?.
15117 * Returns the number of matches. The matches are in "matchp[]", array of
15118 * allocated strings.
15119 */
15120/*ARGSUSED*/
15121 int
15122expand_spelling(lnum, col, pat, matchp)
15123 linenr_T lnum;
15124 int col;
15125 char_u *pat;
15126 char_u ***matchp;
15127{
15128 garray_T ga;
15129
Bram Moolenaar4770d092006-01-12 23:22:24 +000015130 spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE);
Bram Moolenaar8b59de92005-08-11 19:59:29 +000015131 *matchp = ga.ga_data;
15132 return ga.ga_len;
15133}
15134#endif
15135
Bram Moolenaar402d2fe2005-04-15 21:00:38 +000015136#endif /* FEAT_SYN_HL */