blob: 4a65aeeaaaa0fb1e628e726657cad8490a421b30 [file] [log] [blame]
Bram Moolenaare19defe2005-03-21 08:23:33 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * spell.c: code for spell checking
Bram Moolenaarfc735152005-03-22 22:54:12 +000012 *
Bram Moolenaar51485f02005-06-04 21:55:20 +000013 * The spell checking mechanism uses a tree (aka trie). Each node in the tree
14 * has a list of bytes that can appear (siblings). For each byte there is a
15 * pointer to the node with the byte that follows in the word (child).
Bram Moolenaar9f30f502005-06-14 22:01:04 +000016 *
17 * A NUL byte is used where the word may end. The bytes are sorted, so that
18 * binary searching can be used and the NUL bytes are at the start. The
19 * number of possible bytes is stored before the list of bytes.
20 *
21 * The tree uses two arrays: "byts" stores the characters, "idxs" stores
22 * either the next index or flags. The tree starts at index 0. For example,
23 * to lookup "vi" this sequence is followed:
24 * i = 0
25 * len = byts[i]
26 * n = where "v" appears in byts[i + 1] to byts[i + len]
27 * i = idxs[n]
28 * len = byts[i]
29 * n = where "i" appears in byts[i + 1] to byts[i + len]
30 * i = idxs[n]
31 * len = byts[i]
32 * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
Bram Moolenaar51485f02005-06-04 21:55:20 +000033 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +000034 * There are two word trees: one with case-folded words and one with words in
Bram Moolenaar51485f02005-06-04 21:55:20 +000035 * original case. The second one is only used for keep-case words and is
36 * usually small.
37 *
Bram Moolenaarae5bce12005-08-15 21:41:48 +000038 * There is one additional tree for when not all prefixes are applied when
Bram Moolenaar1d73c882005-06-19 22:48:47 +000039 * generating the .spl file. This tree stores all the possible prefixes, as
40 * if they were words. At each word (prefix) end the prefix nr is stored, the
41 * following word must support this prefix nr. And the condition nr is
42 * stored, used to lookup the condition that the word must match with.
43 *
Bram Moolenaar51485f02005-06-04 21:55:20 +000044 * Thanks to Olaf Seibert for providing an example implementation of this tree
45 * and the compression mechanism.
Bram Moolenaar4770d092006-01-12 23:22:24 +000046 * LZ trie ideas:
47 * http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf
48 * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +000049 *
50 * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +000051 *
Bram Moolenaar402d2fe2005-04-15 21:00:38 +000052 * Why doesn't Vim use aspell/ispell/myspell/etc.?
53 * See ":help develop-spell".
54 */
55
Bram Moolenaar329cc7e2005-08-10 07:51:35 +000056/* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word.
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000057 * Only use it for small word lists! */
Bram Moolenaar329cc7e2005-08-10 07:51:35 +000058#if 0
59# define SPELL_PRINTTREE
Bram Moolenaar329cc7e2005-08-10 07:51:35 +000060#endif
61
Bram Moolenaar2d3f4892006-01-20 23:02:51 +000062/* Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk() for a
63 * specific word. */
Bram Moolenaar4770d092006-01-12 23:22:24 +000064#if 0
65# define DEBUG_TRIEWALK
66#endif
67
Bram Moolenaar51485f02005-06-04 21:55:20 +000068/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +000069 * Use this to adjust the score after finding suggestions, based on the
70 * suggested word sounding like the bad word. This is much faster than doing
71 * it for every possible suggestion.
Bram Moolenaar4770d092006-01-12 23:22:24 +000072 * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@"
73 * vs "ht") and goes down in the list.
Bram Moolenaard857f0e2005-06-21 22:37:39 +000074 * Used when 'spellsuggest' is set to "best".
75 */
76#define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4)
77
78/*
Bram Moolenaar4770d092006-01-12 23:22:24 +000079 * Do the opposite: based on a maximum end score and a known sound score,
80 * compute the the maximum word score that can be used.
81 */
82#define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3)
83
84/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +000085 * Vim spell file format: <HEADER>
Bram Moolenaar5195e452005-08-19 20:32:47 +000086 * <SECTIONS>
Bram Moolenaar1d73c882005-06-19 22:48:47 +000087 * <LWORDTREE>
88 * <KWORDTREE>
89 * <PREFIXTREE>
Bram Moolenaar51485f02005-06-04 21:55:20 +000090 *
Bram Moolenaar5195e452005-08-19 20:32:47 +000091 * <HEADER>: <fileID> <versionnr>
Bram Moolenaar51485f02005-06-04 21:55:20 +000092 *
Bram Moolenaar5195e452005-08-19 20:32:47 +000093 * <fileID> 8 bytes "VIMspell"
94 * <versionnr> 1 byte VIMSPELLVERSION
95 *
96 *
97 * Sections make it possible to add information to the .spl file without
98 * making it incompatible with previous versions. There are two kinds of
99 * sections:
100 * 1. Not essential for correct spell checking. E.g. for making suggestions.
101 * These are skipped when not supported.
102 * 2. Optional information, but essential for spell checking when present.
103 * E.g. conditions for affixes. When this section is present but not
104 * supported an error message is given.
105 *
106 * <SECTIONS>: <section> ... <sectionend>
107 *
108 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
109 *
110 * <sectionID> 1 byte number from 0 to 254 identifying the section
111 *
112 * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct
113 * spell checking
114 *
115 * <sectionlen> 4 bytes length of section contents, MSB first
116 *
117 * <sectionend> 1 byte SN_END
118 *
119 *
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000120 * sectionID == SN_INFO: <infotext>
121 * <infotext> N bytes free format text with spell file info (version,
122 * website, etc)
123 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000124 * sectionID == SN_REGION: <regionname> ...
125 * <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000126 * First <regionname> is region 1.
127 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000128 * sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
129 * <folcharslen> <folchars>
Bram Moolenaar51485f02005-06-04 21:55:20 +0000130 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
131 * <charflags> N bytes List of flags (first one is for character 128):
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000132 * 0x01 word character CF_WORD
133 * 0x02 upper-case character CF_UPPER
Bram Moolenaar5195e452005-08-19 20:32:47 +0000134 * <folcharslen> 2 bytes Number of bytes in <folchars>.
135 * <folchars> N bytes Folded characters, first one is for character 128.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000136 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000137 * sectionID == SN_MIDWORD: <midword>
138 * <midword> N bytes Characters that are word characters only when used
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000139 * in the middle of a word.
140 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000141 * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000142 * <prefcondcnt> 2 bytes Number of <prefcond> items following.
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000143 * <prefcond> : <condlen> <condstr>
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000144 * <condlen> 1 byte Length of <condstr>.
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000145 * <condstr> N bytes Condition for the prefix.
146 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000147 * sectionID == SN_REP: <repcount> <rep> ...
148 * <repcount> 2 bytes number of <rep> items, MSB first.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000149 * <rep> : <repfromlen> <repfrom> <reptolen> <repto>
Bram Moolenaar5195e452005-08-19 20:32:47 +0000150 * <repfromlen> 1 byte length of <repfrom>
151 * <repfrom> N bytes "from" part of replacement
152 * <reptolen> 1 byte length of <repto>
153 * <repto> N bytes "to" part of replacement
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000154 *
Bram Moolenaar4770d092006-01-12 23:22:24 +0000155 * sectionID == SN_REPSAL: <repcount> <rep> ...
156 * just like SN_REP but for soundfolded words
157 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000158 * sectionID == SN_SAL: <salflags> <salcount> <sal> ...
159 * <salflags> 1 byte flags for soundsalike conversion:
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000160 * SAL_F0LLOWUP
161 * SAL_COLLAPSE
162 * SAL_REM_ACCENTS
Bram Moolenaar5195e452005-08-19 20:32:47 +0000163 * <salcount> 2 bytes number of <sal> items following
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000164 * <sal> : <salfromlen> <salfrom> <saltolen> <salto>
Bram Moolenaar5195e452005-08-19 20:32:47 +0000165 * <salfromlen> 1 byte length of <salfrom>
166 * <salfrom> N bytes "from" part of soundsalike
167 * <saltolen> 1 byte length of <salto>
168 * <salto> N bytes "to" part of soundsalike
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000169 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000170 * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
171 * <sofofromlen> 2 bytes length of <sofofrom>
172 * <sofofrom> N bytes "from" part of soundfold
173 * <sofotolen> 2 bytes length of <sofoto>
174 * <sofoto> N bytes "to" part of soundfold
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000175 *
Bram Moolenaar4770d092006-01-12 23:22:24 +0000176 * sectionID == SN_SUGFILE: <timestamp>
177 * <timestamp> 8 bytes time in seconds that must match with .sug file
178 *
Bram Moolenaare1438bb2006-03-01 22:01:55 +0000179 * sectionID == SN_NOSPLITSUGS: nothing
180 *
Bram Moolenaar4770d092006-01-12 23:22:24 +0000181 * sectionID == SN_WORDS: <word> ...
182 * <word> N bytes NUL terminated common word
183 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000184 * sectionID == SN_MAP: <mapstr>
185 * <mapstr> N bytes String with sequences of similar characters,
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000186 * separated by slashes.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000187 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000188 * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compflags>
189 * <compmax> 1 byte Maximum nr of words in compound word.
190 * <compminlen> 1 byte Minimal word length for compounding.
191 * <compsylmax> 1 byte Maximum nr of syllables in compound word.
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000192 * <compflags> N bytes Flags from COMPOUNDRULE items, separated by
Bram Moolenaar5195e452005-08-19 20:32:47 +0000193 * slashes.
194 *
Bram Moolenaar78622822005-08-23 21:00:13 +0000195 * sectionID == SN_NOBREAK: (empty, its presence is enough)
196 *
Bram Moolenaar5195e452005-08-19 20:32:47 +0000197 * sectionID == SN_SYLLABLE: <syllable>
198 * <syllable> N bytes String from SYLLABLE item.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000199 *
200 * <LWORDTREE>: <wordtree>
201 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000202 * <KWORDTREE>: <wordtree>
203 *
204 * <PREFIXTREE>: <wordtree>
205 *
206 *
Bram Moolenaar51485f02005-06-04 21:55:20 +0000207 * <wordtree>: <nodecount> <nodedata> ...
208 *
209 * <nodecount> 4 bytes Number of nodes following. MSB first.
210 *
211 * <nodedata>: <siblingcount> <sibling> ...
212 *
213 * <siblingcount> 1 byte Number of siblings in this node. The siblings
214 * follow in sorted order.
215 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000216 * <sibling>: <byte> [ <nodeidx> <xbyte>
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000217 * | <flags> [<flags2>] [<region>] [<affixID>]
218 * | [<pflags>] <affixID> <prefcondnr> ]
Bram Moolenaar51485f02005-06-04 21:55:20 +0000219 *
220 * <byte> 1 byte Byte value of the sibling. Special cases:
221 * BY_NOFLAGS: End of word without flags and for all
222 * regions.
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000223 * For PREFIXTREE <affixID> and
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000224 * <prefcondnr> follow.
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000225 * BY_FLAGS: End of word, <flags> follow.
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000226 * For PREFIXTREE <pflags>, <affixID>
Bram Moolenaar53805d12005-08-01 07:08:33 +0000227 * and <prefcondnr> follow.
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000228 * BY_FLAGS2: End of word, <flags> and <flags2>
229 * follow. Not used in PREFIXTREE.
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000230 * BY_INDEX: Child of sibling is shared, <nodeidx>
Bram Moolenaar51485f02005-06-04 21:55:20 +0000231 * and <xbyte> follow.
232 *
233 * <nodeidx> 3 bytes Index of child for this sibling, MSB first.
234 *
235 * <xbyte> 1 byte byte value of the sibling.
236 *
237 * <flags> 1 byte bitmask of:
238 * WF_ALLCAP word must have only capitals
239 * WF_ONECAP first char of word must be capital
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000240 * WF_KEEPCAP keep-case word
241 * WF_FIXCAP keep-case word, all caps not allowed
Bram Moolenaar51485f02005-06-04 21:55:20 +0000242 * WF_RARE rare word
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000243 * WF_BANNED bad word
Bram Moolenaar51485f02005-06-04 21:55:20 +0000244 * WF_REGION <region> follows
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000245 * WF_AFX <affixID> follows
Bram Moolenaar51485f02005-06-04 21:55:20 +0000246 *
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000247 * <flags2> 1 byte Bitmask of:
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000248 * WF_HAS_AFF >> 8 word includes affix
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000249 * WF_NEEDCOMP >> 8 word only valid in compound
Bram Moolenaare1438bb2006-03-01 22:01:55 +0000250 * WF_NOSUGGEST >> 8 word not used for suggestions
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000251 *
Bram Moolenaar53805d12005-08-01 07:08:33 +0000252 * <pflags> 1 byte bitmask of:
253 * WFP_RARE rare prefix
254 * WFP_NC non-combining prefix
255 * WFP_UP letter after prefix made upper case
256 *
Bram Moolenaar51485f02005-06-04 21:55:20 +0000257 * <region> 1 byte Bitmask for regions in which word is valid. When
258 * omitted it's valid in all regions.
259 * Lowest bit is for region 1.
260 *
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000261 * <affixID> 1 byte ID of affix that can be used with this word. In
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000262 * PREFIXTREE used for the required prefix ID.
263 *
264 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list
265 * from HEADER.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000266 *
Bram Moolenaar51485f02005-06-04 21:55:20 +0000267 * All text characters are in 'encoding', but stored as single bytes.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000268 */
269
Bram Moolenaar4770d092006-01-12 23:22:24 +0000270/*
271 * Vim .sug file format: <SUGHEADER>
272 * <SUGWORDTREE>
273 * <SUGTABLE>
274 *
275 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
276 *
277 * <fileID> 6 bytes "VIMsug"
278 * <versionnr> 1 byte VIMSUGVERSION
279 * <timestamp> 8 bytes timestamp that must match with .spl file
280 *
281 *
282 * <SUGWORDTREE>: <wordtree> (see above, no flags or region used)
283 *
284 *
285 * <SUGTABLE>: <sugwcount> <sugline> ...
286 *
287 * <sugwcount> 4 bytes number of <sugline> following
288 *
289 * <sugline>: <sugnr> ... NUL
290 *
291 * <sugnr>: X bytes word number that results in this soundfolded word,
292 * stored as an offset to the previous number in as
293 * few bytes as possible, see offset2bytes())
294 */
295
Bram Moolenaare19defe2005-03-21 08:23:33 +0000296#if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000297# include "vimio.h" /* for lseek(), must be before vim.h */
Bram Moolenaare19defe2005-03-21 08:23:33 +0000298#endif
299
300#include "vim.h"
301
Bram Moolenaarf71a3db2006-03-12 21:50:18 +0000302#if defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaare19defe2005-03-21 08:23:33 +0000303
304#ifdef HAVE_FCNTL_H
305# include <fcntl.h>
306#endif
307
Bram Moolenaar4770d092006-01-12 23:22:24 +0000308#ifndef UNIX /* it's in os_unix.h for Unix */
309# include <time.h> /* for time_t */
310#endif
311
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000312#define MAXWLEN 250 /* Assume max. word len is this many bytes.
313 Some places assume a word length fits in a
314 byte, thus it can't be above 255. */
Bram Moolenaarfc735152005-03-22 22:54:12 +0000315
Bram Moolenaare52325c2005-08-22 22:54:29 +0000316/* Type used for indexes in the word tree need to be at least 4 bytes. If int
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000317 * is 8 bytes we could use something smaller, but what? */
Bram Moolenaare52325c2005-08-22 22:54:29 +0000318#if SIZEOF_INT > 3
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000319typedef int idx_T;
320#else
321typedef long idx_T;
322#endif
323
324/* Flags used for a word. Only the lowest byte can be used, the region byte
325 * comes above it. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000326#define WF_REGION 0x01 /* region byte follows */
327#define WF_ONECAP 0x02 /* word with one capital (or all capitals) */
328#define WF_ALLCAP 0x04 /* word must be all capitals */
329#define WF_RARE 0x08 /* rare word */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000330#define WF_BANNED 0x10 /* bad word */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000331#define WF_AFX 0x20 /* affix ID follows */
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000332#define WF_FIXCAP 0x40 /* keep-case word, allcap not allowed */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000333#define WF_KEEPCAP 0x80 /* keep-case word */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000334
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000335/* for <flags2>, shifted up one byte to be used in wn_flags */
336#define WF_HAS_AFF 0x0100 /* word includes affix */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000337#define WF_NEEDCOMP 0x0200 /* word only valid in compound */
Bram Moolenaare1438bb2006-03-01 22:01:55 +0000338#define WF_NOSUGGEST 0x0400 /* word not to be suggested */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000339
Bram Moolenaar2d3f4892006-01-20 23:02:51 +0000340/* only used for su_badflags */
341#define WF_MIXCAP 0x20 /* mix of upper and lower case: macaRONI */
342
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000343#define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000344
Bram Moolenaar53805d12005-08-01 07:08:33 +0000345/* flags for <pflags> */
346#define WFP_RARE 0x01 /* rare prefix */
347#define WFP_NC 0x02 /* prefix is not combining */
348#define WFP_UP 0x04 /* to-upper prefix */
349
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000350/* Flags for postponed prefixes. Must be above affixID (one byte)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000351 * and prefcondnr (two bytes). */
Bram Moolenaar53805d12005-08-01 07:08:33 +0000352#define WF_RAREPFX (WFP_RARE << 24) /* in sl_pidxs: flag for rare
353 * postponed prefix */
354#define WF_PFX_NC (WFP_NC << 24) /* in sl_pidxs: flag for non-combining
355 * postponed prefix */
356#define WF_PFX_UP (WFP_UP << 24) /* in sl_pidxs: flag for to-upper
357 * postponed prefix */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000358
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000359/* Special byte values for <byte>. Some are only used in the tree for
360 * postponed prefixes, some only in the other trees. This is a bit messy... */
361#define BY_NOFLAGS 0 /* end of word without flags or region; for
Bram Moolenaar53805d12005-08-01 07:08:33 +0000362 * postponed prefix: no <pflags> */
363#define BY_INDEX 1 /* child is shared, index follows */
364#define BY_FLAGS 2 /* end of word, <flags> byte follows; for
365 * postponed prefix: <pflags> follows */
366#define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes
367 * follow; never used in prefix tree */
368#define BY_SPECIAL BY_FLAGS2 /* highest special byte value */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000369
Bram Moolenaar4770d092006-01-12 23:22:24 +0000370/* Info from "REP", "REPSAL" and "SAL" entries in ".aff" file used in si_rep,
371 * si_repsal, sl_rep, and si_sal. Not for sl_sal!
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000372 * One replacement: from "ft_from" to "ft_to". */
373typedef struct fromto_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000374{
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000375 char_u *ft_from;
376 char_u *ft_to;
377} fromto_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000378
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000379/* Info from "SAL" entries in ".aff" file used in sl_sal.
380 * The info is split for quick processing by spell_soundfold().
381 * Note that "sm_oneof" and "sm_rules" point into sm_lead. */
382typedef struct salitem_S
383{
384 char_u *sm_lead; /* leading letters */
385 int sm_leadlen; /* length of "sm_lead" */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000386 char_u *sm_oneof; /* letters from () or NULL */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000387 char_u *sm_rules; /* rules like ^, $, priority */
388 char_u *sm_to; /* replacement. */
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000389#ifdef FEAT_MBYTE
390 int *sm_lead_w; /* wide character copy of "sm_lead" */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000391 int *sm_oneof_w; /* wide character copy of "sm_oneof" */
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000392 int *sm_to_w; /* wide character copy of "sm_to" */
393#endif
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000394} salitem_T;
395
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000396#ifdef FEAT_MBYTE
397typedef int salfirst_T;
398#else
399typedef short salfirst_T;
400#endif
401
Bram Moolenaar5195e452005-08-19 20:32:47 +0000402/* Values for SP_*ERROR are negative, positive values are used by
403 * read_cnt_string(). */
404#define SP_TRUNCERROR -1 /* spell file truncated error */
405#define SP_FORMERROR -2 /* format error in spell file */
Bram Moolenaar6de68532005-08-24 22:08:48 +0000406#define SP_OTHERERROR -3 /* other error while reading spell file */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000407
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000408/*
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000409 * Structure used to store words and other info for one language, loaded from
410 * a .spl file.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000411 * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the
412 * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words.
413 *
414 * The "byts" array stores the possible bytes in each tree node, preceded by
415 * the number of possible bytes, sorted on byte value:
416 * <len> <byte1> <byte2> ...
417 * The "idxs" array stores the index of the child node corresponding to the
418 * byte in "byts".
419 * Exception: when the byte is zero, the word may end here and "idxs" holds
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000420 * the flags, region mask and affixID for the word. There may be several
421 * zeros in sequence for alternative flag/region/affixID combinations.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000422 */
423typedef struct slang_S slang_T;
424struct slang_S
425{
426 slang_T *sl_next; /* next language */
427 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */
Bram Moolenaarb765d632005-06-07 21:00:02 +0000428 char_u *sl_fname; /* name of .spl file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000429 int sl_add; /* TRUE if it's a .add file. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000430
Bram Moolenaar51485f02005-06-04 21:55:20 +0000431 char_u *sl_fbyts; /* case-folded word bytes */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000432 idx_T *sl_fidxs; /* case-folded word indexes */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000433 char_u *sl_kbyts; /* keep-case word bytes */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000434 idx_T *sl_kidxs; /* keep-case word indexes */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000435 char_u *sl_pbyts; /* prefix tree word bytes */
436 idx_T *sl_pidxs; /* prefix tree word indexes */
437
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000438 char_u *sl_info; /* infotext string or NULL */
439
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000440 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000441
Bram Moolenaar9c96f592005-06-30 21:52:39 +0000442 char_u *sl_midword; /* MIDWORD string or NULL */
443
Bram Moolenaar4770d092006-01-12 23:22:24 +0000444 hashtab_T sl_wordcount; /* hashtable with word count, wordcount_T */
445
Bram Moolenaar5195e452005-08-19 20:32:47 +0000446 int sl_compmax; /* COMPOUNDMAX (default: MAXWLEN) */
Bram Moolenaarda2303d2005-08-30 21:55:26 +0000447 int sl_compminlen; /* COMPOUNDMIN (default: 0) */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000448 int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000449 regprog_T *sl_compprog; /* COMPOUNDRULE turned into a regexp progrm
Bram Moolenaar5195e452005-08-19 20:32:47 +0000450 * (NULL when no compounding) */
451 char_u *sl_compstartflags; /* flags for first compound word */
Bram Moolenaard12a1322005-08-21 22:08:24 +0000452 char_u *sl_compallflags; /* all flags for compound words */
Bram Moolenaar78622822005-08-23 21:00:13 +0000453 char_u sl_nobreak; /* When TRUE: no spaces between words */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000454 char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */
455 garray_T sl_syl_items; /* syllable items */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000456
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000457 int sl_prefixcnt; /* number of items in "sl_prefprog" */
458 regprog_T **sl_prefprog; /* table with regprogs for prefixes */
459
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000460 garray_T sl_rep; /* list of fromto_T entries from REP lines */
461 short sl_rep_first[256]; /* indexes where byte first appears, -1 if
462 there is none */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000463 garray_T sl_sal; /* list of salitem_T entries from SAL lines */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000464 salfirst_T sl_sal_first[256]; /* indexes where byte first appears, -1 if
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000465 there is none */
466 int sl_followup; /* SAL followup */
467 int sl_collapse; /* SAL collapse_result */
468 int sl_rem_accents; /* SAL remove_accents */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000469 int sl_sofo; /* SOFOFROM and SOFOTO instead of SAL items:
470 * "sl_sal_first" maps chars, when has_mbyte
471 * "sl_sal" is a list of wide char lists. */
472 garray_T sl_repsal; /* list of fromto_T entries from REPSAL lines */
473 short sl_repsal_first[256]; /* sl_rep_first for REPSAL lines */
Bram Moolenaare1438bb2006-03-01 22:01:55 +0000474 int sl_nosplitsugs; /* don't suggest splitting a word */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000475
476 /* Info from the .sug file. Loaded on demand. */
477 time_t sl_sugtime; /* timestamp for .sug file */
478 char_u *sl_sbyts; /* soundfolded word bytes */
479 idx_T *sl_sidxs; /* soundfolded word indexes */
480 buf_T *sl_sugbuf; /* buffer with word number table */
481 int sl_sugloaded; /* TRUE when .sug file was loaded or failed to
482 load */
483
Bram Moolenaarea424162005-06-16 21:51:00 +0000484 int sl_has_map; /* TRUE if there is a MAP line */
485#ifdef FEAT_MBYTE
486 hashtab_T sl_map_hash; /* MAP for multi-byte chars */
487 int sl_map_array[256]; /* MAP for first 256 chars */
488#else
489 char_u sl_map_array[256]; /* MAP for first 256 chars */
490#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +0000491 hashtab_T sl_sounddone; /* table with soundfolded words that have
492 handled, see add_sound_suggest() */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000493};
494
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000495/* First language that is loaded, start of the linked list of loaded
496 * languages. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000497static slang_T *first_lang = NULL;
498
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000499/* Flags used in .spl file for soundsalike flags. */
500#define SAL_F0LLOWUP 1
501#define SAL_COLLAPSE 2
502#define SAL_REM_ACCENTS 4
503
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000504/*
505 * Structure used in "b_langp", filled from 'spelllang'.
506 */
507typedef struct langp_S
508{
Bram Moolenaar8b96d642005-09-05 22:05:30 +0000509 slang_T *lp_slang; /* info for this language */
510 slang_T *lp_sallang; /* language used for sound folding or NULL */
511 slang_T *lp_replang; /* language used for REP items or NULL */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000512 int lp_region; /* bitmask for region or REGION_ALL */
513} langp_T;
514
515#define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
516
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000517#define REGION_ALL 0xff /* word valid in all regions */
518
Bram Moolenaar5195e452005-08-19 20:32:47 +0000519#define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */
520#define VIMSPELLMAGICL 8
521#define VIMSPELLVERSION 50
522
Bram Moolenaar4770d092006-01-12 23:22:24 +0000523#define VIMSUGMAGIC "VIMsug" /* string at start of Vim .sug file */
524#define VIMSUGMAGICL 6
525#define VIMSUGVERSION 1
526
Bram Moolenaar5195e452005-08-19 20:32:47 +0000527/* Section IDs. Only renumber them when VIMSPELLVERSION changes! */
528#define SN_REGION 0 /* <regionname> section */
529#define SN_CHARFLAGS 1 /* charflags section */
530#define SN_MIDWORD 2 /* <midword> section */
531#define SN_PREFCOND 3 /* <prefcond> section */
532#define SN_REP 4 /* REP items section */
533#define SN_SAL 5 /* SAL items section */
534#define SN_SOFO 6 /* soundfolding section */
535#define SN_MAP 7 /* MAP items section */
536#define SN_COMPOUND 8 /* compound words section */
537#define SN_SYLLABLE 9 /* syllable section */
Bram Moolenaar78622822005-08-23 21:00:13 +0000538#define SN_NOBREAK 10 /* NOBREAK section */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000539#define SN_SUGFILE 11 /* timestamp for .sug file */
540#define SN_REPSAL 12 /* REPSAL items section */
541#define SN_WORDS 13 /* common words */
Bram Moolenaare1438bb2006-03-01 22:01:55 +0000542#define SN_NOSPLITSUGS 14 /* don't split word for suggestions */
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000543#define SN_INFO 15 /* info section */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000544#define SN_END 255 /* end of sections */
545
546#define SNF_REQUIRED 1 /* <sectionflags>: required section */
547
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000548/* Result values. Lower number is accepted over higher one. */
549#define SP_BANNED -1
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000550#define SP_OK 0
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000551#define SP_RARE 1
552#define SP_LOCAL 2
553#define SP_BAD 3
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000554
Bram Moolenaar7887d882005-07-01 22:33:52 +0000555/* file used for "zG" and "zW" */
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000556static char_u *int_wordlist = NULL;
Bram Moolenaar7887d882005-07-01 22:33:52 +0000557
Bram Moolenaar4770d092006-01-12 23:22:24 +0000558typedef struct wordcount_S
559{
560 short_u wc_count; /* nr of times word was seen */
561 char_u wc_word[1]; /* word, actually longer */
562} wordcount_T;
563
564static wordcount_T dumwc;
565#define WC_KEY_OFF (dumwc.wc_word - (char_u *)&dumwc)
566#define HI2WC(hi) ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF))
567#define MAXWORDCOUNT 0xffff
568
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000569/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000570 * Information used when looking for suggestions.
571 */
572typedef struct suginfo_S
573{
574 garray_T su_ga; /* suggestions, contains "suggest_T" */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000575 int su_maxcount; /* max. number of suggestions displayed */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000576 int su_maxscore; /* maximum score for adding to su_ga */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000577 int su_sfmaxscore; /* idem, for when doing soundfold words */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000578 garray_T su_sga; /* like su_ga, sound-folded scoring */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000579 char_u *su_badptr; /* start of bad word in line */
580 int su_badlen; /* length of detected bad word in line */
Bram Moolenaar0c405862005-06-22 22:26:26 +0000581 int su_badflags; /* caps flags for bad word */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000582 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */
583 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +0000584 char_u su_sal_badword[MAXWLEN]; /* su_badword soundfolded */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000585 hashtab_T su_banned; /* table with banned words */
Bram Moolenaar8b96d642005-09-05 22:05:30 +0000586 slang_T *su_sallang; /* default language for sound folding */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000587} suginfo_T;
588
589/* One word suggestion. Used in "si_ga". */
590typedef struct suggest_S
591{
592 char_u *st_word; /* suggested word, allocated string */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000593 int st_wordlen; /* STRLEN(st_word) */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000594 int st_orglen; /* length of replaced text */
595 int st_score; /* lower is better */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000596 int st_altscore; /* used when st_score compares equal */
597 int st_salscore; /* st_score is for soundalike */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000598 int st_had_bonus; /* bonus already included in score */
Bram Moolenaar8b96d642005-09-05 22:05:30 +0000599 slang_T *st_slang; /* language used for sound folding */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000600} suggest_T;
601
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000602#define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i])
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000603
Bram Moolenaar4770d092006-01-12 23:22:24 +0000604/* TRUE if a word appears in the list of banned words. */
605#define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word)))
606
607/* Number of suggestions kept when cleaning up. we need to keep more than
608 * what is displayed, because when rescore_suggestions() is called the score
609 * may change and wrong suggestions may be removed later. */
610#define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 130 ? 150 : (su)->su_maxcount + 20)
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000611
612/* Threshold for sorting and cleaning up suggestions. Don't want to keep lots
613 * of suggestions that are not going to be displayed. */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000614#define SUG_MAX_COUNT(su) (SUG_CLEAN_COUNT(su) + 50)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000615
616/* score for various changes */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000617#define SCORE_SPLIT 149 /* split bad word */
Bram Moolenaare1438bb2006-03-01 22:01:55 +0000618#define SCORE_SPLIT_NO 249 /* split bad word with NOSPLITSUGS */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000619#define SCORE_ICASE 52 /* slightly different case */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000620#define SCORE_REGION 200 /* word is for different region */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000621#define SCORE_RARE 180 /* rare word */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000622#define SCORE_SWAP 75 /* swap two characters */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000623#define SCORE_SWAP3 110 /* swap two characters in three */
Bram Moolenaar1e015462005-09-25 22:16:38 +0000624#define SCORE_REP 65 /* REP replacement */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000625#define SCORE_SUBST 93 /* substitute a character */
626#define SCORE_SIMILAR 33 /* substitute a similar character */
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +0000627#define SCORE_SUBCOMP 33 /* substitute a composing character */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000628#define SCORE_DEL 94 /* delete a character */
Bram Moolenaar1e015462005-09-25 22:16:38 +0000629#define SCORE_DELDUP 66 /* delete a duplicated character */
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +0000630#define SCORE_DELCOMP 28 /* delete a composing character */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000631#define SCORE_INS 96 /* insert a character */
Bram Moolenaar1e015462005-09-25 22:16:38 +0000632#define SCORE_INSDUP 67 /* insert a duplicate character */
Bram Moolenaar8b59de92005-08-11 19:59:29 +0000633#define SCORE_INSCOMP 30 /* insert a composing character */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000634#define SCORE_NONWORD 103 /* change non-word to word char */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000635
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000636#define SCORE_FILE 30 /* suggestion from a file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000637#define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower.
638 * 350 allows for about three changes. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000639
Bram Moolenaar4770d092006-01-12 23:22:24 +0000640#define SCORE_COMMON1 30 /* subtracted for words seen before */
641#define SCORE_COMMON2 40 /* subtracted for words often seen */
642#define SCORE_COMMON3 50 /* subtracted for words very often seen */
643#define SCORE_THRES2 10 /* word count threshold for COMMON2 */
644#define SCORE_THRES3 100 /* word count threshold for COMMON3 */
645
646/* When trying changed soundfold words it becomes slow when trying more than
647 * two changes. With less then two changes it's slightly faster but we miss a
648 * few good suggestions. In rare cases we need to try three of four changes.
649 */
650#define SCORE_SFMAX1 200 /* maximum score for first try */
651#define SCORE_SFMAX2 300 /* maximum score for second try */
652#define SCORE_SFMAX3 400 /* maximum score for third try */
653
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000654#define SCORE_BIG SCORE_INS * 3 /* big difference */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000655#define SCORE_MAXMAX 999999 /* accept any score */
656#define SCORE_LIMITMAX 350 /* for spell_edit_score_limit() */
657
658/* for spell_edit_score_limit() we need to know the minimum value of
659 * SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS */
660#define SCORE_EDIT_MIN SCORE_SIMILAR
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000661
662/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000663 * Structure to store info for word matching.
664 */
665typedef struct matchinf_S
666{
667 langp_T *mi_lp; /* info for language and region */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000668
669 /* pointers to original text to be checked */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000670 char_u *mi_word; /* start of word being checked */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000671 char_u *mi_end; /* end of matching word so far */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000672 char_u *mi_fend; /* next char to be added to mi_fword */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000673 char_u *mi_cend; /* char after what was used for
674 mi_capflags */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000675
676 /* case-folded text */
677 char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000678 int mi_fwordlen; /* nr of valid bytes in mi_fword */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000679
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000680 /* for when checking word after a prefix */
681 int mi_prefarridx; /* index in sl_pidxs with list of
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000682 affixID/condition */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000683 int mi_prefcnt; /* number of entries at mi_prefarridx */
684 int mi_prefixlen; /* byte length of prefix */
Bram Moolenaar53805d12005-08-01 07:08:33 +0000685#ifdef FEAT_MBYTE
686 int mi_cprefixlen; /* byte length of prefix in original
687 case */
688#else
689# define mi_cprefixlen mi_prefixlen /* it's the same value */
690#endif
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000691
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000692 /* for when checking a compound word */
693 int mi_compoff; /* start of following word offset */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000694 char_u mi_compflags[MAXWLEN]; /* flags for compound words used */
695 int mi_complen; /* nr of compound words used */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000696
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000697 /* others */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000698 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000699 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
Bram Moolenaar9c96f592005-06-30 21:52:39 +0000700 buf_T *mi_buf; /* buffer being checked */
Bram Moolenaar78622822005-08-23 21:00:13 +0000701
702 /* for NOBREAK */
703 int mi_result2; /* "mi_resul" without following word */
704 char_u *mi_end2; /* "mi_end" without following word */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000705} matchinf_T;
706
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000707/*
708 * The tables used for recognizing word characters according to spelling.
709 * These are only used for the first 256 characters of 'encoding'.
710 */
711typedef struct spelltab_S
712{
713 char_u st_isw[256]; /* flags: is word char */
714 char_u st_isu[256]; /* flags: is uppercase char */
715 char_u st_fold[256]; /* chars: folded case */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000716 char_u st_upper[256]; /* chars: upper case */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000717} spelltab_T;
718
719static spelltab_T spelltab;
720static int did_set_spelltab;
721
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000722#define CF_WORD 0x01
723#define CF_UPPER 0x02
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000724
725static void clear_spell_chartab __ARGS((spelltab_T *sp));
726static int set_spell_finish __ARGS((spelltab_T *new_st));
Bram Moolenaar9c96f592005-06-30 21:52:39 +0000727static int spell_iswordp __ARGS((char_u *p, buf_T *buf));
728static int spell_iswordp_nmw __ARGS((char_u *p));
729#ifdef FEAT_MBYTE
730static int spell_iswordp_w __ARGS((int *p, buf_T *buf));
731#endif
Bram Moolenaar5195e452005-08-19 20:32:47 +0000732static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000733
734/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000735 * For finding suggestions: At each node in the tree these states are tried:
Bram Moolenaarea424162005-06-16 21:51:00 +0000736 */
737typedef enum
738{
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000739 STATE_START = 0, /* At start of node check for NUL bytes (goodword
740 * ends); if badword ends there is a match, otherwise
741 * try splitting word. */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000742 STATE_NOPREFIX, /* try without prefix */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000743 STATE_SPLITUNDO, /* Undo splitting. */
Bram Moolenaarea424162005-06-16 21:51:00 +0000744 STATE_ENDNUL, /* Past NUL bytes at start of the node. */
745 STATE_PLAIN, /* Use each byte of the node. */
746 STATE_DEL, /* Delete a byte from the bad word. */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000747 STATE_INS_PREP, /* Prepare for inserting bytes. */
Bram Moolenaarea424162005-06-16 21:51:00 +0000748 STATE_INS, /* Insert a byte in the bad word. */
749 STATE_SWAP, /* Swap two bytes. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000750 STATE_UNSWAP, /* Undo swap two characters. */
751 STATE_SWAP3, /* Swap two characters over three. */
752 STATE_UNSWAP3, /* Undo Swap two characters over three. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000753 STATE_UNROT3L, /* Undo rotate three characters left */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000754 STATE_UNROT3R, /* Undo rotate three characters right */
Bram Moolenaarea424162005-06-16 21:51:00 +0000755 STATE_REP_INI, /* Prepare for using REP items. */
756 STATE_REP, /* Use matching REP items from the .aff file. */
757 STATE_REP_UNDO, /* Undo a REP item replacement. */
758 STATE_FINAL /* End of this node. */
759} state_T;
760
761/*
Bram Moolenaar0c405862005-06-22 22:26:26 +0000762 * Struct to keep the state at each level in suggest_try_change().
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000763 */
764typedef struct trystate_S
765{
Bram Moolenaarea424162005-06-16 21:51:00 +0000766 state_T ts_state; /* state at this level, STATE_ */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000767 int ts_score; /* score */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000768 idx_T ts_arridx; /* index in tree array, start of node */
Bram Moolenaarea424162005-06-16 21:51:00 +0000769 short ts_curi; /* index in list of child nodes */
770 char_u ts_fidx; /* index in fword[], case-folded bad word */
771 char_u ts_fidxtry; /* ts_fidx at which bytes may be changed */
772 char_u ts_twordlen; /* valid length of tword[] */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +0000773 char_u ts_prefixdepth; /* stack depth for end of prefix or
Bram Moolenaard12a1322005-08-21 22:08:24 +0000774 * PFD_PREFIXTREE or PFD_NOPREFIX */
775 char_u ts_flags; /* TSF_ flags */
Bram Moolenaarea424162005-06-16 21:51:00 +0000776#ifdef FEAT_MBYTE
777 char_u ts_tcharlen; /* number of bytes in tword character */
778 char_u ts_tcharidx; /* current byte index in tword character */
779 char_u ts_isdiff; /* DIFF_ values */
780 char_u ts_fcharstart; /* index in fword where badword char started */
781#endif
Bram Moolenaar5195e452005-08-19 20:32:47 +0000782 char_u ts_prewordlen; /* length of word in "preword[]" */
783 char_u ts_splitoff; /* index in "tword" after last split */
Bram Moolenaar78622822005-08-23 21:00:13 +0000784 char_u ts_splitfidx; /* "ts_fidx" at word split */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000785 char_u ts_complen; /* nr of compound words used */
Bram Moolenaard12a1322005-08-21 22:08:24 +0000786 char_u ts_compsplit; /* index for "compflags" where word was spit */
Bram Moolenaar0c405862005-06-22 22:26:26 +0000787 char_u ts_save_badflags; /* su_badflags saved here */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000788 char_u ts_delidx; /* index in fword for char that was deleted,
789 valid when "ts_flags" has TSF_DIDDEL */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000790} trystate_T;
791
Bram Moolenaarea424162005-06-16 21:51:00 +0000792/* values for ts_isdiff */
793#define DIFF_NONE 0 /* no different byte (yet) */
794#define DIFF_YES 1 /* different byte found */
795#define DIFF_INSERT 2 /* inserting character */
796
Bram Moolenaard12a1322005-08-21 22:08:24 +0000797/* values for ts_flags */
798#define TSF_PREFIXOK 1 /* already checked that prefix is OK */
799#define TSF_DIDSPLIT 2 /* tried split at this point */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000800#define TSF_DIDDEL 4 /* did a delete, "ts_delidx" has index */
Bram Moolenaard12a1322005-08-21 22:08:24 +0000801
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000802/* special values ts_prefixdepth */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +0000803#define PFD_NOPREFIX 0xff /* not using prefixes */
Bram Moolenaard12a1322005-08-21 22:08:24 +0000804#define PFD_PREFIXTREE 0xfe /* walking through the prefix tree */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000805#define PFD_NOTSPECIAL 0xfd /* highest value that's not special */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000806
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000807/* mode values for find_word */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000808#define FIND_FOLDWORD 0 /* find word case-folded */
809#define FIND_KEEPWORD 1 /* find keep-case word */
810#define FIND_PREFIX 2 /* find word after prefix */
811#define FIND_COMPOUND 3 /* find case-folded compound word */
812#define FIND_KEEPCOMPOUND 4 /* find keep-case compound word */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000813
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000814static slang_T *slang_alloc __ARGS((char_u *lang));
815static void slang_free __ARGS((slang_T *lp));
Bram Moolenaarb765d632005-06-07 21:00:02 +0000816static void slang_clear __ARGS((slang_T *lp));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000817static void slang_clear_sug __ARGS((slang_T *lp));
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000818static void find_word __ARGS((matchinf_T *mip, int mode));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000819static int can_compound __ARGS((slang_T *slang, char_u *word, char_u *flags));
Bram Moolenaar53805d12005-08-01 07:08:33 +0000820static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req));
Bram Moolenaard12a1322005-08-21 22:08:24 +0000821static void find_prefix __ARGS((matchinf_T *mip, int mode));
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000822static int fold_more __ARGS((matchinf_T *mip));
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000823static int spell_valid_case __ARGS((int wordflags, int treeflags));
Bram Moolenaar95529562005-08-25 21:21:38 +0000824static int no_spell_checking __ARGS((win_T *wp));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000825static void spell_load_lang __ARGS((char_u *lang));
Bram Moolenaarb765d632005-06-07 21:00:02 +0000826static char_u *spell_enc __ARGS((void));
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000827static void int_wordlist_spl __ARGS((char_u *fname));
Bram Moolenaarb765d632005-06-07 21:00:02 +0000828static void spell_load_cb __ARGS((char_u *fname, void *cookie));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000829static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent));
Bram Moolenaarb388adb2006-02-28 23:50:17 +0000830static int get2c __ARGS((FILE *fd));
831static int get3c __ARGS((FILE *fd));
832static int get4c __ARGS((FILE *fd));
833static time_t get8c __ARGS((FILE *fd));
Bram Moolenaar0dc065e2005-07-04 22:49:24 +0000834static char_u *read_cnt_string __ARGS((FILE *fd, int cnt_bytes, int *lenp));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000835static char_u *read_string __ARGS((FILE *fd, int cnt));
836static int read_region_section __ARGS((FILE *fd, slang_T *slang, int len));
837static int read_charflags_section __ARGS((FILE *fd));
838static int read_prefcond_section __ARGS((FILE *fd, slang_T *lp));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000839static int read_rep_section __ARGS((FILE *fd, garray_T *gap, short *first));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000840static int read_sal_section __ARGS((FILE *fd, slang_T *slang));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000841static int read_words_section __ARGS((FILE *fd, slang_T *lp, int len));
842static void count_common_word __ARGS((slang_T *lp, char_u *word, int len, int count));
843static int score_wordcount_adj __ARGS((slang_T *slang, int score, char_u *word, int split));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000844static int read_sofo_section __ARGS((FILE *fd, slang_T *slang));
845static int read_compound __ARGS((FILE *fd, slang_T *slang, int len));
Bram Moolenaar6de68532005-08-24 22:08:48 +0000846static int byte_in_str __ARGS((char_u *str, int byte));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000847static int init_syl_tab __ARGS((slang_T *slang));
848static int count_syllables __ARGS((slang_T *slang, char_u *word));
Bram Moolenaar7887d882005-07-01 22:33:52 +0000849static int set_sofo __ARGS((slang_T *lp, char_u *from, char_u *to));
850static void set_sal_first __ARGS((slang_T *lp));
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000851#ifdef FEAT_MBYTE
852static int *mb_str2wide __ARGS((char_u *s));
853#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +0000854static int spell_read_tree __ARGS((FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt));
855static idx_T read_tree_node __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr));
Bram Moolenaar9c96f592005-06-30 21:52:39 +0000856static void clear_midword __ARGS((buf_T *buf));
857static void use_midword __ARGS((slang_T *lp, buf_T *buf));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000858static int find_region __ARGS((char_u *rp, char_u *region));
859static int captype __ARGS((char_u *word, char_u *end));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000860static int badword_captype __ARGS((char_u *word, char_u *end));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000861static void spell_reload_one __ARGS((char_u *fname, int added_word));
Bram Moolenaar5195e452005-08-19 20:32:47 +0000862static void set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000863static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000864static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen));
Bram Moolenaar8b59de92005-08-11 19:59:29 +0000865static int check_need_cap __ARGS((linenr_T lnum, colnr_T col));
Bram Moolenaar66fa2712006-01-22 23:22:22 +0000866static void spell_find_suggest __ARGS((char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive));
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000867#ifdef FEAT_EVAL
868static void spell_suggest_expr __ARGS((suginfo_T *su, char_u *expr));
869#endif
870static void spell_suggest_file __ARGS((suginfo_T *su, char_u *fname));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000871static void spell_suggest_intern __ARGS((suginfo_T *su, int interactive));
872static void suggest_load_files __ARGS((void));
873static void tree_count_words __ARGS((char_u *byts, idx_T *idxs));
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000874static void spell_find_cleanup __ARGS((suginfo_T *su));
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000875static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper));
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000876static void allcap_copy __ARGS((char_u *word, char_u *wcopy));
Bram Moolenaar0c405862005-06-22 22:26:26 +0000877static void suggest_try_special __ARGS((suginfo_T *su));
878static void suggest_try_change __ARGS((suginfo_T *su));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000879static void suggest_trie_walk __ARGS((suginfo_T *su, langp_T *lp, char_u *fword, int soundfold));
880static void go_deeper __ARGS((trystate_T *stack, int depth, int score_add));
Bram Moolenaar53805d12005-08-01 07:08:33 +0000881#ifdef FEAT_MBYTE
882static int nofold_len __ARGS((char_u *fword, int flen, char_u *word));
883#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000884static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword));
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000885static void score_comp_sal __ARGS((suginfo_T *su));
886static void score_combine __ARGS((suginfo_T *su));
Bram Moolenaarf417f2b2005-06-23 22:29:21 +0000887static int stp_sal_score __ARGS((suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000888static void suggest_try_soundalike_prep __ARGS((void));
Bram Moolenaar0c405862005-06-22 22:26:26 +0000889static void suggest_try_soundalike __ARGS((suginfo_T *su));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000890static void suggest_try_soundalike_finish __ARGS((void));
891static void add_sound_suggest __ARGS((suginfo_T *su, char_u *goodword, int score, langp_T *lp));
892static int soundfold_find __ARGS((slang_T *slang, char_u *word));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000893static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags));
Bram Moolenaarea424162005-06-16 21:51:00 +0000894static void set_map_str __ARGS((slang_T *lp, char_u *map));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000895static int similar_chars __ARGS((slang_T *slang, int c1, int c2));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000896static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf));
897static void check_suggestions __ARGS((suginfo_T *su, garray_T *gap));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000898static void add_banned __ARGS((suginfo_T *su, char_u *word));
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000899static void rescore_suggestions __ARGS((suginfo_T *su));
Bram Moolenaar482aaeb2005-09-29 18:26:07 +0000900static void rescore_one __ARGS((suginfo_T *su, suggest_T *stp));
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000901static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep));
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000902static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, int folded, char_u *res));
903static void spell_soundfold_sofo __ARGS((slang_T *slang, char_u *inword, char_u *res));
904static void spell_soundfold_sal __ARGS((slang_T *slang, char_u *inword, char_u *res));
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000905#ifdef FEAT_MBYTE
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000906static void spell_soundfold_wsal __ARGS((slang_T *slang, char_u *inword, char_u *res));
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000907#endif
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000908static int soundalike_score __ARGS((char_u *goodsound, char_u *badsound));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000909static int spell_edit_score __ARGS((slang_T *slang, char_u *badword, char_u *goodword));
910static int spell_edit_score_limit __ARGS((slang_T *slang, char_u *badword, char_u *goodword, int limit));
911#ifdef FEAT_MBYTE
912static int spell_edit_score_limit_w __ARGS((slang_T *slang, char_u *badword, char_u *goodword, int limit));
913#endif
Bram Moolenaarb475fb92006-03-02 22:40:52 +0000914static void dump_word __ARGS((slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum));
915static linenr_T dump_prefixes __ARGS((slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum));
Bram Moolenaar4770d092006-01-12 23:22:24 +0000916static buf_T *open_spellbuf __ARGS((void));
917static void close_spellbuf __ARGS((buf_T *buf));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000918
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000919/*
920 * Use our own character-case definitions, because the current locale may
921 * differ from what the .spl file uses.
922 * These must not be called with negative number!
923 */
924#ifndef FEAT_MBYTE
925/* Non-multi-byte implementation. */
926# define SPELL_TOFOLD(c) ((c) < 256 ? spelltab.st_fold[c] : (c))
927# define SPELL_TOUPPER(c) ((c) < 256 ? spelltab.st_upper[c] : (c))
928# define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE)
929#else
Bram Moolenaarcfc7d632005-07-28 22:28:16 +0000930# if defined(HAVE_WCHAR_H)
931# include <wchar.h> /* for towupper() and towlower() */
932# endif
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000933/* Multi-byte implementation. For Unicode we can call utf_*(), but don't do
934 * that for ASCII, because we don't want to use 'casemap' here. Otherwise use
935 * the "w" library function for characters above 255 if available. */
936# ifdef HAVE_TOWLOWER
937# define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
938 : (c) < 256 ? spelltab.st_fold[c] : towlower(c))
939# else
940# define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
941 : (c) < 256 ? spelltab.st_fold[c] : (c))
942# endif
943
944# ifdef HAVE_TOWUPPER
945# define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
946 : (c) < 256 ? spelltab.st_upper[c] : towupper(c))
947# else
948# define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
949 : (c) < 256 ? spelltab.st_upper[c] : (c))
950# endif
951
952# ifdef HAVE_ISWUPPER
953# define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
954 : (c) < 256 ? spelltab.st_isu[c] : iswupper(c))
955# else
956# define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000957 : (c) < 256 ? spelltab.st_isu[c] : (FALSE))
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000958# endif
959#endif
960
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000961
962static char *e_format = N_("E759: Format error in spell file");
Bram Moolenaar7887d882005-07-01 22:33:52 +0000963static char *e_spell_trunc = N_("E758: Truncated spell file");
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +0000964static char *e_afftrailing = N_("Trailing text in %s line %d: %s");
Bram Moolenaar6de68532005-08-24 22:08:48 +0000965static char *e_affname = N_("Affix name too long in %s line %d: %s");
966static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
967static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
Bram Moolenaar329cc7e2005-08-10 07:51:35 +0000968static char *msg_compressing = N_("Compressing word tree...");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000969
Bram Moolenaara40ceaf2006-01-13 22:35:40 +0000970/* Remember what "z?" replaced. */
971static char_u *repl_from = NULL;
972static char_u *repl_to = NULL;
973
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000974/*
975 * Main spell-checking function.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000976 * "ptr" points to a character that could be the start of a word.
Bram Moolenaar482aaeb2005-09-29 18:26:07 +0000977 * "*attrp" is set to the highlight index for a badly spelled word. For a
978 * non-word or when it's OK it remains unchanged.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000979 * This must only be called when 'spelllang' is not empty.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000980 *
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000981 * "capcol" is used to check for a Capitalised word after the end of a
982 * sentence. If it's zero then perform the check. Return the column where to
983 * check next, or -1 when no sentence end was found. If it's NULL then don't
984 * worry.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000985 *
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000986 * Returns the length of the word in bytes, also when it's OK, so that the
987 * caller can skip over the word.
988 */
989 int
Bram Moolenaar4770d092006-01-12 23:22:24 +0000990spell_check(wp, ptr, attrp, capcol, docount)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000991 win_T *wp; /* current window */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000992 char_u *ptr;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +0000993 hlf_T *attrp;
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000994 int *capcol; /* column to check for Capital */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000995 int docount; /* count good words */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000996{
997 matchinf_T mi; /* Most things are put in "mi" so that it can
998 be passed to functions quickly. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000999 int nrlen = 0; /* found a number first */
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001000 int c;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001001 int wrongcaplen = 0;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001002 int lpi;
Bram Moolenaar4770d092006-01-12 23:22:24 +00001003 int count_word = docount;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001004
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001005 /* A word never starts at a space or a control character. Return quickly
1006 * then, skipping over the character. */
1007 if (*ptr <= ' ')
1008 return 1;
Bram Moolenaara226a6d2006-02-26 23:59:20 +00001009
1010 /* Return here when loading language files failed. */
1011 if (wp->w_buffer->b_langp.ga_len == 0)
1012 return 1;
1013
Bram Moolenaar5195e452005-08-19 20:32:47 +00001014 vim_memset(&mi, 0, sizeof(matchinf_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001015
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001016 /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and
Bram Moolenaar43abc522005-12-10 20:15:02 +00001017 * 0X99FF. But always do check spelling to find "3GPP" and "11
1018 * julifeest". */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001019 if (*ptr >= '0' && *ptr <= '9')
Bram Moolenaar51485f02005-06-04 21:55:20 +00001020 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00001021 if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
1022 mi.mi_end = skiphex(ptr + 2);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001023 else
1024 mi.mi_end = skipdigits(ptr);
Bram Moolenaar43abc522005-12-10 20:15:02 +00001025 nrlen = mi.mi_end - ptr;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001026 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001027
Bram Moolenaar0c405862005-06-22 22:26:26 +00001028 /* Find the normal end of the word (until the next non-word character). */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001029 mi.mi_word = ptr;
Bram Moolenaar43abc522005-12-10 20:15:02 +00001030 mi.mi_fend = ptr;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00001031 if (spell_iswordp(mi.mi_fend, wp->w_buffer))
Bram Moolenaar51485f02005-06-04 21:55:20 +00001032 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001033 do
Bram Moolenaar51485f02005-06-04 21:55:20 +00001034 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001035 mb_ptr_adv(mi.mi_fend);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00001036 } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp->w_buffer));
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001037
1038 if (capcol != NULL && *capcol == 0 && wp->w_buffer->b_cap_prog != NULL)
1039 {
1040 /* Check word starting with capital letter. */
Bram Moolenaar53805d12005-08-01 07:08:33 +00001041 c = PTR2CHAR(ptr);
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001042 if (!SPELL_ISUPPER(c))
Bram Moolenaar5195e452005-08-19 20:32:47 +00001043 wrongcaplen = (int)(mi.mi_fend - ptr);
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001044 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001045 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001046 if (capcol != NULL)
1047 *capcol = -1;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001048
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001049 /* We always use the characters up to the next non-word character,
1050 * also for bad words. */
1051 mi.mi_end = mi.mi_fend;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001052
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001053 /* Check caps type later. */
Bram Moolenaar9c96f592005-06-30 21:52:39 +00001054 mi.mi_buf = wp->w_buffer;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001055
Bram Moolenaar5195e452005-08-19 20:32:47 +00001056 /* case-fold the word with one non-word character, so that we can check
1057 * for the word end. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001058 if (*mi.mi_fend != NUL)
1059 mb_ptr_adv(mi.mi_fend);
1060
1061 (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
1062 MAXWLEN + 1);
1063 mi.mi_fwordlen = STRLEN(mi.mi_fword);
1064
1065 /* The word is bad unless we recognize it. */
1066 mi.mi_result = SP_BAD;
Bram Moolenaar78622822005-08-23 21:00:13 +00001067 mi.mi_result2 = SP_BAD;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001068
1069 /*
1070 * Loop over the languages specified in 'spelllang'.
Bram Moolenaar4770d092006-01-12 23:22:24 +00001071 * We check them all, because a word may be matched longer in another
1072 * language.
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001073 */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001074 for (lpi = 0; lpi < wp->w_buffer->b_langp.ga_len; ++lpi)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001075 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001076 mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, lpi);
1077
1078 /* If reloading fails the language is still in the list but everything
1079 * has been cleared. */
1080 if (mi.mi_lp->lp_slang->sl_fidxs == NULL)
1081 continue;
1082
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001083 /* Check for a matching word in case-folded words. */
1084 find_word(&mi, FIND_FOLDWORD);
1085
1086 /* Check for a matching word in keep-case words. */
1087 find_word(&mi, FIND_KEEPWORD);
1088
1089 /* Check for matching prefixes. */
Bram Moolenaard12a1322005-08-21 22:08:24 +00001090 find_prefix(&mi, FIND_FOLDWORD);
Bram Moolenaar78622822005-08-23 21:00:13 +00001091
1092 /* For a NOBREAK language, may want to use a word without a following
1093 * word as a backup. */
1094 if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
1095 && mi.mi_result2 != SP_BAD)
1096 {
1097 mi.mi_result = mi.mi_result2;
1098 mi.mi_end = mi.mi_end2;
1099 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00001100
1101 /* Count the word in the first language where it's found to be OK. */
1102 if (count_word && mi.mi_result == SP_OK)
1103 {
1104 count_common_word(mi.mi_lp->lp_slang, ptr,
1105 (int)(mi.mi_end - ptr), 1);
1106 count_word = FALSE;
1107 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001108 }
1109
1110 if (mi.mi_result != SP_OK)
1111 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00001112 /* If we found a number skip over it. Allows for "42nd". Do flag
1113 * rare and local words, e.g., "3GPP". */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001114 if (nrlen > 0)
Bram Moolenaar0c405862005-06-22 22:26:26 +00001115 {
1116 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
1117 return nrlen;
1118 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001119
1120 /* When we are at a non-word character there is no error, just
1121 * skip over the character (try looking for a word after it). */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001122 else if (!spell_iswordp_nmw(ptr))
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +00001123 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001124 if (capcol != NULL && wp->w_buffer->b_cap_prog != NULL)
1125 {
1126 regmatch_T regmatch;
1127
1128 /* Check for end of sentence. */
1129 regmatch.regprog = wp->w_buffer->b_cap_prog;
1130 regmatch.rm_ic = FALSE;
1131 if (vim_regexec(&regmatch, ptr, 0))
1132 *capcol = (int)(regmatch.endp[0] - ptr);
1133 }
1134
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001135#ifdef FEAT_MBYTE
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001136 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001137 return (*mb_ptr2len)(ptr);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001138#endif
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001139 return 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001140 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00001141 else if (mi.mi_end == ptr)
1142 /* Always include at least one character. Required for when there
1143 * is a mixup in "midword". */
1144 mb_ptr_adv(mi.mi_end);
Bram Moolenaar78622822005-08-23 21:00:13 +00001145 else if (mi.mi_result == SP_BAD
1146 && LANGP_ENTRY(wp->w_buffer->b_langp, 0)->lp_slang->sl_nobreak)
1147 {
1148 char_u *p, *fp;
1149 int save_result = mi.mi_result;
1150
1151 /* First language in 'spelllang' is NOBREAK. Find first position
1152 * at which any word would be valid. */
1153 mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001154 if (mi.mi_lp->lp_slang->sl_fidxs != NULL)
Bram Moolenaar78622822005-08-23 21:00:13 +00001155 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001156 p = mi.mi_word;
1157 fp = mi.mi_fword;
1158 for (;;)
Bram Moolenaar78622822005-08-23 21:00:13 +00001159 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001160 mb_ptr_adv(p);
1161 mb_ptr_adv(fp);
1162 if (p >= mi.mi_end)
1163 break;
1164 mi.mi_compoff = fp - mi.mi_fword;
1165 find_word(&mi, FIND_COMPOUND);
1166 if (mi.mi_result != SP_BAD)
1167 {
1168 mi.mi_end = p;
1169 break;
1170 }
Bram Moolenaar78622822005-08-23 21:00:13 +00001171 }
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001172 mi.mi_result = save_result;
Bram Moolenaar78622822005-08-23 21:00:13 +00001173 }
Bram Moolenaar78622822005-08-23 21:00:13 +00001174 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001175
1176 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001177 *attrp = HLF_SPB;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001178 else if (mi.mi_result == SP_RARE)
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001179 *attrp = HLF_SPR;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001180 else
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001181 *attrp = HLF_SPL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001182 }
1183
Bram Moolenaar5195e452005-08-19 20:32:47 +00001184 if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE))
1185 {
1186 /* Report SpellCap only when the word isn't badly spelled. */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001187 *attrp = HLF_SPC;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001188 return wrongcaplen;
1189 }
1190
Bram Moolenaar51485f02005-06-04 21:55:20 +00001191 return (int)(mi.mi_end - ptr);
1192}
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001193
Bram Moolenaar51485f02005-06-04 21:55:20 +00001194/*
1195 * Check if the word at "mip->mi_word" is in the tree.
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001196 * When "mode" is FIND_FOLDWORD check in fold-case word tree.
1197 * When "mode" is FIND_KEEPWORD check in keep-case word tree.
1198 * When "mode" is FIND_PREFIX check for word after prefix in fold-case word
1199 * tree.
Bram Moolenaar51485f02005-06-04 21:55:20 +00001200 *
1201 * For a match mip->mi_result is updated.
1202 */
1203 static void
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001204find_word(mip, mode)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001205 matchinf_T *mip;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001206 int mode;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001207{
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001208 idx_T arridx = 0;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001209 int endlen[MAXWLEN]; /* length at possible word endings */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001210 idx_T endidx[MAXWLEN]; /* possible word endings */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001211 int endidxcnt = 0;
1212 int len;
1213 int wlen = 0;
1214 int flen;
1215 int c;
1216 char_u *ptr;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001217 idx_T lo, hi, m;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001218#ifdef FEAT_MBYTE
1219 char_u *s;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001220#endif
Bram Moolenaare52325c2005-08-22 22:54:29 +00001221 char_u *p;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001222 int res = SP_BAD;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001223 slang_T *slang = mip->mi_lp->lp_slang;
1224 unsigned flags;
1225 char_u *byts;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001226 idx_T *idxs;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001227 int word_ends;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001228 int prefix_found;
Bram Moolenaar78622822005-08-23 21:00:13 +00001229 int nobreak_result;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001230
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001231 if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001232 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001233 /* Check for word with matching case in keep-case tree. */
1234 ptr = mip->mi_word;
1235 flen = 9999; /* no case folding, always enough bytes */
1236 byts = slang->sl_kbyts;
1237 idxs = slang->sl_kidxs;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001238
1239 if (mode == FIND_KEEPCOMPOUND)
1240 /* Skip over the previously found word(s). */
1241 wlen += mip->mi_compoff;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001242 }
1243 else
1244 {
1245 /* Check for case-folded in case-folded tree. */
1246 ptr = mip->mi_fword;
1247 flen = mip->mi_fwordlen; /* available case-folded bytes */
1248 byts = slang->sl_fbyts;
1249 idxs = slang->sl_fidxs;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001250
1251 if (mode == FIND_PREFIX)
1252 {
1253 /* Skip over the prefix. */
1254 wlen = mip->mi_prefixlen;
1255 flen -= mip->mi_prefixlen;
1256 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001257 else if (mode == FIND_COMPOUND)
1258 {
1259 /* Skip over the previously found word(s). */
1260 wlen = mip->mi_compoff;
1261 flen -= mip->mi_compoff;
1262 }
1263
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001264 }
1265
Bram Moolenaar51485f02005-06-04 21:55:20 +00001266 if (byts == NULL)
1267 return; /* array is empty */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001268
Bram Moolenaar51485f02005-06-04 21:55:20 +00001269 /*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001270 * Repeat advancing in the tree until:
1271 * - there is a byte that doesn't match,
1272 * - we reach the end of the tree,
1273 * - or we reach the end of the line.
Bram Moolenaar51485f02005-06-04 21:55:20 +00001274 */
1275 for (;;)
1276 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00001277 if (flen <= 0 && *mip->mi_fend != NUL)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001278 flen = fold_more(mip);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001279
1280 len = byts[arridx++];
1281
1282 /* If the first possible byte is a zero the word could end here.
1283 * Remember this index, we first check for the longest word. */
1284 if (byts[arridx] == 0)
1285 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001286 if (endidxcnt == MAXWLEN)
1287 {
1288 /* Must be a corrupted spell file. */
1289 EMSG(_(e_format));
1290 return;
1291 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00001292 endlen[endidxcnt] = wlen;
1293 endidx[endidxcnt++] = arridx++;
1294 --len;
1295
1296 /* Skip over the zeros, there can be several flag/region
1297 * combinations. */
1298 while (len > 0 && byts[arridx] == 0)
1299 {
1300 ++arridx;
1301 --len;
1302 }
1303 if (len == 0)
1304 break; /* no children, word must end here */
1305 }
1306
1307 /* Stop looking at end of the line. */
1308 if (ptr[wlen] == NUL)
1309 break;
1310
1311 /* Perform a binary search in the list of accepted bytes. */
1312 c = ptr[wlen];
Bram Moolenaar0c405862005-06-22 22:26:26 +00001313 if (c == TAB) /* <Tab> is handled like <Space> */
1314 c = ' ';
Bram Moolenaar51485f02005-06-04 21:55:20 +00001315 lo = arridx;
1316 hi = arridx + len - 1;
1317 while (lo < hi)
1318 {
1319 m = (lo + hi) / 2;
1320 if (byts[m] > c)
1321 hi = m - 1;
1322 else if (byts[m] < c)
1323 lo = m + 1;
1324 else
1325 {
1326 lo = hi = m;
1327 break;
1328 }
1329 }
1330
1331 /* Stop if there is no matching byte. */
1332 if (hi < lo || byts[lo] != c)
1333 break;
1334
1335 /* Continue at the child (if there is one). */
1336 arridx = idxs[lo];
1337 ++wlen;
1338 --flen;
Bram Moolenaar0c405862005-06-22 22:26:26 +00001339
1340 /* One space in the good word may stand for several spaces in the
1341 * checked word. */
1342 if (c == ' ')
1343 {
1344 for (;;)
1345 {
1346 if (flen <= 0 && *mip->mi_fend != NUL)
1347 flen = fold_more(mip);
1348 if (ptr[wlen] != ' ' && ptr[wlen] != TAB)
1349 break;
1350 ++wlen;
1351 --flen;
1352 }
1353 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00001354 }
1355
1356 /*
1357 * Verify that one of the possible endings is valid. Try the longest
1358 * first.
1359 */
1360 while (endidxcnt > 0)
1361 {
1362 --endidxcnt;
1363 arridx = endidx[endidxcnt];
1364 wlen = endlen[endidxcnt];
1365
1366#ifdef FEAT_MBYTE
1367 if ((*mb_head_off)(ptr, ptr + wlen) > 0)
1368 continue; /* not at first byte of character */
1369#endif
Bram Moolenaar9c96f592005-06-30 21:52:39 +00001370 if (spell_iswordp(ptr + wlen, mip->mi_buf))
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001371 {
Bram Moolenaar78622822005-08-23 21:00:13 +00001372 if (slang->sl_compprog == NULL && !slang->sl_nobreak)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001373 continue; /* next char is a word character */
1374 word_ends = FALSE;
1375 }
1376 else
1377 word_ends = TRUE;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001378 /* The prefix flag is before compound flags. Once a valid prefix flag
1379 * has been found we try compound flags. */
1380 prefix_found = FALSE;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001381
1382#ifdef FEAT_MBYTE
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001383 if (mode != FIND_KEEPWORD && has_mbyte)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001384 {
1385 /* Compute byte length in original word, length may change
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001386 * when folding case. This can be slow, take a shortcut when the
1387 * case-folded word is equal to the keep-case word. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001388 p = mip->mi_word;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001389 if (STRNCMP(ptr, p, wlen) != 0)
1390 {
1391 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
1392 mb_ptr_adv(p);
1393 wlen = p - mip->mi_word;
1394 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00001395 }
1396#endif
1397
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001398 /* Check flags and region. For FIND_PREFIX check the condition and
1399 * prefix ID.
1400 * Repeat this if there are more flags/region alternatives until there
1401 * is a match. */
1402 res = SP_BAD;
1403 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0;
1404 --len, ++arridx)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001405 {
1406 flags = idxs[arridx];
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001407
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001408 /* For the fold-case tree check that the case of the checked word
1409 * matches with what the word in the tree requires.
1410 * For keep-case tree the case is always right. For prefixes we
1411 * don't bother to check. */
1412 if (mode == FIND_FOLDWORD)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001413 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001414 if (mip->mi_cend != mip->mi_word + wlen)
1415 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001416 /* mi_capflags was set for a different word length, need
1417 * to do it again. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001418 mip->mi_cend = mip->mi_word + wlen;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001419 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001420 }
1421
Bram Moolenaar0c405862005-06-22 22:26:26 +00001422 if (mip->mi_capflags == WF_KEEPCAP
1423 || !spell_valid_case(mip->mi_capflags, flags))
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001424 continue;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001425 }
1426
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001427 /* When mode is FIND_PREFIX the word must support the prefix:
1428 * check the prefix ID and the condition. Do that for the list at
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001429 * mip->mi_prefarridx that find_prefix() filled. */
Bram Moolenaard12a1322005-08-21 22:08:24 +00001430 else if (mode == FIND_PREFIX && !prefix_found)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001431 {
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001432 c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001433 flags,
Bram Moolenaar53805d12005-08-01 07:08:33 +00001434 mip->mi_word + mip->mi_cprefixlen, slang,
1435 FALSE);
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001436 if (c == 0)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001437 continue;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001438
1439 /* Use the WF_RARE flag for a rare prefix. */
1440 if (c & WF_RAREPFX)
1441 flags |= WF_RARE;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001442 prefix_found = TRUE;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001443 }
1444
Bram Moolenaar78622822005-08-23 21:00:13 +00001445 if (slang->sl_nobreak)
1446 {
1447 if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND)
1448 && (flags & WF_BANNED) == 0)
1449 {
1450 /* NOBREAK: found a valid following word. That's all we
1451 * need to know, so return. */
1452 mip->mi_result = SP_OK;
1453 break;
1454 }
1455 }
1456
1457 else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
1458 || !word_ends))
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001459 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00001460 /* If there is no flag or the word is shorter than
1461 * COMPOUNDMIN reject it quickly.
1462 * Makes you wonder why someone puts a compound flag on a word
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001463 * that's too short... Myspell compatibility requires this
1464 * anyway. */
Bram Moolenaare52325c2005-08-22 22:54:29 +00001465 if (((unsigned)flags >> 24) == 0
1466 || wlen - mip->mi_compoff < slang->sl_compminlen)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001467 continue;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001468#ifdef FEAT_MBYTE
1469 /* For multi-byte chars check character length against
1470 * COMPOUNDMIN. */
1471 if (has_mbyte
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001472 && slang->sl_compminlen > 0
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001473 && mb_charlen_len(mip->mi_word + mip->mi_compoff,
1474 wlen - mip->mi_compoff) < slang->sl_compminlen)
1475 continue;
1476#endif
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001477
Bram Moolenaare52325c2005-08-22 22:54:29 +00001478 /* Limit the number of compound words to COMPOUNDMAX if no
1479 * maximum for syllables is specified. */
1480 if (!word_ends && mip->mi_complen + 2 > slang->sl_compmax
1481 && slang->sl_compsylmax == MAXWLEN)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001482 continue;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001483
Bram Moolenaard12a1322005-08-21 22:08:24 +00001484 /* Quickly check if compounding is possible with this flag. */
Bram Moolenaar6de68532005-08-24 22:08:48 +00001485 if (!byte_in_str(mip->mi_complen == 0
Bram Moolenaard12a1322005-08-21 22:08:24 +00001486 ? slang->sl_compstartflags
1487 : slang->sl_compallflags,
Bram Moolenaar6de68532005-08-24 22:08:48 +00001488 ((unsigned)flags >> 24)))
Bram Moolenaar5195e452005-08-19 20:32:47 +00001489 continue;
1490
Bram Moolenaare52325c2005-08-22 22:54:29 +00001491 if (mode == FIND_COMPOUND)
1492 {
1493 int capflags;
1494
1495 /* Need to check the caps type of the appended compound
1496 * word. */
1497#ifdef FEAT_MBYTE
1498 if (has_mbyte && STRNCMP(ptr, mip->mi_word,
1499 mip->mi_compoff) != 0)
1500 {
1501 /* case folding may have changed the length */
1502 p = mip->mi_word;
1503 for (s = ptr; s < ptr + mip->mi_compoff; mb_ptr_adv(s))
1504 mb_ptr_adv(p);
1505 }
1506 else
1507#endif
1508 p = mip->mi_word + mip->mi_compoff;
1509 capflags = captype(p, mip->mi_word + wlen);
1510 if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP
1511 && (flags & WF_FIXCAP) != 0))
1512 continue;
1513
1514 if (capflags != WF_ALLCAP)
1515 {
1516 /* When the character before the word is a word
1517 * character we do not accept a Onecap word. We do
1518 * accept a no-caps word, even when the dictionary
1519 * word specifies ONECAP. */
1520 mb_ptr_back(mip->mi_word, p);
1521 if (spell_iswordp_nmw(p)
1522 ? capflags == WF_ONECAP
1523 : (flags & WF_ONECAP) != 0
1524 && capflags != WF_ONECAP)
1525 continue;
1526 }
1527 }
1528
Bram Moolenaar5195e452005-08-19 20:32:47 +00001529 /* If the word ends the sequence of compound flags of the
Bram Moolenaar362e1a32006-03-06 23:29:24 +00001530 * words must match with one of the COMPOUNDRULE items and
Bram Moolenaar5195e452005-08-19 20:32:47 +00001531 * the number of syllables must not be too large. */
1532 mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24);
1533 mip->mi_compflags[mip->mi_complen + 1] = NUL;
1534 if (word_ends)
1535 {
1536 char_u fword[MAXWLEN];
1537
1538 if (slang->sl_compsylmax < MAXWLEN)
1539 {
1540 /* "fword" is only needed for checking syllables. */
1541 if (ptr == mip->mi_word)
1542 (void)spell_casefold(ptr, wlen, fword, MAXWLEN);
1543 else
1544 vim_strncpy(fword, ptr, endlen[endidxcnt]);
1545 }
1546 if (!can_compound(slang, fword, mip->mi_compflags))
1547 continue;
1548 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001549 }
1550
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001551 /* Check NEEDCOMPOUND: can't use word without compounding. */
1552 else if (flags & WF_NEEDCOMP)
1553 continue;
1554
Bram Moolenaar78622822005-08-23 21:00:13 +00001555 nobreak_result = SP_OK;
1556
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001557 if (!word_ends)
1558 {
Bram Moolenaar78622822005-08-23 21:00:13 +00001559 int save_result = mip->mi_result;
1560 char_u *save_end = mip->mi_end;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001561 langp_T *save_lp = mip->mi_lp;
1562 int lpi;
Bram Moolenaar78622822005-08-23 21:00:13 +00001563
1564 /* Check that a valid word follows. If there is one and we
1565 * are compounding, it will set "mi_result", thus we are
1566 * always finished here. For NOBREAK we only check that a
1567 * valid word follows.
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001568 * Recursive! */
Bram Moolenaar78622822005-08-23 21:00:13 +00001569 if (slang->sl_nobreak)
1570 mip->mi_result = SP_BAD;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001571
1572 /* Find following word in case-folded tree. */
1573 mip->mi_compoff = endlen[endidxcnt];
1574#ifdef FEAT_MBYTE
1575 if (has_mbyte && mode == FIND_KEEPWORD)
1576 {
1577 /* Compute byte length in case-folded word from "wlen":
1578 * byte length in keep-case word. Length may change when
1579 * folding case. This can be slow, take a shortcut when
1580 * the case-folded word is equal to the keep-case word. */
1581 p = mip->mi_fword;
1582 if (STRNCMP(ptr, p, wlen) != 0)
1583 {
1584 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
1585 mb_ptr_adv(p);
1586 mip->mi_compoff = p - mip->mi_fword;
1587 }
1588 }
1589#endif
Bram Moolenaard12a1322005-08-21 22:08:24 +00001590 c = mip->mi_compoff;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001591 ++mip->mi_complen;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001592
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001593 /* For NOBREAK we need to try all NOBREAK languages, at least
1594 * to find the ".add" file(s). */
1595 for (lpi = 0; lpi < mip->mi_buf->b_langp.ga_len; ++lpi)
Bram Moolenaar78622822005-08-23 21:00:13 +00001596 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001597 if (slang->sl_nobreak)
1598 {
1599 mip->mi_lp = LANGP_ENTRY(mip->mi_buf->b_langp, lpi);
1600 if (mip->mi_lp->lp_slang->sl_fidxs == NULL
1601 || !mip->mi_lp->lp_slang->sl_nobreak)
1602 continue;
1603 }
Bram Moolenaard12a1322005-08-21 22:08:24 +00001604
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001605 find_word(mip, FIND_COMPOUND);
1606
1607 /* When NOBREAK any word that matches is OK. Otherwise we
1608 * need to find the longest match, thus try with keep-case
1609 * and prefix too. */
Bram Moolenaar78622822005-08-23 21:00:13 +00001610 if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1611 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001612 /* Find following word in keep-case tree. */
1613 mip->mi_compoff = wlen;
1614 find_word(mip, FIND_KEEPCOMPOUND);
1615
1616 if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1617 {
1618 /* Check for following word with prefix. */
1619 mip->mi_compoff = c;
1620 find_prefix(mip, FIND_COMPOUND);
1621 }
Bram Moolenaar78622822005-08-23 21:00:13 +00001622 }
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001623
1624 if (!slang->sl_nobreak)
1625 break;
Bram Moolenaar78622822005-08-23 21:00:13 +00001626 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00001627 --mip->mi_complen;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001628 mip->mi_lp = save_lp;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001629
Bram Moolenaar78622822005-08-23 21:00:13 +00001630 if (slang->sl_nobreak)
1631 {
1632 nobreak_result = mip->mi_result;
1633 mip->mi_result = save_result;
1634 mip->mi_end = save_end;
1635 }
1636 else
1637 {
1638 if (mip->mi_result == SP_OK)
1639 break;
1640 continue;
1641 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001642 }
1643
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001644 if (flags & WF_BANNED)
1645 res = SP_BANNED;
1646 else if (flags & WF_REGION)
1647 {
1648 /* Check region. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001649 if ((mip->mi_lp->lp_region & (flags >> 16)) != 0)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001650 res = SP_OK;
1651 else
1652 res = SP_LOCAL;
1653 }
1654 else if (flags & WF_RARE)
1655 res = SP_RARE;
1656 else
1657 res = SP_OK;
1658
Bram Moolenaar78622822005-08-23 21:00:13 +00001659 /* Always use the longest match and the best result. For NOBREAK
1660 * we separately keep the longest match without a following good
1661 * word as a fall-back. */
1662 if (nobreak_result == SP_BAD)
1663 {
1664 if (mip->mi_result2 > res)
1665 {
1666 mip->mi_result2 = res;
1667 mip->mi_end2 = mip->mi_word + wlen;
1668 }
1669 else if (mip->mi_result2 == res
1670 && mip->mi_end2 < mip->mi_word + wlen)
1671 mip->mi_end2 = mip->mi_word + wlen;
1672 }
1673 else if (mip->mi_result > res)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001674 {
1675 mip->mi_result = res;
1676 mip->mi_end = mip->mi_word + wlen;
1677 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001678 else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001679 mip->mi_end = mip->mi_word + wlen;
1680
Bram Moolenaar78622822005-08-23 21:00:13 +00001681 if (mip->mi_result == SP_OK)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001682 break;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001683 }
1684
Bram Moolenaar78622822005-08-23 21:00:13 +00001685 if (mip->mi_result == SP_OK)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001686 break;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001687 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001688}
1689
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001690/*
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00001691 * Return TRUE if "flags" is a valid sequence of compound flags and "word"
1692 * does not have too many syllables.
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00001693 */
1694 static int
Bram Moolenaar5195e452005-08-19 20:32:47 +00001695can_compound(slang, word, flags)
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00001696 slang_T *slang;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001697 char_u *word;
1698 char_u *flags;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00001699{
Bram Moolenaar5195e452005-08-19 20:32:47 +00001700 regmatch_T regmatch;
Bram Moolenaar6de68532005-08-24 22:08:48 +00001701#ifdef FEAT_MBYTE
1702 char_u uflags[MAXWLEN * 2];
1703 int i;
1704#endif
1705 char_u *p;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001706
1707 if (slang->sl_compprog == NULL)
1708 return FALSE;
Bram Moolenaar6de68532005-08-24 22:08:48 +00001709#ifdef FEAT_MBYTE
1710 if (enc_utf8)
1711 {
1712 /* Need to convert the single byte flags to utf8 characters. */
1713 p = uflags;
1714 for (i = 0; flags[i] != NUL; ++i)
1715 p += mb_char2bytes(flags[i], p);
1716 *p = NUL;
1717 p = uflags;
1718 }
1719 else
1720#endif
1721 p = flags;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001722 regmatch.regprog = slang->sl_compprog;
1723 regmatch.rm_ic = FALSE;
Bram Moolenaar6de68532005-08-24 22:08:48 +00001724 if (!vim_regexec(&regmatch, p, 0))
Bram Moolenaar5195e452005-08-19 20:32:47 +00001725 return FALSE;
1726
Bram Moolenaare52325c2005-08-22 22:54:29 +00001727 /* Count the number of syllables. This may be slow, do it last. If there
1728 * are too many syllables AND the number of compound words is above
1729 * COMPOUNDMAX then compounding is not allowed. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00001730 if (slang->sl_compsylmax < MAXWLEN
1731 && count_syllables(slang, word) > slang->sl_compsylmax)
Bram Moolenaar6de68532005-08-24 22:08:48 +00001732 return (int)STRLEN(flags) < slang->sl_compmax;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001733 return TRUE;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00001734}
1735
1736/*
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001737 * Return non-zero if the prefix indicated by "arridx" matches with the prefix
1738 * ID in "flags" for the word "word".
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001739 * The WF_RAREPFX flag is included in the return value for a rare prefix.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001740 */
1741 static int
Bram Moolenaar53805d12005-08-01 07:08:33 +00001742valid_word_prefix(totprefcnt, arridx, flags, word, slang, cond_req)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001743 int totprefcnt; /* nr of prefix IDs */
1744 int arridx; /* idx in sl_pidxs[] */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001745 int flags;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001746 char_u *word;
1747 slang_T *slang;
Bram Moolenaar53805d12005-08-01 07:08:33 +00001748 int cond_req; /* only use prefixes with a condition */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001749{
1750 int prefcnt;
1751 int pidx;
1752 regprog_T *rp;
1753 regmatch_T regmatch;
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001754 int prefid;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001755
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001756 prefid = (unsigned)flags >> 24;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001757 for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt)
1758 {
1759 pidx = slang->sl_pidxs[arridx + prefcnt];
1760
1761 /* Check the prefix ID. */
1762 if (prefid != (pidx & 0xff))
1763 continue;
1764
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001765 /* Check if the prefix doesn't combine and the word already has a
1766 * suffix. */
1767 if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC))
1768 continue;
1769
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001770 /* Check the condition, if there is one. The condition index is
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001771 * stored in the two bytes above the prefix ID byte. */
1772 rp = slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff];
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001773 if (rp != NULL)
1774 {
1775 regmatch.regprog = rp;
1776 regmatch.rm_ic = FALSE;
1777 if (!vim_regexec(&regmatch, word, 0))
1778 continue;
1779 }
Bram Moolenaar53805d12005-08-01 07:08:33 +00001780 else if (cond_req)
1781 continue;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001782
Bram Moolenaar53805d12005-08-01 07:08:33 +00001783 /* It's a match! Return the WF_ flags. */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001784 return pidx;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001785 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001786 return 0;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001787}
1788
1789/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001790 * Check if the word at "mip->mi_word" has a matching prefix.
1791 * If it does, then check the following word.
1792 *
Bram Moolenaard12a1322005-08-21 22:08:24 +00001793 * If "mode" is "FIND_COMPOUND" then do the same after another word, find a
1794 * prefix in a compound word.
1795 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001796 * For a match mip->mi_result is updated.
1797 */
1798 static void
Bram Moolenaard12a1322005-08-21 22:08:24 +00001799find_prefix(mip, mode)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001800 matchinf_T *mip;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001801 int mode;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001802{
1803 idx_T arridx = 0;
1804 int len;
1805 int wlen = 0;
1806 int flen;
1807 int c;
1808 char_u *ptr;
1809 idx_T lo, hi, m;
1810 slang_T *slang = mip->mi_lp->lp_slang;
1811 char_u *byts;
1812 idx_T *idxs;
1813
Bram Moolenaar42eeac32005-06-29 22:40:58 +00001814 byts = slang->sl_pbyts;
1815 if (byts == NULL)
1816 return; /* array is empty */
1817
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001818 /* We use the case-folded word here, since prefixes are always
1819 * case-folded. */
1820 ptr = mip->mi_fword;
1821 flen = mip->mi_fwordlen; /* available case-folded bytes */
Bram Moolenaard12a1322005-08-21 22:08:24 +00001822 if (mode == FIND_COMPOUND)
1823 {
1824 /* Skip over the previously found word(s). */
1825 ptr += mip->mi_compoff;
1826 flen -= mip->mi_compoff;
1827 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001828 idxs = slang->sl_pidxs;
1829
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001830 /*
1831 * Repeat advancing in the tree until:
1832 * - there is a byte that doesn't match,
1833 * - we reach the end of the tree,
1834 * - or we reach the end of the line.
1835 */
1836 for (;;)
1837 {
1838 if (flen == 0 && *mip->mi_fend != NUL)
1839 flen = fold_more(mip);
1840
1841 len = byts[arridx++];
1842
1843 /* If the first possible byte is a zero the prefix could end here.
1844 * Check if the following word matches and supports the prefix. */
1845 if (byts[arridx] == 0)
1846 {
1847 /* There can be several prefixes with different conditions. We
1848 * try them all, since we don't know which one will give the
1849 * longest match. The word is the same each time, pass the list
1850 * of possible prefixes to find_word(). */
1851 mip->mi_prefarridx = arridx;
1852 mip->mi_prefcnt = len;
1853 while (len > 0 && byts[arridx] == 0)
1854 {
1855 ++arridx;
1856 --len;
1857 }
1858 mip->mi_prefcnt -= len;
1859
1860 /* Find the word that comes after the prefix. */
1861 mip->mi_prefixlen = wlen;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001862 if (mode == FIND_COMPOUND)
1863 /* Skip over the previously found word(s). */
1864 mip->mi_prefixlen += mip->mi_compoff;
1865
Bram Moolenaar53805d12005-08-01 07:08:33 +00001866#ifdef FEAT_MBYTE
1867 if (has_mbyte)
1868 {
1869 /* Case-folded length may differ from original length. */
Bram Moolenaard12a1322005-08-21 22:08:24 +00001870 mip->mi_cprefixlen = nofold_len(mip->mi_fword,
1871 mip->mi_prefixlen, mip->mi_word);
Bram Moolenaar53805d12005-08-01 07:08:33 +00001872 }
1873 else
Bram Moolenaard12a1322005-08-21 22:08:24 +00001874 mip->mi_cprefixlen = mip->mi_prefixlen;
Bram Moolenaar53805d12005-08-01 07:08:33 +00001875#endif
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001876 find_word(mip, FIND_PREFIX);
1877
1878
1879 if (len == 0)
1880 break; /* no children, word must end here */
1881 }
1882
1883 /* Stop looking at end of the line. */
1884 if (ptr[wlen] == NUL)
1885 break;
1886
1887 /* Perform a binary search in the list of accepted bytes. */
1888 c = ptr[wlen];
1889 lo = arridx;
1890 hi = arridx + len - 1;
1891 while (lo < hi)
1892 {
1893 m = (lo + hi) / 2;
1894 if (byts[m] > c)
1895 hi = m - 1;
1896 else if (byts[m] < c)
1897 lo = m + 1;
1898 else
1899 {
1900 lo = hi = m;
1901 break;
1902 }
1903 }
1904
1905 /* Stop if there is no matching byte. */
1906 if (hi < lo || byts[lo] != c)
1907 break;
1908
1909 /* Continue at the child (if there is one). */
1910 arridx = idxs[lo];
1911 ++wlen;
1912 --flen;
1913 }
1914}
1915
1916/*
1917 * Need to fold at least one more character. Do until next non-word character
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00001918 * for efficiency. Include the non-word character too.
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001919 * Return the length of the folded chars in bytes.
1920 */
1921 static int
1922fold_more(mip)
1923 matchinf_T *mip;
1924{
1925 int flen;
1926 char_u *p;
1927
1928 p = mip->mi_fend;
1929 do
1930 {
1931 mb_ptr_adv(mip->mi_fend);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00001932 } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_buf));
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001933
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00001934 /* Include the non-word character so that we can check for the word end. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001935 if (*mip->mi_fend != NUL)
1936 mb_ptr_adv(mip->mi_fend);
1937
1938 (void)spell_casefold(p, (int)(mip->mi_fend - p),
1939 mip->mi_fword + mip->mi_fwordlen,
1940 MAXWLEN - mip->mi_fwordlen);
1941 flen = STRLEN(mip->mi_fword + mip->mi_fwordlen);
1942 mip->mi_fwordlen += flen;
1943 return flen;
1944}
1945
1946/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001947 * Check case flags for a word. Return TRUE if the word has the requested
1948 * case.
1949 */
1950 static int
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00001951spell_valid_case(wordflags, treeflags)
1952 int wordflags; /* flags for the checked word. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001953 int treeflags; /* flags for the word in the spell tree */
1954{
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00001955 return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001956 || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001957 && ((treeflags & WF_ONECAP) == 0
1958 || (wordflags & WF_ONECAP) != 0)));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001959}
1960
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001961/*
1962 * Return TRUE if spell checking is not enabled.
1963 */
1964 static int
Bram Moolenaar95529562005-08-25 21:21:38 +00001965no_spell_checking(wp)
1966 win_T *wp;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001967{
Bram Moolenaara226a6d2006-02-26 23:59:20 +00001968 if (!wp->w_p_spell || *wp->w_buffer->b_p_spl == NUL
1969 || wp->w_buffer->b_langp.ga_len == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001970 {
1971 EMSG(_("E756: Spell checking is not enabled"));
1972 return TRUE;
1973 }
1974 return FALSE;
1975}
Bram Moolenaar51485f02005-06-04 21:55:20 +00001976
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001977/*
1978 * Move to next spell error.
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001979 * "curline" is FALSE for "[s", "]s", "[S" and "]S".
1980 * "curline" is TRUE to find word under/after cursor in the same line.
Bram Moolenaar5195e452005-08-19 20:32:47 +00001981 * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move
1982 * to after badly spelled word before the cursor.
Bram Moolenaar6de68532005-08-24 22:08:48 +00001983 * Return 0 if not found, length of the badly spelled word otherwise.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001984 */
1985 int
Bram Moolenaar95529562005-08-25 21:21:38 +00001986spell_move_to(wp, dir, allwords, curline, attrp)
1987 win_T *wp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001988 int dir; /* FORWARD or BACKWARD */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001989 int allwords; /* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001990 int curline;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001991 hlf_T *attrp; /* return: attributes of bad word or NULL
1992 (only when "dir" is FORWARD) */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001993{
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001994 linenr_T lnum;
1995 pos_T found_pos;
Bram Moolenaar6de68532005-08-24 22:08:48 +00001996 int found_len = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001997 char_u *line;
1998 char_u *p;
Bram Moolenaar0c405862005-06-22 22:26:26 +00001999 char_u *endp;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00002000 hlf_T attr;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002001 int len;
Bram Moolenaarf71a3db2006-03-12 21:50:18 +00002002# ifdef FEAT_SYN_HL
Bram Moolenaar95529562005-08-25 21:21:38 +00002003 int has_syntax = syntax_present(wp->w_buffer);
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002004 int col;
Bram Moolenaarf71a3db2006-03-12 21:50:18 +00002005# endif
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002006 int can_spell;
Bram Moolenaar0c405862005-06-22 22:26:26 +00002007 char_u *buf = NULL;
2008 int buflen = 0;
2009 int skip = 0;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002010 int capcol = -1;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002011 int found_one = FALSE;
2012 int wrapped = FALSE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002013
Bram Moolenaar95529562005-08-25 21:21:38 +00002014 if (no_spell_checking(wp))
Bram Moolenaar6de68532005-08-24 22:08:48 +00002015 return 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002016
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002017 /*
2018 * Start looking for bad word at the start of the line, because we can't
Bram Moolenaar0c405862005-06-22 22:26:26 +00002019 * start halfway a word, we don't know where the it starts or ends.
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002020 *
2021 * When searching backwards, we continue in the line to find the last
2022 * bad word (in the cursor line: before the cursor).
Bram Moolenaar0c405862005-06-22 22:26:26 +00002023 *
2024 * We concatenate the start of the next line, so that wrapped words work
2025 * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards
2026 * though...
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002027 */
Bram Moolenaar95529562005-08-25 21:21:38 +00002028 lnum = wp->w_cursor.lnum;
Bram Moolenaare1438bb2006-03-01 22:01:55 +00002029 clearpos(&found_pos);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002030
2031 while (!got_int)
2032 {
Bram Moolenaar95529562005-08-25 21:21:38 +00002033 line = ml_get_buf(wp->w_buffer, lnum, FALSE);
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002034
Bram Moolenaar0c405862005-06-22 22:26:26 +00002035 len = STRLEN(line);
2036 if (buflen < len + MAXWLEN + 2)
2037 {
2038 vim_free(buf);
2039 buflen = len + MAXWLEN + 2;
2040 buf = alloc(buflen);
2041 if (buf == NULL)
2042 break;
2043 }
2044
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002045 /* In first line check first word for Capital. */
2046 if (lnum == 1)
2047 capcol = 0;
2048
2049 /* For checking first word with a capital skip white space. */
2050 if (capcol == 0)
2051 capcol = skipwhite(line) - line;
2052
Bram Moolenaar0c405862005-06-22 22:26:26 +00002053 /* Copy the line into "buf" and append the start of the next line if
2054 * possible. */
2055 STRCPY(buf, line);
Bram Moolenaar95529562005-08-25 21:21:38 +00002056 if (lnum < wp->w_buffer->b_ml.ml_line_count)
Bram Moolenaar0c405862005-06-22 22:26:26 +00002057 spell_cat_line(buf + STRLEN(buf), ml_get(lnum + 1), MAXWLEN);
2058
2059 p = buf + skip;
2060 endp = buf + len;
2061 while (p < endp)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002062 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002063 /* When searching backward don't search after the cursor. Unless
2064 * we wrapped around the end of the buffer. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002065 if (dir == BACKWARD
Bram Moolenaar95529562005-08-25 21:21:38 +00002066 && lnum == wp->w_cursor.lnum
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002067 && !wrapped
Bram Moolenaar95529562005-08-25 21:21:38 +00002068 && (colnr_T)(p - buf) >= wp->w_cursor.col)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002069 break;
2070
2071 /* start of word */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00002072 attr = HLF_COUNT;
Bram Moolenaar4770d092006-01-12 23:22:24 +00002073 len = spell_check(wp, p, &attr, &capcol, FALSE);
Bram Moolenaar51485f02005-06-04 21:55:20 +00002074
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00002075 if (attr != HLF_COUNT)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002076 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002077 /* We found a bad word. Check the attribute. */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00002078 if (allwords || attr == HLF_SPB)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002079 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002080 found_one = TRUE;
2081
Bram Moolenaar51485f02005-06-04 21:55:20 +00002082 /* When searching forward only accept a bad word after
2083 * the cursor. */
2084 if (dir == BACKWARD
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002085 || lnum != wp->w_cursor.lnum
Bram Moolenaar95529562005-08-25 21:21:38 +00002086 || (lnum == wp->w_cursor.lnum
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002087 && (wrapped
2088 || (colnr_T)(curline ? p - buf + len
Bram Moolenaar0c405862005-06-22 22:26:26 +00002089 : p - buf)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002090 > wp->w_cursor.col)))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002091 {
Bram Moolenaarf71a3db2006-03-12 21:50:18 +00002092# ifdef FEAT_SYN_HL
Bram Moolenaar51485f02005-06-04 21:55:20 +00002093 if (has_syntax)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002094 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00002095 col = p - buf;
Bram Moolenaar95529562005-08-25 21:21:38 +00002096 (void)syn_get_id(wp, lnum, (colnr_T)col,
Bram Moolenaar51485f02005-06-04 21:55:20 +00002097 FALSE, &can_spell);
Bram Moolenaar51485f02005-06-04 21:55:20 +00002098 }
2099 else
Bram Moolenaarf71a3db2006-03-12 21:50:18 +00002100#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00002101 can_spell = TRUE;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002102
Bram Moolenaar51485f02005-06-04 21:55:20 +00002103 if (can_spell)
2104 {
2105 found_pos.lnum = lnum;
Bram Moolenaar0c405862005-06-22 22:26:26 +00002106 found_pos.col = p - buf;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002107#ifdef FEAT_VIRTUALEDIT
Bram Moolenaar51485f02005-06-04 21:55:20 +00002108 found_pos.coladd = 0;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002109#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00002110 if (dir == FORWARD)
2111 {
2112 /* No need to search further. */
Bram Moolenaar95529562005-08-25 21:21:38 +00002113 wp->w_cursor = found_pos;
Bram Moolenaar0c405862005-06-22 22:26:26 +00002114 vim_free(buf);
Bram Moolenaar95529562005-08-25 21:21:38 +00002115 if (attrp != NULL)
2116 *attrp = attr;
Bram Moolenaar6de68532005-08-24 22:08:48 +00002117 return len;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002118 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00002119 else if (curline)
2120 /* Insert mode completion: put cursor after
2121 * the bad word. */
2122 found_pos.col += len;
Bram Moolenaar6de68532005-08-24 22:08:48 +00002123 found_len = len;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002124 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002125 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002126 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002127 }
2128
Bram Moolenaar51485f02005-06-04 21:55:20 +00002129 /* advance to character after the word */
2130 p += len;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002131 capcol -= len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002132 }
2133
Bram Moolenaar5195e452005-08-19 20:32:47 +00002134 if (dir == BACKWARD && found_pos.lnum != 0)
2135 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002136 /* Use the last match in the line (before the cursor). */
Bram Moolenaar95529562005-08-25 21:21:38 +00002137 wp->w_cursor = found_pos;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002138 vim_free(buf);
Bram Moolenaar6de68532005-08-24 22:08:48 +00002139 return found_len;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002140 }
2141
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002142 if (curline)
Bram Moolenaar0c405862005-06-22 22:26:26 +00002143 break; /* only check cursor line */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002144
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002145 /* Advance to next line. */
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002146 if (dir == BACKWARD)
2147 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002148 /* If we are back at the starting line and searched it again there
2149 * is no match, give up. */
2150 if (lnum == wp->w_cursor.lnum && wrapped)
Bram Moolenaar0c405862005-06-22 22:26:26 +00002151 break;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002152
2153 if (lnum > 1)
2154 --lnum;
2155 else if (!p_ws)
2156 break; /* at first line and 'nowrapscan' */
2157 else
2158 {
2159 /* Wrap around to the end of the buffer. May search the
2160 * starting line again and accept the last match. */
2161 lnum = wp->w_buffer->b_ml.ml_line_count;
2162 wrapped = TRUE;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002163 if (!shortmess(SHM_SEARCH))
2164 give_warning((char_u *)_(top_bot_msg), TRUE);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002165 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002166 capcol = -1;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002167 }
2168 else
2169 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002170 if (lnum < wp->w_buffer->b_ml.ml_line_count)
2171 ++lnum;
2172 else if (!p_ws)
2173 break; /* at first line and 'nowrapscan' */
2174 else
2175 {
2176 /* Wrap around to the start of the buffer. May search the
2177 * starting line again and accept the first match. */
2178 lnum = 1;
2179 wrapped = TRUE;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002180 if (!shortmess(SHM_SEARCH))
2181 give_warning((char_u *)_(bot_top_msg), TRUE);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002182 }
2183
2184 /* If we are back at the starting line and there is no match then
2185 * give up. */
2186 if (lnum == wp->w_cursor.lnum && !found_one)
Bram Moolenaar0c405862005-06-22 22:26:26 +00002187 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +00002188
2189 /* Skip the characters at the start of the next line that were
2190 * included in a match crossing line boundaries. */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00002191 if (attr == HLF_COUNT)
Bram Moolenaar0c405862005-06-22 22:26:26 +00002192 skip = p - endp;
2193 else
2194 skip = 0;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002195
2196 /* Capscol skips over the inserted space. */
2197 --capcol;
2198
2199 /* But after empty line check first word in next line */
2200 if (*skipwhite(line) == NUL)
2201 capcol = 0;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002202 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002203
2204 line_breakcheck();
2205 }
2206
Bram Moolenaar0c405862005-06-22 22:26:26 +00002207 vim_free(buf);
Bram Moolenaar6de68532005-08-24 22:08:48 +00002208 return 0;
Bram Moolenaar0c405862005-06-22 22:26:26 +00002209}
2210
2211/*
2212 * For spell checking: concatenate the start of the following line "line" into
2213 * "buf", blanking-out special characters. Copy less then "maxlen" bytes.
2214 */
2215 void
2216spell_cat_line(buf, line, maxlen)
2217 char_u *buf;
2218 char_u *line;
2219 int maxlen;
2220{
2221 char_u *p;
2222 int n;
2223
2224 p = skipwhite(line);
2225 while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL)
2226 p = skipwhite(p + 1);
2227
2228 if (*p != NUL)
2229 {
2230 *buf = ' ';
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002231 vim_strncpy(buf + 1, line, maxlen - 2);
Bram Moolenaar0c405862005-06-22 22:26:26 +00002232 n = p - line;
2233 if (n >= maxlen)
2234 n = maxlen - 1;
2235 vim_memset(buf + 1, ' ', n);
2236 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002237}
2238
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00002239/*
2240 * Structure used for the cookie argument of do_in_runtimepath().
2241 */
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002242typedef struct spelload_S
2243{
2244 char_u sl_lang[MAXWLEN + 1]; /* language name */
2245 slang_T *sl_slang; /* resulting slang_T struct */
2246 int sl_nobreak; /* NOBREAK language found */
2247} spelload_T;
2248
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002249/*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002250 * Load word list(s) for "lang" from Vim spell file(s).
Bram Moolenaarb765d632005-06-07 21:00:02 +00002251 * "lang" must be the language without the region: e.g., "en".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002252 */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002253 static void
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002254spell_load_lang(lang)
2255 char_u *lang;
2256{
Bram Moolenaarb765d632005-06-07 21:00:02 +00002257 char_u fname_enc[85];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002258 int r;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002259 spelload_T sl;
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002260#ifdef FEAT_AUTOCMD
2261 int round;
2262#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002263
Bram Moolenaarb765d632005-06-07 21:00:02 +00002264 /* Copy the language name to pass it to spell_load_cb() as a cookie.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002265 * It's truncated when an error is detected. */
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002266 STRCPY(sl.sl_lang, lang);
2267 sl.sl_slang = NULL;
2268 sl.sl_nobreak = FALSE;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002269
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002270#ifdef FEAT_AUTOCMD
2271 /* We may retry when no spell file is found for the language, an
2272 * autocommand may load it then. */
2273 for (round = 1; round <= 2; ++round)
2274#endif
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002275 {
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002276 /*
2277 * Find the first spell file for "lang" in 'runtimepath' and load it.
2278 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002279 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002280 "spell/%s.%s.spl", lang, spell_enc());
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002281 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl);
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002282
2283 if (r == FAIL && *sl.sl_lang != NUL)
2284 {
2285 /* Try loading the ASCII version. */
2286 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
2287 "spell/%s.ascii.spl", lang);
2288 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl);
2289
2290#ifdef FEAT_AUTOCMD
2291 if (r == FAIL && *sl.sl_lang != NUL && round == 1
2292 && apply_autocmds(EVENT_SPELLFILEMISSING, lang,
2293 curbuf->b_fname, FALSE, curbuf))
2294 continue;
2295 break;
2296#endif
2297 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002298#ifdef FEAT_AUTOCMD
2299 break;
2300#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002301 }
2302
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002303 if (r == FAIL)
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002304 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002305 smsg((char_u *)_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""),
2306 lang, spell_enc(), lang);
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00002307 }
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002308 else if (sl.sl_slang != NULL)
Bram Moolenaarb765d632005-06-07 21:00:02 +00002309 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00002310 /* At least one file was loaded, now load ALL the additions. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002311 STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl");
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002312 do_in_runtimepath(fname_enc, TRUE, spell_load_cb, &sl);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002313 }
2314}
2315
2316/*
2317 * Return the encoding used for spell checking: Use 'encoding', except that we
2318 * use "latin1" for "latin9". And limit to 60 characters (just in case).
2319 */
2320 static char_u *
2321spell_enc()
2322{
2323
2324#ifdef FEAT_MBYTE
2325 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
2326 return p_enc;
2327#endif
2328 return (char_u *)"latin1";
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002329}
2330
2331/*
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002332 * Get the name of the .spl file for the internal wordlist into
2333 * "fname[MAXPATHL]".
2334 */
2335 static void
2336int_wordlist_spl(fname)
2337 char_u *fname;
2338{
2339 vim_snprintf((char *)fname, MAXPATHL, "%s.%s.spl",
2340 int_wordlist, spell_enc());
2341}
2342
2343/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00002344 * Allocate a new slang_T for language "lang". "lang" can be NULL.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002345 * Caller must fill "sl_next".
2346 */
2347 static slang_T *
2348slang_alloc(lang)
2349 char_u *lang;
2350{
2351 slang_T *lp;
2352
Bram Moolenaar51485f02005-06-04 21:55:20 +00002353 lp = (slang_T *)alloc_clear(sizeof(slang_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002354 if (lp != NULL)
2355 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00002356 if (lang != NULL)
2357 lp->sl_name = vim_strsave(lang);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002358 ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
Bram Moolenaar4770d092006-01-12 23:22:24 +00002359 ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10);
Bram Moolenaar5195e452005-08-19 20:32:47 +00002360 lp->sl_compmax = MAXWLEN;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002361 lp->sl_compsylmax = MAXWLEN;
Bram Moolenaar4770d092006-01-12 23:22:24 +00002362 hash_init(&lp->sl_wordcount);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002363 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00002364
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002365 return lp;
2366}
2367
2368/*
2369 * Free the contents of an slang_T and the structure itself.
2370 */
2371 static void
2372slang_free(lp)
2373 slang_T *lp;
2374{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002375 vim_free(lp->sl_name);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002376 vim_free(lp->sl_fname);
2377 slang_clear(lp);
2378 vim_free(lp);
2379}
2380
2381/*
2382 * Clear an slang_T so that the file can be reloaded.
2383 */
2384 static void
2385slang_clear(lp)
2386 slang_T *lp;
2387{
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002388 garray_T *gap;
2389 fromto_T *ftp;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002390 salitem_T *smp;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002391 int i;
Bram Moolenaar4770d092006-01-12 23:22:24 +00002392 int round;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002393
Bram Moolenaar51485f02005-06-04 21:55:20 +00002394 vim_free(lp->sl_fbyts);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002395 lp->sl_fbyts = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002396 vim_free(lp->sl_kbyts);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002397 lp->sl_kbyts = NULL;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002398 vim_free(lp->sl_pbyts);
2399 lp->sl_pbyts = NULL;
2400
Bram Moolenaar51485f02005-06-04 21:55:20 +00002401 vim_free(lp->sl_fidxs);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002402 lp->sl_fidxs = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002403 vim_free(lp->sl_kidxs);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002404 lp->sl_kidxs = NULL;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002405 vim_free(lp->sl_pidxs);
2406 lp->sl_pidxs = NULL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002407
Bram Moolenaar4770d092006-01-12 23:22:24 +00002408 for (round = 1; round <= 2; ++round)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002409 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00002410 gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal;
2411 while (gap->ga_len > 0)
2412 {
2413 ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len];
2414 vim_free(ftp->ft_from);
2415 vim_free(ftp->ft_to);
2416 }
2417 ga_clear(gap);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002418 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002419
2420 gap = &lp->sl_sal;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002421 if (lp->sl_sofo)
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002422 {
2423 /* "ga_len" is set to 1 without adding an item for latin1 */
2424 if (gap->ga_data != NULL)
2425 /* SOFOFROM and SOFOTO items: free lists of wide characters. */
2426 for (i = 0; i < gap->ga_len; ++i)
2427 vim_free(((int **)gap->ga_data)[i]);
2428 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002429 else
2430 /* SAL items: free salitem_T items */
2431 while (gap->ga_len > 0)
2432 {
2433 smp = &((salitem_T *)gap->ga_data)[--gap->ga_len];
2434 vim_free(smp->sm_lead);
2435 /* Don't free sm_oneof and sm_rules, they point into sm_lead. */
2436 vim_free(smp->sm_to);
2437#ifdef FEAT_MBYTE
2438 vim_free(smp->sm_lead_w);
2439 vim_free(smp->sm_oneof_w);
2440 vim_free(smp->sm_to_w);
2441#endif
2442 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002443 ga_clear(gap);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002444
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002445 for (i = 0; i < lp->sl_prefixcnt; ++i)
2446 vim_free(lp->sl_prefprog[i]);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002447 lp->sl_prefixcnt = 0;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002448 vim_free(lp->sl_prefprog);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002449 lp->sl_prefprog = NULL;
2450
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002451 vim_free(lp->sl_info);
2452 lp->sl_info = NULL;
2453
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002454 vim_free(lp->sl_midword);
2455 lp->sl_midword = NULL;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002456
Bram Moolenaar5195e452005-08-19 20:32:47 +00002457 vim_free(lp->sl_compprog);
2458 vim_free(lp->sl_compstartflags);
Bram Moolenaard12a1322005-08-21 22:08:24 +00002459 vim_free(lp->sl_compallflags);
Bram Moolenaar5195e452005-08-19 20:32:47 +00002460 lp->sl_compprog = NULL;
2461 lp->sl_compstartflags = NULL;
Bram Moolenaard12a1322005-08-21 22:08:24 +00002462 lp->sl_compallflags = NULL;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002463
2464 vim_free(lp->sl_syllable);
2465 lp->sl_syllable = NULL;
2466 ga_clear(&lp->sl_syl_items);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002467
Bram Moolenaar4770d092006-01-12 23:22:24 +00002468 hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF);
2469 hash_init(&lp->sl_wordcount);
Bram Moolenaarea424162005-06-16 21:51:00 +00002470
Bram Moolenaar4770d092006-01-12 23:22:24 +00002471#ifdef FEAT_MBYTE
2472 hash_clear_all(&lp->sl_map_hash, 0);
Bram Moolenaarea424162005-06-16 21:51:00 +00002473#endif
Bram Moolenaar5195e452005-08-19 20:32:47 +00002474
Bram Moolenaar4770d092006-01-12 23:22:24 +00002475 /* Clear info from .sug file. */
2476 slang_clear_sug(lp);
2477
Bram Moolenaar5195e452005-08-19 20:32:47 +00002478 lp->sl_compmax = MAXWLEN;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002479 lp->sl_compminlen = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002480 lp->sl_compsylmax = MAXWLEN;
2481 lp->sl_regions[0] = NUL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002482}
2483
2484/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00002485 * Clear the info from the .sug file in "lp".
2486 */
2487 static void
2488slang_clear_sug(lp)
2489 slang_T *lp;
2490{
2491 vim_free(lp->sl_sbyts);
2492 lp->sl_sbyts = NULL;
2493 vim_free(lp->sl_sidxs);
2494 lp->sl_sidxs = NULL;
2495 close_spellbuf(lp->sl_sugbuf);
2496 lp->sl_sugbuf = NULL;
2497 lp->sl_sugloaded = FALSE;
2498 lp->sl_sugtime = 0;
2499}
2500
2501/*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002502 * Load one spell file and store the info into a slang_T.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002503 * Invoked through do_in_runtimepath().
2504 */
2505 static void
Bram Moolenaarb765d632005-06-07 21:00:02 +00002506spell_load_cb(fname, cookie)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002507 char_u *fname;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002508 void *cookie;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002509{
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002510 spelload_T *slp = (spelload_T *)cookie;
2511 slang_T *slang;
2512
2513 slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE);
2514 if (slang != NULL)
2515 {
2516 /* When a previously loaded file has NOBREAK also use it for the
2517 * ".add" files. */
2518 if (slp->sl_nobreak && slang->sl_add)
2519 slang->sl_nobreak = TRUE;
2520 else if (slang->sl_nobreak)
2521 slp->sl_nobreak = TRUE;
2522
2523 slp->sl_slang = slang;
2524 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00002525}
2526
2527/*
2528 * Load one spell file and store the info into a slang_T.
2529 *
Bram Moolenaar4770d092006-01-12 23:22:24 +00002530 * This is invoked in three ways:
Bram Moolenaarb765d632005-06-07 21:00:02 +00002531 * - From spell_load_cb() to load a spell file for the first time. "lang" is
2532 * the language name, "old_lp" is NULL. Will allocate an slang_T.
2533 * - To reload a spell file that was changed. "lang" is NULL and "old_lp"
2534 * points to the existing slang_T.
Bram Moolenaar4770d092006-01-12 23:22:24 +00002535 * - Just after writing a .spl file; it's read back to produce the .sug file.
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00002536 * "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T.
2537 *
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002538 * Returns the slang_T the spell file was loaded into. NULL for error.
Bram Moolenaarb765d632005-06-07 21:00:02 +00002539 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002540 static slang_T *
2541spell_load_file(fname, lang, old_lp, silent)
Bram Moolenaarb765d632005-06-07 21:00:02 +00002542 char_u *fname;
2543 char_u *lang;
2544 slang_T *old_lp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002545 int silent; /* no error if file doesn't exist */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002546{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002547 FILE *fd;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002548 char_u buf[VIMSPELLMAGICL];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002549 char_u *p;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002550 int i;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002551 int n;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002552 int len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002553 char_u *save_sourcing_name = sourcing_name;
2554 linenr_T save_sourcing_lnum = sourcing_lnum;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002555 slang_T *lp = NULL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002556 int c = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002557 int res;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002558
Bram Moolenaarb765d632005-06-07 21:00:02 +00002559 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002560 if (fd == NULL)
2561 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002562 if (!silent)
2563 EMSG2(_(e_notopen), fname);
2564 else if (p_verbose > 2)
2565 {
2566 verbose_enter();
2567 smsg((char_u *)e_notopen, fname);
2568 verbose_leave();
2569 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002570 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002571 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00002572 if (p_verbose > 2)
2573 {
2574 verbose_enter();
2575 smsg((char_u *)_("Reading spell file \"%s\""), fname);
2576 verbose_leave();
2577 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002578
Bram Moolenaarb765d632005-06-07 21:00:02 +00002579 if (old_lp == NULL)
2580 {
2581 lp = slang_alloc(lang);
2582 if (lp == NULL)
2583 goto endFAIL;
2584
2585 /* Remember the file name, used to reload the file when it's updated. */
2586 lp->sl_fname = vim_strsave(fname);
2587 if (lp->sl_fname == NULL)
2588 goto endFAIL;
2589
2590 /* Check for .add.spl. */
2591 lp->sl_add = strstr((char *)gettail(fname), ".add.") != NULL;
2592 }
2593 else
2594 lp = old_lp;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002595
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002596 /* Set sourcing_name, so that error messages mention the file name. */
2597 sourcing_name = fname;
2598 sourcing_lnum = 0;
2599
Bram Moolenaar4770d092006-01-12 23:22:24 +00002600 /*
2601 * <HEADER>: <fileID>
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002602 */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002603 for (i = 0; i < VIMSPELLMAGICL; ++i)
2604 buf[i] = getc(fd); /* <fileID> */
2605 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
2606 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002607 EMSG(_("E757: This does not look like a spell file"));
2608 goto endFAIL;
2609 }
2610 c = getc(fd); /* <versionnr> */
2611 if (c < VIMSPELLVERSION)
2612 {
2613 EMSG(_("E771: Old spell file, needs to be updated"));
2614 goto endFAIL;
2615 }
2616 else if (c > VIMSPELLVERSION)
2617 {
2618 EMSG(_("E772: Spell file is for newer version of Vim"));
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002619 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002620 }
2621
Bram Moolenaar5195e452005-08-19 20:32:47 +00002622
2623 /*
2624 * <SECTIONS>: <section> ... <sectionend>
2625 * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
2626 */
2627 for (;;)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002628 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002629 n = getc(fd); /* <sectionID> or <sectionend> */
2630 if (n == SN_END)
2631 break;
2632 c = getc(fd); /* <sectionflags> */
Bram Moolenaarb388adb2006-02-28 23:50:17 +00002633 len = get4c(fd); /* <sectionlen> */
Bram Moolenaar5195e452005-08-19 20:32:47 +00002634 if (len < 0)
2635 goto truncerr;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002636
Bram Moolenaar5195e452005-08-19 20:32:47 +00002637 res = 0;
2638 switch (n)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002639 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002640 case SN_INFO:
2641 lp->sl_info = read_string(fd, len); /* <infotext> */
2642 if (lp->sl_info == NULL)
2643 goto endFAIL;
2644 break;
2645
Bram Moolenaar5195e452005-08-19 20:32:47 +00002646 case SN_REGION:
2647 res = read_region_section(fd, lp, len);
2648 break;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002649
Bram Moolenaar5195e452005-08-19 20:32:47 +00002650 case SN_CHARFLAGS:
2651 res = read_charflags_section(fd);
2652 break;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002653
Bram Moolenaar5195e452005-08-19 20:32:47 +00002654 case SN_MIDWORD:
2655 lp->sl_midword = read_string(fd, len); /* <midword> */
2656 if (lp->sl_midword == NULL)
2657 goto endFAIL;
2658 break;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002659
Bram Moolenaar5195e452005-08-19 20:32:47 +00002660 case SN_PREFCOND:
2661 res = read_prefcond_section(fd, lp);
2662 break;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002663
Bram Moolenaar5195e452005-08-19 20:32:47 +00002664 case SN_REP:
Bram Moolenaar4770d092006-01-12 23:22:24 +00002665 res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first);
2666 break;
2667
2668 case SN_REPSAL:
2669 res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first);
Bram Moolenaar5195e452005-08-19 20:32:47 +00002670 break;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002671
Bram Moolenaar5195e452005-08-19 20:32:47 +00002672 case SN_SAL:
2673 res = read_sal_section(fd, lp);
2674 break;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002675
Bram Moolenaar5195e452005-08-19 20:32:47 +00002676 case SN_SOFO:
2677 res = read_sofo_section(fd, lp);
2678 break;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002679
Bram Moolenaar5195e452005-08-19 20:32:47 +00002680 case SN_MAP:
2681 p = read_string(fd, len); /* <mapstr> */
2682 if (p == NULL)
2683 goto endFAIL;
2684 set_map_str(lp, p);
2685 vim_free(p);
2686 break;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002687
Bram Moolenaar4770d092006-01-12 23:22:24 +00002688 case SN_WORDS:
2689 res = read_words_section(fd, lp, len);
2690 break;
2691
2692 case SN_SUGFILE:
Bram Moolenaarb388adb2006-02-28 23:50:17 +00002693 lp->sl_sugtime = get8c(fd); /* <timestamp> */
Bram Moolenaar4770d092006-01-12 23:22:24 +00002694 break;
2695
Bram Moolenaare1438bb2006-03-01 22:01:55 +00002696 case SN_NOSPLITSUGS:
2697 lp->sl_nosplitsugs = TRUE; /* <timestamp> */
2698 break;
2699
Bram Moolenaar5195e452005-08-19 20:32:47 +00002700 case SN_COMPOUND:
2701 res = read_compound(fd, lp, len);
2702 break;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002703
Bram Moolenaar78622822005-08-23 21:00:13 +00002704 case SN_NOBREAK:
2705 lp->sl_nobreak = TRUE;
2706 break;
2707
Bram Moolenaar5195e452005-08-19 20:32:47 +00002708 case SN_SYLLABLE:
2709 lp->sl_syllable = read_string(fd, len); /* <syllable> */
2710 if (lp->sl_syllable == NULL)
2711 goto endFAIL;
2712 if (init_syl_tab(lp) == FAIL)
2713 goto endFAIL;
2714 break;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002715
Bram Moolenaar5195e452005-08-19 20:32:47 +00002716 default:
2717 /* Unsupported section. When it's required give an error
2718 * message. When it's not required skip the contents. */
2719 if (c & SNF_REQUIRED)
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002720 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002721 EMSG(_("E770: Unsupported section in spell file"));
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002722 goto endFAIL;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00002723 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00002724 while (--len >= 0)
2725 if (getc(fd) < 0)
2726 goto truncerr;
2727 break;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00002728 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00002729someerror:
Bram Moolenaar5195e452005-08-19 20:32:47 +00002730 if (res == SP_FORMERROR)
2731 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002732 EMSG(_(e_format));
2733 goto endFAIL;
2734 }
2735 if (res == SP_TRUNCERROR)
2736 {
2737truncerr:
2738 EMSG(_(e_spell_trunc));
2739 goto endFAIL;
2740 }
Bram Moolenaar6de68532005-08-24 22:08:48 +00002741 if (res == SP_OTHERERROR)
Bram Moolenaar5195e452005-08-19 20:32:47 +00002742 goto endFAIL;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002743 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002744
Bram Moolenaar4770d092006-01-12 23:22:24 +00002745 /* <LWORDTREE> */
2746 res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0);
2747 if (res != 0)
2748 goto someerror;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002749
Bram Moolenaar4770d092006-01-12 23:22:24 +00002750 /* <KWORDTREE> */
2751 res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0);
2752 if (res != 0)
2753 goto someerror;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002754
Bram Moolenaar4770d092006-01-12 23:22:24 +00002755 /* <PREFIXTREE> */
2756 res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE,
2757 lp->sl_prefixcnt);
2758 if (res != 0)
2759 goto someerror;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002760
Bram Moolenaarb765d632005-06-07 21:00:02 +00002761 /* For a new file link it in the list of spell files. */
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00002762 if (old_lp == NULL && lang != NULL)
Bram Moolenaarb765d632005-06-07 21:00:02 +00002763 {
2764 lp->sl_next = first_lang;
2765 first_lang = lp;
2766 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002767
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002768 goto endOK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002769
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002770endFAIL:
Bram Moolenaarb765d632005-06-07 21:00:02 +00002771 if (lang != NULL)
2772 /* truncating the name signals the error to spell_load_lang() */
2773 *lang = NUL;
2774 if (lp != NULL && old_lp == NULL)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002775 slang_free(lp);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002776 lp = NULL;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002777
2778endOK:
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002779 if (fd != NULL)
2780 fclose(fd);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002781 sourcing_name = save_sourcing_name;
2782 sourcing_lnum = save_sourcing_lnum;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002783
2784 return lp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002785}
2786
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002787/*
Bram Moolenaarb388adb2006-02-28 23:50:17 +00002788 * Read 2 bytes from "fd" and turn them into an int, MSB first.
2789 */
2790 static int
2791get2c(fd)
2792 FILE *fd;
2793{
2794 long n;
2795
2796 n = getc(fd);
2797 n = (n << 8) + getc(fd);
2798 return n;
2799}
2800
2801/*
2802 * Read 3 bytes from "fd" and turn them into an int, MSB first.
2803 */
2804 static int
2805get3c(fd)
2806 FILE *fd;
2807{
2808 long n;
2809
2810 n = getc(fd);
2811 n = (n << 8) + getc(fd);
2812 n = (n << 8) + getc(fd);
2813 return n;
2814}
2815
2816/*
2817 * Read 4 bytes from "fd" and turn them into an int, MSB first.
2818 */
2819 static int
2820get4c(fd)
2821 FILE *fd;
2822{
2823 long n;
2824
2825 n = getc(fd);
2826 n = (n << 8) + getc(fd);
2827 n = (n << 8) + getc(fd);
2828 n = (n << 8) + getc(fd);
2829 return n;
2830}
2831
2832/*
2833 * Read 8 bytes from "fd" and turn them into a time_t, MSB first.
2834 */
2835 static time_t
2836get8c(fd)
2837 FILE *fd;
2838{
2839 time_t n = 0;
2840 int i;
2841
2842 for (i = 0; i < 8; ++i)
2843 n = (n << 8) + getc(fd);
2844 return n;
2845}
2846
2847/*
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002848 * Read a length field from "fd" in "cnt_bytes" bytes.
Bram Moolenaar7887d882005-07-01 22:33:52 +00002849 * Allocate memory, read the string into it and add a NUL at the end.
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002850 * Returns NULL when the count is zero.
Bram Moolenaar5195e452005-08-19 20:32:47 +00002851 * Sets "*cntp" to SP_*ERROR when there is an error, length of the result
2852 * otherwise.
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002853 */
2854 static char_u *
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002855read_cnt_string(fd, cnt_bytes, cntp)
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002856 FILE *fd;
2857 int cnt_bytes;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002858 int *cntp;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002859{
2860 int cnt = 0;
2861 int i;
2862 char_u *str;
2863
2864 /* read the length bytes, MSB first */
2865 for (i = 0; i < cnt_bytes; ++i)
2866 cnt = (cnt << 8) + getc(fd);
2867 if (cnt < 0)
2868 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00002869 *cntp = SP_TRUNCERROR;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002870 return NULL;
2871 }
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002872 *cntp = cnt;
2873 if (cnt == 0)
2874 return NULL; /* nothing to read, return NULL */
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002875
Bram Moolenaar5195e452005-08-19 20:32:47 +00002876 str = read_string(fd, cnt);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002877 if (str == NULL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00002878 *cntp = SP_OTHERERROR;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002879 return str;
2880}
2881
Bram Moolenaar7887d882005-07-01 22:33:52 +00002882/*
Bram Moolenaar5195e452005-08-19 20:32:47 +00002883 * Read a string of length "cnt" from "fd" into allocated memory.
2884 * Returns NULL when out of memory.
2885 */
2886 static char_u *
2887read_string(fd, cnt)
2888 FILE *fd;
2889 int cnt;
2890{
2891 char_u *str;
2892 int i;
2893
2894 /* allocate memory */
2895 str = alloc((unsigned)cnt + 1);
2896 if (str != NULL)
2897 {
2898 /* Read the string. Doesn't check for truncated file. */
2899 for (i = 0; i < cnt; ++i)
2900 str[i] = getc(fd);
2901 str[i] = NUL;
2902 }
2903 return str;
2904}
2905
2906/*
2907 * Read SN_REGION: <regionname> ...
2908 * Return SP_*ERROR flags.
2909 */
2910 static int
2911read_region_section(fd, lp, len)
2912 FILE *fd;
2913 slang_T *lp;
2914 int len;
2915{
2916 int i;
2917
2918 if (len > 16)
2919 return SP_FORMERROR;
2920 for (i = 0; i < len; ++i)
2921 lp->sl_regions[i] = getc(fd); /* <regionname> */
2922 lp->sl_regions[len] = NUL;
2923 return 0;
2924}
2925
2926/*
2927 * Read SN_CHARFLAGS section: <charflagslen> <charflags>
2928 * <folcharslen> <folchars>
2929 * Return SP_*ERROR flags.
2930 */
2931 static int
2932read_charflags_section(fd)
2933 FILE *fd;
2934{
2935 char_u *flags;
2936 char_u *fol;
2937 int flagslen, follen;
2938
2939 /* <charflagslen> <charflags> */
2940 flags = read_cnt_string(fd, 1, &flagslen);
2941 if (flagslen < 0)
2942 return flagslen;
2943
2944 /* <folcharslen> <folchars> */
2945 fol = read_cnt_string(fd, 2, &follen);
2946 if (follen < 0)
2947 {
2948 vim_free(flags);
2949 return follen;
2950 }
2951
2952 /* Set the word-char flags and fill SPELL_ISUPPER() table. */
2953 if (flags != NULL && fol != NULL)
2954 set_spell_charflags(flags, flagslen, fol);
2955
2956 vim_free(flags);
2957 vim_free(fol);
2958
2959 /* When <charflagslen> is zero then <fcharlen> must also be zero. */
2960 if ((flags == NULL) != (fol == NULL))
2961 return SP_FORMERROR;
2962 return 0;
2963}
2964
2965/*
2966 * Read SN_PREFCOND section.
2967 * Return SP_*ERROR flags.
2968 */
2969 static int
2970read_prefcond_section(fd, lp)
2971 FILE *fd;
2972 slang_T *lp;
2973{
2974 int cnt;
2975 int i;
2976 int n;
2977 char_u *p;
2978 char_u buf[MAXWLEN + 1];
2979
2980 /* <prefcondcnt> <prefcond> ... */
Bram Moolenaarb388adb2006-02-28 23:50:17 +00002981 cnt = get2c(fd); /* <prefcondcnt> */
Bram Moolenaar5195e452005-08-19 20:32:47 +00002982 if (cnt <= 0)
2983 return SP_FORMERROR;
2984
2985 lp->sl_prefprog = (regprog_T **)alloc_clear(
2986 (unsigned)sizeof(regprog_T *) * cnt);
2987 if (lp->sl_prefprog == NULL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00002988 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002989 lp->sl_prefixcnt = cnt;
2990
2991 for (i = 0; i < cnt; ++i)
2992 {
2993 /* <prefcond> : <condlen> <condstr> */
2994 n = getc(fd); /* <condlen> */
2995 if (n < 0 || n >= MAXWLEN)
2996 return SP_FORMERROR;
2997
2998 /* When <condlen> is zero we have an empty condition. Otherwise
2999 * compile the regexp program used to check for the condition. */
3000 if (n > 0)
3001 {
3002 buf[0] = '^'; /* always match at one position only */
3003 p = buf + 1;
3004 while (n-- > 0)
3005 *p++ = getc(fd); /* <condstr> */
3006 *p = NUL;
3007 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
3008 }
3009 }
3010 return 0;
3011}
3012
3013/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00003014 * Read REP or REPSAL items section from "fd": <repcount> <rep> ...
Bram Moolenaar5195e452005-08-19 20:32:47 +00003015 * Return SP_*ERROR flags.
3016 */
3017 static int
Bram Moolenaar4770d092006-01-12 23:22:24 +00003018read_rep_section(fd, gap, first)
Bram Moolenaar5195e452005-08-19 20:32:47 +00003019 FILE *fd;
Bram Moolenaar4770d092006-01-12 23:22:24 +00003020 garray_T *gap;
3021 short *first;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003022{
3023 int cnt;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003024 fromto_T *ftp;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003025 int i;
3026
Bram Moolenaarb388adb2006-02-28 23:50:17 +00003027 cnt = get2c(fd); /* <repcount> */
Bram Moolenaar5195e452005-08-19 20:32:47 +00003028 if (cnt < 0)
3029 return SP_TRUNCERROR;
3030
Bram Moolenaar5195e452005-08-19 20:32:47 +00003031 if (ga_grow(gap, cnt) == FAIL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003032 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003033
3034 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
3035 for (; gap->ga_len < cnt; ++gap->ga_len)
3036 {
3037 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
3038 ftp->ft_from = read_cnt_string(fd, 1, &i);
3039 if (i < 0)
3040 return i;
3041 if (i == 0)
3042 return SP_FORMERROR;
3043 ftp->ft_to = read_cnt_string(fd, 1, &i);
3044 if (i <= 0)
3045 {
3046 vim_free(ftp->ft_from);
3047 if (i < 0)
3048 return i;
3049 return SP_FORMERROR;
3050 }
3051 }
3052
3053 /* Fill the first-index table. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00003054 for (i = 0; i < 256; ++i)
3055 first[i] = -1;
3056 for (i = 0; i < gap->ga_len; ++i)
3057 {
3058 ftp = &((fromto_T *)gap->ga_data)[i];
3059 if (first[*ftp->ft_from] == -1)
3060 first[*ftp->ft_from] = i;
3061 }
3062 return 0;
3063}
3064
3065/*
3066 * Read SN_SAL section: <salflags> <salcount> <sal> ...
3067 * Return SP_*ERROR flags.
3068 */
3069 static int
3070read_sal_section(fd, slang)
3071 FILE *fd;
3072 slang_T *slang;
3073{
3074 int i;
3075 int cnt;
3076 garray_T *gap;
3077 salitem_T *smp;
3078 int ccnt;
3079 char_u *p;
Bram Moolenaard12a1322005-08-21 22:08:24 +00003080 int c = NUL;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003081
3082 slang->sl_sofo = FALSE;
3083
3084 i = getc(fd); /* <salflags> */
3085 if (i & SAL_F0LLOWUP)
3086 slang->sl_followup = TRUE;
3087 if (i & SAL_COLLAPSE)
3088 slang->sl_collapse = TRUE;
3089 if (i & SAL_REM_ACCENTS)
3090 slang->sl_rem_accents = TRUE;
3091
Bram Moolenaarb388adb2006-02-28 23:50:17 +00003092 cnt = get2c(fd); /* <salcount> */
Bram Moolenaar5195e452005-08-19 20:32:47 +00003093 if (cnt < 0)
3094 return SP_TRUNCERROR;
3095
3096 gap = &slang->sl_sal;
3097 ga_init2(gap, sizeof(salitem_T), 10);
Bram Moolenaard5cdbeb2005-10-10 20:59:28 +00003098 if (ga_grow(gap, cnt + 1) == FAIL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003099 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003100
3101 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
3102 for (; gap->ga_len < cnt; ++gap->ga_len)
3103 {
3104 smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
3105 ccnt = getc(fd); /* <salfromlen> */
3106 if (ccnt < 0)
3107 return SP_TRUNCERROR;
3108 if ((p = alloc(ccnt + 2)) == NULL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003109 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003110 smp->sm_lead = p;
3111
3112 /* Read up to the first special char into sm_lead. */
3113 for (i = 0; i < ccnt; ++i)
3114 {
3115 c = getc(fd); /* <salfrom> */
3116 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
3117 break;
3118 *p++ = c;
3119 }
3120 smp->sm_leadlen = p - smp->sm_lead;
3121 *p++ = NUL;
3122
3123 /* Put (abc) chars in sm_oneof, if any. */
3124 if (c == '(')
3125 {
3126 smp->sm_oneof = p;
3127 for (++i; i < ccnt; ++i)
3128 {
3129 c = getc(fd); /* <salfrom> */
3130 if (c == ')')
3131 break;
3132 *p++ = c;
3133 }
3134 *p++ = NUL;
3135 if (++i < ccnt)
3136 c = getc(fd);
3137 }
3138 else
3139 smp->sm_oneof = NULL;
3140
3141 /* Any following chars go in sm_rules. */
3142 smp->sm_rules = p;
3143 if (i < ccnt)
3144 /* store the char we got while checking for end of sm_lead */
3145 *p++ = c;
3146 for (++i; i < ccnt; ++i)
3147 *p++ = getc(fd); /* <salfrom> */
3148 *p++ = NUL;
3149
3150 /* <saltolen> <salto> */
3151 smp->sm_to = read_cnt_string(fd, 1, &ccnt);
3152 if (ccnt < 0)
3153 {
3154 vim_free(smp->sm_lead);
3155 return ccnt;
3156 }
3157
3158#ifdef FEAT_MBYTE
3159 if (has_mbyte)
3160 {
3161 /* convert the multi-byte strings to wide char strings */
3162 smp->sm_lead_w = mb_str2wide(smp->sm_lead);
3163 smp->sm_leadlen = mb_charlen(smp->sm_lead);
3164 if (smp->sm_oneof == NULL)
3165 smp->sm_oneof_w = NULL;
3166 else
3167 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
3168 if (smp->sm_to == NULL)
3169 smp->sm_to_w = NULL;
3170 else
3171 smp->sm_to_w = mb_str2wide(smp->sm_to);
3172 if (smp->sm_lead_w == NULL
3173 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL)
3174 || (smp->sm_to_w == NULL && smp->sm_to != NULL))
3175 {
3176 vim_free(smp->sm_lead);
3177 vim_free(smp->sm_to);
3178 vim_free(smp->sm_lead_w);
3179 vim_free(smp->sm_oneof_w);
3180 vim_free(smp->sm_to_w);
Bram Moolenaar6de68532005-08-24 22:08:48 +00003181 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003182 }
3183 }
3184#endif
3185 }
3186
Bram Moolenaard5cdbeb2005-10-10 20:59:28 +00003187 if (gap->ga_len > 0)
3188 {
3189 /* Add one extra entry to mark the end with an empty sm_lead. Avoids
3190 * that we need to check the index every time. */
3191 smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
3192 if ((p = alloc(1)) == NULL)
3193 return SP_OTHERERROR;
3194 p[0] = NUL;
3195 smp->sm_lead = p;
3196 smp->sm_leadlen = 0;
3197 smp->sm_oneof = NULL;
3198 smp->sm_rules = p;
3199 smp->sm_to = NULL;
3200#ifdef FEAT_MBYTE
3201 if (has_mbyte)
3202 {
3203 smp->sm_lead_w = mb_str2wide(smp->sm_lead);
3204 smp->sm_leadlen = 0;
3205 smp->sm_oneof_w = NULL;
3206 smp->sm_to_w = NULL;
3207 }
3208#endif
3209 ++gap->ga_len;
3210 }
3211
Bram Moolenaar5195e452005-08-19 20:32:47 +00003212 /* Fill the first-index table. */
3213 set_sal_first(slang);
3214
3215 return 0;
3216}
3217
3218/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00003219 * Read SN_WORDS: <word> ...
3220 * Return SP_*ERROR flags.
3221 */
3222 static int
3223read_words_section(fd, lp, len)
3224 FILE *fd;
3225 slang_T *lp;
3226 int len;
3227{
3228 int done = 0;
3229 int i;
3230 char_u word[MAXWLEN];
3231
3232 while (done < len)
3233 {
3234 /* Read one word at a time. */
3235 for (i = 0; ; ++i)
3236 {
3237 word[i] = getc(fd);
3238 if (word[i] == NUL)
3239 break;
3240 if (i == MAXWLEN - 1)
3241 return SP_FORMERROR;
3242 }
3243
3244 /* Init the count to 10. */
3245 count_common_word(lp, word, -1, 10);
3246 done += i + 1;
3247 }
3248 return 0;
3249}
3250
3251/*
3252 * Add a word to the hashtable of common words.
3253 * If it's already there then the counter is increased.
3254 */
3255 static void
3256count_common_word(lp, word, len, count)
3257 slang_T *lp;
3258 char_u *word;
3259 int len; /* word length, -1 for upto NUL */
3260 int count; /* 1 to count once, 10 to init */
3261{
3262 hash_T hash;
3263 hashitem_T *hi;
3264 wordcount_T *wc;
3265 char_u buf[MAXWLEN];
3266 char_u *p;
3267
3268 if (len == -1)
3269 p = word;
3270 else
3271 {
3272 vim_strncpy(buf, word, len);
3273 p = buf;
3274 }
3275
3276 hash = hash_hash(p);
3277 hi = hash_lookup(&lp->sl_wordcount, p, hash);
3278 if (HASHITEM_EMPTY(hi))
3279 {
3280 wc = (wordcount_T *)alloc(sizeof(wordcount_T) + STRLEN(p));
3281 if (wc == NULL)
3282 return;
3283 STRCPY(wc->wc_word, p);
3284 wc->wc_count = count;
3285 hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash);
3286 }
3287 else
3288 {
3289 wc = HI2WC(hi);
3290 if ((wc->wc_count += count) < (unsigned)count) /* check for overflow */
3291 wc->wc_count = MAXWORDCOUNT;
3292 }
3293}
3294
3295/*
3296 * Adjust the score of common words.
3297 */
3298 static int
3299score_wordcount_adj(slang, score, word, split)
3300 slang_T *slang;
3301 int score;
3302 char_u *word;
3303 int split; /* word was split, less bonus */
3304{
3305 hashitem_T *hi;
3306 wordcount_T *wc;
3307 int bonus;
3308 int newscore;
3309
3310 hi = hash_find(&slang->sl_wordcount, word);
3311 if (!HASHITEM_EMPTY(hi))
3312 {
3313 wc = HI2WC(hi);
3314 if (wc->wc_count < SCORE_THRES2)
3315 bonus = SCORE_COMMON1;
3316 else if (wc->wc_count < SCORE_THRES3)
3317 bonus = SCORE_COMMON2;
3318 else
3319 bonus = SCORE_COMMON3;
3320 if (split)
3321 newscore = score - bonus / 2;
3322 else
3323 newscore = score - bonus;
3324 if (newscore < 0)
3325 return 0;
3326 return newscore;
3327 }
3328 return score;
3329}
3330
3331/*
Bram Moolenaar5195e452005-08-19 20:32:47 +00003332 * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
3333 * Return SP_*ERROR flags.
3334 */
3335 static int
3336read_sofo_section(fd, slang)
3337 FILE *fd;
3338 slang_T *slang;
3339{
3340 int cnt;
3341 char_u *from, *to;
3342 int res;
3343
3344 slang->sl_sofo = TRUE;
3345
3346 /* <sofofromlen> <sofofrom> */
3347 from = read_cnt_string(fd, 2, &cnt);
3348 if (cnt < 0)
3349 return cnt;
3350
3351 /* <sofotolen> <sofoto> */
3352 to = read_cnt_string(fd, 2, &cnt);
3353 if (cnt < 0)
3354 {
3355 vim_free(from);
3356 return cnt;
3357 }
3358
3359 /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */
3360 if (from != NULL && to != NULL)
3361 res = set_sofo(slang, from, to);
3362 else if (from != NULL || to != NULL)
3363 res = SP_FORMERROR; /* only one of two strings is an error */
3364 else
3365 res = 0;
3366
3367 vim_free(from);
3368 vim_free(to);
3369 return res;
3370}
3371
3372/*
3373 * Read the compound section from the .spl file:
3374 * <compmax> <compminlen> <compsylmax> <compflags>
3375 * Returns SP_*ERROR flags.
3376 */
3377 static int
3378read_compound(fd, slang, len)
3379 FILE *fd;
3380 slang_T *slang;
3381 int len;
3382{
3383 int todo = len;
3384 int c;
3385 int atstart;
3386 char_u *pat;
3387 char_u *pp;
3388 char_u *cp;
Bram Moolenaard12a1322005-08-21 22:08:24 +00003389 char_u *ap;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003390
3391 if (todo < 2)
3392 return SP_FORMERROR; /* need at least two bytes */
3393
3394 --todo;
3395 c = getc(fd); /* <compmax> */
3396 if (c < 2)
3397 c = MAXWLEN;
3398 slang->sl_compmax = c;
3399
3400 --todo;
3401 c = getc(fd); /* <compminlen> */
3402 if (c < 1)
Bram Moolenaarda2303d2005-08-30 21:55:26 +00003403 c = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003404 slang->sl_compminlen = c;
3405
3406 --todo;
3407 c = getc(fd); /* <compsylmax> */
3408 if (c < 1)
3409 c = MAXWLEN;
3410 slang->sl_compsylmax = c;
3411
Bram Moolenaar362e1a32006-03-06 23:29:24 +00003412 /* Turn the COMPOUNDRULE items into a regexp pattern:
Bram Moolenaar5195e452005-08-19 20:32:47 +00003413 * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$".
Bram Moolenaar6de68532005-08-24 22:08:48 +00003414 * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes.
3415 * Conversion to utf-8 may double the size. */
3416 c = todo * 2 + 7;
3417#ifdef FEAT_MBYTE
3418 if (enc_utf8)
3419 c += todo * 2;
3420#endif
3421 pat = alloc((unsigned)c);
Bram Moolenaar5195e452005-08-19 20:32:47 +00003422 if (pat == NULL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003423 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003424
Bram Moolenaard12a1322005-08-21 22:08:24 +00003425 /* We also need a list of all flags that can appear at the start and one
3426 * for all flags. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00003427 cp = alloc(todo + 1);
3428 if (cp == NULL)
3429 {
3430 vim_free(pat);
Bram Moolenaar6de68532005-08-24 22:08:48 +00003431 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003432 }
3433 slang->sl_compstartflags = cp;
3434 *cp = NUL;
3435
Bram Moolenaard12a1322005-08-21 22:08:24 +00003436 ap = alloc(todo + 1);
3437 if (ap == NULL)
3438 {
3439 vim_free(pat);
Bram Moolenaar6de68532005-08-24 22:08:48 +00003440 return SP_OTHERERROR;
Bram Moolenaard12a1322005-08-21 22:08:24 +00003441 }
3442 slang->sl_compallflags = ap;
3443 *ap = NUL;
3444
Bram Moolenaar5195e452005-08-19 20:32:47 +00003445 pp = pat;
3446 *pp++ = '^';
3447 *pp++ = '\\';
3448 *pp++ = '(';
3449
3450 atstart = 1;
3451 while (todo-- > 0)
3452 {
3453 c = getc(fd); /* <compflags> */
Bram Moolenaard12a1322005-08-21 22:08:24 +00003454
3455 /* Add all flags to "sl_compallflags". */
3456 if (vim_strchr((char_u *)"+*[]/", c) == NULL
Bram Moolenaar6de68532005-08-24 22:08:48 +00003457 && !byte_in_str(slang->sl_compallflags, c))
Bram Moolenaard12a1322005-08-21 22:08:24 +00003458 {
3459 *ap++ = c;
3460 *ap = NUL;
3461 }
3462
Bram Moolenaar5195e452005-08-19 20:32:47 +00003463 if (atstart != 0)
3464 {
3465 /* At start of item: copy flags to "sl_compstartflags". For a
3466 * [abc] item set "atstart" to 2 and copy up to the ']'. */
3467 if (c == '[')
3468 atstart = 2;
3469 else if (c == ']')
3470 atstart = 0;
3471 else
3472 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00003473 if (!byte_in_str(slang->sl_compstartflags, c))
Bram Moolenaar5195e452005-08-19 20:32:47 +00003474 {
3475 *cp++ = c;
3476 *cp = NUL;
3477 }
3478 if (atstart == 1)
3479 atstart = 0;
3480 }
3481 }
3482 if (c == '/') /* slash separates two items */
3483 {
3484 *pp++ = '\\';
3485 *pp++ = '|';
3486 atstart = 1;
3487 }
3488 else /* normal char, "[abc]" and '*' are copied as-is */
3489 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00003490 if (c == '+' || c == '~')
Bram Moolenaar5195e452005-08-19 20:32:47 +00003491 *pp++ = '\\'; /* "a+" becomes "a\+" */
Bram Moolenaar6de68532005-08-24 22:08:48 +00003492#ifdef FEAT_MBYTE
3493 if (enc_utf8)
3494 pp += mb_char2bytes(c, pp);
3495 else
3496#endif
3497 *pp++ = c;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003498 }
3499 }
3500
3501 *pp++ = '\\';
3502 *pp++ = ')';
3503 *pp++ = '$';
3504 *pp = NUL;
3505
3506 slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT);
3507 vim_free(pat);
3508 if (slang->sl_compprog == NULL)
3509 return SP_FORMERROR;
3510
3511 return 0;
3512}
3513
Bram Moolenaar6de68532005-08-24 22:08:48 +00003514/*
Bram Moolenaar95529562005-08-25 21:21:38 +00003515 * Return TRUE if byte "n" appears in "str".
Bram Moolenaar6de68532005-08-24 22:08:48 +00003516 * Like strchr() but independent of locale.
3517 */
3518 static int
Bram Moolenaar95529562005-08-25 21:21:38 +00003519byte_in_str(str, n)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003520 char_u *str;
Bram Moolenaar95529562005-08-25 21:21:38 +00003521 int n;
Bram Moolenaar6de68532005-08-24 22:08:48 +00003522{
3523 char_u *p;
3524
3525 for (p = str; *p != NUL; ++p)
Bram Moolenaar95529562005-08-25 21:21:38 +00003526 if (*p == n)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003527 return TRUE;
3528 return FALSE;
3529}
3530
Bram Moolenaar5195e452005-08-19 20:32:47 +00003531#define SY_MAXLEN 30
3532typedef struct syl_item_S
3533{
3534 char_u sy_chars[SY_MAXLEN]; /* the sequence of chars */
3535 int sy_len;
3536} syl_item_T;
3537
3538/*
3539 * Truncate "slang->sl_syllable" at the first slash and put the following items
3540 * in "slang->sl_syl_items".
3541 */
3542 static int
3543init_syl_tab(slang)
3544 slang_T *slang;
3545{
3546 char_u *p;
3547 char_u *s;
3548 int l;
3549 syl_item_T *syl;
3550
3551 ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4);
3552 p = vim_strchr(slang->sl_syllable, '/');
3553 while (p != NULL)
3554 {
3555 *p++ = NUL;
Bram Moolenaar6de68532005-08-24 22:08:48 +00003556 if (*p == NUL) /* trailing slash */
Bram Moolenaar5195e452005-08-19 20:32:47 +00003557 break;
3558 s = p;
3559 p = vim_strchr(p, '/');
3560 if (p == NULL)
3561 l = STRLEN(s);
3562 else
3563 l = p - s;
3564 if (l >= SY_MAXLEN)
3565 return SP_FORMERROR;
3566 if (ga_grow(&slang->sl_syl_items, 1) == FAIL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003567 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003568 syl = ((syl_item_T *)slang->sl_syl_items.ga_data)
3569 + slang->sl_syl_items.ga_len++;
3570 vim_strncpy(syl->sy_chars, s, l);
3571 syl->sy_len = l;
3572 }
3573 return OK;
3574}
3575
3576/*
3577 * Count the number of syllables in "word".
3578 * When "word" contains spaces the syllables after the last space are counted.
3579 * Returns zero if syllables are not defines.
3580 */
3581 static int
3582count_syllables(slang, word)
3583 slang_T *slang;
3584 char_u *word;
3585{
3586 int cnt = 0;
3587 int skip = FALSE;
3588 char_u *p;
3589 int len;
3590 int i;
3591 syl_item_T *syl;
3592 int c;
3593
3594 if (slang->sl_syllable == NULL)
3595 return 0;
3596
3597 for (p = word; *p != NUL; p += len)
3598 {
3599 /* When running into a space reset counter. */
3600 if (*p == ' ')
3601 {
3602 len = 1;
3603 cnt = 0;
3604 continue;
3605 }
3606
3607 /* Find longest match of syllable items. */
3608 len = 0;
3609 for (i = 0; i < slang->sl_syl_items.ga_len; ++i)
3610 {
3611 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i;
3612 if (syl->sy_len > len
3613 && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0)
3614 len = syl->sy_len;
3615 }
3616 if (len != 0) /* found a match, count syllable */
3617 {
3618 ++cnt;
3619 skip = FALSE;
3620 }
3621 else
3622 {
3623 /* No recognized syllable item, at least a syllable char then? */
3624#ifdef FEAT_MBYTE
3625 c = mb_ptr2char(p);
3626 len = (*mb_ptr2len)(p);
3627#else
3628 c = *p;
3629 len = 1;
3630#endif
3631 if (vim_strchr(slang->sl_syllable, c) == NULL)
3632 skip = FALSE; /* No, search for next syllable */
3633 else if (!skip)
3634 {
3635 ++cnt; /* Yes, count it */
3636 skip = TRUE; /* don't count following syllable chars */
3637 }
3638 }
3639 }
3640 return cnt;
3641}
3642
3643/*
Bram Moolenaar7887d882005-07-01 22:33:52 +00003644 * Set the SOFOFROM and SOFOTO items in language "lp".
Bram Moolenaar5195e452005-08-19 20:32:47 +00003645 * Returns SP_*ERROR flags when there is something wrong.
Bram Moolenaar7887d882005-07-01 22:33:52 +00003646 */
3647 static int
3648set_sofo(lp, from, to)
3649 slang_T *lp;
3650 char_u *from;
3651 char_u *to;
3652{
3653 int i;
3654
3655#ifdef FEAT_MBYTE
3656 garray_T *gap;
3657 char_u *s;
3658 char_u *p;
3659 int c;
3660 int *inp;
3661
3662 if (has_mbyte)
3663 {
3664 /* Use "sl_sal" as an array with 256 pointers to a list of wide
3665 * characters. The index is the low byte of the character.
3666 * The list contains from-to pairs with a terminating NUL.
3667 * sl_sal_first[] is used for latin1 "from" characters. */
3668 gap = &lp->sl_sal;
3669 ga_init2(gap, sizeof(int *), 1);
3670 if (ga_grow(gap, 256) == FAIL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003671 return SP_OTHERERROR;
Bram Moolenaar7887d882005-07-01 22:33:52 +00003672 vim_memset(gap->ga_data, 0, sizeof(int *) * 256);
3673 gap->ga_len = 256;
3674
3675 /* First count the number of items for each list. Temporarily use
3676 * sl_sal_first[] for this. */
3677 for (p = from, s = to; *p != NUL && *s != NUL; )
3678 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003679 c = mb_cptr2char_adv(&p);
3680 mb_cptr_adv(s);
Bram Moolenaar7887d882005-07-01 22:33:52 +00003681 if (c >= 256)
3682 ++lp->sl_sal_first[c & 0xff];
3683 }
3684 if (*p != NUL || *s != NUL) /* lengths differ */
Bram Moolenaar5195e452005-08-19 20:32:47 +00003685 return SP_FORMERROR;
Bram Moolenaar7887d882005-07-01 22:33:52 +00003686
3687 /* Allocate the lists. */
3688 for (i = 0; i < 256; ++i)
3689 if (lp->sl_sal_first[i] > 0)
3690 {
3691 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
3692 if (p == NULL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00003693 return SP_OTHERERROR;
Bram Moolenaar7887d882005-07-01 22:33:52 +00003694 ((int **)gap->ga_data)[i] = (int *)p;
3695 *(int *)p = 0;
3696 }
3697
3698 /* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
3699 * list. */
3700 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
3701 for (p = from, s = to; *p != NUL && *s != NUL; )
3702 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003703 c = mb_cptr2char_adv(&p);
3704 i = mb_cptr2char_adv(&s);
Bram Moolenaar7887d882005-07-01 22:33:52 +00003705 if (c >= 256)
3706 {
3707 /* Append the from-to chars at the end of the list with
3708 * the low byte. */
3709 inp = ((int **)gap->ga_data)[c & 0xff];
3710 while (*inp != 0)
3711 ++inp;
3712 *inp++ = c; /* from char */
3713 *inp++ = i; /* to char */
3714 *inp++ = NUL; /* NUL at the end */
3715 }
3716 else
3717 /* mapping byte to char is done in sl_sal_first[] */
3718 lp->sl_sal_first[c] = i;
3719 }
3720 }
3721 else
3722#endif
3723 {
3724 /* mapping bytes to bytes is done in sl_sal_first[] */
3725 if (STRLEN(from) != STRLEN(to))
Bram Moolenaar5195e452005-08-19 20:32:47 +00003726 return SP_FORMERROR;
Bram Moolenaar7887d882005-07-01 22:33:52 +00003727
3728 for (i = 0; to[i] != NUL; ++i)
3729 lp->sl_sal_first[from[i]] = to[i];
3730 lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */
3731 }
3732
Bram Moolenaar5195e452005-08-19 20:32:47 +00003733 return 0;
Bram Moolenaar7887d882005-07-01 22:33:52 +00003734}
3735
3736/*
3737 * Fill the first-index table for "lp".
3738 */
3739 static void
3740set_sal_first(lp)
3741 slang_T *lp;
3742{
3743 salfirst_T *sfirst;
3744 int i;
3745 salitem_T *smp;
3746 int c;
3747 garray_T *gap = &lp->sl_sal;
3748
3749 sfirst = lp->sl_sal_first;
3750 for (i = 0; i < 256; ++i)
3751 sfirst[i] = -1;
3752 smp = (salitem_T *)gap->ga_data;
3753 for (i = 0; i < gap->ga_len; ++i)
3754 {
3755#ifdef FEAT_MBYTE
3756 if (has_mbyte)
3757 /* Use the lowest byte of the first character. For latin1 it's
3758 * the character, for other encodings it should differ for most
3759 * characters. */
3760 c = *smp[i].sm_lead_w & 0xff;
3761 else
3762#endif
3763 c = *smp[i].sm_lead;
3764 if (sfirst[c] == -1)
3765 {
3766 sfirst[c] = i;
3767#ifdef FEAT_MBYTE
3768 if (has_mbyte)
3769 {
3770 int n;
3771
3772 /* Make sure all entries with this byte are following each
3773 * other. Move the ones that are in the wrong position. Do
3774 * keep the same ordering! */
3775 while (i + 1 < gap->ga_len
3776 && (*smp[i + 1].sm_lead_w & 0xff) == c)
3777 /* Skip over entry with same index byte. */
3778 ++i;
3779
3780 for (n = 1; i + n < gap->ga_len; ++n)
3781 if ((*smp[i + n].sm_lead_w & 0xff) == c)
3782 {
3783 salitem_T tsal;
3784
3785 /* Move entry with same index byte after the entries
3786 * we already found. */
3787 ++i;
3788 --n;
3789 tsal = smp[i + n];
3790 mch_memmove(smp + i + 1, smp + i,
3791 sizeof(salitem_T) * n);
3792 smp[i] = tsal;
3793 }
3794 }
3795#endif
3796 }
3797 }
3798}
Bram Moolenaar9c96f592005-06-30 21:52:39 +00003799
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003800#ifdef FEAT_MBYTE
3801/*
3802 * Turn a multi-byte string into a wide character string.
3803 * Return it in allocated memory (NULL for out-of-memory)
3804 */
3805 static int *
3806mb_str2wide(s)
3807 char_u *s;
3808{
3809 int *res;
3810 char_u *p;
3811 int i = 0;
3812
3813 res = (int *)alloc(sizeof(int) * (mb_charlen(s) + 1));
3814 if (res != NULL)
3815 {
3816 for (p = s; *p != NUL; )
3817 res[i++] = mb_ptr2char_adv(&p);
3818 res[i] = NUL;
3819 }
3820 return res;
3821}
3822#endif
3823
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003824/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00003825 * Read a tree from the .spl or .sug file.
3826 * Allocates the memory and stores pointers in "bytsp" and "idxsp".
3827 * This is skipped when the tree has zero length.
3828 * Returns zero when OK, SP_ value for an error.
3829 */
3830 static int
3831spell_read_tree(fd, bytsp, idxsp, prefixtree, prefixcnt)
3832 FILE *fd;
3833 char_u **bytsp;
3834 idx_T **idxsp;
3835 int prefixtree; /* TRUE for the prefix tree */
3836 int prefixcnt; /* when "prefixtree" is TRUE: prefix count */
3837{
3838 int len;
3839 int idx;
3840 char_u *bp;
3841 idx_T *ip;
3842
3843 /* The tree size was computed when writing the file, so that we can
3844 * allocate it as one long block. <nodecount> */
Bram Moolenaarb388adb2006-02-28 23:50:17 +00003845 len = get4c(fd);
Bram Moolenaar4770d092006-01-12 23:22:24 +00003846 if (len < 0)
3847 return SP_TRUNCERROR;
3848 if (len > 0)
3849 {
3850 /* Allocate the byte array. */
3851 bp = lalloc((long_u)len, TRUE);
3852 if (bp == NULL)
3853 return SP_OTHERERROR;
3854 *bytsp = bp;
3855
3856 /* Allocate the index array. */
3857 ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE);
3858 if (ip == NULL)
3859 return SP_OTHERERROR;
3860 *idxsp = ip;
3861
3862 /* Recursively read the tree and store it in the array. */
3863 idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt);
3864 if (idx < 0)
3865 return idx;
3866 }
3867 return 0;
3868}
3869
3870/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00003871 * Read one row of siblings from the spell file and store it in the byte array
3872 * "byts" and index array "idxs". Recursively read the children.
3873 *
Bram Moolenaar4770d092006-01-12 23:22:24 +00003874 * NOTE: The code here must match put_node()!
Bram Moolenaar51485f02005-06-04 21:55:20 +00003875 *
Bram Moolenaar4770d092006-01-12 23:22:24 +00003876 * Returns the index (>= 0) following the siblings.
3877 * Returns SP_TRUNCERROR if the file is shorter than expected.
3878 * Returns SP_FORMERROR if there is a format error.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003879 */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003880 static idx_T
Bram Moolenaar4770d092006-01-12 23:22:24 +00003881read_tree_node(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003882 FILE *fd;
3883 char_u *byts;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003884 idx_T *idxs;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003885 int maxidx; /* size of arrays */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003886 idx_T startidx; /* current index in "byts" and "idxs" */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003887 int prefixtree; /* TRUE for reading PREFIXTREE */
3888 int maxprefcondnr; /* maximum for <prefcondnr> */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003889{
Bram Moolenaar51485f02005-06-04 21:55:20 +00003890 int len;
3891 int i;
3892 int n;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003893 idx_T idx = startidx;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003894 int c;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003895 int c2;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003896#define SHARED_MASK 0x8000000
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003897
Bram Moolenaar51485f02005-06-04 21:55:20 +00003898 len = getc(fd); /* <siblingcount> */
3899 if (len <= 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +00003900 return SP_TRUNCERROR;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003901
3902 if (startidx + len >= maxidx)
Bram Moolenaar4770d092006-01-12 23:22:24 +00003903 return SP_FORMERROR;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003904 byts[idx++] = len;
3905
3906 /* Read the byte values, flag/region bytes and shared indexes. */
3907 for (i = 1; i <= len; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003908 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003909 c = getc(fd); /* <byte> */
3910 if (c < 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +00003911 return SP_TRUNCERROR;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003912 if (c <= BY_SPECIAL)
3913 {
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003914 if (c == BY_NOFLAGS && !prefixtree)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003915 {
3916 /* No flags, all regions. */
3917 idxs[idx] = 0;
3918 c = 0;
3919 }
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00003920 else if (c != BY_INDEX)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003921 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003922 if (prefixtree)
3923 {
Bram Moolenaar53805d12005-08-01 07:08:33 +00003924 /* Read the optional pflags byte, the prefix ID and the
3925 * condition nr. In idxs[] store the prefix ID in the low
3926 * byte, the condition index shifted up 8 bits, the flags
3927 * shifted up 24 bits. */
3928 if (c == BY_FLAGS)
3929 c = getc(fd) << 24; /* <pflags> */
3930 else
3931 c = 0;
3932
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003933 c |= getc(fd); /* <affixID> */
Bram Moolenaar53805d12005-08-01 07:08:33 +00003934
Bram Moolenaarb388adb2006-02-28 23:50:17 +00003935 n = get2c(fd); /* <prefcondnr> */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003936 if (n >= maxprefcondnr)
Bram Moolenaar4770d092006-01-12 23:22:24 +00003937 return SP_FORMERROR;
Bram Moolenaar53805d12005-08-01 07:08:33 +00003938 c |= (n << 8);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003939 }
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00003940 else /* c must be BY_FLAGS or BY_FLAGS2 */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003941 {
3942 /* Read flags and optional region and prefix ID. In
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00003943 * idxs[] the flags go in the low two bytes, region above
3944 * that and prefix ID above the region. */
3945 c2 = c;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003946 c = getc(fd); /* <flags> */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00003947 if (c2 == BY_FLAGS2)
3948 c = (getc(fd) << 8) + c; /* <flags2> */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003949 if (c & WF_REGION)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00003950 c = (getc(fd) << 16) + c; /* <region> */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00003951 if (c & WF_AFX)
3952 c = (getc(fd) << 24) + c; /* <affixID> */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003953 }
3954
Bram Moolenaar51485f02005-06-04 21:55:20 +00003955 idxs[idx] = c;
3956 c = 0;
3957 }
3958 else /* c == BY_INDEX */
3959 {
3960 /* <nodeidx> */
Bram Moolenaarb388adb2006-02-28 23:50:17 +00003961 n = get3c(fd);
Bram Moolenaar51485f02005-06-04 21:55:20 +00003962 if (n < 0 || n >= maxidx)
Bram Moolenaar4770d092006-01-12 23:22:24 +00003963 return SP_FORMERROR;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003964 idxs[idx] = n + SHARED_MASK;
3965 c = getc(fd); /* <xbyte> */
3966 }
3967 }
3968 byts[idx++] = c;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003969 }
3970
Bram Moolenaar51485f02005-06-04 21:55:20 +00003971 /* Recursively read the children for non-shared siblings.
3972 * Skip the end-of-word ones (zero byte value) and the shared ones (and
3973 * remove SHARED_MASK) */
3974 for (i = 1; i <= len; ++i)
3975 if (byts[startidx + i] != 0)
3976 {
3977 if (idxs[startidx + i] & SHARED_MASK)
3978 idxs[startidx + i] &= ~SHARED_MASK;
3979 else
3980 {
3981 idxs[startidx + i] = idx;
Bram Moolenaar4770d092006-01-12 23:22:24 +00003982 idx = read_tree_node(fd, byts, idxs, maxidx, idx,
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003983 prefixtree, maxprefcondnr);
Bram Moolenaar51485f02005-06-04 21:55:20 +00003984 if (idx < 0)
3985 break;
3986 }
3987 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003988
Bram Moolenaar51485f02005-06-04 21:55:20 +00003989 return idx;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003990}
3991
3992/*
3993 * Parse 'spelllang' and set buf->b_langp accordingly.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00003994 * Returns NULL if it's OK, an error message otherwise.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003995 */
3996 char_u *
3997did_set_spelllang(buf)
3998 buf_T *buf;
3999{
4000 garray_T ga;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004001 char_u *splp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004002 char_u *region;
Bram Moolenaarb6356332005-07-18 21:40:44 +00004003 char_u region_cp[3];
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004004 int filename;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004005 int region_mask;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004006 slang_T *slang;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004007 int c;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004008 char_u lang[MAXWLEN + 1];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004009 char_u spf_name[MAXPATHL];
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004010 int len;
4011 char_u *p;
Bram Moolenaar7887d882005-07-01 22:33:52 +00004012 int round;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004013 char_u *spf;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004014 char_u *use_region = NULL;
4015 int dont_use_region = FALSE;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00004016 int nobreak = FALSE;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004017 int i, j;
4018 langp_T *lp, *lp2;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004019
4020 ga_init2(&ga, sizeof(langp_T), 2);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004021 clear_midword(buf);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004022
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004023 /* loop over comma separated language names. */
4024 for (splp = buf->b_p_spl; *splp != NUL; )
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004025 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004026 /* Get one language name. */
4027 copy_option_part(&splp, lang, MAXWLEN, ",");
4028
Bram Moolenaar5482f332005-04-17 20:18:43 +00004029 region = NULL;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004030 len = STRLEN(lang);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004031
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004032 /* If the name ends in ".spl" use it as the name of the spell file.
4033 * If there is a region name let "region" point to it and remove it
4034 * from the name. */
4035 if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0)
4036 {
4037 filename = TRUE;
4038
Bram Moolenaarb6356332005-07-18 21:40:44 +00004039 /* Locate a region and remove it from the file name. */
4040 p = vim_strchr(gettail(lang), '_');
4041 if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2])
4042 && !ASCII_ISALPHA(p[3]))
4043 {
4044 vim_strncpy(region_cp, p + 1, 2);
4045 mch_memmove(p, p + 3, len - (p - lang) - 2);
4046 len -= 3;
4047 region = region_cp;
4048 }
4049 else
4050 dont_use_region = TRUE;
4051
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004052 /* Check if we loaded this language before. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004053 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4054 if (fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME)
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004055 break;
4056 }
4057 else
4058 {
4059 filename = FALSE;
4060 if (len > 3 && lang[len - 3] == '_')
4061 {
4062 region = lang + len - 2;
4063 len -= 3;
4064 lang[len] = NUL;
4065 }
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004066 else
4067 dont_use_region = TRUE;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004068
4069 /* Check if we loaded this language before. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004070 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4071 if (STRICMP(lang, slang->sl_name) == 0)
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004072 break;
4073 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004074
Bram Moolenaarb6356332005-07-18 21:40:44 +00004075 if (region != NULL)
4076 {
4077 /* If the region differs from what was used before then don't
4078 * use it for 'spellfile'. */
4079 if (use_region != NULL && STRCMP(region, use_region) != 0)
4080 dont_use_region = TRUE;
4081 use_region = region;
4082 }
4083
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004084 /* If not found try loading the language now. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004085 if (slang == NULL)
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004086 {
4087 if (filename)
4088 (void)spell_load_file(lang, lang, NULL, FALSE);
4089 else
4090 spell_load_lang(lang);
4091 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004092
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004093 /*
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004094 * Loop over the languages, there can be several files for "lang".
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004095 */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004096 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4097 if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME
4098 : STRICMP(lang, slang->sl_name) == 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004099 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00004100 region_mask = REGION_ALL;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004101 if (!filename && region != NULL)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004102 {
4103 /* find region in sl_regions */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004104 c = find_region(slang->sl_regions, region);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004105 if (c == REGION_ALL)
4106 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004107 if (slang->sl_add)
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004108 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004109 if (*slang->sl_regions != NUL)
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004110 /* This addition file is for other regions. */
4111 region_mask = 0;
4112 }
4113 else
4114 /* This is probably an error. Give a warning and
4115 * accept the words anyway. */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004116 smsg((char_u *)
4117 _("Warning: region %s not supported"),
4118 region);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004119 }
4120 else
4121 region_mask = 1 << c;
4122 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004123
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004124 if (region_mask != 0)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004125 {
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004126 if (ga_grow(&ga, 1) == FAIL)
4127 {
4128 ga_clear(&ga);
4129 return e_outofmem;
4130 }
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004131 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004132 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
4133 ++ga.ga_len;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004134 use_midword(slang, buf);
4135 if (slang->sl_nobreak)
Bram Moolenaarda2303d2005-08-30 21:55:26 +00004136 nobreak = TRUE;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004137 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004138 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004139 }
4140
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004141 /* round 0: load int_wordlist, if possible.
4142 * round 1: load first name in 'spellfile'.
4143 * round 2: load second name in 'spellfile.
4144 * etc. */
4145 spf = curbuf->b_p_spf;
4146 for (round = 0; round == 0 || *spf != NUL; ++round)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004147 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004148 if (round == 0)
Bram Moolenaar7887d882005-07-01 22:33:52 +00004149 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004150 /* Internal wordlist, if there is one. */
4151 if (int_wordlist == NULL)
Bram Moolenaar7887d882005-07-01 22:33:52 +00004152 continue;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004153 int_wordlist_spl(spf_name);
Bram Moolenaar7887d882005-07-01 22:33:52 +00004154 }
4155 else
4156 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004157 /* One entry in 'spellfile'. */
4158 copy_option_part(&spf, spf_name, MAXPATHL - 5, ",");
4159 STRCAT(spf_name, ".spl");
4160
4161 /* If it was already found above then skip it. */
4162 for (c = 0; c < ga.ga_len; ++c)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004163 {
4164 p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname;
4165 if (p != NULL && fullpathcmp(spf_name, p, FALSE) == FPC_SAME)
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004166 break;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004167 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004168 if (c < ga.ga_len)
Bram Moolenaar7887d882005-07-01 22:33:52 +00004169 continue;
Bram Moolenaar7887d882005-07-01 22:33:52 +00004170 }
4171
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004172 /* Check if it was loaded already. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004173 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4174 if (fullpathcmp(spf_name, slang->sl_fname, FALSE) == FPC_SAME)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004175 break;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004176 if (slang == NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004177 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004178 /* Not loaded, try loading it now. The language name includes the
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004179 * region name, the region is ignored otherwise. for int_wordlist
4180 * use an arbitrary name. */
4181 if (round == 0)
4182 STRCPY(lang, "internal wordlist");
4183 else
Bram Moolenaar7887d882005-07-01 22:33:52 +00004184 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004185 vim_strncpy(lang, gettail(spf_name), MAXWLEN);
Bram Moolenaar7887d882005-07-01 22:33:52 +00004186 p = vim_strchr(lang, '.');
4187 if (p != NULL)
4188 *p = NUL; /* truncate at ".encoding.add" */
4189 }
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004190 slang = spell_load_file(spf_name, lang, NULL, TRUE);
Bram Moolenaarda2303d2005-08-30 21:55:26 +00004191
4192 /* If one of the languages has NOBREAK we assume the addition
4193 * files also have this. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004194 if (slang != NULL && nobreak)
4195 slang->sl_nobreak = TRUE;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004196 }
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004197 if (slang != NULL && ga_grow(&ga, 1) == OK)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004198 {
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004199 region_mask = REGION_ALL;
4200 if (use_region != NULL && !dont_use_region)
4201 {
4202 /* find region in sl_regions */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004203 c = find_region(slang->sl_regions, use_region);
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004204 if (c != REGION_ALL)
4205 region_mask = 1 << c;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004206 else if (*slang->sl_regions != NUL)
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004207 /* This spell file is for other regions. */
4208 region_mask = 0;
4209 }
4210
4211 if (region_mask != 0)
4212 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004213 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
4214 LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL;
4215 LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004216 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
4217 ++ga.ga_len;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004218 use_midword(slang, buf);
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004219 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004220 }
4221 }
4222
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004223 /* Everything is fine, store the new b_langp value. */
4224 ga_clear(&buf->b_langp);
4225 buf->b_langp = ga;
4226
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004227 /* For each language figure out what language to use for sound folding and
4228 * REP items. If the language doesn't support it itself use another one
4229 * with the same name. E.g. for "en-math" use "en". */
4230 for (i = 0; i < ga.ga_len; ++i)
4231 {
4232 lp = LANGP_ENTRY(ga, i);
4233
4234 /* sound folding */
4235 if (lp->lp_slang->sl_sal.ga_len > 0)
4236 /* language does sound folding itself */
4237 lp->lp_sallang = lp->lp_slang;
4238 else
4239 /* find first similar language that does sound folding */
4240 for (j = 0; j < ga.ga_len; ++j)
4241 {
4242 lp2 = LANGP_ENTRY(ga, j);
4243 if (lp2->lp_slang->sl_sal.ga_len > 0
4244 && STRNCMP(lp->lp_slang->sl_name,
4245 lp2->lp_slang->sl_name, 2) == 0)
4246 {
4247 lp->lp_sallang = lp2->lp_slang;
4248 break;
4249 }
4250 }
4251
4252 /* REP items */
4253 if (lp->lp_slang->sl_rep.ga_len > 0)
4254 /* language has REP items itself */
4255 lp->lp_replang = lp->lp_slang;
4256 else
Bram Moolenaar4770d092006-01-12 23:22:24 +00004257 /* find first similar language that has REP items */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004258 for (j = 0; j < ga.ga_len; ++j)
4259 {
4260 lp2 = LANGP_ENTRY(ga, j);
4261 if (lp2->lp_slang->sl_rep.ga_len > 0
4262 && STRNCMP(lp->lp_slang->sl_name,
4263 lp2->lp_slang->sl_name, 2) == 0)
4264 {
4265 lp->lp_replang = lp2->lp_slang;
4266 break;
4267 }
4268 }
4269 }
4270
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004271 return NULL;
4272}
4273
4274/*
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004275 * Clear the midword characters for buffer "buf".
4276 */
4277 static void
4278clear_midword(buf)
4279 buf_T *buf;
4280{
4281 vim_memset(buf->b_spell_ismw, 0, 256);
4282#ifdef FEAT_MBYTE
4283 vim_free(buf->b_spell_ismw_mb);
4284 buf->b_spell_ismw_mb = NULL;
4285#endif
4286}
4287
4288/*
4289 * Use the "sl_midword" field of language "lp" for buffer "buf".
4290 * They add up to any currently used midword characters.
4291 */
4292 static void
4293use_midword(lp, buf)
4294 slang_T *lp;
4295 buf_T *buf;
4296{
4297 char_u *p;
4298
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00004299 if (lp->sl_midword == NULL) /* there aren't any */
4300 return;
4301
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004302 for (p = lp->sl_midword; *p != NUL; )
4303#ifdef FEAT_MBYTE
4304 if (has_mbyte)
4305 {
4306 int c, l, n;
4307 char_u *bp;
4308
4309 c = mb_ptr2char(p);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004310 l = (*mb_ptr2len)(p);
4311 if (c < 256 && l <= 2)
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004312 buf->b_spell_ismw[c] = TRUE;
4313 else if (buf->b_spell_ismw_mb == NULL)
4314 /* First multi-byte char in "b_spell_ismw_mb". */
4315 buf->b_spell_ismw_mb = vim_strnsave(p, l);
4316 else
4317 {
4318 /* Append multi-byte chars to "b_spell_ismw_mb". */
4319 n = STRLEN(buf->b_spell_ismw_mb);
4320 bp = vim_strnsave(buf->b_spell_ismw_mb, n + l);
4321 if (bp != NULL)
4322 {
4323 vim_free(buf->b_spell_ismw_mb);
4324 buf->b_spell_ismw_mb = bp;
4325 vim_strncpy(bp + n, p, l);
4326 }
4327 }
4328 p += l;
4329 }
4330 else
4331#endif
4332 buf->b_spell_ismw[*p++] = TRUE;
4333}
4334
4335/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004336 * Find the region "region[2]" in "rp" (points to "sl_regions").
4337 * Each region is simply stored as the two characters of it's name.
Bram Moolenaar7887d882005-07-01 22:33:52 +00004338 * Returns the index if found (first is 0), REGION_ALL if not found.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004339 */
4340 static int
4341find_region(rp, region)
4342 char_u *rp;
4343 char_u *region;
4344{
4345 int i;
4346
4347 for (i = 0; ; i += 2)
4348 {
4349 if (rp[i] == NUL)
4350 return REGION_ALL;
4351 if (rp[i] == region[0] && rp[i + 1] == region[1])
4352 break;
4353 }
4354 return i / 2;
4355}
4356
4357/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004358 * Return case type of word:
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004359 * w word 0
Bram Moolenaar51485f02005-06-04 21:55:20 +00004360 * Word WF_ONECAP
4361 * W WORD WF_ALLCAP
4362 * WoRd wOrd WF_KEEPCAP
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004363 */
4364 static int
4365captype(word, end)
4366 char_u *word;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004367 char_u *end; /* When NULL use up to NUL byte. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004368{
4369 char_u *p;
4370 int c;
4371 int firstcap;
4372 int allcap;
4373 int past_second = FALSE; /* past second word char */
4374
4375 /* find first letter */
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004376 for (p = word; !spell_iswordp_nmw(p); mb_ptr_adv(p))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004377 if (end == NULL ? *p == NUL : p >= end)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004378 return 0; /* only non-word characters, illegal word */
4379#ifdef FEAT_MBYTE
Bram Moolenaarb765d632005-06-07 21:00:02 +00004380 if (has_mbyte)
4381 c = mb_ptr2char_adv(&p);
4382 else
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004383#endif
Bram Moolenaarb765d632005-06-07 21:00:02 +00004384 c = *p++;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004385 firstcap = allcap = SPELL_ISUPPER(c);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004386
4387 /*
4388 * Need to check all letters to find a word with mixed upper/lower.
4389 * But a word with an upper char only at start is a ONECAP.
4390 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004391 for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p))
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004392 if (spell_iswordp_nmw(p))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004393 {
Bram Moolenaar53805d12005-08-01 07:08:33 +00004394 c = PTR2CHAR(p);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004395 if (!SPELL_ISUPPER(c))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004396 {
4397 /* UUl -> KEEPCAP */
4398 if (past_second && allcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004399 return WF_KEEPCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004400 allcap = FALSE;
4401 }
4402 else if (!allcap)
4403 /* UlU -> KEEPCAP */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004404 return WF_KEEPCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004405 past_second = TRUE;
4406 }
4407
4408 if (allcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004409 return WF_ALLCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004410 if (firstcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004411 return WF_ONECAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004412 return 0;
4413}
4414
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004415/*
4416 * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a
4417 * capital. So that make_case_word() can turn WOrd into Word.
4418 * Add ALLCAP for "WOrD".
4419 */
4420 static int
4421badword_captype(word, end)
4422 char_u *word;
4423 char_u *end;
4424{
4425 int flags = captype(word, end);
Bram Moolenaar8b59de92005-08-11 19:59:29 +00004426 int c;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004427 int l, u;
4428 int first;
4429 char_u *p;
4430
4431 if (flags & WF_KEEPCAP)
4432 {
4433 /* Count the number of UPPER and lower case letters. */
4434 l = u = 0;
4435 first = FALSE;
4436 for (p = word; p < end; mb_ptr_adv(p))
4437 {
Bram Moolenaar8b59de92005-08-11 19:59:29 +00004438 c = PTR2CHAR(p);
4439 if (SPELL_ISUPPER(c))
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004440 {
4441 ++u;
4442 if (p == word)
4443 first = TRUE;
4444 }
4445 else
4446 ++l;
4447 }
4448
4449 /* If there are more UPPER than lower case letters suggest an
4450 * ALLCAP word. Otherwise, if the first letter is UPPER then
4451 * suggest ONECAP. Exception: "ALl" most likely should be "All",
4452 * require three upper case letters. */
4453 if (u > l && u > 2)
4454 flags |= WF_ALLCAP;
4455 else if (first)
4456 flags |= WF_ONECAP;
Bram Moolenaar2d3f4892006-01-20 23:02:51 +00004457
4458 if (u >= 2 && l >= 2) /* maCARONI maCAroni */
4459 flags |= WF_MIXCAP;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004460 }
4461 return flags;
4462}
4463
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004464# if defined(FEAT_MBYTE) || defined(EXITFREE) || defined(PROTO)
4465/*
4466 * Free all languages.
4467 */
4468 void
4469spell_free_all()
4470{
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004471 slang_T *slang;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004472 buf_T *buf;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004473 char_u fname[MAXPATHL];
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004474
4475 /* Go through all buffers and handle 'spelllang'. */
4476 for (buf = firstbuf; buf != NULL; buf = buf->b_next)
4477 ga_clear(&buf->b_langp);
4478
4479 while (first_lang != NULL)
4480 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004481 slang = first_lang;
4482 first_lang = slang->sl_next;
4483 slang_free(slang);
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004484 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004485
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004486 if (int_wordlist != NULL)
Bram Moolenaar7887d882005-07-01 22:33:52 +00004487 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00004488 /* Delete the internal wordlist and its .spl file */
4489 mch_remove(int_wordlist);
4490 int_wordlist_spl(fname);
4491 mch_remove(fname);
4492 vim_free(int_wordlist);
4493 int_wordlist = NULL;
Bram Moolenaar7887d882005-07-01 22:33:52 +00004494 }
4495
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004496 init_spell_chartab();
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00004497
4498 vim_free(repl_to);
4499 repl_to = NULL;
4500 vim_free(repl_from);
4501 repl_from = NULL;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004502}
4503# endif
4504
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004505# if defined(FEAT_MBYTE) || defined(PROTO)
4506/*
4507 * Clear all spelling tables and reload them.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004508 * Used after 'encoding' is set and when ":mkspell" was used.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004509 */
4510 void
4511spell_reload()
4512{
4513 buf_T *buf;
Bram Moolenaar3982c542005-06-08 21:56:31 +00004514 win_T *wp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004515
Bram Moolenaarea408852005-06-25 22:49:46 +00004516 /* Initialize the table for spell_iswordp(). */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004517 init_spell_chartab();
4518
4519 /* Unload all allocated memory. */
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00004520 spell_free_all();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004521
4522 /* Go through all buffers and handle 'spelllang'. */
4523 for (buf = firstbuf; buf != NULL; buf = buf->b_next)
4524 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00004525 /* Only load the wordlists when 'spelllang' is set and there is a
4526 * window for this buffer in which 'spell' is set. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004527 if (*buf->b_p_spl != NUL)
Bram Moolenaar3982c542005-06-08 21:56:31 +00004528 {
4529 FOR_ALL_WINDOWS(wp)
4530 if (wp->w_buffer == buf && wp->w_p_spell)
4531 {
4532 (void)did_set_spelllang(buf);
4533# ifdef FEAT_WINDOWS
4534 break;
4535# endif
4536 }
4537 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004538 }
4539}
4540# endif
4541
Bram Moolenaarb765d632005-06-07 21:00:02 +00004542/*
4543 * Reload the spell file "fname" if it's loaded.
4544 */
4545 static void
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004546spell_reload_one(fname, added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00004547 char_u *fname;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004548 int added_word; /* invoked through "zg" */
Bram Moolenaarb765d632005-06-07 21:00:02 +00004549{
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004550 slang_T *slang;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004551 int didit = FALSE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004552
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004553 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004554 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004555 if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME)
Bram Moolenaarb765d632005-06-07 21:00:02 +00004556 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004557 slang_clear(slang);
4558 if (spell_load_file(fname, NULL, slang, FALSE) == NULL)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004559 /* reloading failed, clear the language */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004560 slang_clear(slang);
Bram Moolenaarf71a3db2006-03-12 21:50:18 +00004561 redraw_all_later(SOME_VALID);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004562 didit = TRUE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00004563 }
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004564 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004565
4566 /* When "zg" was used and the file wasn't loaded yet, should redo
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00004567 * 'spelllang' to load it now. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004568 if (added_word && !didit)
4569 did_set_spelllang(curbuf);
Bram Moolenaarb765d632005-06-07 21:00:02 +00004570}
4571
4572
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004573/*
4574 * Functions for ":mkspell".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004575 */
4576
Bram Moolenaar51485f02005-06-04 21:55:20 +00004577#define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004578 and .dic file. */
4579/*
4580 * Main structure to store the contents of a ".aff" file.
4581 */
4582typedef struct afffile_S
4583{
4584 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */
Bram Moolenaar95529562005-08-25 21:21:38 +00004585 int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */
Bram Moolenaar371baa92005-12-29 22:43:53 +00004586 unsigned af_rare; /* RARE ID for rare word */
4587 unsigned af_keepcase; /* KEEPCASE ID for keep-case word */
Bram Moolenaar6de68532005-08-24 22:08:48 +00004588 unsigned af_bad; /* BAD ID for banned word */
4589 unsigned af_needaffix; /* NEEDAFFIX ID */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004590 unsigned af_needcomp; /* NEEDCOMPOUND ID */
Bram Moolenaare1438bb2006-03-01 22:01:55 +00004591 unsigned af_nosuggest; /* NOSUGGEST ID */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004592 int af_pfxpostpone; /* postpone prefixes without chop string */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004593 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */
4594 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */
Bram Moolenaar6de68532005-08-24 22:08:48 +00004595 hashtab_T af_comp; /* hashtable for compound flags, compitem_T */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004596} afffile_T;
4597
Bram Moolenaar6de68532005-08-24 22:08:48 +00004598#define AFT_CHAR 0 /* flags are one character */
Bram Moolenaar95529562005-08-25 21:21:38 +00004599#define AFT_LONG 1 /* flags are two characters */
4600#define AFT_CAPLONG 2 /* flags are one or two characters */
4601#define AFT_NUM 3 /* flags are numbers, comma separated */
Bram Moolenaar6de68532005-08-24 22:08:48 +00004602
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004603typedef struct affentry_S affentry_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004604/* Affix entry from ".aff" file. Used for prefixes and suffixes. */
4605struct affentry_S
4606{
4607 affentry_T *ae_next; /* next affix with same name/number */
4608 char_u *ae_chop; /* text to chop off basic word (can be NULL) */
4609 char_u *ae_add; /* text to add to basic word (can be NULL) */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004610 char_u *ae_cond; /* condition (NULL for ".") */
4611 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */
Bram Moolenaar5195e452005-08-19 20:32:47 +00004612 char_u ae_rare; /* rare affix */
4613 char_u ae_nocomp; /* word with affix not compoundable */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004614};
4615
Bram Moolenaar6de68532005-08-24 22:08:48 +00004616#ifdef FEAT_MBYTE
4617# define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */
4618#else
Bram Moolenaar95529562005-08-25 21:21:38 +00004619# define AH_KEY_LEN 7 /* 6 digits + NUL */
Bram Moolenaar6de68532005-08-24 22:08:48 +00004620#endif
Bram Moolenaar53805d12005-08-01 07:08:33 +00004621
Bram Moolenaar51485f02005-06-04 21:55:20 +00004622/* Affix header from ".aff" file. Used for af_pref and af_suff. */
4623typedef struct affheader_S
4624{
Bram Moolenaar6de68532005-08-24 22:08:48 +00004625 char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */
4626 unsigned ah_flag; /* affix name as number, uses "af_flagtype" */
4627 int ah_newID; /* prefix ID after renumbering; 0 if not used */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004628 int ah_combine; /* suffix may combine with prefix */
Bram Moolenaar95529562005-08-25 21:21:38 +00004629 int ah_follows; /* another affix block should be following */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004630 affentry_T *ah_first; /* first affix entry */
4631} affheader_T;
4632
4633#define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
4634
Bram Moolenaar6de68532005-08-24 22:08:48 +00004635/* Flag used in compound items. */
4636typedef struct compitem_S
4637{
4638 char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */
4639 unsigned ci_flag; /* affix name as number, uses "af_flagtype" */
4640 int ci_newID; /* affix ID after renumbering. */
4641} compitem_T;
4642
4643#define HI2CI(hi) ((compitem_T *)(hi)->hi_key)
4644
Bram Moolenaar51485f02005-06-04 21:55:20 +00004645/*
4646 * Structure that is used to store the items in the word tree. This avoids
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004647 * the need to keep track of each allocated thing, everything is freed all at
4648 * once after ":mkspell" is done.
Bram Moolenaar51485f02005-06-04 21:55:20 +00004649 */
4650#define SBLOCKSIZE 16000 /* size of sb_data */
4651typedef struct sblock_S sblock_T;
4652struct sblock_S
4653{
4654 sblock_T *sb_next; /* next block in list */
4655 int sb_used; /* nr of bytes already in use */
4656 char_u sb_data[1]; /* data, actually longer */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004657};
4658
4659/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00004660 * A node in the tree.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004661 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004662typedef struct wordnode_S wordnode_T;
4663struct wordnode_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004664{
Bram Moolenaar0c405862005-06-22 22:26:26 +00004665 union /* shared to save space */
4666 {
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00004667 char_u hashkey[6]; /* the hash key, only used while compressing */
Bram Moolenaar0c405862005-06-22 22:26:26 +00004668 int index; /* index in written nodes (valid after first
4669 round) */
4670 } wn_u1;
4671 union /* shared to save space */
4672 {
4673 wordnode_T *next; /* next node with same hash key */
4674 wordnode_T *wnode; /* parent node that will write this node */
4675 } wn_u2;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004676 wordnode_T *wn_child; /* child (next byte in word) */
4677 wordnode_T *wn_sibling; /* next sibling (alternate byte in word,
4678 always sorted) */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004679 int wn_refs; /* Nr. of references to this node. Only
4680 relevant for first node in a list of
4681 siblings, in following siblings it is
4682 always one. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004683 char_u wn_byte; /* Byte for this node. NUL for word end */
Bram Moolenaar4770d092006-01-12 23:22:24 +00004684
4685 /* Info for when "wn_byte" is NUL.
4686 * In PREFIXTREE "wn_region" is used for the prefcondnr.
4687 * In the soundfolded word tree "wn_flags" has the MSW of the wordnr and
4688 * "wn_region" the LSW of the wordnr. */
4689 char_u wn_affixID; /* supported/required prefix ID or 0 */
4690 short_u wn_flags; /* WF_ flags */
4691 short wn_region; /* region mask */
4692
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00004693#ifdef SPELL_PRINTTREE
4694 int wn_nr; /* sequence nr for printing */
4695#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004696};
4697
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00004698#define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */
4699
Bram Moolenaar51485f02005-06-04 21:55:20 +00004700#define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004701
Bram Moolenaar51485f02005-06-04 21:55:20 +00004702/*
4703 * Info used while reading the spell files.
4704 */
4705typedef struct spellinfo_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004706{
Bram Moolenaar51485f02005-06-04 21:55:20 +00004707 wordnode_T *si_foldroot; /* tree with case-folded words */
Bram Moolenaar8db73182005-06-17 21:51:16 +00004708 long si_foldwcount; /* nr of words in si_foldroot */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004709
Bram Moolenaar51485f02005-06-04 21:55:20 +00004710 wordnode_T *si_keeproot; /* tree with keep-case words */
Bram Moolenaar8db73182005-06-17 21:51:16 +00004711 long si_keepwcount; /* nr of words in si_keeproot */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004712
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004713 wordnode_T *si_prefroot; /* tree with postponed prefixes */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004714
Bram Moolenaar4770d092006-01-12 23:22:24 +00004715 long si_sugtree; /* creating the soundfolding trie */
4716
Bram Moolenaar51485f02005-06-04 21:55:20 +00004717 sblock_T *si_blocks; /* memory blocks used */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004718 long si_blocks_cnt; /* memory blocks allocated */
4719 long si_compress_cnt; /* words to add before lowering
4720 compression limit */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004721 wordnode_T *si_first_free; /* List of nodes that have been freed during
4722 compression, linked by "wn_child" field. */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004723 long si_free_count; /* number of nodes in si_first_free */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004724#ifdef SPELL_PRINTTREE
4725 int si_wordnode_nr; /* sequence nr for nodes */
4726#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +00004727 buf_T *si_spellbuf; /* buffer used to store soundfold word table */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004728
Bram Moolenaar51485f02005-06-04 21:55:20 +00004729 int si_ascii; /* handling only ASCII words */
Bram Moolenaarb765d632005-06-07 21:00:02 +00004730 int si_add; /* addition file */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004731 int si_clear_chartab; /* when TRUE clear char tables */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004732 int si_region; /* region mask */
4733 vimconv_T si_conv; /* for conversion to 'encoding' */
Bram Moolenaar50cde822005-06-05 21:54:54 +00004734 int si_memtot; /* runtime memory used */
Bram Moolenaarb765d632005-06-07 21:00:02 +00004735 int si_verbose; /* verbose messages */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004736 int si_msg_count; /* number of words added since last message */
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004737 char_u *si_info; /* info text chars or NULL */
Bram Moolenaar3982c542005-06-08 21:56:31 +00004738 int si_region_count; /* number of regions supported (1 when there
4739 are no regions) */
Bram Moolenaar5195e452005-08-19 20:32:47 +00004740 char_u si_region_name[16]; /* region names; used only if
4741 * si_region_count > 1) */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004742
4743 garray_T si_rep; /* list of fromto_T entries from REP lines */
Bram Moolenaar4770d092006-01-12 23:22:24 +00004744 garray_T si_repsal; /* list of fromto_T entries from REPSAL lines */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004745 garray_T si_sal; /* list of fromto_T entries from SAL lines */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004746 char_u *si_sofofr; /* SOFOFROM text */
4747 char_u *si_sofoto; /* SOFOTO text */
Bram Moolenaar4770d092006-01-12 23:22:24 +00004748 int si_nosugfile; /* NOSUGFILE item found */
Bram Moolenaare1438bb2006-03-01 22:01:55 +00004749 int si_nosplitsugs; /* NOSPLITSUGS item found */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004750 int si_followup; /* soundsalike: ? */
4751 int si_collapse; /* soundsalike: ? */
Bram Moolenaar4770d092006-01-12 23:22:24 +00004752 hashtab_T si_commonwords; /* hashtable for common words */
4753 time_t si_sugtime; /* timestamp for .sug file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004754 int si_rem_accents; /* soundsalike: remove accents */
4755 garray_T si_map; /* MAP info concatenated */
Bram Moolenaar6de68532005-08-24 22:08:48 +00004756 char_u *si_midword; /* MIDWORD chars or NULL */
Bram Moolenaar5195e452005-08-19 20:32:47 +00004757 int si_compmax; /* max nr of words for compounding */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00004758 int si_compminlen; /* minimal length for compounding */
Bram Moolenaar5195e452005-08-19 20:32:47 +00004759 int si_compsylmax; /* max nr of syllables for compounding */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00004760 char_u *si_compflags; /* flags used for compounding */
Bram Moolenaar78622822005-08-23 21:00:13 +00004761 char_u si_nobreak; /* NOBREAK */
Bram Moolenaar5195e452005-08-19 20:32:47 +00004762 char_u *si_syllable; /* syllable string */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004763 garray_T si_prefcond; /* table with conditions for postponed
4764 * prefixes, each stored as a string */
Bram Moolenaar6de68532005-08-24 22:08:48 +00004765 int si_newprefID; /* current value for ah_newID */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004766 int si_newcompID; /* current value for compound ID */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004767} spellinfo_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004768
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004769static afffile_T *spell_read_aff __ARGS((spellinfo_T *spin, char_u *fname));
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004770static int spell_info_item __ARGS((char_u *s));
Bram Moolenaar6de68532005-08-24 22:08:48 +00004771static unsigned affitem2flag __ARGS((int flagtype, char_u *item, char_u *fname, int lnum));
4772static unsigned get_affitem __ARGS((int flagtype, char_u **pp));
4773static void process_compflags __ARGS((spellinfo_T *spin, afffile_T *aff, char_u *compflags));
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004774static void check_renumber __ARGS((spellinfo_T *spin));
Bram Moolenaar6de68532005-08-24 22:08:48 +00004775static int flag_in_afflist __ARGS((int flagtype, char_u *afflist, unsigned flag));
4776static void aff_check_number __ARGS((int spinval, int affval, char *name));
4777static void aff_check_string __ARGS((char_u *spinval, char_u *affval, char *name));
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004778static int str_equal __ARGS((char_u *s1, char_u *s2));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004779static void add_fromto __ARGS((spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to));
4780static int sal_to_bool __ARGS((char_u *s));
Bram Moolenaar5482f332005-04-17 20:18:43 +00004781static int has_non_ascii __ARGS((char_u *s));
Bram Moolenaar51485f02005-06-04 21:55:20 +00004782static void spell_free_aff __ARGS((afffile_T *aff));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004783static int spell_read_dic __ARGS((spellinfo_T *spin, char_u *fname, afffile_T *affile));
Bram Moolenaar5195e452005-08-19 20:32:47 +00004784static int get_pfxlist __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist));
Bram Moolenaar6de68532005-08-24 22:08:48 +00004785static void get_compflags __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist));
Bram Moolenaar5195e452005-08-19 20:32:47 +00004786static int store_aff_word __ARGS((spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int comb, int flags, char_u *pfxlist, int pfxlen));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004787static int spell_read_wordfile __ARGS((spellinfo_T *spin, char_u *fname));
4788static void *getroom __ARGS((spellinfo_T *spin, size_t len, int align));
4789static char_u *getroom_save __ARGS((spellinfo_T *spin, char_u *s));
Bram Moolenaar51485f02005-06-04 21:55:20 +00004790static void free_blocks __ARGS((sblock_T *bl));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004791static wordnode_T *wordtree_alloc __ARGS((spellinfo_T *spin));
Bram Moolenaar5195e452005-08-19 20:32:47 +00004792static int store_word __ARGS((spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix));
Bram Moolenaarae5bce12005-08-15 21:41:48 +00004793static int tree_add_word __ARGS((spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004794static wordnode_T *get_wordnode __ARGS((spellinfo_T *spin));
Bram Moolenaar4770d092006-01-12 23:22:24 +00004795static int deref_wordnode __ARGS((spellinfo_T *spin, wordnode_T *node));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004796static void free_wordnode __ARGS((spellinfo_T *spin, wordnode_T *n));
4797static void wordtree_compress __ARGS((spellinfo_T *spin, wordnode_T *root));
4798static int node_compress __ARGS((spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot));
Bram Moolenaar51485f02005-06-04 21:55:20 +00004799static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2));
Bram Moolenaar4770d092006-01-12 23:22:24 +00004800static void put_sugtime __ARGS((spellinfo_T *spin, FILE *fd));
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004801static int write_vim_spell __ARGS((spellinfo_T *spin, char_u *fname));
Bram Moolenaar0c405862005-06-22 22:26:26 +00004802static void clear_node __ARGS((wordnode_T *node));
4803static int put_node __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree));
Bram Moolenaar4770d092006-01-12 23:22:24 +00004804static void spell_make_sugfile __ARGS((spellinfo_T *spin, char_u *wfname));
4805static int sug_filltree __ARGS((spellinfo_T *spin, slang_T *slang));
4806static int sug_maketable __ARGS((spellinfo_T *spin));
4807static int sug_filltable __ARGS((spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap));
4808static int offset2bytes __ARGS((int nr, char_u *buf));
4809static int bytes2offset __ARGS((char_u **pp));
4810static void sug_write __ARGS((spellinfo_T *spin, char_u *fname));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004811static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word));
Bram Moolenaar4770d092006-01-12 23:22:24 +00004812static void spell_message __ARGS((spellinfo_T *spin, char_u *str));
Bram Moolenaarb765d632005-06-07 21:00:02 +00004813static void init_spellfile __ARGS((void));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004814
Bram Moolenaar53805d12005-08-01 07:08:33 +00004815/* In the postponed prefixes tree wn_flags is used to store the WFP_ flags,
4816 * but it must be negative to indicate the prefix tree to tree_add_word().
4817 * Use a negative number with the lower 8 bits zero. */
4818#define PFX_FLAGS -256
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00004819
Bram Moolenaar5195e452005-08-19 20:32:47 +00004820/*
4821 * Tunable parameters for when the tree is compressed. See 'mkspellmem'.
4822 */
4823static long compress_start = 30000; /* memory / SBLOCKSIZE */
4824static long compress_inc = 100; /* memory / SBLOCKSIZE */
4825static long compress_added = 500000; /* word count */
4826
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00004827#ifdef SPELL_PRINTTREE
4828/*
4829 * For debugging the tree code: print the current tree in a (more or less)
4830 * readable format, so that we can see what happens when adding a word and/or
4831 * compressing the tree.
4832 * Based on code from Olaf Seibert.
4833 */
4834#define PRINTLINESIZE 1000
4835#define PRINTWIDTH 6
4836
4837#define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
4838 PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
4839
4840static char line1[PRINTLINESIZE];
4841static char line2[PRINTLINESIZE];
4842static char line3[PRINTLINESIZE];
4843
4844 static void
4845spell_clear_flags(wordnode_T *node)
4846{
4847 wordnode_T *np;
4848
4849 for (np = node; np != NULL; np = np->wn_sibling)
4850 {
4851 np->wn_u1.index = FALSE;
4852 spell_clear_flags(np->wn_child);
4853 }
4854}
4855
4856 static void
4857spell_print_node(wordnode_T *node, int depth)
4858{
4859 if (node->wn_u1.index)
4860 {
4861 /* Done this node before, print the reference. */
4862 PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
4863 PRINTSOME(line2, depth, " ", 0, 0);
4864 PRINTSOME(line3, depth, " ", 0, 0);
4865 msg(line1);
4866 msg(line2);
4867 msg(line3);
4868 }
4869 else
4870 {
4871 node->wn_u1.index = TRUE;
4872
4873 if (node->wn_byte != NUL)
4874 {
4875 if (node->wn_child != NULL)
4876 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0);
4877 else
4878 /* Cannot happen? */
4879 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0);
4880 }
4881 else
4882 PRINTSOME(line1, depth, " $ ", 0, 0);
4883
4884 PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs);
4885
4886 if (node->wn_sibling != NULL)
4887 PRINTSOME(line3, depth, " | ", 0, 0);
4888 else
4889 PRINTSOME(line3, depth, " ", 0, 0);
4890
4891 if (node->wn_byte == NUL)
4892 {
4893 msg(line1);
4894 msg(line2);
4895 msg(line3);
4896 }
4897
4898 /* do the children */
4899 if (node->wn_byte != NUL && node->wn_child != NULL)
4900 spell_print_node(node->wn_child, depth + 1);
4901
4902 /* do the siblings */
4903 if (node->wn_sibling != NULL)
4904 {
4905 /* get rid of all parent details except | */
4906 STRCPY(line1, line3);
4907 STRCPY(line2, line3);
4908 spell_print_node(node->wn_sibling, depth);
4909 }
4910 }
4911}
4912
4913 static void
4914spell_print_tree(wordnode_T *root)
4915{
4916 if (root != NULL)
4917 {
4918 /* Clear the "wn_u1.index" fields, used to remember what has been
4919 * done. */
4920 spell_clear_flags(root);
4921
4922 /* Recursively print the tree. */
4923 spell_print_node(root, 0);
4924 }
4925}
4926#endif /* SPELL_PRINTTREE */
4927
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004928/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004929 * Read the affix file "fname".
Bram Moolenaar3982c542005-06-08 21:56:31 +00004930 * Returns an afffile_T, NULL for complete failure.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004931 */
4932 static afffile_T *
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004933spell_read_aff(spin, fname)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004934 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004935 char_u *fname;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004936{
4937 FILE *fd;
4938 afffile_T *aff;
4939 char_u rline[MAXLINELEN];
4940 char_u *line;
4941 char_u *pc = NULL;
Bram Moolenaar4770d092006-01-12 23:22:24 +00004942#define MAXITEMCNT 30
Bram Moolenaar8db73182005-06-17 21:51:16 +00004943 char_u *(items[MAXITEMCNT]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004944 int itemcnt;
4945 char_u *p;
4946 int lnum = 0;
4947 affheader_T *cur_aff = NULL;
Bram Moolenaar6de68532005-08-24 22:08:48 +00004948 int did_postpone_prefix = FALSE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004949 int aff_todo = 0;
4950 hashtab_T *tp;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00004951 char_u *low = NULL;
4952 char_u *fol = NULL;
4953 char_u *upp = NULL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004954 int do_rep;
Bram Moolenaar4770d092006-01-12 23:22:24 +00004955 int do_repsal;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004956 int do_sal;
4957 int do_map;
4958 int found_map = FALSE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004959 hashitem_T *hi;
Bram Moolenaar53805d12005-08-01 07:08:33 +00004960 int l;
Bram Moolenaar6de68532005-08-24 22:08:48 +00004961 int compminlen = 0; /* COMPOUNDMIN value */
4962 int compsylmax = 0; /* COMPOUNDSYLMAX value */
4963 int compmax = 0; /* COMPOUNDMAX value */
Bram Moolenaar362e1a32006-03-06 23:29:24 +00004964 char_u *compflags = NULL; /* COMPOUNDFLAG and COMPOUNDRULE
Bram Moolenaar6de68532005-08-24 22:08:48 +00004965 concatenated */
4966 char_u *midword = NULL; /* MIDWORD value */
4967 char_u *syllable = NULL; /* SYLLABLE value */
4968 char_u *sofofrom = NULL; /* SOFOFROM value */
4969 char_u *sofoto = NULL; /* SOFOTO value */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004970
Bram Moolenaar51485f02005-06-04 21:55:20 +00004971 /*
4972 * Open the file.
4973 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00004974 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004975 if (fd == NULL)
4976 {
4977 EMSG2(_(e_notopen), fname);
4978 return NULL;
4979 }
4980
Bram Moolenaar4770d092006-01-12 23:22:24 +00004981 vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname);
4982 spell_message(spin, IObuff);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004983
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004984 /* Only do REP lines when not done in another .aff file already. */
4985 do_rep = spin->si_rep.ga_len == 0;
4986
Bram Moolenaar4770d092006-01-12 23:22:24 +00004987 /* Only do REPSAL lines when not done in another .aff file already. */
4988 do_repsal = spin->si_repsal.ga_len == 0;
4989
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004990 /* Only do SAL lines when not done in another .aff file already. */
4991 do_sal = spin->si_sal.ga_len == 0;
4992
4993 /* Only do MAP lines when not done in another .aff file already. */
4994 do_map = spin->si_map.ga_len == 0;
4995
Bram Moolenaar51485f02005-06-04 21:55:20 +00004996 /*
4997 * Allocate and init the afffile_T structure.
4998 */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004999 aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005000 if (aff == NULL)
5001 return NULL;
5002 hash_init(&aff->af_pref);
5003 hash_init(&aff->af_suff);
Bram Moolenaar6de68532005-08-24 22:08:48 +00005004 hash_init(&aff->af_comp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005005
5006 /*
5007 * Read all the lines in the file one by one.
5008 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005009 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005010 {
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005011 line_breakcheck();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005012 ++lnum;
5013
5014 /* Skip comment lines. */
5015 if (*rline == '#')
5016 continue;
5017
5018 /* Convert from "SET" to 'encoding' when needed. */
5019 vim_free(pc);
Bram Moolenaarb765d632005-06-07 21:00:02 +00005020#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00005021 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005022 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00005023 pc = string_convert(&spin->si_conv, rline, NULL);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005024 if (pc == NULL)
5025 {
5026 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
5027 fname, lnum, rline);
5028 continue;
5029 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005030 line = pc;
5031 }
5032 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00005033#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005034 {
5035 pc = NULL;
5036 line = rline;
5037 }
5038
5039 /* Split the line up in white separated items. Put a NUL after each
5040 * item. */
5041 itemcnt = 0;
5042 for (p = line; ; )
5043 {
5044 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */
5045 ++p;
5046 if (*p == NUL)
5047 break;
Bram Moolenaar8db73182005-06-17 21:51:16 +00005048 if (itemcnt == MAXITEMCNT) /* too many items */
Bram Moolenaar51485f02005-06-04 21:55:20 +00005049 break;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005050 items[itemcnt++] = p;
Bram Moolenaar362e1a32006-03-06 23:29:24 +00005051 /* A few items have arbitrary text argument, don't split them. */
5052 if (itemcnt == 2 && spell_info_item(items[0]))
5053 while (*p >= ' ' || *p == TAB) /* skip until CR/NL */
5054 ++p;
5055 else
5056 while (*p > ' ') /* skip until white space or CR/NL */
5057 ++p;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005058 if (*p == NUL)
5059 break;
5060 *p++ = NUL;
5061 }
5062
5063 /* Handle non-empty lines. */
5064 if (itemcnt > 0)
5065 {
5066 if (STRCMP(items[0], "SET") == 0 && itemcnt == 2
5067 && aff->af_enc == NULL)
5068 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00005069#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00005070 /* Setup for conversion from "ENC" to 'encoding'. */
5071 aff->af_enc = enc_canonize(items[1]);
5072 if (aff->af_enc != NULL && !spin->si_ascii
5073 && convert_setup(&spin->si_conv, aff->af_enc,
5074 p_enc) == FAIL)
5075 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
5076 fname, aff->af_enc, p_enc);
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005077 spin->si_conv.vc_fail = TRUE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00005078#else
5079 smsg((char_u *)_("Conversion in %s not supported"), fname);
5080#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005081 }
Bram Moolenaar6de68532005-08-24 22:08:48 +00005082 else if (STRCMP(items[0], "FLAG") == 0 && itemcnt == 2
5083 && aff->af_flagtype == AFT_CHAR)
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005084 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005085 if (STRCMP(items[1], "long") == 0)
Bram Moolenaar95529562005-08-25 21:21:38 +00005086 aff->af_flagtype = AFT_LONG;
Bram Moolenaar6de68532005-08-24 22:08:48 +00005087 else if (STRCMP(items[1], "num") == 0)
Bram Moolenaar95529562005-08-25 21:21:38 +00005088 aff->af_flagtype = AFT_NUM;
5089 else if (STRCMP(items[1], "caplong") == 0)
5090 aff->af_flagtype = AFT_CAPLONG;
Bram Moolenaar6de68532005-08-24 22:08:48 +00005091 else
5092 smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"),
5093 fname, lnum, items[1]);
Bram Moolenaar371baa92005-12-29 22:43:53 +00005094 if (aff->af_rare != 0
5095 || aff->af_keepcase != 0
5096 || aff->af_bad != 0
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005097 || aff->af_needaffix != 0
5098 || aff->af_needcomp != 0
Bram Moolenaare1438bb2006-03-01 22:01:55 +00005099 || aff->af_nosuggest != 0
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005100 || compflags != NULL
Bram Moolenaar6de68532005-08-24 22:08:48 +00005101 || aff->af_suff.ht_used > 0
5102 || aff->af_pref.ht_used > 0)
5103 smsg((char_u *)_("FLAG after using flags in %s line %d: %s"),
5104 fname, lnum, items[1]);
5105 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00005106 else if (spell_info_item(items[0]))
5107 {
5108 p = (char_u *)getroom(spin,
5109 (spin->si_info == NULL ? 0 : STRLEN(spin->si_info))
5110 + STRLEN(items[0])
5111 + STRLEN(items[1]) + 3, FALSE);
5112 if (p != NULL)
5113 {
5114 if (spin->si_info != NULL)
5115 {
5116 STRCPY(p, spin->si_info);
5117 STRCAT(p, "\n");
5118 }
5119 STRCAT(p, items[0]);
5120 STRCAT(p, " ");
5121 STRCAT(p, items[1]);
5122 spin->si_info = p;
5123 }
5124 }
Bram Moolenaar6de68532005-08-24 22:08:48 +00005125 else if (STRCMP(items[0], "MIDWORD") == 0 && itemcnt == 2
5126 && midword == NULL)
5127 {
5128 midword = getroom_save(spin, items[1]);
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005129 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005130 else if (STRCMP(items[0], "TRY") == 0 && itemcnt == 2)
Bram Moolenaar51485f02005-06-04 21:55:20 +00005131 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005132 /* ignored, we look in the tree for what chars may appear */
Bram Moolenaar51485f02005-06-04 21:55:20 +00005133 }
Bram Moolenaar371baa92005-12-29 22:43:53 +00005134 /* TODO: remove "RAR" later */
5135 else if ((STRCMP(items[0], "RAR") == 0
5136 || STRCMP(items[0], "RARE") == 0) && itemcnt == 2
5137 && aff->af_rare == 0)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005138 {
Bram Moolenaar371baa92005-12-29 22:43:53 +00005139 aff->af_rare = affitem2flag(aff->af_flagtype, items[1],
Bram Moolenaar6de68532005-08-24 22:08:48 +00005140 fname, lnum);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005141 }
Bram Moolenaar371baa92005-12-29 22:43:53 +00005142 /* TODO: remove "KEP" later */
5143 else if ((STRCMP(items[0], "KEP") == 0
5144 || STRCMP(items[0], "KEEPCASE") == 0) && itemcnt == 2
5145 && aff->af_keepcase == 0)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005146 {
Bram Moolenaar371baa92005-12-29 22:43:53 +00005147 aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1],
Bram Moolenaar6de68532005-08-24 22:08:48 +00005148 fname, lnum);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005149 }
Bram Moolenaar0c405862005-06-22 22:26:26 +00005150 else if (STRCMP(items[0], "BAD") == 0 && itemcnt == 2
5151 && aff->af_bad == 0)
5152 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005153 aff->af_bad = affitem2flag(aff->af_flagtype, items[1],
5154 fname, lnum);
Bram Moolenaar0c405862005-06-22 22:26:26 +00005155 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00005156 else if (STRCMP(items[0], "NEEDAFFIX") == 0 && itemcnt == 2
5157 && aff->af_needaffix == 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005158 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005159 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1],
5160 fname, lnum);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005161 }
Bram Moolenaare1438bb2006-03-01 22:01:55 +00005162 else if (STRCMP(items[0], "NOSUGGEST") == 0 && itemcnt == 2
5163 && aff->af_nosuggest == 0)
5164 {
5165 aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1],
5166 fname, lnum);
5167 }
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005168 else if (STRCMP(items[0], "NEEDCOMPOUND") == 0 && itemcnt == 2
5169 && aff->af_needcomp == 0)
5170 {
5171 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1],
5172 fname, lnum);
5173 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00005174 else if (STRCMP(items[0], "COMPOUNDFLAG") == 0 && itemcnt == 2
Bram Moolenaar6de68532005-08-24 22:08:48 +00005175 && compflags == NULL)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005176 {
Bram Moolenaar362e1a32006-03-06 23:29:24 +00005177 /* Turn flag "c" into COMPOUNDRULE compatible string "c+",
Bram Moolenaar6de68532005-08-24 22:08:48 +00005178 * "Na" into "Na+", "1234" into "1234+". */
5179 p = getroom(spin, STRLEN(items[1]) + 2, FALSE);
Bram Moolenaar5195e452005-08-19 20:32:47 +00005180 if (p != NULL)
5181 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005182 STRCPY(p, items[1]);
5183 STRCAT(p, "+");
5184 compflags = p;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005185 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00005186 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00005187 else if (STRCMP(items[0], "COMPOUNDRULE") == 0 && itemcnt == 2)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005188 {
5189 /* Concatenate this string to previously defined ones, using a
5190 * slash to separate them. */
5191 l = STRLEN(items[1]) + 1;
Bram Moolenaar6de68532005-08-24 22:08:48 +00005192 if (compflags != NULL)
5193 l += STRLEN(compflags) + 1;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005194 p = getroom(spin, l, FALSE);
5195 if (p != NULL)
5196 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005197 if (compflags != NULL)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005198 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005199 STRCPY(p, compflags);
Bram Moolenaar5195e452005-08-19 20:32:47 +00005200 STRCAT(p, "/");
5201 }
5202 STRCAT(p, items[1]);
Bram Moolenaar6de68532005-08-24 22:08:48 +00005203 compflags = p;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005204 }
5205 }
5206 else if (STRCMP(items[0], "COMPOUNDMAX") == 0 && itemcnt == 2
Bram Moolenaar6de68532005-08-24 22:08:48 +00005207 && compmax == 0)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005208 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005209 compmax = atoi((char *)items[1]);
5210 if (compmax == 0)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005211 smsg((char_u *)_("Wrong COMPOUNDMAX value in %s line %d: %s"),
5212 fname, lnum, items[1]);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005213 }
5214 else if (STRCMP(items[0], "COMPOUNDMIN") == 0 && itemcnt == 2
Bram Moolenaar6de68532005-08-24 22:08:48 +00005215 && compminlen == 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005216 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005217 compminlen = atoi((char *)items[1]);
5218 if (compminlen == 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005219 smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"),
5220 fname, lnum, items[1]);
5221 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00005222 else if (STRCMP(items[0], "COMPOUNDSYLMAX") == 0 && itemcnt == 2
Bram Moolenaar6de68532005-08-24 22:08:48 +00005223 && compsylmax == 0)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005224 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005225 compsylmax = atoi((char *)items[1]);
5226 if (compsylmax == 0)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005227 smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"),
5228 fname, lnum, items[1]);
5229 }
5230 else if (STRCMP(items[0], "SYLLABLE") == 0 && itemcnt == 2
Bram Moolenaar6de68532005-08-24 22:08:48 +00005231 && syllable == NULL)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005232 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005233 syllable = getroom_save(spin, items[1]);
Bram Moolenaar5195e452005-08-19 20:32:47 +00005234 }
Bram Moolenaar78622822005-08-23 21:00:13 +00005235 else if (STRCMP(items[0], "NOBREAK") == 0 && itemcnt == 1)
5236 {
5237 spin->si_nobreak = TRUE;
5238 }
Bram Moolenaare1438bb2006-03-01 22:01:55 +00005239 else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1)
5240 {
5241 spin->si_nosplitsugs = TRUE;
5242 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005243 else if (STRCMP(items[0], "NOSUGFILE") == 0 && itemcnt == 1)
5244 {
5245 spin->si_nosugfile = TRUE;
5246 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005247 else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1)
5248 {
5249 aff->af_pfxpostpone = TRUE;
5250 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005251 else if ((STRCMP(items[0], "PFX") == 0
5252 || STRCMP(items[0], "SFX") == 0)
5253 && aff_todo == 0
Bram Moolenaar8db73182005-06-17 21:51:16 +00005254 && itemcnt >= 4)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005255 {
Bram Moolenaar95529562005-08-25 21:21:38 +00005256 int lasti = 4;
5257 char_u key[AH_KEY_LEN];
5258
5259 if (*items[0] == 'P')
5260 tp = &aff->af_pref;
5261 else
5262 tp = &aff->af_suff;
5263
5264 /* Myspell allows the same affix name to be used multiple
5265 * times. The affix files that do this have an undocumented
5266 * "S" flag on all but the last block, thus we check for that
5267 * and store it in ah_follows. */
5268 vim_strncpy(key, items[1], AH_KEY_LEN - 1);
5269 hi = hash_find(tp, key);
5270 if (!HASHITEM_EMPTY(hi))
5271 {
5272 cur_aff = HI2AH(hi);
5273 if (cur_aff->ah_combine != (*items[2] == 'Y'))
5274 smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"),
5275 fname, lnum, items[1]);
5276 if (!cur_aff->ah_follows)
5277 smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
5278 fname, lnum, items[1]);
5279 }
5280 else
5281 {
5282 /* New affix letter. */
5283 cur_aff = (affheader_T *)getroom(spin,
5284 sizeof(affheader_T), TRUE);
5285 if (cur_aff == NULL)
5286 break;
5287 cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1],
5288 fname, lnum);
5289 if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN)
5290 break;
5291 if (cur_aff->ah_flag == aff->af_bad
Bram Moolenaar371baa92005-12-29 22:43:53 +00005292 || cur_aff->ah_flag == aff->af_rare
5293 || cur_aff->ah_flag == aff->af_keepcase
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005294 || cur_aff->ah_flag == aff->af_needaffix
Bram Moolenaare1438bb2006-03-01 22:01:55 +00005295 || cur_aff->ah_flag == aff->af_nosuggest
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005296 || cur_aff->ah_flag == aff->af_needcomp)
Bram Moolenaare1438bb2006-03-01 22:01:55 +00005297 smsg((char_u *)_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"),
Bram Moolenaar95529562005-08-25 21:21:38 +00005298 fname, lnum, items[1]);
5299 STRCPY(cur_aff->ah_key, items[1]);
5300 hash_add(tp, cur_aff->ah_key);
5301
5302 cur_aff->ah_combine = (*items[2] == 'Y');
5303 }
5304
5305 /* Check for the "S" flag, which apparently means that another
5306 * block with the same affix name is following. */
5307 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0)
5308 {
5309 ++lasti;
5310 cur_aff->ah_follows = TRUE;
5311 }
5312 else
5313 cur_aff->ah_follows = FALSE;
5314
Bram Moolenaar8db73182005-06-17 21:51:16 +00005315 /* Myspell allows extra text after the item, but that might
5316 * mean mistakes go unnoticed. Require a comment-starter. */
Bram Moolenaar95529562005-08-25 21:21:38 +00005317 if (itemcnt > lasti && *items[lasti] != '#')
Bram Moolenaar8db73182005-06-17 21:51:16 +00005318 smsg((char_u *)_("Trailing text in %s line %d: %s"),
5319 fname, lnum, items[4]);
5320
Bram Moolenaar95529562005-08-25 21:21:38 +00005321 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005322 smsg((char_u *)_("Expected Y or N in %s line %d: %s"),
5323 fname, lnum, items[2]);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005324
Bram Moolenaar95529562005-08-25 21:21:38 +00005325 if (*items[0] == 'P' && aff->af_pfxpostpone)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005326 {
Bram Moolenaar95529562005-08-25 21:21:38 +00005327 if (cur_aff->ah_newID == 0)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005328 {
5329 /* Use a new number in the .spl file later, to be able
5330 * to handle multiple .aff files. */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005331 check_renumber(spin);
Bram Moolenaar6de68532005-08-24 22:08:48 +00005332 cur_aff->ah_newID = ++spin->si_newprefID;
5333
5334 /* We only really use ah_newID if the prefix is
5335 * postponed. We know that only after handling all
5336 * the items. */
5337 did_postpone_prefix = FALSE;
5338 }
Bram Moolenaar95529562005-08-25 21:21:38 +00005339 else
5340 /* Did use the ID in a previous block. */
5341 did_postpone_prefix = TRUE;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005342 }
Bram Moolenaar95529562005-08-25 21:21:38 +00005343
Bram Moolenaar51485f02005-06-04 21:55:20 +00005344 aff_todo = atoi((char *)items[3]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005345 }
5346 else if ((STRCMP(items[0], "PFX") == 0
5347 || STRCMP(items[0], "SFX") == 0)
5348 && aff_todo > 0
5349 && STRCMP(cur_aff->ah_key, items[1]) == 0
Bram Moolenaar8db73182005-06-17 21:51:16 +00005350 && itemcnt >= 5)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005351 {
5352 affentry_T *aff_entry;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005353 int rare = FALSE;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005354 int nocomp = FALSE;
Bram Moolenaar53805d12005-08-01 07:08:33 +00005355 int upper = FALSE;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005356 int lasti = 5;
5357
Bram Moolenaar5195e452005-08-19 20:32:47 +00005358 /* Check for "rare" and "nocomp" after the other info. */
5359 while (itemcnt > lasti)
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005360 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00005361 if (!rare && STRICMP(items[lasti], "rare") == 0)
5362 {
5363 rare = TRUE;
5364 ++lasti;
5365 }
5366 else if (!nocomp && STRICMP(items[lasti], "nocomp") == 0)
5367 {
5368 nocomp = TRUE;
5369 ++lasti;
5370 }
5371 else
5372 break;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005373 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005374
Bram Moolenaar8db73182005-06-17 21:51:16 +00005375 /* Myspell allows extra text after the item, but that might
5376 * mean mistakes go unnoticed. Require a comment-starter. */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005377 if (itemcnt > lasti && *items[lasti] != '#')
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00005378 smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]);
Bram Moolenaar8db73182005-06-17 21:51:16 +00005379
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005380 /* New item for an affix letter. */
5381 --aff_todo;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005382 aff_entry = (affentry_T *)getroom(spin,
Bram Moolenaarcfc7d632005-07-28 22:28:16 +00005383 sizeof(affentry_T), TRUE);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005384 if (aff_entry == NULL)
5385 break;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005386 aff_entry->ae_rare = rare;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005387 aff_entry->ae_nocomp = nocomp;
Bram Moolenaar5482f332005-04-17 20:18:43 +00005388
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005389 if (STRCMP(items[2], "0") != 0)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005390 aff_entry->ae_chop = getroom_save(spin, items[2]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005391 if (STRCMP(items[3], "0") != 0)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005392 aff_entry->ae_add = getroom_save(spin, items[3]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005393
Bram Moolenaar51485f02005-06-04 21:55:20 +00005394 /* Don't use an affix entry with non-ASCII characters when
5395 * "spin->si_ascii" is TRUE. */
5396 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop)
Bram Moolenaar5482f332005-04-17 20:18:43 +00005397 || has_non_ascii(aff_entry->ae_add)))
5398 {
Bram Moolenaar5482f332005-04-17 20:18:43 +00005399 aff_entry->ae_next = cur_aff->ah_first;
5400 cur_aff->ah_first = aff_entry;
Bram Moolenaar51485f02005-06-04 21:55:20 +00005401
5402 if (STRCMP(items[4], ".") != 0)
5403 {
5404 char_u buf[MAXLINELEN];
5405
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005406 aff_entry->ae_cond = getroom_save(spin, items[4]);
Bram Moolenaar51485f02005-06-04 21:55:20 +00005407 if (*items[0] == 'P')
5408 sprintf((char *)buf, "^%s", items[4]);
5409 else
5410 sprintf((char *)buf, "%s$", items[4]);
5411 aff_entry->ae_prog = vim_regcomp(buf,
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005412 RE_MAGIC + RE_STRING + RE_STRICT);
5413 if (aff_entry->ae_prog == NULL)
5414 smsg((char_u *)_("Broken condition in %s line %d: %s"),
5415 fname, lnum, items[4]);
Bram Moolenaar51485f02005-06-04 21:55:20 +00005416 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005417
5418 /* For postponed prefixes we need an entry in si_prefcond
5419 * for the condition. Use an existing one if possible. */
Bram Moolenaar53805d12005-08-01 07:08:33 +00005420 if (*items[0] == 'P' && aff->af_pfxpostpone)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005421 {
Bram Moolenaar53805d12005-08-01 07:08:33 +00005422 /* When the chop string is one lower-case letter and
5423 * the add string ends in the upper-case letter we set
5424 * the "upper" flag, clear "ae_chop" and remove the
5425 * letters from "ae_add". The condition must either
5426 * be empty or start with the same letter. */
5427 if (aff_entry->ae_chop != NULL
5428 && aff_entry->ae_add != NULL
5429#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005430 && aff_entry->ae_chop[(*mb_ptr2len)(
Bram Moolenaar53805d12005-08-01 07:08:33 +00005431 aff_entry->ae_chop)] == NUL
5432#else
5433 && aff_entry->ae_chop[1] == NUL
5434#endif
5435 )
5436 {
5437 int c, c_up;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005438
Bram Moolenaar53805d12005-08-01 07:08:33 +00005439 c = PTR2CHAR(aff_entry->ae_chop);
5440 c_up = SPELL_TOUPPER(c);
5441 if (c_up != c
5442 && (aff_entry->ae_cond == NULL
5443 || PTR2CHAR(aff_entry->ae_cond) == c))
5444 {
5445 p = aff_entry->ae_add
5446 + STRLEN(aff_entry->ae_add);
5447 mb_ptr_back(aff_entry->ae_add, p);
5448 if (PTR2CHAR(p) == c_up)
5449 {
5450 upper = TRUE;
5451 aff_entry->ae_chop = NULL;
5452 *p = NUL;
5453
5454 /* The condition is matched with the
5455 * actual word, thus must check for the
5456 * upper-case letter. */
5457 if (aff_entry->ae_cond != NULL)
5458 {
5459 char_u buf[MAXLINELEN];
5460#ifdef FEAT_MBYTE
5461 if (has_mbyte)
5462 {
5463 onecap_copy(items[4], buf, TRUE);
5464 aff_entry->ae_cond = getroom_save(
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005465 spin, buf);
Bram Moolenaar53805d12005-08-01 07:08:33 +00005466 }
5467 else
5468#endif
5469 *aff_entry->ae_cond = c_up;
5470 if (aff_entry->ae_cond != NULL)
5471 {
5472 sprintf((char *)buf, "^%s",
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005473 aff_entry->ae_cond);
Bram Moolenaar53805d12005-08-01 07:08:33 +00005474 vim_free(aff_entry->ae_prog);
5475 aff_entry->ae_prog = vim_regcomp(
5476 buf, RE_MAGIC + RE_STRING);
5477 }
5478 }
5479 }
5480 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005481 }
5482
Bram Moolenaar53805d12005-08-01 07:08:33 +00005483 if (aff_entry->ae_chop == NULL)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00005484 {
Bram Moolenaar53805d12005-08-01 07:08:33 +00005485 int idx;
5486 char_u **pp;
5487 int n;
5488
Bram Moolenaar6de68532005-08-24 22:08:48 +00005489 /* Find a previously used condition. */
Bram Moolenaar53805d12005-08-01 07:08:33 +00005490 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0;
5491 --idx)
5492 {
5493 p = ((char_u **)spin->si_prefcond.ga_data)[idx];
5494 if (str_equal(p, aff_entry->ae_cond))
5495 break;
5496 }
5497 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK)
5498 {
5499 /* Not found, add a new condition. */
5500 idx = spin->si_prefcond.ga_len++;
5501 pp = ((char_u **)spin->si_prefcond.ga_data)
5502 + idx;
5503 if (aff_entry->ae_cond == NULL)
5504 *pp = NULL;
5505 else
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005506 *pp = getroom_save(spin,
Bram Moolenaar53805d12005-08-01 07:08:33 +00005507 aff_entry->ae_cond);
5508 }
5509
5510 /* Add the prefix to the prefix tree. */
5511 if (aff_entry->ae_add == NULL)
5512 p = (char_u *)"";
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00005513 else
Bram Moolenaar53805d12005-08-01 07:08:33 +00005514 p = aff_entry->ae_add;
5515 /* PFX_FLAGS is a negative number, so that
5516 * tree_add_word() knows this is the prefix tree. */
5517 n = PFX_FLAGS;
5518 if (rare)
5519 n |= WFP_RARE;
5520 if (!cur_aff->ah_combine)
5521 n |= WFP_NC;
5522 if (upper)
5523 n |= WFP_UP;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00005524 tree_add_word(spin, p, spin->si_prefroot, n,
5525 idx, cur_aff->ah_newID);
Bram Moolenaar6de68532005-08-24 22:08:48 +00005526 did_postpone_prefix = TRUE;
5527 }
5528
5529 /* Didn't actually use ah_newID, backup si_newprefID. */
5530 if (aff_todo == 0 && !did_postpone_prefix)
5531 {
5532 --spin->si_newprefID;
5533 cur_aff->ah_newID = 0;
Bram Moolenaar53805d12005-08-01 07:08:33 +00005534 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005535 }
Bram Moolenaar5482f332005-04-17 20:18:43 +00005536 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005537 }
Bram Moolenaar6de68532005-08-24 22:08:48 +00005538 else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2
5539 && fol == NULL)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005540 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005541 fol = vim_strsave(items[1]);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005542 }
Bram Moolenaar6de68532005-08-24 22:08:48 +00005543 else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2
5544 && low == NULL)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005545 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005546 low = vim_strsave(items[1]);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005547 }
Bram Moolenaar6de68532005-08-24 22:08:48 +00005548 else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2
5549 && upp == NULL)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005550 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005551 upp = vim_strsave(items[1]);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005552 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005553 else if ((STRCMP(items[0], "REP") == 0
5554 || STRCMP(items[0], "REPSAL") == 0)
5555 && itemcnt == 2)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005556 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00005557 /* Ignore REP/REPSAL count */;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005558 if (!isdigit(*items[1]))
Bram Moolenaar4770d092006-01-12 23:22:24 +00005559 smsg((char_u *)_("Expected REP(SAL) count in %s line %d"),
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005560 fname, lnum);
5561 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005562 else if ((STRCMP(items[0], "REP") == 0
5563 || STRCMP(items[0], "REPSAL") == 0)
5564 && itemcnt >= 3)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005565 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00005566 /* REP/REPSAL item */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00005567 /* Myspell ignores extra arguments, we require it starts with
5568 * # to detect mistakes. */
5569 if (itemcnt > 3 && items[3][0] != '#')
5570 smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005571 if (items[0][3] == 'S' ? do_repsal : do_rep)
Bram Moolenaar1e015462005-09-25 22:16:38 +00005572 {
5573 /* Replace underscore with space (can't include a space
5574 * directly). */
5575 for (p = items[1]; *p != NUL; mb_ptr_adv(p))
5576 if (*p == '_')
5577 *p = ' ';
5578 for (p = items[2]; *p != NUL; mb_ptr_adv(p))
5579 if (*p == '_')
5580 *p = ' ';
Bram Moolenaar4770d092006-01-12 23:22:24 +00005581 add_fromto(spin, items[0][3] == 'S'
5582 ? &spin->si_repsal
5583 : &spin->si_rep, items[1], items[2]);
Bram Moolenaar1e015462005-09-25 22:16:38 +00005584 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005585 }
5586 else if (STRCMP(items[0], "MAP") == 0 && itemcnt == 2)
5587 {
5588 /* MAP item or count */
5589 if (!found_map)
5590 {
5591 /* First line contains the count. */
5592 found_map = TRUE;
5593 if (!isdigit(*items[1]))
5594 smsg((char_u *)_("Expected MAP count in %s line %d"),
5595 fname, lnum);
5596 }
5597 else if (do_map)
5598 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00005599 int c;
5600
5601 /* Check that every character appears only once. */
5602 for (p = items[1]; *p != NUL; )
5603 {
5604#ifdef FEAT_MBYTE
5605 c = mb_ptr2char_adv(&p);
5606#else
5607 c = *p++;
5608#endif
5609 if ((spin->si_map.ga_len > 0
5610 && vim_strchr(spin->si_map.ga_data, c)
5611 != NULL)
5612 || vim_strchr(p, c) != NULL)
5613 smsg((char_u *)_("Duplicate character in MAP in %s line %d"),
5614 fname, lnum);
5615 }
5616
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005617 /* We simply concatenate all the MAP strings, separated by
5618 * slashes. */
5619 ga_concat(&spin->si_map, items[1]);
5620 ga_append(&spin->si_map, '/');
5621 }
5622 }
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00005623 /* Accept "SAL from to" and "SAL from to # comment". */
5624 else if (STRCMP(items[0], "SAL") == 0
5625 && (itemcnt == 3 || (itemcnt > 3 && items[3][0] == '#')))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005626 {
5627 if (do_sal)
5628 {
5629 /* SAL item (sounds-a-like)
5630 * Either one of the known keys or a from-to pair. */
5631 if (STRCMP(items[1], "followup") == 0)
5632 spin->si_followup = sal_to_bool(items[2]);
5633 else if (STRCMP(items[1], "collapse_result") == 0)
5634 spin->si_collapse = sal_to_bool(items[2]);
5635 else if (STRCMP(items[1], "remove_accents") == 0)
5636 spin->si_rem_accents = sal_to_bool(items[2]);
5637 else
5638 /* when "to" is "_" it means empty */
5639 add_fromto(spin, &spin->si_sal, items[1],
5640 STRCMP(items[2], "_") == 0 ? (char_u *)""
5641 : items[2]);
5642 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005643 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005644 else if (STRCMP(items[0], "SOFOFROM") == 0 && itemcnt == 2
Bram Moolenaar6de68532005-08-24 22:08:48 +00005645 && sofofrom == NULL)
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005646 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005647 sofofrom = getroom_save(spin, items[1]);
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005648 }
5649 else if (STRCMP(items[0], "SOFOTO") == 0 && itemcnt == 2
Bram Moolenaar6de68532005-08-24 22:08:48 +00005650 && sofoto == NULL)
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005651 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005652 sofoto = getroom_save(spin, items[1]);
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005653 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005654 else if (STRCMP(items[0], "COMMON") == 0)
5655 {
5656 int i;
5657
5658 for (i = 1; i < itemcnt; ++i)
5659 {
5660 if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords,
5661 items[i])))
5662 {
5663 p = vim_strsave(items[i]);
5664 if (p == NULL)
5665 break;
5666 hash_add(&spin->si_commonwords, p);
5667 }
5668 }
5669 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00005670 else
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005671 smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"),
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005672 fname, lnum, items[0]);
5673 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005674 }
5675
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005676 if (fol != NULL || low != NULL || upp != NULL)
5677 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00005678 if (spin->si_clear_chartab)
5679 {
5680 /* Clear the char type tables, don't want to use any of the
5681 * currently used spell properties. */
5682 init_spell_chartab();
5683 spin->si_clear_chartab = FALSE;
5684 }
5685
Bram Moolenaar3982c542005-06-08 21:56:31 +00005686 /*
5687 * Don't write a word table for an ASCII file, so that we don't check
5688 * for conflicts with a word table that matches 'encoding'.
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005689 * Don't write one for utf-8 either, we use utf_*() and
Bram Moolenaar3982c542005-06-08 21:56:31 +00005690 * mb_get_class(), the list of chars in the file will be incomplete.
5691 */
5692 if (!spin->si_ascii
5693#ifdef FEAT_MBYTE
5694 && !enc_utf8
5695#endif
5696 )
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00005697 {
5698 if (fol == NULL || low == NULL || upp == NULL)
5699 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
5700 else
Bram Moolenaar3982c542005-06-08 21:56:31 +00005701 (void)set_spell_chartab(fol, low, upp);
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00005702 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00005703
5704 vim_free(fol);
5705 vim_free(low);
5706 vim_free(upp);
5707 }
5708
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005709 /* Use compound specifications of the .aff file for the spell info. */
Bram Moolenaar6de68532005-08-24 22:08:48 +00005710 if (compmax != 0)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005711 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005712 aff_check_number(spin->si_compmax, compmax, "COMPOUNDMAX");
5713 spin->si_compmax = compmax;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005714 }
5715
Bram Moolenaar6de68532005-08-24 22:08:48 +00005716 if (compminlen != 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005717 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005718 aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN");
5719 spin->si_compminlen = compminlen;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005720 }
5721
Bram Moolenaar6de68532005-08-24 22:08:48 +00005722 if (compsylmax != 0)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005723 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005724 if (syllable == NULL)
5725 smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE"));
5726 aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX");
5727 spin->si_compsylmax = compsylmax;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005728 }
5729
Bram Moolenaar6de68532005-08-24 22:08:48 +00005730 if (compflags != NULL)
5731 process_compflags(spin, aff, compflags);
5732
5733 /* Check that we didn't use too many renumbered flags. */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005734 if (spin->si_newcompID < spin->si_newprefID)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005735 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005736 if (spin->si_newcompID == 127 || spin->si_newcompID == 255)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005737 MSG(_("Too many postponed prefixes"));
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005738 else if (spin->si_newprefID == 0 || spin->si_newprefID == 127)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005739 MSG(_("Too many compound flags"));
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005740 else
Bram Moolenaar6de68532005-08-24 22:08:48 +00005741 MSG(_("Too many posponed prefixes and/or compound flags"));
Bram Moolenaarae5bce12005-08-15 21:41:48 +00005742 }
5743
Bram Moolenaar6de68532005-08-24 22:08:48 +00005744 if (syllable != NULL)
Bram Moolenaar5195e452005-08-19 20:32:47 +00005745 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00005746 aff_check_string(spin->si_syllable, syllable, "SYLLABLE");
5747 spin->si_syllable = syllable;
5748 }
5749
5750 if (sofofrom != NULL || sofoto != NULL)
5751 {
5752 if (sofofrom == NULL || sofoto == NULL)
5753 smsg((char_u *)_("Missing SOFO%s line in %s"),
5754 sofofrom == NULL ? "FROM" : "TO", fname);
5755 else if (spin->si_sal.ga_len > 0)
5756 smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname);
Bram Moolenaar5195e452005-08-19 20:32:47 +00005757 else
Bram Moolenaar6de68532005-08-24 22:08:48 +00005758 {
5759 aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM");
5760 aff_check_string(spin->si_sofoto, sofoto, "SOFOTO");
5761 spin->si_sofofr = sofofrom;
5762 spin->si_sofoto = sofoto;
5763 }
5764 }
5765
5766 if (midword != NULL)
5767 {
5768 aff_check_string(spin->si_midword, midword, "MIDWORD");
5769 spin->si_midword = midword;
Bram Moolenaar5195e452005-08-19 20:32:47 +00005770 }
5771
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005772 vim_free(pc);
5773 fclose(fd);
5774 return aff;
5775}
5776
5777/*
Bram Moolenaar362e1a32006-03-06 23:29:24 +00005778 * Return TRUE if "s" is the name of an info item in the affix file.
5779 */
5780 static int
5781spell_info_item(s)
5782 char_u *s;
5783{
5784 return STRCMP(s, "NAME") == 0
5785 || STRCMP(s, "HOME") == 0
5786 || STRCMP(s, "VERSION") == 0
5787 || STRCMP(s, "AUTHOR") == 0
5788 || STRCMP(s, "EMAIL") == 0
5789 || STRCMP(s, "COPYRIGHT") == 0;
5790}
5791
5792/*
Bram Moolenaar6de68532005-08-24 22:08:48 +00005793 * Turn an affix flag name into a number, according to the FLAG type.
5794 * returns zero for failure.
5795 */
5796 static unsigned
5797affitem2flag(flagtype, item, fname, lnum)
5798 int flagtype;
5799 char_u *item;
5800 char_u *fname;
5801 int lnum;
5802{
5803 unsigned res;
5804 char_u *p = item;
5805
5806 res = get_affitem(flagtype, &p);
5807 if (res == 0)
5808 {
Bram Moolenaar95529562005-08-25 21:21:38 +00005809 if (flagtype == AFT_NUM)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005810 smsg((char_u *)_("Flag is not a number in %s line %d: %s"),
5811 fname, lnum, item);
5812 else
5813 smsg((char_u *)_("Illegal flag in %s line %d: %s"),
5814 fname, lnum, item);
5815 }
5816 if (*p != NUL)
5817 {
5818 smsg((char_u *)_(e_affname), fname, lnum, item);
5819 return 0;
5820 }
5821
5822 return res;
5823}
5824
5825/*
5826 * Get one affix name from "*pp" and advance the pointer.
5827 * Returns zero for an error, still advances the pointer then.
5828 */
5829 static unsigned
5830get_affitem(flagtype, pp)
5831 int flagtype;
5832 char_u **pp;
5833{
5834 int res;
5835
Bram Moolenaar95529562005-08-25 21:21:38 +00005836 if (flagtype == AFT_NUM)
Bram Moolenaar6de68532005-08-24 22:08:48 +00005837 {
5838 if (!VIM_ISDIGIT(**pp))
5839 {
Bram Moolenaar95529562005-08-25 21:21:38 +00005840 ++*pp; /* always advance, avoid getting stuck */
Bram Moolenaar6de68532005-08-24 22:08:48 +00005841 return 0;
5842 }
5843 res = getdigits(pp);
5844 }
5845 else
5846 {
5847#ifdef FEAT_MBYTE
5848 res = mb_ptr2char_adv(pp);
5849#else
5850 res = *(*pp)++;
5851#endif
Bram Moolenaar95529562005-08-25 21:21:38 +00005852 if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG
Bram Moolenaar6de68532005-08-24 22:08:48 +00005853 && res >= 'A' && res <= 'Z'))
5854 {
5855 if (**pp == NUL)
5856 return 0;
5857#ifdef FEAT_MBYTE
5858 res = mb_ptr2char_adv(pp) + (res << 16);
5859#else
5860 res = *(*pp)++ + (res << 16);
5861#endif
5862 }
5863 }
5864 return res;
5865}
5866
5867/*
5868 * Process the "compflags" string used in an affix file and append it to
5869 * spin->si_compflags.
5870 * The processing involves changing the affix names to ID numbers, so that
5871 * they fit in one byte.
5872 */
5873 static void
5874process_compflags(spin, aff, compflags)
5875 spellinfo_T *spin;
5876 afffile_T *aff;
5877 char_u *compflags;
5878{
5879 char_u *p;
5880 char_u *prevp;
5881 unsigned flag;
5882 compitem_T *ci;
5883 int id;
5884 int len;
5885 char_u *tp;
5886 char_u key[AH_KEY_LEN];
5887 hashitem_T *hi;
5888
5889 /* Make room for the old and the new compflags, concatenated with a / in
5890 * between. Processing it makes it shorter, but we don't know by how
5891 * much, thus allocate the maximum. */
5892 len = STRLEN(compflags) + 1;
5893 if (spin->si_compflags != NULL)
5894 len += STRLEN(spin->si_compflags) + 1;
5895 p = getroom(spin, len, FALSE);
5896 if (p == NULL)
5897 return;
5898 if (spin->si_compflags != NULL)
5899 {
5900 STRCPY(p, spin->si_compflags);
5901 STRCAT(p, "/");
5902 }
Bram Moolenaar6de68532005-08-24 22:08:48 +00005903 spin->si_compflags = p;
5904 tp = p + STRLEN(p);
5905
5906 for (p = compflags; *p != NUL; )
5907 {
5908 if (vim_strchr((char_u *)"/*+[]", *p) != NULL)
5909 /* Copy non-flag characters directly. */
5910 *tp++ = *p++;
5911 else
5912 {
5913 /* First get the flag number, also checks validity. */
5914 prevp = p;
5915 flag = get_affitem(aff->af_flagtype, &p);
5916 if (flag != 0)
5917 {
5918 /* Find the flag in the hashtable. If it was used before, use
5919 * the existing ID. Otherwise add a new entry. */
5920 vim_strncpy(key, prevp, p - prevp);
5921 hi = hash_find(&aff->af_comp, key);
5922 if (!HASHITEM_EMPTY(hi))
5923 id = HI2CI(hi)->ci_newID;
5924 else
5925 {
5926 ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE);
5927 if (ci == NULL)
5928 break;
5929 STRCPY(ci->ci_key, key);
5930 ci->ci_flag = flag;
5931 /* Avoid using a flag ID that has a special meaning in a
5932 * regexp (also inside []). */
5933 do
5934 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005935 check_renumber(spin);
5936 id = spin->si_newcompID--;
5937 } while (vim_strchr((char_u *)"/+*[]\\-^", id) != NULL);
Bram Moolenaar6de68532005-08-24 22:08:48 +00005938 ci->ci_newID = id;
5939 hash_add(&aff->af_comp, ci->ci_key);
5940 }
5941 *tp++ = id;
5942 }
Bram Moolenaar95529562005-08-25 21:21:38 +00005943 if (aff->af_flagtype == AFT_NUM && *p == ',')
Bram Moolenaar6de68532005-08-24 22:08:48 +00005944 ++p;
5945 }
5946 }
5947
5948 *tp = NUL;
5949}
5950
5951/*
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00005952 * Check that the new IDs for postponed affixes and compounding don't overrun
5953 * each other. We have almost 255 available, but start at 0-127 to avoid
5954 * using two bytes for utf-8. When the 0-127 range is used up go to 128-255.
5955 * When that is used up an error message is given.
5956 */
5957 static void
5958check_renumber(spin)
5959 spellinfo_T *spin;
5960{
5961 if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128)
5962 {
5963 spin->si_newprefID = 127;
5964 spin->si_newcompID = 255;
5965 }
5966}
5967
5968/*
Bram Moolenaar6de68532005-08-24 22:08:48 +00005969 * Return TRUE if flag "flag" appears in affix list "afflist".
5970 */
5971 static int
5972flag_in_afflist(flagtype, afflist, flag)
5973 int flagtype;
5974 char_u *afflist;
5975 unsigned flag;
5976{
5977 char_u *p;
5978 unsigned n;
5979
5980 switch (flagtype)
5981 {
5982 case AFT_CHAR:
5983 return vim_strchr(afflist, flag) != NULL;
5984
Bram Moolenaar95529562005-08-25 21:21:38 +00005985 case AFT_CAPLONG:
5986 case AFT_LONG:
Bram Moolenaar6de68532005-08-24 22:08:48 +00005987 for (p = afflist; *p != NUL; )
5988 {
5989#ifdef FEAT_MBYTE
5990 n = mb_ptr2char_adv(&p);
5991#else
5992 n = *p++;
5993#endif
Bram Moolenaar95529562005-08-25 21:21:38 +00005994 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z'))
Bram Moolenaar6de68532005-08-24 22:08:48 +00005995 && *p != NUL)
5996#ifdef FEAT_MBYTE
5997 n = mb_ptr2char_adv(&p) + (n << 16);
5998#else
5999 n = *p++ + (n << 16);
6000#endif
6001 if (n == flag)
6002 return TRUE;
6003 }
6004 break;
6005
Bram Moolenaar95529562005-08-25 21:21:38 +00006006 case AFT_NUM:
Bram Moolenaar6de68532005-08-24 22:08:48 +00006007 for (p = afflist; *p != NUL; )
6008 {
6009 n = getdigits(&p);
6010 if (n == flag)
6011 return TRUE;
6012 if (*p != NUL) /* skip over comma */
6013 ++p;
6014 }
6015 break;
6016 }
6017 return FALSE;
6018}
6019
6020/*
6021 * Give a warning when "spinval" and "affval" numbers are set and not the same.
6022 */
6023 static void
6024aff_check_number(spinval, affval, name)
6025 int spinval;
6026 int affval;
6027 char *name;
6028{
6029 if (spinval != 0 && spinval != affval)
6030 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
6031}
6032
6033/*
6034 * Give a warning when "spinval" and "affval" strings are set and not the same.
6035 */
6036 static void
6037aff_check_string(spinval, affval, name)
6038 char_u *spinval;
6039 char_u *affval;
6040 char *name;
6041{
6042 if (spinval != NULL && STRCMP(spinval, affval) != 0)
6043 smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
6044}
6045
6046/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006047 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being
6048 * NULL as equal.
6049 */
6050 static int
6051str_equal(s1, s2)
6052 char_u *s1;
6053 char_u *s2;
6054{
6055 if (s1 == NULL || s2 == NULL)
6056 return s1 == s2;
6057 return STRCMP(s1, s2) == 0;
6058}
6059
6060/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006061 * Add a from-to item to "gap". Used for REP and SAL items.
6062 * They are stored case-folded.
6063 */
6064 static void
6065add_fromto(spin, gap, from, to)
6066 spellinfo_T *spin;
6067 garray_T *gap;
6068 char_u *from;
6069 char_u *to;
6070{
6071 fromto_T *ftp;
6072 char_u word[MAXWLEN];
6073
6074 if (ga_grow(gap, 1) == OK)
6075 {
6076 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
6077 (void)spell_casefold(from, STRLEN(from), word, MAXWLEN);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006078 ftp->ft_from = getroom_save(spin, word);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006079 (void)spell_casefold(to, STRLEN(to), word, MAXWLEN);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006080 ftp->ft_to = getroom_save(spin, word);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006081 ++gap->ga_len;
6082 }
6083}
6084
6085/*
6086 * Convert a boolean argument in a SAL line to TRUE or FALSE;
6087 */
6088 static int
6089sal_to_bool(s)
6090 char_u *s;
6091{
6092 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0;
6093}
6094
6095/*
Bram Moolenaar5482f332005-04-17 20:18:43 +00006096 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
6097 * When "s" is NULL FALSE is returned.
6098 */
6099 static int
6100has_non_ascii(s)
6101 char_u *s;
6102{
6103 char_u *p;
6104
6105 if (s != NULL)
6106 for (p = s; *p != NUL; ++p)
6107 if (*p >= 128)
6108 return TRUE;
6109 return FALSE;
6110}
6111
6112/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006113 * Free the structure filled by spell_read_aff().
6114 */
6115 static void
6116spell_free_aff(aff)
6117 afffile_T *aff;
6118{
6119 hashtab_T *ht;
6120 hashitem_T *hi;
6121 int todo;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006122 affheader_T *ah;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006123 affentry_T *ae;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006124
6125 vim_free(aff->af_enc);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006126
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006127 /* All this trouble to free the "ae_prog" items... */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006128 for (ht = &aff->af_pref; ; ht = &aff->af_suff)
6129 {
6130 todo = ht->ht_used;
6131 for (hi = ht->ht_array; todo > 0; ++hi)
6132 {
6133 if (!HASHITEM_EMPTY(hi))
6134 {
6135 --todo;
6136 ah = HI2AH(hi);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006137 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
6138 vim_free(ae->ae_prog);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006139 }
6140 }
6141 if (ht == &aff->af_suff)
6142 break;
6143 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00006144
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006145 hash_clear(&aff->af_pref);
6146 hash_clear(&aff->af_suff);
Bram Moolenaar6de68532005-08-24 22:08:48 +00006147 hash_clear(&aff->af_comp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006148}
6149
6150/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00006151 * Read dictionary file "fname".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006152 * Returns OK or FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006153 */
6154 static int
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006155spell_read_dic(spin, fname, affile)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006156 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006157 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006158 afffile_T *affile;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006159{
Bram Moolenaar51485f02005-06-04 21:55:20 +00006160 hashtab_T ht;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006161 char_u line[MAXLINELEN];
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006162 char_u *p;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006163 char_u *afflist;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006164 char_u store_afflist[MAXWLEN];
6165 int pfxlen;
6166 int need_affix;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006167 char_u *dw;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006168 char_u *pc;
6169 char_u *w;
6170 int l;
6171 hash_T hash;
6172 hashitem_T *hi;
6173 FILE *fd;
6174 int lnum = 1;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006175 int non_ascii = 0;
6176 int retval = OK;
6177 char_u message[MAXLINELEN + MAXWLEN];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006178 int flags;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006179 int duplicate = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006180
Bram Moolenaar51485f02005-06-04 21:55:20 +00006181 /*
6182 * Open the file.
6183 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00006184 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006185 if (fd == NULL)
6186 {
6187 EMSG2(_(e_notopen), fname);
6188 return FAIL;
6189 }
6190
Bram Moolenaar51485f02005-06-04 21:55:20 +00006191 /* The hashtable is only used to detect duplicated words. */
6192 hash_init(&ht);
6193
Bram Moolenaar4770d092006-01-12 23:22:24 +00006194 vim_snprintf((char *)IObuff, IOSIZE,
6195 _("Reading dictionary file %s ..."), fname);
6196 spell_message(spin, IObuff);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006197
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006198 /* start with a message for the first line */
6199 spin->si_msg_count = 999999;
6200
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006201 /* Read and ignore the first line: word count. */
6202 (void)vim_fgets(line, MAXLINELEN, fd);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006203 if (!vim_isdigit(*skipwhite(line)))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006204 EMSG2(_("E760: No word count in %s"), fname);
6205
6206 /*
6207 * Read all the lines in the file one by one.
6208 * The words are converted to 'encoding' here, before being added to
6209 * the hashtable.
6210 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006211 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006212 {
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006213 line_breakcheck();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006214 ++lnum;
Bram Moolenaar53805d12005-08-01 07:08:33 +00006215 if (line[0] == '#' || line[0] == '/')
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006216 continue; /* comment line */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006217
Bram Moolenaar51485f02005-06-04 21:55:20 +00006218 /* Remove CR, LF and white space from the end. White space halfway
6219 * the word is kept to allow e.g., "et al.". */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006220 l = STRLEN(line);
6221 while (l > 0 && line[l - 1] <= ' ')
6222 --l;
6223 if (l == 0)
6224 continue; /* empty line */
6225 line[l] = NUL;
6226
Bram Moolenaar66fa2712006-01-22 23:22:22 +00006227 /* Truncate the word at the "/", set "afflist" to what follows.
6228 * Replace "\/" by "/" and "\\" by "\". */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006229 afflist = NULL;
6230 for (p = line; *p != NUL; mb_ptr_adv(p))
6231 {
Bram Moolenaar66fa2712006-01-22 23:22:22 +00006232 if (*p == '\\' && (p[1] == '\\' || p[1] == '/'))
6233 mch_memmove(p, p + 1, STRLEN(p));
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006234 else if (*p == '/')
6235 {
6236 *p = NUL;
6237 afflist = p + 1;
6238 break;
6239 }
6240 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00006241
6242 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
6243 if (spin->si_ascii && has_non_ascii(line))
6244 {
6245 ++non_ascii;
Bram Moolenaar5482f332005-04-17 20:18:43 +00006246 continue;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006247 }
Bram Moolenaar5482f332005-04-17 20:18:43 +00006248
Bram Moolenaarb765d632005-06-07 21:00:02 +00006249#ifdef FEAT_MBYTE
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006250 /* Convert from "SET" to 'encoding' when needed. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00006251 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006252 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006253 pc = string_convert(&spin->si_conv, line, NULL);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006254 if (pc == NULL)
6255 {
6256 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
6257 fname, lnum, line);
6258 continue;
6259 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006260 w = pc;
6261 }
6262 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00006263#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006264 {
6265 pc = NULL;
6266 w = line;
6267 }
6268
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006269 /* This takes time, print a message every 10000 words. */
6270 if (spin->si_verbose && spin->si_msg_count > 10000)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006271 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006272 spin->si_msg_count = 0;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006273 vim_snprintf((char *)message, sizeof(message),
6274 _("line %6d, word %6d - %s"),
6275 lnum, spin->si_foldwcount + spin->si_keepwcount, w);
6276 msg_start();
6277 msg_puts_long_attr(message, 0);
6278 msg_clr_eos();
6279 msg_didout = FALSE;
6280 msg_col = 0;
6281 out_flush();
6282 }
6283
Bram Moolenaar51485f02005-06-04 21:55:20 +00006284 /* Store the word in the hashtable to be able to find duplicates. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006285 dw = (char_u *)getroom_save(spin, w);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006286 if (dw == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006287 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006288 vim_free(pc);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006289 if (retval == FAIL)
6290 break;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006291
Bram Moolenaar51485f02005-06-04 21:55:20 +00006292 hash = hash_hash(dw);
6293 hi = hash_lookup(&ht, dw, hash);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006294 if (!HASHITEM_EMPTY(hi))
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006295 {
6296 if (p_verbose > 0)
6297 smsg((char_u *)_("Duplicate word in %s line %d: %s"),
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006298 fname, lnum, dw);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006299 else if (duplicate == 0)
6300 smsg((char_u *)_("First duplicate word in %s line %d: %s"),
6301 fname, lnum, dw);
6302 ++duplicate;
6303 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006304 else
Bram Moolenaar51485f02005-06-04 21:55:20 +00006305 hash_add_item(&ht, hi, dw, hash);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006306
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006307 flags = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006308 store_afflist[0] = NUL;
6309 pfxlen = 0;
6310 need_affix = FALSE;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006311 if (afflist != NULL)
6312 {
6313 /* Check for affix name that stands for keep-case word and stands
6314 * for rare word (if defined). */
Bram Moolenaar371baa92005-12-29 22:43:53 +00006315 if (affile->af_keepcase != 0 && flag_in_afflist(
6316 affile->af_flagtype, afflist, affile->af_keepcase))
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00006317 flags |= WF_KEEPCAP | WF_FIXCAP;
Bram Moolenaar371baa92005-12-29 22:43:53 +00006318 if (affile->af_rare != 0 && flag_in_afflist(
6319 affile->af_flagtype, afflist, affile->af_rare))
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006320 flags |= WF_RARE;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006321 if (affile->af_bad != 0 && flag_in_afflist(
6322 affile->af_flagtype, afflist, affile->af_bad))
Bram Moolenaar0c405862005-06-22 22:26:26 +00006323 flags |= WF_BANNED;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006324 if (affile->af_needaffix != 0 && flag_in_afflist(
6325 affile->af_flagtype, afflist, affile->af_needaffix))
Bram Moolenaar5195e452005-08-19 20:32:47 +00006326 need_affix = TRUE;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00006327 if (affile->af_needcomp != 0 && flag_in_afflist(
6328 affile->af_flagtype, afflist, affile->af_needcomp))
6329 flags |= WF_NEEDCOMP;
Bram Moolenaare1438bb2006-03-01 22:01:55 +00006330 if (affile->af_nosuggest != 0 && flag_in_afflist(
6331 affile->af_flagtype, afflist, affile->af_nosuggest))
6332 flags |= WF_NOSUGGEST;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006333
6334 if (affile->af_pfxpostpone)
6335 /* Need to store the list of prefix IDs with the word. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006336 pfxlen = get_pfxlist(affile, afflist, store_afflist);
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006337
Bram Moolenaar5195e452005-08-19 20:32:47 +00006338 if (spin->si_compflags != NULL)
6339 /* Need to store the list of compound flags with the word.
6340 * Concatenate them to the list of prefix IDs. */
Bram Moolenaar6de68532005-08-24 22:08:48 +00006341 get_compflags(affile, afflist, store_afflist + pfxlen);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006342 }
6343
Bram Moolenaar51485f02005-06-04 21:55:20 +00006344 /* Add the word to the word tree(s). */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006345 if (store_word(spin, dw, flags, spin->si_region,
6346 store_afflist, need_affix) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006347 retval = FAIL;
6348
6349 if (afflist != NULL)
6350 {
6351 /* Find all matching suffixes and add the resulting words.
6352 * Additionally do matching prefixes that combine. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006353 if (store_aff_word(spin, dw, afflist, affile,
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006354 &affile->af_suff, &affile->af_pref,
Bram Moolenaar5195e452005-08-19 20:32:47 +00006355 FALSE, flags, store_afflist, pfxlen) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006356 retval = FAIL;
6357
6358 /* Find all matching prefixes and add the resulting words. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006359 if (store_aff_word(spin, dw, afflist, affile,
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006360 &affile->af_pref, NULL,
Bram Moolenaar5195e452005-08-19 20:32:47 +00006361 FALSE, flags, store_afflist, pfxlen) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006362 retval = FAIL;
6363 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006364 }
6365
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006366 if (duplicate > 0)
6367 smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006368 if (spin->si_ascii && non_ascii > 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006369 smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"),
6370 non_ascii, fname);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006371 hash_clear(&ht);
6372
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006373 fclose(fd);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006374 return retval;
6375}
6376
6377/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006378 * Get the list of prefix IDs from the affix list "afflist".
6379 * Used for PFXPOSTPONE.
Bram Moolenaar5195e452005-08-19 20:32:47 +00006380 * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL
6381 * and return the number of affixes.
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006382 */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006383 static int
6384get_pfxlist(affile, afflist, store_afflist)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006385 afffile_T *affile;
6386 char_u *afflist;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006387 char_u *store_afflist;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006388{
6389 char_u *p;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006390 char_u *prevp;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006391 int cnt = 0;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006392 int id;
6393 char_u key[AH_KEY_LEN];
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006394 hashitem_T *hi;
6395
Bram Moolenaar6de68532005-08-24 22:08:48 +00006396 for (p = afflist; *p != NUL; )
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006397 {
Bram Moolenaar6de68532005-08-24 22:08:48 +00006398 prevp = p;
6399 if (get_affitem(affile->af_flagtype, &p) != 0)
6400 {
6401 /* A flag is a postponed prefix flag if it appears in "af_pref"
6402 * and it's ID is not zero. */
6403 vim_strncpy(key, prevp, p - prevp);
6404 hi = hash_find(&affile->af_pref, key);
6405 if (!HASHITEM_EMPTY(hi))
6406 {
6407 id = HI2AH(hi)->ah_newID;
6408 if (id != 0)
6409 store_afflist[cnt++] = id;
6410 }
6411 }
Bram Moolenaar95529562005-08-25 21:21:38 +00006412 if (affile->af_flagtype == AFT_NUM && *p == ',')
Bram Moolenaar6de68532005-08-24 22:08:48 +00006413 ++p;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006414 }
6415
Bram Moolenaar5195e452005-08-19 20:32:47 +00006416 store_afflist[cnt] = NUL;
6417 return cnt;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006418}
6419
6420/*
Bram Moolenaar6de68532005-08-24 22:08:48 +00006421 * Get the list of compound IDs from the affix list "afflist" that are used
6422 * for compound words.
Bram Moolenaar5195e452005-08-19 20:32:47 +00006423 * Puts the flags in "store_afflist[]".
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006424 */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006425 static void
Bram Moolenaar6de68532005-08-24 22:08:48 +00006426get_compflags(affile, afflist, store_afflist)
6427 afffile_T *affile;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006428 char_u *afflist;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006429 char_u *store_afflist;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006430{
6431 char_u *p;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006432 char_u *prevp;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006433 int cnt = 0;
Bram Moolenaar6de68532005-08-24 22:08:48 +00006434 char_u key[AH_KEY_LEN];
6435 hashitem_T *hi;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006436
Bram Moolenaar6de68532005-08-24 22:08:48 +00006437 for (p = afflist; *p != NUL; )
6438 {
6439 prevp = p;
6440 if (get_affitem(affile->af_flagtype, &p) != 0)
6441 {
6442 /* A flag is a compound flag if it appears in "af_comp". */
6443 vim_strncpy(key, prevp, p - prevp);
6444 hi = hash_find(&affile->af_comp, key);
6445 if (!HASHITEM_EMPTY(hi))
6446 store_afflist[cnt++] = HI2CI(hi)->ci_newID;
6447 }
Bram Moolenaar95529562005-08-25 21:21:38 +00006448 if (affile->af_flagtype == AFT_NUM && *p == ',')
Bram Moolenaar6de68532005-08-24 22:08:48 +00006449 ++p;
6450 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006451
Bram Moolenaar5195e452005-08-19 20:32:47 +00006452 store_afflist[cnt] = NUL;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006453}
6454
6455/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00006456 * Apply affixes to a word and store the resulting words.
6457 * "ht" is the hashtable with affentry_T that need to be applied, either
6458 * prefixes or suffixes.
6459 * "xht", when not NULL, is the prefix hashtable, to be used additionally on
6460 * the resulting words for combining affixes.
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006461 *
6462 * Returns FAIL when out of memory.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006463 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006464 static int
Bram Moolenaar5195e452005-08-19 20:32:47 +00006465store_aff_word(spin, word, afflist, affile, ht, xht, comb, flags,
6466 pfxlist, pfxlen)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006467 spellinfo_T *spin; /* spell info */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006468 char_u *word; /* basic word start */
Bram Moolenaar51485f02005-06-04 21:55:20 +00006469 char_u *afflist; /* list of names of supported affixes */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006470 afffile_T *affile;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006471 hashtab_T *ht;
6472 hashtab_T *xht;
6473 int comb; /* only use affixes that combine */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006474 int flags; /* flags for the word */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006475 char_u *pfxlist; /* list of prefix IDs */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006476 int pfxlen; /* nr of flags in "pfxlist" for prefixes, rest
6477 * is compound flags */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006478{
6479 int todo;
6480 hashitem_T *hi;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006481 affheader_T *ah;
6482 affentry_T *ae;
6483 regmatch_T regmatch;
6484 char_u newword[MAXWLEN];
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006485 int retval = OK;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006486 int i;
6487 char_u *p;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00006488 int use_flags;
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00006489 char_u *use_pfxlist;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006490 char_u pfx_pfxlist[MAXWLEN];
Bram Moolenaar5195e452005-08-19 20:32:47 +00006491 size_t wordlen = STRLEN(word);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006492
Bram Moolenaar51485f02005-06-04 21:55:20 +00006493 todo = ht->ht_used;
6494 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006495 {
6496 if (!HASHITEM_EMPTY(hi))
6497 {
6498 --todo;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006499 ah = HI2AH(hi);
Bram Moolenaar5482f332005-04-17 20:18:43 +00006500
Bram Moolenaar51485f02005-06-04 21:55:20 +00006501 /* Check that the affix combines, if required, and that the word
6502 * supports this affix. */
Bram Moolenaar6de68532005-08-24 22:08:48 +00006503 if ((!comb || ah->ah_combine) && flag_in_afflist(
6504 affile->af_flagtype, afflist, ah->ah_flag))
Bram Moolenaar5482f332005-04-17 20:18:43 +00006505 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006506 /* Loop over all affix entries with this name. */
6507 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006508 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006509 /* Check the condition. It's not logical to match case
6510 * here, but it is required for compatibility with
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006511 * Myspell.
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006512 * Another requirement from Myspell is that the chop
6513 * string is shorter than the word itself.
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006514 * For prefixes, when "PFXPOSTPONE" was used, only do
6515 * prefixes with a chop string. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00006516 regmatch.regprog = ae->ae_prog;
6517 regmatch.rm_ic = FALSE;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006518 if ((xht != NULL || !affile->af_pfxpostpone
6519 || ae->ae_chop != NULL)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006520 && (ae->ae_chop == NULL
6521 || STRLEN(ae->ae_chop) < wordlen)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006522 && (ae->ae_prog == NULL
6523 || vim_regexec(&regmatch, word, (colnr_T)0)))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006524 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006525 /* Match. Remove the chop and add the affix. */
6526 if (xht == NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006527 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006528 /* prefix: chop/add at the start of the word */
6529 if (ae->ae_add == NULL)
6530 *newword = NUL;
6531 else
6532 STRCPY(newword, ae->ae_add);
6533 p = word;
6534 if (ae->ae_chop != NULL)
Bram Moolenaarb765d632005-06-07 21:00:02 +00006535 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00006536 /* Skip chop string. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00006537#ifdef FEAT_MBYTE
6538 if (has_mbyte)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006539 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00006540 i = mb_charlen(ae->ae_chop);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006541 for ( ; i > 0; --i)
6542 mb_ptr_adv(p);
6543 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00006544 else
6545#endif
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006546 p += STRLEN(ae->ae_chop);
Bram Moolenaarb765d632005-06-07 21:00:02 +00006547 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00006548 STRCAT(newword, p);
6549 }
6550 else
6551 {
6552 /* suffix: chop/add at the end of the word */
6553 STRCPY(newword, word);
6554 if (ae->ae_chop != NULL)
6555 {
6556 /* Remove chop string. */
6557 p = newword + STRLEN(newword);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00006558 i = MB_CHARLEN(ae->ae_chop);
Bram Moolenaarb765d632005-06-07 21:00:02 +00006559 for ( ; i > 0; --i)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006560 mb_ptr_back(newword, p);
6561 *p = NUL;
6562 }
6563 if (ae->ae_add != NULL)
6564 STRCAT(newword, ae->ae_add);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006565 }
6566
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00006567 /* Obey the "rare" flag of the affix. */
6568 if (ae->ae_rare)
6569 use_flags = flags | WF_RARE;
6570 else
6571 use_flags = flags;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006572
6573 /* Obey the "nocomp" flag of the affix: don't use the
6574 * compound flags. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00006575 use_pfxlist = pfxlist;
Bram Moolenaar5195e452005-08-19 20:32:47 +00006576 if (ae->ae_nocomp && pfxlist != NULL)
6577 {
6578 vim_strncpy(pfx_pfxlist, pfxlist, pfxlen);
6579 use_pfxlist = pfx_pfxlist;
6580 }
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00006581
6582 /* When there are postponed prefixes... */
Bram Moolenaar551f84f2005-07-06 22:29:20 +00006583 if (spin->si_prefroot != NULL
6584 && spin->si_prefroot->wn_sibling != NULL)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00006585 {
6586 /* ... add a flag to indicate an affix was used. */
6587 use_flags |= WF_HAS_AFF;
6588
6589 /* ... don't use a prefix list if combining
Bram Moolenaar5195e452005-08-19 20:32:47 +00006590 * affixes is not allowed. But do use the
6591 * compound flags after them. */
6592 if ((!ah->ah_combine || comb) && pfxlist != NULL)
6593 use_pfxlist += pfxlen;
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00006594 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00006595
Bram Moolenaar51485f02005-06-04 21:55:20 +00006596 /* Store the modified word. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006597 if (store_word(spin, newword, use_flags,
Bram Moolenaar5195e452005-08-19 20:32:47 +00006598 spin->si_region, use_pfxlist, FALSE) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006599 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006600
Bram Moolenaar51485f02005-06-04 21:55:20 +00006601 /* When added a suffix and combining is allowed also
6602 * try adding prefixes additionally. */
6603 if (xht != NULL && ah->ah_combine)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006604 if (store_aff_word(spin, newword, afflist, affile,
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00006605 xht, NULL, TRUE,
Bram Moolenaar5195e452005-08-19 20:32:47 +00006606 use_flags, use_pfxlist, pfxlen) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006607 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006608 }
6609 }
6610 }
6611 }
6612 }
6613
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006614 return retval;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006615}
6616
6617/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00006618 * Read a file with a list of words.
6619 */
6620 static int
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006621spell_read_wordfile(spin, fname)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006622 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006623 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006624{
6625 FILE *fd;
6626 long lnum = 0;
6627 char_u rline[MAXLINELEN];
6628 char_u *line;
6629 char_u *pc = NULL;
Bram Moolenaar7887d882005-07-01 22:33:52 +00006630 char_u *p;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006631 int l;
6632 int retval = OK;
6633 int did_word = FALSE;
6634 int non_ascii = 0;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006635 int flags;
Bram Moolenaar3982c542005-06-08 21:56:31 +00006636 int regionmask;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006637
6638 /*
6639 * Open the file.
6640 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00006641 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar51485f02005-06-04 21:55:20 +00006642 if (fd == NULL)
6643 {
6644 EMSG2(_(e_notopen), fname);
6645 return FAIL;
6646 }
6647
Bram Moolenaar4770d092006-01-12 23:22:24 +00006648 vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname);
6649 spell_message(spin, IObuff);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006650
6651 /*
6652 * Read all the lines in the file one by one.
6653 */
6654 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
6655 {
6656 line_breakcheck();
6657 ++lnum;
6658
6659 /* Skip comment lines. */
6660 if (*rline == '#')
6661 continue;
6662
6663 /* Remove CR, LF and white space from the end. */
6664 l = STRLEN(rline);
6665 while (l > 0 && rline[l - 1] <= ' ')
6666 --l;
6667 if (l == 0)
6668 continue; /* empty or blank line */
6669 rline[l] = NUL;
6670
6671 /* Convert from "=encoding={encoding}" to 'encoding' when needed. */
6672 vim_free(pc);
Bram Moolenaarb765d632005-06-07 21:00:02 +00006673#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00006674 if (spin->si_conv.vc_type != CONV_NONE)
6675 {
6676 pc = string_convert(&spin->si_conv, rline, NULL);
6677 if (pc == NULL)
6678 {
6679 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
6680 fname, lnum, rline);
6681 continue;
6682 }
6683 line = pc;
6684 }
6685 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00006686#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00006687 {
6688 pc = NULL;
6689 line = rline;
6690 }
6691
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006692 if (*line == '/')
Bram Moolenaar51485f02005-06-04 21:55:20 +00006693 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006694 ++line;
6695 if (STRNCMP(line, "encoding=", 9) == 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006696 {
6697 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar3982c542005-06-08 21:56:31 +00006698 smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"),
6699 fname, lnum, line - 1);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006700 else if (did_word)
Bram Moolenaar3982c542005-06-08 21:56:31 +00006701 smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"),
6702 fname, lnum, line - 1);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006703 else
6704 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00006705#ifdef FEAT_MBYTE
6706 char_u *enc;
6707
Bram Moolenaar51485f02005-06-04 21:55:20 +00006708 /* Setup for conversion to 'encoding'. */
Bram Moolenaar3982c542005-06-08 21:56:31 +00006709 line += 10;
6710 enc = enc_canonize(line);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006711 if (enc != NULL && !spin->si_ascii
6712 && convert_setup(&spin->si_conv, enc,
6713 p_enc) == FAIL)
6714 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
Bram Moolenaar3982c542005-06-08 21:56:31 +00006715 fname, line, p_enc);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006716 vim_free(enc);
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006717 spin->si_conv.vc_fail = TRUE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00006718#else
6719 smsg((char_u *)_("Conversion in %s not supported"), fname);
6720#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00006721 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006722 continue;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006723 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006724
Bram Moolenaar3982c542005-06-08 21:56:31 +00006725 if (STRNCMP(line, "regions=", 8) == 0)
6726 {
6727 if (spin->si_region_count > 1)
6728 smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"),
6729 fname, lnum, line);
6730 else
6731 {
6732 line += 8;
6733 if (STRLEN(line) > 16)
6734 smsg((char_u *)_("Too many regions in %s line %d: %s"),
6735 fname, lnum, line);
6736 else
6737 {
6738 spin->si_region_count = STRLEN(line) / 2;
6739 STRCPY(spin->si_region_name, line);
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00006740
6741 /* Adjust the mask for a word valid in all regions. */
6742 spin->si_region = (1 << spin->si_region_count) - 1;
Bram Moolenaar3982c542005-06-08 21:56:31 +00006743 }
6744 }
6745 continue;
6746 }
6747
Bram Moolenaar7887d882005-07-01 22:33:52 +00006748 smsg((char_u *)_("/ line ignored in %s line %d: %s"),
6749 fname, lnum, line - 1);
6750 continue;
6751 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006752
Bram Moolenaar7887d882005-07-01 22:33:52 +00006753 flags = 0;
6754 regionmask = spin->si_region;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006755
Bram Moolenaar7887d882005-07-01 22:33:52 +00006756 /* Check for flags and region after a slash. */
6757 p = vim_strchr(line, '/');
6758 if (p != NULL)
6759 {
6760 *p++ = NUL;
6761 while (*p != NUL)
Bram Moolenaar3982c542005-06-08 21:56:31 +00006762 {
Bram Moolenaar7887d882005-07-01 22:33:52 +00006763 if (*p == '=') /* keep-case word */
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00006764 flags |= WF_KEEPCAP | WF_FIXCAP;
Bram Moolenaar7887d882005-07-01 22:33:52 +00006765 else if (*p == '!') /* Bad, bad, wicked word. */
6766 flags |= WF_BANNED;
6767 else if (*p == '?') /* Rare word. */
6768 flags |= WF_RARE;
6769 else if (VIM_ISDIGIT(*p)) /* region number(s) */
Bram Moolenaar3982c542005-06-08 21:56:31 +00006770 {
Bram Moolenaar7887d882005-07-01 22:33:52 +00006771 if ((flags & WF_REGION) == 0) /* first one */
6772 regionmask = 0;
6773 flags |= WF_REGION;
6774
6775 l = *p - '0';
Bram Moolenaar3982c542005-06-08 21:56:31 +00006776 if (l > spin->si_region_count)
6777 {
6778 smsg((char_u *)_("Invalid region nr in %s line %d: %s"),
Bram Moolenaar7887d882005-07-01 22:33:52 +00006779 fname, lnum, p);
Bram Moolenaar3982c542005-06-08 21:56:31 +00006780 break;
6781 }
6782 regionmask |= 1 << (l - 1);
Bram Moolenaar3982c542005-06-08 21:56:31 +00006783 }
Bram Moolenaar7887d882005-07-01 22:33:52 +00006784 else
6785 {
6786 smsg((char_u *)_("Unrecognized flags in %s line %d: %s"),
6787 fname, lnum, p);
6788 break;
6789 }
6790 ++p;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006791 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00006792 }
6793
6794 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
6795 if (spin->si_ascii && has_non_ascii(line))
6796 {
6797 ++non_ascii;
6798 continue;
6799 }
6800
6801 /* Normal word: store it. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006802 if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006803 {
6804 retval = FAIL;
6805 break;
6806 }
6807 did_word = TRUE;
6808 }
6809
6810 vim_free(pc);
6811 fclose(fd);
6812
Bram Moolenaar4770d092006-01-12 23:22:24 +00006813 if (spin->si_ascii && non_ascii > 0)
Bram Moolenaarb765d632005-06-07 21:00:02 +00006814 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00006815 vim_snprintf((char *)IObuff, IOSIZE,
6816 _("Ignored %d words with non-ASCII characters"), non_ascii);
6817 spell_message(spin, IObuff);
Bram Moolenaarb765d632005-06-07 21:00:02 +00006818 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00006819
Bram Moolenaar51485f02005-06-04 21:55:20 +00006820 return retval;
6821}
6822
6823/*
6824 * Get part of an sblock_T, "len" bytes long.
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006825 * This avoids calling free() for every little struct we use (and keeping
6826 * track of them).
Bram Moolenaar51485f02005-06-04 21:55:20 +00006827 * The memory is cleared to all zeros.
6828 * Returns NULL when out of memory.
6829 */
6830 static void *
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006831getroom(spin, len, align)
6832 spellinfo_T *spin;
Bram Moolenaarcfc7d632005-07-28 22:28:16 +00006833 size_t len; /* length needed */
6834 int align; /* align for pointer */
Bram Moolenaar51485f02005-06-04 21:55:20 +00006835{
6836 char_u *p;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006837 sblock_T *bl = spin->si_blocks;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006838
Bram Moolenaarcfc7d632005-07-28 22:28:16 +00006839 if (align && bl != NULL)
6840 /* Round size up for alignment. On some systems structures need to be
6841 * aligned to the size of a pointer (e.g., SPARC). */
6842 bl->sb_used = (bl->sb_used + sizeof(char *) - 1)
6843 & ~(sizeof(char *) - 1);
6844
Bram Moolenaar51485f02005-06-04 21:55:20 +00006845 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE)
6846 {
6847 /* Allocate a block of memory. This is not freed until much later. */
6848 bl = (sblock_T *)alloc_clear((unsigned)(sizeof(sblock_T) + SBLOCKSIZE));
6849 if (bl == NULL)
6850 return NULL;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006851 bl->sb_next = spin->si_blocks;
6852 spin->si_blocks = bl;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006853 bl->sb_used = 0;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006854 ++spin->si_blocks_cnt;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006855 }
6856
6857 p = bl->sb_data + bl->sb_used;
6858 bl->sb_used += len;
6859
6860 return p;
6861}
6862
6863/*
6864 * Make a copy of a string into memory allocated with getroom().
6865 */
6866 static char_u *
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006867getroom_save(spin, s)
6868 spellinfo_T *spin;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006869 char_u *s;
6870{
6871 char_u *sc;
6872
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006873 sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006874 if (sc != NULL)
6875 STRCPY(sc, s);
6876 return sc;
6877}
6878
6879
6880/*
6881 * Free the list of allocated sblock_T.
6882 */
6883 static void
6884free_blocks(bl)
6885 sblock_T *bl;
6886{
6887 sblock_T *next;
6888
6889 while (bl != NULL)
6890 {
6891 next = bl->sb_next;
6892 vim_free(bl);
6893 bl = next;
6894 }
6895}
6896
6897/*
6898 * Allocate the root of a word tree.
6899 */
6900 static wordnode_T *
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006901wordtree_alloc(spin)
6902 spellinfo_T *spin;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006903{
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006904 return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
Bram Moolenaar51485f02005-06-04 21:55:20 +00006905}
6906
6907/*
6908 * Store a word in the tree(s).
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00006909 * Always store it in the case-folded tree. For a keep-case word this is
6910 * useful when the word can also be used with all caps (no WF_FIXCAP flag) and
6911 * used to find suggestions.
Bram Moolenaar51485f02005-06-04 21:55:20 +00006912 * For a keep-case word also store it in the keep-case tree.
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00006913 * When "pfxlist" is not NULL store the word for each postponed prefix ID and
6914 * compound flag.
Bram Moolenaar51485f02005-06-04 21:55:20 +00006915 */
6916 static int
Bram Moolenaar5195e452005-08-19 20:32:47 +00006917store_word(spin, word, flags, region, pfxlist, need_affix)
Bram Moolenaar51485f02005-06-04 21:55:20 +00006918 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006919 char_u *word;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006920 int flags; /* extra flags, WF_BANNED */
Bram Moolenaar3982c542005-06-08 21:56:31 +00006921 int region; /* supported region(s) */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006922 char_u *pfxlist; /* list of prefix IDs or NULL */
Bram Moolenaar5195e452005-08-19 20:32:47 +00006923 int need_affix; /* only store word with affix ID */
Bram Moolenaar51485f02005-06-04 21:55:20 +00006924{
6925 int len = STRLEN(word);
6926 int ct = captype(word, word + len);
6927 char_u foldword[MAXWLEN];
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006928 int res = OK;
6929 char_u *p;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006930
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006931 (void)spell_casefold(word, len, foldword, MAXWLEN);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006932 for (p = pfxlist; res == OK; ++p)
6933 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00006934 if (!need_affix || (p != NULL && *p != NUL))
6935 res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags,
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006936 region, p == NULL ? 0 : *p);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006937 if (p == NULL || *p == NUL)
6938 break;
6939 }
Bram Moolenaar8db73182005-06-17 21:51:16 +00006940 ++spin->si_foldwcount;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006941
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006942 if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP)))
Bram Moolenaar8db73182005-06-17 21:51:16 +00006943 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006944 for (p = pfxlist; res == OK; ++p)
6945 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00006946 if (!need_affix || (p != NULL && *p != NUL))
6947 res = tree_add_word(spin, word, spin->si_keeproot, flags,
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006948 region, p == NULL ? 0 : *p);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00006949 if (p == NULL || *p == NUL)
6950 break;
6951 }
Bram Moolenaar8db73182005-06-17 21:51:16 +00006952 ++spin->si_keepwcount;
6953 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00006954 return res;
6955}
6956
6957/*
6958 * Add word "word" to a word tree at "root".
Bram Moolenaar4770d092006-01-12 23:22:24 +00006959 * When "flags" < 0 we are adding to the prefix tree where "flags" is used for
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00006960 * "rare" and "region" is the condition nr.
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006961 * Returns FAIL when out of memory.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006962 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00006963 static int
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006964tree_add_word(spin, word, root, flags, region, affixID)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006965 spellinfo_T *spin;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006966 char_u *word;
6967 wordnode_T *root;
6968 int flags;
6969 int region;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00006970 int affixID;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006971{
Bram Moolenaar51485f02005-06-04 21:55:20 +00006972 wordnode_T *node = root;
6973 wordnode_T *np;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00006974 wordnode_T *copyp, **copyprev;
Bram Moolenaar51485f02005-06-04 21:55:20 +00006975 wordnode_T **prev = NULL;
6976 int i;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006977
Bram Moolenaar51485f02005-06-04 21:55:20 +00006978 /* Add each byte of the word to the tree, including the NUL at the end. */
6979 for (i = 0; ; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006980 {
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00006981 /* When there is more than one reference to this node we need to make
6982 * a copy, so that we can modify it. Copy the whole list of siblings
6983 * (we don't optimize for a partly shared list of siblings). */
6984 if (node != NULL && node->wn_refs > 1)
6985 {
6986 --node->wn_refs;
6987 copyprev = prev;
6988 for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling)
6989 {
6990 /* Allocate a new node and copy the info. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00006991 np = get_wordnode(spin);
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00006992 if (np == NULL)
6993 return FAIL;
6994 np->wn_child = copyp->wn_child;
6995 if (np->wn_child != NULL)
6996 ++np->wn_child->wn_refs; /* child gets extra ref */
6997 np->wn_byte = copyp->wn_byte;
6998 if (np->wn_byte == NUL)
6999 {
7000 np->wn_flags = copyp->wn_flags;
7001 np->wn_region = copyp->wn_region;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007002 np->wn_affixID = copyp->wn_affixID;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007003 }
7004
7005 /* Link the new node in the list, there will be one ref. */
7006 np->wn_refs = 1;
7007 *copyprev = np;
7008 copyprev = &np->wn_sibling;
7009
7010 /* Let "node" point to the head of the copied list. */
7011 if (copyp == node)
7012 node = np;
7013 }
7014 }
7015
Bram Moolenaar51485f02005-06-04 21:55:20 +00007016 /* Look for the sibling that has the same character. They are sorted
7017 * on byte value, thus stop searching when a sibling is found with a
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007018 * higher byte value. For zero bytes (end of word) the sorting is
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007019 * done on flags and then on affixID. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007020 while (node != NULL
7021 && (node->wn_byte < word[i]
7022 || (node->wn_byte == NUL
7023 && (flags < 0
Bram Moolenaar4770d092006-01-12 23:22:24 +00007024 ? node->wn_affixID < (unsigned)affixID
7025 : (node->wn_flags < (unsigned)(flags & WN_MASK)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00007026 || (node->wn_flags == (flags & WN_MASK)
Bram Moolenaar4770d092006-01-12 23:22:24 +00007027 && (spin->si_sugtree
7028 ? (node->wn_region & 0xffff) < region
7029 : node->wn_affixID
7030 < (unsigned)affixID)))))))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007031 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007032 prev = &node->wn_sibling;
7033 node = *prev;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007034 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007035 if (node == NULL
7036 || node->wn_byte != word[i]
7037 || (word[i] == NUL
7038 && (flags < 0
Bram Moolenaar4770d092006-01-12 23:22:24 +00007039 || spin->si_sugtree
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00007040 || node->wn_flags != (flags & WN_MASK)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007041 || node->wn_affixID != affixID)))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007042 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007043 /* Allocate a new node. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007044 np = get_wordnode(spin);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007045 if (np == NULL)
7046 return FAIL;
7047 np->wn_byte = word[i];
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007048
7049 /* If "node" is NULL this is a new child or the end of the sibling
7050 * list: ref count is one. Otherwise use ref count of sibling and
7051 * make ref count of sibling one (matters when inserting in front
7052 * of the list of siblings). */
7053 if (node == NULL)
7054 np->wn_refs = 1;
7055 else
7056 {
7057 np->wn_refs = node->wn_refs;
7058 node->wn_refs = 1;
7059 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00007060 *prev = np;
7061 np->wn_sibling = node;
7062 node = np;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007063 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007064
Bram Moolenaar51485f02005-06-04 21:55:20 +00007065 if (word[i] == NUL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007066 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007067 node->wn_flags = flags;
7068 node->wn_region |= region;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007069 node->wn_affixID = affixID;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007070 break;
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +00007071 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00007072 prev = &node->wn_child;
7073 node = *prev;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00007074 }
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007075#ifdef SPELL_PRINTTREE
7076 smsg("Added \"%s\"", word);
7077 spell_print_tree(root->wn_sibling);
7078#endif
7079
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007080 /* count nr of words added since last message */
7081 ++spin->si_msg_count;
7082
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007083 if (spin->si_compress_cnt > 1)
7084 {
7085 if (--spin->si_compress_cnt == 1)
7086 /* Did enough words to lower the block count limit. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00007087 spin->si_blocks_cnt += compress_inc;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007088 }
7089
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007090 /*
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007091 * When we have allocated lots of memory we need to compress the word tree
7092 * to free up some room. But compression is slow, and we might actually
7093 * need that room, thus only compress in the following situations:
7094 * 1. When not compressed before (si_compress_cnt == 0): when using
Bram Moolenaar5195e452005-08-19 20:32:47 +00007095 * "compress_start" blocks.
7096 * 2. When compressed before and used "compress_inc" blocks before
7097 * adding "compress_added" words (si_compress_cnt > 1).
7098 * 3. When compressed before, added "compress_added" words
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007099 * (si_compress_cnt == 1) and the number of free nodes drops below the
7100 * maximum word length.
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007101 */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007102#ifndef SPELL_PRINTTREE
7103 if (spin->si_compress_cnt == 1
7104 ? spin->si_free_count < MAXWLEN
Bram Moolenaar5195e452005-08-19 20:32:47 +00007105 : spin->si_blocks_cnt >= compress_start)
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007106#endif
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007107 {
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007108 /* Decrement the block counter. The effect is that we compress again
Bram Moolenaar5195e452005-08-19 20:32:47 +00007109 * when the freed up room has been used and another "compress_inc"
7110 * blocks have been allocated. Unless "compress_added" words have
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007111 * been added, then the limit is put back again. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00007112 spin->si_blocks_cnt -= compress_inc;
7113 spin->si_compress_cnt = compress_added;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007114
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007115 if (spin->si_verbose)
7116 {
7117 msg_start();
7118 msg_puts((char_u *)_(msg_compressing));
7119 msg_clr_eos();
7120 msg_didout = FALSE;
7121 msg_col = 0;
7122 out_flush();
7123 }
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007124
7125 /* Compress both trees. Either they both have many nodes, which makes
7126 * compression useful, or one of them is small, which means
Bram Moolenaar4770d092006-01-12 23:22:24 +00007127 * compression goes fast. But when filling the souldfold word tree
7128 * there is no keep-case tree. */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007129 wordtree_compress(spin, spin->si_foldroot);
Bram Moolenaar4770d092006-01-12 23:22:24 +00007130 if (affixID >= 0)
7131 wordtree_compress(spin, spin->si_keeproot);
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007132 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00007133
7134 return OK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007135}
7136
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007137/*
Bram Moolenaar5195e452005-08-19 20:32:47 +00007138 * Check the 'mkspellmem' option. Return FAIL if it's wrong.
7139 * Sets "sps_flags".
7140 */
7141 int
7142spell_check_msm()
7143{
7144 char_u *p = p_msm;
7145 long start = 0;
7146 long inc = 0;
7147 long added = 0;
7148
7149 if (!VIM_ISDIGIT(*p))
7150 return FAIL;
7151 /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/
7152 start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102);
7153 if (*p != ',')
7154 return FAIL;
7155 ++p;
7156 if (!VIM_ISDIGIT(*p))
7157 return FAIL;
7158 inc = (getdigits(&p) * 102) / (SBLOCKSIZE / 10);
7159 if (*p != ',')
7160 return FAIL;
7161 ++p;
7162 if (!VIM_ISDIGIT(*p))
7163 return FAIL;
7164 added = getdigits(&p) * 1024;
7165 if (*p != NUL)
7166 return FAIL;
7167
7168 if (start == 0 || inc == 0 || added == 0 || inc > start)
7169 return FAIL;
7170
7171 compress_start = start;
7172 compress_inc = inc;
7173 compress_added = added;
7174 return OK;
7175}
7176
7177
7178/*
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007179 * Get a wordnode_T, either from the list of previously freed nodes or
7180 * allocate a new one.
7181 */
7182 static wordnode_T *
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007183get_wordnode(spin)
7184 spellinfo_T *spin;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007185{
7186 wordnode_T *n;
7187
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007188 if (spin->si_first_free == NULL)
7189 n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007190 else
7191 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007192 n = spin->si_first_free;
7193 spin->si_first_free = n->wn_child;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007194 vim_memset(n, 0, sizeof(wordnode_T));
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007195 --spin->si_free_count;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007196 }
7197#ifdef SPELL_PRINTTREE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007198 n->wn_nr = ++spin->si_wordnode_nr;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007199#endif
7200 return n;
7201}
7202
7203/*
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007204 * Decrement the reference count on a node (which is the head of a list of
7205 * siblings). If the reference count becomes zero free the node and its
7206 * siblings.
Bram Moolenaar4770d092006-01-12 23:22:24 +00007207 * Returns the number of nodes actually freed.
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007208 */
Bram Moolenaar4770d092006-01-12 23:22:24 +00007209 static int
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007210deref_wordnode(spin, node)
7211 spellinfo_T *spin;
7212 wordnode_T *node;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007213{
Bram Moolenaar4770d092006-01-12 23:22:24 +00007214 wordnode_T *np;
7215 int cnt = 0;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007216
7217 if (--node->wn_refs == 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +00007218 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007219 for (np = node; np != NULL; np = np->wn_sibling)
7220 {
7221 if (np->wn_child != NULL)
Bram Moolenaar4770d092006-01-12 23:22:24 +00007222 cnt += deref_wordnode(spin, np->wn_child);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007223 free_wordnode(spin, np);
Bram Moolenaar4770d092006-01-12 23:22:24 +00007224 ++cnt;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007225 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00007226 ++cnt; /* length field */
7227 }
7228 return cnt;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007229}
7230
7231/*
7232 * Free a wordnode_T for re-use later.
7233 * Only the "wn_child" field becomes invalid.
7234 */
7235 static void
7236free_wordnode(spin, n)
7237 spellinfo_T *spin;
7238 wordnode_T *n;
7239{
7240 n->wn_child = spin->si_first_free;
7241 spin->si_first_free = n;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007242 ++spin->si_free_count;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007243}
7244
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007245/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00007246 * Compress a tree: find tails that are identical and can be shared.
7247 */
7248 static void
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007249wordtree_compress(spin, root)
Bram Moolenaarb765d632005-06-07 21:00:02 +00007250 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007251 wordnode_T *root;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007252{
7253 hashtab_T ht;
7254 int n;
7255 int tot = 0;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007256 int perc;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007257
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007258 /* Skip the root itself, it's not actually used. The first sibling is the
7259 * start of the tree. */
7260 if (root->wn_sibling != NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007261 {
7262 hash_init(&ht);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007263 n = node_compress(spin, root->wn_sibling, &ht, &tot);
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007264
7265#ifndef SPELL_PRINTTREE
Bram Moolenaarb765d632005-06-07 21:00:02 +00007266 if (spin->si_verbose || p_verbose > 2)
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007267#endif
Bram Moolenaarb765d632005-06-07 21:00:02 +00007268 {
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007269 if (tot > 1000000)
7270 perc = (tot - n) / (tot / 100);
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007271 else if (tot == 0)
7272 perc = 0;
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007273 else
7274 perc = (tot - n) * 100 / tot;
Bram Moolenaar4770d092006-01-12 23:22:24 +00007275 vim_snprintf((char *)IObuff, IOSIZE,
7276 _("Compressed %d of %d nodes; %d (%d%%) remaining"),
7277 n, tot, tot - n, perc);
7278 spell_message(spin, IObuff);
Bram Moolenaarb765d632005-06-07 21:00:02 +00007279 }
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007280#ifdef SPELL_PRINTTREE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007281 spell_print_tree(root->wn_sibling);
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007282#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00007283 hash_clear(&ht);
7284 }
7285}
7286
7287/*
7288 * Compress a node, its siblings and its children, depth first.
7289 * Returns the number of compressed nodes.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007290 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00007291 static int
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007292node_compress(spin, node, ht, tot)
7293 spellinfo_T *spin;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007294 wordnode_T *node;
7295 hashtab_T *ht;
7296 int *tot; /* total count of nodes before compressing,
7297 incremented while going through the tree */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007298{
Bram Moolenaar51485f02005-06-04 21:55:20 +00007299 wordnode_T *np;
7300 wordnode_T *tp;
7301 wordnode_T *child;
7302 hash_T hash;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007303 hashitem_T *hi;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007304 int len = 0;
7305 unsigned nr, n;
7306 int compressed = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007307
Bram Moolenaar51485f02005-06-04 21:55:20 +00007308 /*
7309 * Go through the list of siblings. Compress each child and then try
7310 * finding an identical child to replace it.
7311 * Note that with "child" we mean not just the node that is pointed to,
Bram Moolenaar4770d092006-01-12 23:22:24 +00007312 * but the whole list of siblings of which the child node is the first.
Bram Moolenaar51485f02005-06-04 21:55:20 +00007313 */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007314 for (np = node; np != NULL && !got_int; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007315 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007316 ++len;
7317 if ((child = np->wn_child) != NULL)
7318 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00007319 /* Compress the child first. This fills hashkey. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007320 compressed += node_compress(spin, child, ht, tot);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007321
7322 /* Try to find an identical child. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007323 hash = hash_hash(child->wn_u1.hashkey);
7324 hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007325 if (!HASHITEM_EMPTY(hi))
7326 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00007327 /* There are children we encountered before with a hash value
7328 * identical to the current child. Now check if there is one
7329 * that is really identical. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007330 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007331 if (node_equal(child, tp))
7332 {
7333 /* Found one! Now use that child in place of the
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007334 * current one. This means the current child and all
7335 * its siblings is unlinked from the tree. */
7336 ++tp->wn_refs;
Bram Moolenaar4770d092006-01-12 23:22:24 +00007337 compressed += deref_wordnode(spin, child);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007338 np->wn_child = tp;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007339 break;
7340 }
7341 if (tp == NULL)
7342 {
7343 /* No other child with this hash value equals the child of
7344 * the node, add it to the linked list after the first
7345 * item. */
7346 tp = HI2WN(hi);
Bram Moolenaar0c405862005-06-22 22:26:26 +00007347 child->wn_u2.next = tp->wn_u2.next;
7348 tp->wn_u2.next = child;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007349 }
7350 }
7351 else
7352 /* No other child has this hash value, add it to the
7353 * hashtable. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007354 hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007355 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007356 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00007357 *tot += len + 1; /* add one for the node that stores the length */
Bram Moolenaar51485f02005-06-04 21:55:20 +00007358
7359 /*
7360 * Make a hash key for the node and its siblings, so that we can quickly
7361 * find a lookalike node. This must be done after compressing the sibling
7362 * list, otherwise the hash key would become invalid by the compression.
7363 */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007364 node->wn_u1.hashkey[0] = len;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007365 nr = 0;
7366 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007367 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007368 if (np->wn_byte == NUL)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007369 /* end node: use wn_flags, wn_region and wn_affixID */
7370 n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16);
Bram Moolenaar51485f02005-06-04 21:55:20 +00007371 else
7372 /* byte node: use the byte value and the child pointer */
7373 n = np->wn_byte + ((long_u)np->wn_child << 8);
7374 nr = nr * 101 + n;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007375 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00007376
7377 /* Avoid NUL bytes, it terminates the hash key. */
7378 n = nr & 0xff;
Bram Moolenaar0c405862005-06-22 22:26:26 +00007379 node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007380 n = (nr >> 8) & 0xff;
Bram Moolenaar0c405862005-06-22 22:26:26 +00007381 node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007382 n = (nr >> 16) & 0xff;
Bram Moolenaar0c405862005-06-22 22:26:26 +00007383 node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007384 n = (nr >> 24) & 0xff;
Bram Moolenaar0c405862005-06-22 22:26:26 +00007385 node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
7386 node->wn_u1.hashkey[5] = NUL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007387
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00007388 /* Check for CTRL-C pressed now and then. */
7389 fast_breakcheck();
7390
Bram Moolenaar51485f02005-06-04 21:55:20 +00007391 return compressed;
7392}
7393
7394/*
7395 * Return TRUE when two nodes have identical siblings and children.
7396 */
7397 static int
7398node_equal(n1, n2)
7399 wordnode_T *n1;
7400 wordnode_T *n2;
7401{
7402 wordnode_T *p1;
7403 wordnode_T *p2;
7404
7405 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
7406 p1 = p1->wn_sibling, p2 = p2->wn_sibling)
7407 if (p1->wn_byte != p2->wn_byte
7408 || (p1->wn_byte == NUL
7409 ? (p1->wn_flags != p2->wn_flags
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007410 || p1->wn_region != p2->wn_region
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007411 || p1->wn_affixID != p2->wn_affixID)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007412 : (p1->wn_child != p2->wn_child)))
7413 break;
7414
7415 return p1 == NULL && p2 == NULL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007416}
7417
7418/*
7419 * Write a number to file "fd", MSB first, in "len" bytes.
7420 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00007421 void
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007422put_bytes(fd, nr, len)
7423 FILE *fd;
7424 long_u nr;
7425 int len;
7426{
7427 int i;
7428
7429 for (i = len - 1; i >= 0; --i)
7430 putc((int)(nr >> (i * 8)), fd);
7431}
7432
Bram Moolenaar362e1a32006-03-06 23:29:24 +00007433#ifdef _MSC_VER
7434# if (_MSC_VER <= 1200)
7435/* This line is required for VC6 without the service pack. Also see the
7436 * matching #pragma below. */
7437/* # pragma optimize("", off) */
7438# endif
7439#endif
7440
Bram Moolenaar4770d092006-01-12 23:22:24 +00007441/*
7442 * Write spin->si_sugtime to file "fd".
7443 */
7444 static void
7445put_sugtime(spin, fd)
7446 spellinfo_T *spin;
7447 FILE *fd;
7448{
7449 int c;
7450 int i;
7451
7452 /* time_t can be up to 8 bytes in size, more than long_u, thus we
7453 * can't use put_bytes() here. */
7454 for (i = 7; i >= 0; --i)
7455 if (i + 1 > sizeof(time_t))
7456 /* ">>" doesn't work well when shifting more bits than avail */
7457 putc(0, fd);
7458 else
7459 {
7460 c = (unsigned)spin->si_sugtime >> (i * 8);
7461 putc(c, fd);
7462 }
7463}
7464
Bram Moolenaar362e1a32006-03-06 23:29:24 +00007465#ifdef _MSC_VER
7466# if (_MSC_VER <= 1200)
7467/* # pragma optimize("", on) */
7468# endif
7469#endif
7470
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007471static int
7472#ifdef __BORLANDC__
7473_RTLENTRYF
7474#endif
7475rep_compare __ARGS((const void *s1, const void *s2));
7476
7477/*
7478 * Function given to qsort() to sort the REP items on "from" string.
7479 */
7480 static int
7481#ifdef __BORLANDC__
7482_RTLENTRYF
7483#endif
7484rep_compare(s1, s2)
7485 const void *s1;
7486 const void *s2;
7487{
7488 fromto_T *p1 = (fromto_T *)s1;
7489 fromto_T *p2 = (fromto_T *)s2;
7490
7491 return STRCMP(p1->ft_from, p2->ft_from);
7492}
7493
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007494/*
Bram Moolenaar5195e452005-08-19 20:32:47 +00007495 * Write the Vim .spl file "fname".
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00007496 * Return FAIL or OK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007497 */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00007498 static int
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007499write_vim_spell(spin, fname)
Bram Moolenaar51485f02005-06-04 21:55:20 +00007500 spellinfo_T *spin;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007501 char_u *fname;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007502{
Bram Moolenaar51485f02005-06-04 21:55:20 +00007503 FILE *fd;
7504 int regionmask;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007505 int round;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007506 wordnode_T *tree;
7507 int nodecount;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007508 int i;
7509 int l;
7510 garray_T *gap;
7511 fromto_T *ftp;
7512 char_u *p;
7513 int rr;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00007514 int retval = OK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007515
Bram Moolenaarb765d632005-06-07 21:00:02 +00007516 fd = mch_fopen((char *)fname, "w");
Bram Moolenaar51485f02005-06-04 21:55:20 +00007517 if (fd == NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007518 {
7519 EMSG2(_(e_notopen), fname);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00007520 return FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007521 }
7522
Bram Moolenaar5195e452005-08-19 20:32:47 +00007523 /* <HEADER>: <fileID> <versionnr> */
Bram Moolenaar51485f02005-06-04 21:55:20 +00007524 /* <fileID> */
7525 if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00007526 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007527 EMSG(_(e_write));
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00007528 retval = FAIL;
7529 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00007530 putc(VIMSPELLVERSION, fd); /* <versionnr> */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007531
Bram Moolenaar5195e452005-08-19 20:32:47 +00007532 /*
7533 * <SECTIONS>: <section> ... <sectionend>
7534 */
7535
Bram Moolenaar362e1a32006-03-06 23:29:24 +00007536 /* SN_INFO: <infotext> */
7537 if (spin->si_info != NULL)
7538 {
7539 putc(SN_INFO, fd); /* <sectionID> */
7540 putc(0, fd); /* <sectionflags> */
7541
7542 i = STRLEN(spin->si_info);
7543 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */
7544 fwrite(spin->si_info, (size_t)i, (size_t)1, fd); /* <infotext> */
7545 }
7546
Bram Moolenaar5195e452005-08-19 20:32:47 +00007547 /* SN_REGION: <regionname> ...
7548 * Write the region names only if there is more than one. */
Bram Moolenaar3982c542005-06-08 21:56:31 +00007549 if (spin->si_region_count > 1)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007550 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00007551 putc(SN_REGION, fd); /* <sectionID> */
7552 putc(SNF_REQUIRED, fd); /* <sectionflags> */
7553 l = spin->si_region_count * 2;
7554 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
7555 fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd);
7556 /* <regionname> ... */
Bram Moolenaar3982c542005-06-08 21:56:31 +00007557 regionmask = (1 << spin->si_region_count) - 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007558 }
7559 else
Bram Moolenaar51485f02005-06-04 21:55:20 +00007560 regionmask = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007561
Bram Moolenaar5195e452005-08-19 20:32:47 +00007562 /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars>
7563 *
7564 * The table with character flags and the table for case folding.
7565 * This makes sure the same characters are recognized as word characters
7566 * when generating an when using a spell file.
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00007567 * Skip this for ASCII, the table may conflict with the one used for
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007568 * 'encoding'.
7569 * Also skip this for an .add.spl file, the main spell file must contain
7570 * the table (avoids that it conflicts). File is shorter too.
7571 */
Bram Moolenaar5195e452005-08-19 20:32:47 +00007572 if (!spin->si_ascii && !spin->si_add)
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00007573 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00007574 char_u folchars[128 * 8];
7575 int flags;
7576
Bram Moolenaard12a1322005-08-21 22:08:24 +00007577 putc(SN_CHARFLAGS, fd); /* <sectionID> */
Bram Moolenaar5195e452005-08-19 20:32:47 +00007578 putc(SNF_REQUIRED, fd); /* <sectionflags> */
7579
7580 /* Form the <folchars> string first, we need to know its length. */
7581 l = 0;
7582 for (i = 128; i < 256; ++i)
7583 {
7584#ifdef FEAT_MBYTE
7585 if (has_mbyte)
7586 l += mb_char2bytes(spelltab.st_fold[i], folchars + l);
7587 else
7588#endif
7589 folchars[l++] = spelltab.st_fold[i];
7590 }
7591 put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */
7592
7593 fputc(128, fd); /* <charflagslen> */
7594 for (i = 128; i < 256; ++i)
7595 {
7596 flags = 0;
7597 if (spelltab.st_isw[i])
7598 flags |= CF_WORD;
7599 if (spelltab.st_isu[i])
7600 flags |= CF_UPPER;
7601 fputc(flags, fd); /* <charflags> */
7602 }
7603
7604 put_bytes(fd, (long_u)l, 2); /* <folcharslen> */
7605 fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00007606 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00007607
Bram Moolenaar5195e452005-08-19 20:32:47 +00007608 /* SN_MIDWORD: <midword> */
7609 if (spin->si_midword != NULL)
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00007610 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00007611 putc(SN_MIDWORD, fd); /* <sectionID> */
7612 putc(SNF_REQUIRED, fd); /* <sectionflags> */
7613
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00007614 i = STRLEN(spin->si_midword);
Bram Moolenaar5195e452005-08-19 20:32:47 +00007615 put_bytes(fd, (long_u)i, 4); /* <sectionlen> */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00007616 fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); /* <midword> */
7617 }
7618
Bram Moolenaar5195e452005-08-19 20:32:47 +00007619 /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */
7620 if (spin->si_prefcond.ga_len > 0)
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007621 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00007622 putc(SN_PREFCOND, fd); /* <sectionID> */
7623 putc(SNF_REQUIRED, fd); /* <sectionflags> */
7624
7625 l = write_spell_prefcond(NULL, &spin->si_prefcond);
7626 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
7627
7628 write_spell_prefcond(fd, &spin->si_prefcond);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007629 }
7630
Bram Moolenaar5195e452005-08-19 20:32:47 +00007631 /* SN_REP: <repcount> <rep> ...
Bram Moolenaar4770d092006-01-12 23:22:24 +00007632 * SN_SAL: <salflags> <salcount> <sal> ...
7633 * SN_REPSAL: <repcount> <rep> ... */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007634
Bram Moolenaar5195e452005-08-19 20:32:47 +00007635 /* round 1: SN_REP section
Bram Moolenaar4770d092006-01-12 23:22:24 +00007636 * round 2: SN_SAL section (unless SN_SOFO is used)
7637 * round 3: SN_REPSAL section */
7638 for (round = 1; round <= 3; ++round)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007639 {
7640 if (round == 1)
7641 gap = &spin->si_rep;
Bram Moolenaar4770d092006-01-12 23:22:24 +00007642 else if (round == 2)
7643 {
7644 /* Don't write SN_SAL when using a SN_SOFO section */
7645 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
7646 continue;
7647 gap = &spin->si_sal;
Bram Moolenaar5195e452005-08-19 20:32:47 +00007648 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007649 else
Bram Moolenaar4770d092006-01-12 23:22:24 +00007650 gap = &spin->si_repsal;
7651
7652 /* Don't write the section if there are no items. */
7653 if (gap->ga_len == 0)
7654 continue;
7655
7656 /* Sort the REP/REPSAL items. */
7657 if (round != 2)
7658 qsort(gap->ga_data, (size_t)gap->ga_len,
7659 sizeof(fromto_T), rep_compare);
7660
7661 i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL);
7662 putc(i, fd); /* <sectionID> */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007663
Bram Moolenaar5195e452005-08-19 20:32:47 +00007664 /* This is for making suggestions, section is not required. */
7665 putc(0, fd); /* <sectionflags> */
7666
7667 /* Compute the length of what follows. */
7668 l = 2; /* count <repcount> or <salcount> */
7669 for (i = 0; i < gap->ga_len; ++i)
7670 {
7671 ftp = &((fromto_T *)gap->ga_data)[i];
7672 l += 1 + STRLEN(ftp->ft_from); /* count <*fromlen> and <*from> */
7673 l += 1 + STRLEN(ftp->ft_to); /* count <*tolen> and <*to> */
7674 }
7675 if (round == 2)
7676 ++l; /* count <salflags> */
7677 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
7678
7679 if (round == 2)
7680 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007681 i = 0;
7682 if (spin->si_followup)
7683 i |= SAL_F0LLOWUP;
7684 if (spin->si_collapse)
7685 i |= SAL_COLLAPSE;
7686 if (spin->si_rem_accents)
7687 i |= SAL_REM_ACCENTS;
7688 putc(i, fd); /* <salflags> */
7689 }
7690
7691 put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */
7692 for (i = 0; i < gap->ga_len; ++i)
7693 {
7694 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
7695 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
7696 ftp = &((fromto_T *)gap->ga_data)[i];
7697 for (rr = 1; rr <= 2; ++rr)
7698 {
7699 p = rr == 1 ? ftp->ft_from : ftp->ft_to;
7700 l = STRLEN(p);
7701 putc(l, fd);
7702 fwrite(p, l, (size_t)1, fd);
7703 }
7704 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00007705
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007706 }
7707
Bram Moolenaar5195e452005-08-19 20:32:47 +00007708 /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
7709 * This is for making suggestions, section is not required. */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007710 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
7711 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00007712 putc(SN_SOFO, fd); /* <sectionID> */
7713 putc(0, fd); /* <sectionflags> */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007714
7715 l = STRLEN(spin->si_sofofr);
Bram Moolenaar5195e452005-08-19 20:32:47 +00007716 put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4);
7717 /* <sectionlen> */
7718
7719 put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */
7720 fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007721
7722 l = STRLEN(spin->si_sofoto);
Bram Moolenaar5195e452005-08-19 20:32:47 +00007723 put_bytes(fd, (long_u)l, 2); /* <sofotolen> */
7724 fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007725 }
7726
Bram Moolenaar4770d092006-01-12 23:22:24 +00007727 /* SN_WORDS: <word> ...
7728 * This is for making suggestions, section is not required. */
7729 if (spin->si_commonwords.ht_used > 0)
7730 {
7731 putc(SN_WORDS, fd); /* <sectionID> */
7732 putc(0, fd); /* <sectionflags> */
7733
7734 /* round 1: count the bytes
7735 * round 2: write the bytes */
7736 for (round = 1; round <= 2; ++round)
7737 {
7738 int todo;
7739 int len = 0;
7740 hashitem_T *hi;
7741
7742 todo = spin->si_commonwords.ht_used;
7743 for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi)
7744 if (!HASHITEM_EMPTY(hi))
7745 {
7746 l = STRLEN(hi->hi_key) + 1;
7747 len += l;
7748 if (round == 2) /* <word> */
7749 fwrite(hi->hi_key, (size_t)l, (size_t)1, fd);
7750 --todo;
7751 }
7752 if (round == 1)
7753 put_bytes(fd, (long_u)len, 4); /* <sectionlen> */
7754 }
7755 }
7756
Bram Moolenaar5195e452005-08-19 20:32:47 +00007757 /* SN_MAP: <mapstr>
7758 * This is for making suggestions, section is not required. */
7759 if (spin->si_map.ga_len > 0)
7760 {
7761 putc(SN_MAP, fd); /* <sectionID> */
7762 putc(0, fd); /* <sectionflags> */
7763 l = spin->si_map.ga_len;
7764 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
7765 fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd);
7766 /* <mapstr> */
7767 }
7768
Bram Moolenaar4770d092006-01-12 23:22:24 +00007769 /* SN_SUGFILE: <timestamp>
7770 * This is used to notify that a .sug file may be available and at the
7771 * same time allows for checking that a .sug file that is found matches
7772 * with this .spl file. That's because the word numbers must be exactly
7773 * right. */
7774 if (!spin->si_nosugfile
7775 && (spin->si_sal.ga_len > 0
7776 || (spin->si_sofofr != NULL && spin->si_sofoto != NULL)))
7777 {
7778 putc(SN_SUGFILE, fd); /* <sectionID> */
7779 putc(0, fd); /* <sectionflags> */
7780 put_bytes(fd, (long_u)8, 4); /* <sectionlen> */
7781
7782 /* Set si_sugtime and write it to the file. */
7783 spin->si_sugtime = time(NULL);
7784 put_sugtime(spin, fd); /* <timestamp> */
7785 }
7786
Bram Moolenaare1438bb2006-03-01 22:01:55 +00007787 /* SN_NOSPLITSUGS: nothing
7788 * This is used to notify that no suggestions with word splits are to be
7789 * made. */
7790 if (spin->si_nosplitsugs)
7791 {
7792 putc(SN_NOSPLITSUGS, fd); /* <sectionID> */
7793 putc(0, fd); /* <sectionflags> */
7794 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
7795 }
7796
Bram Moolenaar5195e452005-08-19 20:32:47 +00007797 /* SN_COMPOUND: compound info.
7798 * We don't mark it required, when not supported all compound words will
7799 * be bad words. */
7800 if (spin->si_compflags != NULL)
7801 {
7802 putc(SN_COMPOUND, fd); /* <sectionID> */
7803 putc(0, fd); /* <sectionflags> */
7804
7805 l = STRLEN(spin->si_compflags);
7806 put_bytes(fd, (long_u)(l + 3), 4); /* <sectionlen> */
7807 putc(spin->si_compmax, fd); /* <compmax> */
7808 putc(spin->si_compminlen, fd); /* <compminlen> */
7809 putc(spin->si_compsylmax, fd); /* <compsylmax> */
7810 /* <compflags> */
7811 fwrite(spin->si_compflags, (size_t)l, (size_t)1, fd);
7812 }
7813
Bram Moolenaar78622822005-08-23 21:00:13 +00007814 /* SN_NOBREAK: NOBREAK flag */
7815 if (spin->si_nobreak)
7816 {
7817 putc(SN_NOBREAK, fd); /* <sectionID> */
7818 putc(0, fd); /* <sectionflags> */
7819
7820 /* It's empty, the precense of the section flags the feature. */
7821 put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
7822 }
7823
Bram Moolenaar5195e452005-08-19 20:32:47 +00007824 /* SN_SYLLABLE: syllable info.
7825 * We don't mark it required, when not supported syllables will not be
7826 * counted. */
7827 if (spin->si_syllable != NULL)
7828 {
7829 putc(SN_SYLLABLE, fd); /* <sectionID> */
7830 putc(0, fd); /* <sectionflags> */
7831
7832 l = STRLEN(spin->si_syllable);
7833 put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
7834 fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); /* <syllable> */
7835 }
7836
7837 /* end of <SECTIONS> */
7838 putc(SN_END, fd); /* <sectionend> */
7839
Bram Moolenaar50cde822005-06-05 21:54:54 +00007840
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007841 /*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007842 * <LWORDTREE> <KWORDTREE> <PREFIXTREE>
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007843 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007844 spin->si_memtot = 0;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007845 for (round = 1; round <= 3; ++round)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007846 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007847 if (round == 1)
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007848 tree = spin->si_foldroot->wn_sibling;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007849 else if (round == 2)
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007850 tree = spin->si_keeproot->wn_sibling;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007851 else
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00007852 tree = spin->si_prefroot->wn_sibling;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007853
Bram Moolenaar0c405862005-06-22 22:26:26 +00007854 /* Clear the index and wnode fields in the tree. */
7855 clear_node(tree);
7856
Bram Moolenaar51485f02005-06-04 21:55:20 +00007857 /* Count the number of nodes. Needed to be able to allocate the
Bram Moolenaar0c405862005-06-22 22:26:26 +00007858 * memory when reading the nodes. Also fills in index for shared
Bram Moolenaar51485f02005-06-04 21:55:20 +00007859 * nodes. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007860 nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007861
Bram Moolenaar51485f02005-06-04 21:55:20 +00007862 /* number of nodes in 4 bytes */
7863 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
Bram Moolenaar50cde822005-06-05 21:54:54 +00007864 spin->si_memtot += nodecount + nodecount * sizeof(int);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007865
Bram Moolenaar51485f02005-06-04 21:55:20 +00007866 /* Write the nodes. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007867 (void)put_node(fd, tree, 0, regionmask, round == 3);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007868 }
7869
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00007870 /* Write another byte to check for errors. */
7871 if (putc(0, fd) == EOF)
7872 retval = FAIL;
7873
7874 if (fclose(fd) == EOF)
7875 retval = FAIL;
7876
7877 return retval;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00007878}
7879
7880/*
Bram Moolenaar0c405862005-06-22 22:26:26 +00007881 * Clear the index and wnode fields of "node", it siblings and its
7882 * children. This is needed because they are a union with other items to save
7883 * space.
7884 */
7885 static void
7886clear_node(node)
7887 wordnode_T *node;
7888{
7889 wordnode_T *np;
7890
7891 if (node != NULL)
7892 for (np = node; np != NULL; np = np->wn_sibling)
7893 {
7894 np->wn_u1.index = 0;
7895 np->wn_u2.wnode = NULL;
7896
7897 if (np->wn_byte != NUL)
7898 clear_node(np->wn_child);
7899 }
7900}
7901
7902
7903/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00007904 * Dump a word tree at node "node".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007905 *
Bram Moolenaar51485f02005-06-04 21:55:20 +00007906 * This first writes the list of possible bytes (siblings). Then for each
7907 * byte recursively write the children.
7908 *
Bram Moolenaar4770d092006-01-12 23:22:24 +00007909 * NOTE: The code here must match the code in read_tree_node(), since
7910 * assumptions are made about the indexes (so that we don't have to write them
7911 * in the file).
Bram Moolenaar51485f02005-06-04 21:55:20 +00007912 *
7913 * Returns the number of nodes used.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007914 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00007915 static int
Bram Moolenaar0c405862005-06-22 22:26:26 +00007916put_node(fd, node, index, regionmask, prefixtree)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007917 FILE *fd; /* NULL when only counting */
Bram Moolenaar51485f02005-06-04 21:55:20 +00007918 wordnode_T *node;
7919 int index;
7920 int regionmask;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007921 int prefixtree; /* TRUE for PREFIXTREE */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007922{
Bram Moolenaar51485f02005-06-04 21:55:20 +00007923 int newindex = index;
7924 int siblingcount = 0;
7925 wordnode_T *np;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007926 int flags;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007927
Bram Moolenaar51485f02005-06-04 21:55:20 +00007928 /* If "node" is zero the tree is empty. */
7929 if (node == NULL)
7930 return 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007931
Bram Moolenaar51485f02005-06-04 21:55:20 +00007932 /* Store the index where this node is written. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00007933 node->wn_u1.index = index;
Bram Moolenaar51485f02005-06-04 21:55:20 +00007934
7935 /* Count the number of siblings. */
7936 for (np = node; np != NULL; np = np->wn_sibling)
7937 ++siblingcount;
7938
7939 /* Write the sibling count. */
7940 if (fd != NULL)
7941 putc(siblingcount, fd); /* <siblingcount> */
7942
7943 /* Write each sibling byte and optionally extra info. */
7944 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007945 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007946 if (np->wn_byte == 0)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00007947 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00007948 if (fd != NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00007949 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007950 /* For a NUL byte (end of word) write the flags etc. */
7951 if (prefixtree)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00007952 {
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007953 /* In PREFIXTREE write the required affixID and the
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00007954 * associated condition nr (stored in wn_region). The
7955 * byte value is misused to store the "rare" and "not
7956 * combining" flags */
Bram Moolenaar53805d12005-08-01 07:08:33 +00007957 if (np->wn_flags == (short_u)PFX_FLAGS)
7958 putc(BY_NOFLAGS, fd); /* <byte> */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00007959 else
Bram Moolenaar53805d12005-08-01 07:08:33 +00007960 {
7961 putc(BY_FLAGS, fd); /* <byte> */
7962 putc(np->wn_flags, fd); /* <pflags> */
7963 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007964 putc(np->wn_affixID, fd); /* <affixID> */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007965 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */
Bram Moolenaar51485f02005-06-04 21:55:20 +00007966 }
7967 else
7968 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007969 /* For word trees we write the flag/region items. */
7970 flags = np->wn_flags;
7971 if (regionmask != 0 && np->wn_region != regionmask)
7972 flags |= WF_REGION;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007973 if (np->wn_affixID != 0)
7974 flags |= WF_AFX;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007975 if (flags == 0)
7976 {
7977 /* word without flags or region */
7978 putc(BY_NOFLAGS, fd); /* <byte> */
7979 }
7980 else
7981 {
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00007982 if (np->wn_flags >= 0x100)
7983 {
7984 putc(BY_FLAGS2, fd); /* <byte> */
7985 putc(flags, fd); /* <flags> */
7986 putc((unsigned)flags >> 8, fd); /* <flags2> */
7987 }
7988 else
7989 {
7990 putc(BY_FLAGS, fd); /* <byte> */
7991 putc(flags, fd); /* <flags> */
7992 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007993 if (flags & WF_REGION)
7994 putc(np->wn_region, fd); /* <region> */
Bram Moolenaarae5bce12005-08-15 21:41:48 +00007995 if (flags & WF_AFX)
7996 putc(np->wn_affixID, fd); /* <affixID> */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00007997 }
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00007998 }
7999 }
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00008000 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00008001 else
8002 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00008003 if (np->wn_child->wn_u1.index != 0
8004 && np->wn_child->wn_u2.wnode != node)
Bram Moolenaar51485f02005-06-04 21:55:20 +00008005 {
8006 /* The child is written elsewhere, write the reference. */
8007 if (fd != NULL)
8008 {
8009 putc(BY_INDEX, fd); /* <byte> */
8010 /* <nodeidx> */
Bram Moolenaar0c405862005-06-22 22:26:26 +00008011 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
Bram Moolenaar51485f02005-06-04 21:55:20 +00008012 }
8013 }
Bram Moolenaar0c405862005-06-22 22:26:26 +00008014 else if (np->wn_child->wn_u2.wnode == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00008015 /* We will write the child below and give it an index. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00008016 np->wn_child->wn_u2.wnode = node;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00008017
Bram Moolenaar51485f02005-06-04 21:55:20 +00008018 if (fd != NULL)
8019 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
8020 {
8021 EMSG(_(e_write));
8022 return 0;
8023 }
8024 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008025 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00008026
8027 /* Space used in the array when reading: one for each sibling and one for
8028 * the count. */
8029 newindex += siblingcount + 1;
8030
8031 /* Recursively dump the children of each sibling. */
8032 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar0c405862005-06-22 22:26:26 +00008033 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
8034 newindex = put_node(fd, np->wn_child, newindex, regionmask,
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008035 prefixtree);
Bram Moolenaar51485f02005-06-04 21:55:20 +00008036
8037 return newindex;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008038}
8039
8040
8041/*
Bram Moolenaarb765d632005-06-07 21:00:02 +00008042 * ":mkspell [-ascii] outfile infile ..."
8043 * ":mkspell [-ascii] addfile"
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008044 */
8045 void
8046ex_mkspell(eap)
8047 exarg_T *eap;
8048{
8049 int fcount;
8050 char_u **fnames;
Bram Moolenaarb765d632005-06-07 21:00:02 +00008051 char_u *arg = eap->arg;
8052 int ascii = FALSE;
8053
8054 if (STRNCMP(arg, "-ascii", 6) == 0)
8055 {
8056 ascii = TRUE;
8057 arg = skipwhite(arg + 6);
8058 }
8059
8060 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
8061 if (get_arglist_exp(arg, &fcount, &fnames) == OK)
8062 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008063 mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
Bram Moolenaarb765d632005-06-07 21:00:02 +00008064 FreeWild(fcount, fnames);
8065 }
8066}
8067
8068/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00008069 * Create the .sug file.
8070 * Uses the soundfold info in "spin".
8071 * Writes the file with the name "wfname", with ".spl" changed to ".sug".
8072 */
8073 static void
8074spell_make_sugfile(spin, wfname)
8075 spellinfo_T *spin;
8076 char_u *wfname;
8077{
8078 char_u fname[MAXPATHL];
8079 int len;
8080 slang_T *slang;
8081 int free_slang = FALSE;
8082
8083 /*
8084 * Read back the .spl file that was written. This fills the required
8085 * info for soundfolding. This also uses less memory than the
8086 * pointer-linked version of the trie. And it avoids having two versions
8087 * of the code for the soundfolding stuff.
8088 * It might have been done already by spell_reload_one().
8089 */
8090 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
8091 if (fullpathcmp(wfname, slang->sl_fname, FALSE) == FPC_SAME)
8092 break;
8093 if (slang == NULL)
8094 {
8095 spell_message(spin, (char_u *)_("Reading back spell file..."));
8096 slang = spell_load_file(wfname, NULL, NULL, FALSE);
8097 if (slang == NULL)
8098 return;
Bram Moolenaar4770d092006-01-12 23:22:24 +00008099 free_slang = TRUE;
8100 }
8101
8102 /*
8103 * Clear the info in "spin" that is used.
8104 */
8105 spin->si_blocks = NULL;
8106 spin->si_blocks_cnt = 0;
8107 spin->si_compress_cnt = 0; /* will stay at 0 all the time*/
8108 spin->si_free_count = 0;
8109 spin->si_first_free = NULL;
8110 spin->si_foldwcount = 0;
8111
8112 /*
8113 * Go through the trie of good words, soundfold each word and add it to
8114 * the soundfold trie.
8115 */
8116 spell_message(spin, (char_u *)_("Performing soundfolding..."));
8117 if (sug_filltree(spin, slang) == FAIL)
8118 goto theend;
8119
8120 /*
8121 * Create the table which links each soundfold word with a list of the
8122 * good words it may come from. Creates buffer "spin->si_spellbuf".
8123 * This also removes the wordnr from the NUL byte entries to make
8124 * compression possible.
8125 */
8126 if (sug_maketable(spin) == FAIL)
8127 goto theend;
8128
8129 smsg((char_u *)_("Number of words after soundfolding: %ld"),
8130 (long)spin->si_spellbuf->b_ml.ml_line_count);
8131
8132 /*
8133 * Compress the soundfold trie.
8134 */
8135 spell_message(spin, (char_u *)_(msg_compressing));
8136 wordtree_compress(spin, spin->si_foldroot);
8137
8138 /*
8139 * Write the .sug file.
8140 * Make the file name by changing ".spl" to ".sug".
8141 */
8142 STRCPY(fname, wfname);
8143 len = STRLEN(fname);
8144 fname[len - 2] = 'u';
8145 fname[len - 1] = 'g';
8146 sug_write(spin, fname);
8147
8148theend:
8149 if (free_slang)
8150 slang_free(slang);
8151 free_blocks(spin->si_blocks);
8152 close_spellbuf(spin->si_spellbuf);
8153}
8154
8155/*
8156 * Build the soundfold trie for language "slang".
8157 */
8158 static int
8159sug_filltree(spin, slang)
8160 spellinfo_T *spin;
8161 slang_T *slang;
8162{
8163 char_u *byts;
8164 idx_T *idxs;
8165 int depth;
8166 idx_T arridx[MAXWLEN];
8167 int curi[MAXWLEN];
8168 char_u tword[MAXWLEN];
8169 char_u tsalword[MAXWLEN];
8170 int c;
8171 idx_T n;
8172 unsigned words_done = 0;
8173 int wordcount[MAXWLEN];
8174
8175 /* We use si_foldroot for the souldfolded trie. */
8176 spin->si_foldroot = wordtree_alloc(spin);
8177 if (spin->si_foldroot == NULL)
8178 return FAIL;
8179
8180 /* let tree_add_word() know we're adding to the soundfolded tree */
8181 spin->si_sugtree = TRUE;
8182
8183 /*
8184 * Go through the whole case-folded tree, soundfold each word and put it
8185 * in the trie.
8186 */
8187 byts = slang->sl_fbyts;
8188 idxs = slang->sl_fidxs;
8189
8190 arridx[0] = 0;
8191 curi[0] = 1;
8192 wordcount[0] = 0;
8193
8194 depth = 0;
8195 while (depth >= 0 && !got_int)
8196 {
8197 if (curi[depth] > byts[arridx[depth]])
8198 {
8199 /* Done all bytes at this node, go up one level. */
8200 idxs[arridx[depth]] = wordcount[depth];
8201 if (depth > 0)
8202 wordcount[depth - 1] += wordcount[depth];
8203
8204 --depth;
8205 line_breakcheck();
8206 }
8207 else
8208 {
8209
8210 /* Do one more byte at this node. */
8211 n = arridx[depth] + curi[depth];
8212 ++curi[depth];
8213
8214 c = byts[n];
8215 if (c == 0)
8216 {
8217 /* Sound-fold the word. */
8218 tword[depth] = NUL;
8219 spell_soundfold(slang, tword, TRUE, tsalword);
8220
8221 /* We use the "flags" field for the MSB of the wordnr,
8222 * "region" for the LSB of the wordnr. */
8223 if (tree_add_word(spin, tsalword, spin->si_foldroot,
8224 words_done >> 16, words_done & 0xffff,
8225 0) == FAIL)
8226 return FAIL;
8227
8228 ++words_done;
8229 ++wordcount[depth];
8230
8231 /* Reset the block count each time to avoid compression
8232 * kicking in. */
8233 spin->si_blocks_cnt = 0;
8234
8235 /* Skip over any other NUL bytes (same word with different
8236 * flags). */
8237 while (byts[n + 1] == 0)
8238 {
8239 ++n;
8240 ++curi[depth];
8241 }
8242 }
8243 else
8244 {
8245 /* Normal char, go one level deeper. */
8246 tword[depth++] = c;
8247 arridx[depth] = idxs[n];
8248 curi[depth] = 1;
8249 wordcount[depth] = 0;
8250 }
8251 }
8252 }
8253
8254 smsg((char_u *)_("Total number of words: %d"), words_done);
8255
8256 return OK;
8257}
8258
8259/*
8260 * Make the table that links each word in the soundfold trie to the words it
8261 * can be produced from.
8262 * This is not unlike lines in a file, thus use a memfile to be able to access
8263 * the table efficiently.
8264 * Returns FAIL when out of memory.
8265 */
8266 static int
8267sug_maketable(spin)
8268 spellinfo_T *spin;
8269{
8270 garray_T ga;
8271 int res = OK;
8272
8273 /* Allocate a buffer, open a memline for it and create the swap file
8274 * (uses a temp file, not a .swp file). */
8275 spin->si_spellbuf = open_spellbuf();
8276 if (spin->si_spellbuf == NULL)
8277 return FAIL;
8278
8279 /* Use a buffer to store the line info, avoids allocating many small
8280 * pieces of memory. */
8281 ga_init2(&ga, 1, 100);
8282
8283 /* recursively go through the tree */
8284 if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1)
8285 res = FAIL;
8286
8287 ga_clear(&ga);
8288 return res;
8289}
8290
8291/*
8292 * Fill the table for one node and its children.
8293 * Returns the wordnr at the start of the node.
8294 * Returns -1 when out of memory.
8295 */
8296 static int
8297sug_filltable(spin, node, startwordnr, gap)
8298 spellinfo_T *spin;
8299 wordnode_T *node;
8300 int startwordnr;
8301 garray_T *gap; /* place to store line of numbers */
8302{
8303 wordnode_T *p, *np;
8304 int wordnr = startwordnr;
8305 int nr;
8306 int prev_nr;
8307
8308 for (p = node; p != NULL; p = p->wn_sibling)
8309 {
8310 if (p->wn_byte == NUL)
8311 {
8312 gap->ga_len = 0;
8313 prev_nr = 0;
8314 for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling)
8315 {
8316 if (ga_grow(gap, 10) == FAIL)
8317 return -1;
8318
8319 nr = (np->wn_flags << 16) + (np->wn_region & 0xffff);
8320 /* Compute the offset from the previous nr and store the
8321 * offset in a way that it takes a minimum number of bytes.
8322 * It's a bit like utf-8, but without the need to mark
8323 * following bytes. */
8324 nr -= prev_nr;
8325 prev_nr += nr;
8326 gap->ga_len += offset2bytes(nr,
8327 (char_u *)gap->ga_data + gap->ga_len);
8328 }
8329
8330 /* add the NUL byte */
8331 ((char_u *)gap->ga_data)[gap->ga_len++] = NUL;
8332
8333 if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr,
8334 gap->ga_data, gap->ga_len, TRUE) == FAIL)
8335 return -1;
8336 ++wordnr;
8337
8338 /* Remove extra NUL entries, we no longer need them. We don't
8339 * bother freeing the nodes, the won't be reused anyway. */
8340 while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL)
8341 p->wn_sibling = p->wn_sibling->wn_sibling;
8342
8343 /* Clear the flags on the remaining NUL node, so that compression
8344 * works a lot better. */
8345 p->wn_flags = 0;
8346 p->wn_region = 0;
8347 }
8348 else
8349 {
8350 wordnr = sug_filltable(spin, p->wn_child, wordnr, gap);
8351 if (wordnr == -1)
8352 return -1;
8353 }
8354 }
8355 return wordnr;
8356}
8357
8358/*
8359 * Convert an offset into a minimal number of bytes.
8360 * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL
8361 * bytes.
8362 */
8363 static int
8364offset2bytes(nr, buf)
8365 int nr;
8366 char_u *buf;
8367{
8368 int rem;
8369 int b1, b2, b3, b4;
8370
8371 /* Split the number in parts of base 255. We need to avoid NUL bytes. */
8372 b1 = nr % 255 + 1;
8373 rem = nr / 255;
8374 b2 = rem % 255 + 1;
8375 rem = rem / 255;
8376 b3 = rem % 255 + 1;
8377 b4 = rem / 255 + 1;
8378
8379 if (b4 > 1 || b3 > 0x1f) /* 4 bytes */
8380 {
8381 buf[0] = 0xe0 + b4;
8382 buf[1] = b3;
8383 buf[2] = b2;
8384 buf[3] = b1;
8385 return 4;
8386 }
8387 if (b3 > 1 || b2 > 0x3f ) /* 3 bytes */
8388 {
8389 buf[0] = 0xc0 + b3;
8390 buf[1] = b2;
8391 buf[2] = b1;
8392 return 3;
8393 }
8394 if (b2 > 1 || b1 > 0x7f ) /* 2 bytes */
8395 {
8396 buf[0] = 0x80 + b2;
8397 buf[1] = b1;
8398 return 2;
8399 }
8400 /* 1 byte */
8401 buf[0] = b1;
8402 return 1;
8403}
8404
8405/*
8406 * Opposite of offset2bytes().
8407 * "pp" points to the bytes and is advanced over it.
8408 * Returns the offset.
8409 */
8410 static int
8411bytes2offset(pp)
8412 char_u **pp;
8413{
8414 char_u *p = *pp;
8415 int nr;
8416 int c;
8417
8418 c = *p++;
8419 if ((c & 0x80) == 0x00) /* 1 byte */
8420 {
8421 nr = c - 1;
8422 }
8423 else if ((c & 0xc0) == 0x80) /* 2 bytes */
8424 {
8425 nr = (c & 0x3f) - 1;
8426 nr = nr * 255 + (*p++ - 1);
8427 }
8428 else if ((c & 0xe0) == 0xc0) /* 3 bytes */
8429 {
8430 nr = (c & 0x1f) - 1;
8431 nr = nr * 255 + (*p++ - 1);
8432 nr = nr * 255 + (*p++ - 1);
8433 }
8434 else /* 4 bytes */
8435 {
8436 nr = (c & 0x0f) - 1;
8437 nr = nr * 255 + (*p++ - 1);
8438 nr = nr * 255 + (*p++ - 1);
8439 nr = nr * 255 + (*p++ - 1);
8440 }
8441
8442 *pp = p;
8443 return nr;
8444}
8445
8446/*
8447 * Write the .sug file in "fname".
8448 */
8449 static void
8450sug_write(spin, fname)
8451 spellinfo_T *spin;
8452 char_u *fname;
8453{
8454 FILE *fd;
8455 wordnode_T *tree;
8456 int nodecount;
8457 int wcount;
8458 char_u *line;
8459 linenr_T lnum;
8460 int len;
8461
8462 /* Create the file. Note that an existing file is silently overwritten! */
8463 fd = mch_fopen((char *)fname, "w");
8464 if (fd == NULL)
8465 {
8466 EMSG2(_(e_notopen), fname);
8467 return;
8468 }
8469
8470 vim_snprintf((char *)IObuff, IOSIZE,
8471 _("Writing suggestion file %s ..."), fname);
8472 spell_message(spin, IObuff);
8473
8474 /*
8475 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
8476 */
8477 if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */
8478 {
8479 EMSG(_(e_write));
8480 goto theend;
8481 }
8482 putc(VIMSUGVERSION, fd); /* <versionnr> */
8483
8484 /* Write si_sugtime to the file. */
8485 put_sugtime(spin, fd); /* <timestamp> */
8486
8487 /*
8488 * <SUGWORDTREE>
8489 */
8490 spin->si_memtot = 0;
8491 tree = spin->si_foldroot->wn_sibling;
8492
8493 /* Clear the index and wnode fields in the tree. */
8494 clear_node(tree);
8495
8496 /* Count the number of nodes. Needed to be able to allocate the
8497 * memory when reading the nodes. Also fills in index for shared
8498 * nodes. */
8499 nodecount = put_node(NULL, tree, 0, 0, FALSE);
8500
8501 /* number of nodes in 4 bytes */
8502 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
8503 spin->si_memtot += nodecount + nodecount * sizeof(int);
8504
8505 /* Write the nodes. */
8506 (void)put_node(fd, tree, 0, 0, FALSE);
8507
8508 /*
8509 * <SUGTABLE>: <sugwcount> <sugline> ...
8510 */
8511 wcount = spin->si_spellbuf->b_ml.ml_line_count;
8512 put_bytes(fd, (long_u)wcount, 4); /* <sugwcount> */
8513
8514 for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum)
8515 {
8516 /* <sugline>: <sugnr> ... NUL */
8517 line = ml_get_buf(spin->si_spellbuf, lnum, FALSE);
8518 len = STRLEN(line) + 1;
8519 if (fwrite(line, (size_t)len, (size_t)1, fd) == 0)
8520 {
8521 EMSG(_(e_write));
8522 goto theend;
8523 }
8524 spin->si_memtot += len;
8525 }
8526
8527 /* Write another byte to check for errors. */
8528 if (putc(0, fd) == EOF)
8529 EMSG(_(e_write));
8530
8531 vim_snprintf((char *)IObuff, IOSIZE,
8532 _("Estimated runtime memory use: %d bytes"), spin->si_memtot);
8533 spell_message(spin, IObuff);
8534
8535theend:
8536 /* close the file */
8537 fclose(fd);
8538}
8539
8540/*
8541 * Open a spell buffer. This is a nameless buffer that is not in the buffer
8542 * list and only contains text lines. Can use a swapfile to reduce memory
8543 * use.
8544 * Most other fields are invalid! Esp. watch out for string options being
8545 * NULL and there is no undo info.
8546 * Returns NULL when out of memory.
8547 */
8548 static buf_T *
8549open_spellbuf()
8550{
8551 buf_T *buf;
8552
8553 buf = (buf_T *)alloc_clear(sizeof(buf_T));
8554 if (buf != NULL)
8555 {
8556 buf->b_spell = TRUE;
8557 buf->b_p_swf = TRUE; /* may create a swap file */
8558 ml_open(buf);
8559 ml_open_file(buf); /* create swap file now */
8560 }
8561 return buf;
8562}
8563
8564/*
8565 * Close the buffer used for spell info.
8566 */
8567 static void
8568close_spellbuf(buf)
8569 buf_T *buf;
8570{
8571 if (buf != NULL)
8572 {
8573 ml_close(buf, TRUE);
8574 vim_free(buf);
8575 }
8576}
8577
8578
8579/*
Bram Moolenaarb765d632005-06-07 21:00:02 +00008580 * Create a Vim spell file from one or more word lists.
8581 * "fnames[0]" is the output file name.
8582 * "fnames[fcount - 1]" is the last input file name.
8583 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name
8584 * and ".spl" is appended to make the output file name.
8585 */
8586 static void
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008587mkspell(fcount, fnames, ascii, overwrite, added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00008588 int fcount;
8589 char_u **fnames;
8590 int ascii; /* -ascii argument given */
8591 int overwrite; /* overwrite existing output file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008592 int added_word; /* invoked through "zg" */
Bram Moolenaarb765d632005-06-07 21:00:02 +00008593{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008594 char_u fname[MAXPATHL];
8595 char_u wfname[MAXPATHL];
Bram Moolenaarb765d632005-06-07 21:00:02 +00008596 char_u **innames;
8597 int incount;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008598 afffile_T *(afile[8]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008599 int i;
8600 int len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008601 struct stat st;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00008602 int error = FALSE;
Bram Moolenaar51485f02005-06-04 21:55:20 +00008603 spellinfo_T spin;
8604
8605 vim_memset(&spin, 0, sizeof(spin));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008606 spin.si_verbose = !added_word;
Bram Moolenaarb765d632005-06-07 21:00:02 +00008607 spin.si_ascii = ascii;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008608 spin.si_followup = TRUE;
8609 spin.si_rem_accents = TRUE;
8610 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20);
Bram Moolenaar4770d092006-01-12 23:22:24 +00008611 ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008612 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
8613 ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008614 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
Bram Moolenaar4770d092006-01-12 23:22:24 +00008615 hash_init(&spin.si_commonwords);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00008616 spin.si_newcompID = 127; /* start compound ID at first maximum */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008617
Bram Moolenaarb765d632005-06-07 21:00:02 +00008618 /* default: fnames[0] is output file, following are input files */
8619 innames = &fnames[1];
8620 incount = fcount - 1;
8621
8622 if (fcount >= 1)
Bram Moolenaar5482f332005-04-17 20:18:43 +00008623 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00008624 len = STRLEN(fnames[0]);
8625 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
8626 {
8627 /* For ":mkspell path/en.latin1.add" output file is
8628 * "path/en.latin1.add.spl". */
8629 innames = &fnames[0];
8630 incount = 1;
8631 vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]);
8632 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00008633 else if (fcount == 1)
8634 {
8635 /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */
8636 innames = &fnames[0];
8637 incount = 1;
8638 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
8639 spin.si_ascii ? (char_u *)"ascii" : spell_enc());
8640 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00008641 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
8642 {
8643 /* Name ends in ".spl", use as the file name. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008644 vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1);
Bram Moolenaarb765d632005-06-07 21:00:02 +00008645 }
8646 else
8647 /* Name should be language, make the file name from it. */
8648 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
8649 spin.si_ascii ? (char_u *)"ascii" : spell_enc());
8650
8651 /* Check for .ascii.spl. */
8652 if (strstr((char *)gettail(wfname), ".ascii.") != NULL)
8653 spin.si_ascii = TRUE;
8654
8655 /* Check for .add.spl. */
8656 if (strstr((char *)gettail(wfname), ".add.") != NULL)
8657 spin.si_add = TRUE;
Bram Moolenaar5482f332005-04-17 20:18:43 +00008658 }
8659
Bram Moolenaarb765d632005-06-07 21:00:02 +00008660 if (incount <= 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008661 EMSG(_(e_invarg)); /* need at least output and input names */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008662 else if (vim_strchr(gettail(wfname), '_') != NULL)
8663 EMSG(_("E751: Output file name must not have region name"));
Bram Moolenaarb765d632005-06-07 21:00:02 +00008664 else if (incount > 8)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008665 EMSG(_("E754: Only up to 8 regions supported"));
8666 else
8667 {
8668 /* Check for overwriting before doing things that may take a lot of
8669 * time. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00008670 if (!overwrite && mch_stat((char *)wfname, &st) >= 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008671 {
8672 EMSG(_(e_exists));
Bram Moolenaarb765d632005-06-07 21:00:02 +00008673 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008674 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00008675 if (mch_isdir(wfname))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008676 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00008677 EMSG2(_(e_isadir2), wfname);
8678 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008679 }
8680
8681 /*
8682 * Init the aff and dic pointers.
8683 * Get the region names if there are more than 2 arguments.
8684 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00008685 for (i = 0; i < incount; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008686 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00008687 afile[i] = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00008688
Bram Moolenaar3982c542005-06-08 21:56:31 +00008689 if (incount > 1)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008690 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00008691 len = STRLEN(innames[i]);
8692 if (STRLEN(gettail(innames[i])) < 5
8693 || innames[i][len - 3] != '_')
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008694 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00008695 EMSG2(_("E755: Invalid region in %s"), innames[i]);
8696 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008697 }
Bram Moolenaar3982c542005-06-08 21:56:31 +00008698 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
8699 spin.si_region_name[i * 2 + 1] =
8700 TOLOWER_ASC(innames[i][len - 1]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008701 }
8702 }
Bram Moolenaar3982c542005-06-08 21:56:31 +00008703 spin.si_region_count = incount;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008704
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00008705 spin.si_foldroot = wordtree_alloc(&spin);
8706 spin.si_keeproot = wordtree_alloc(&spin);
8707 spin.si_prefroot = wordtree_alloc(&spin);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008708 if (spin.si_foldroot == NULL
8709 || spin.si_keeproot == NULL
8710 || spin.si_prefroot == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00008711 {
Bram Moolenaar329cc7e2005-08-10 07:51:35 +00008712 free_blocks(spin.si_blocks);
Bram Moolenaarb765d632005-06-07 21:00:02 +00008713 return;
Bram Moolenaar51485f02005-06-04 21:55:20 +00008714 }
8715
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008716 /* When not producing a .add.spl file clear the character table when
8717 * we encounter one in the .aff file. This means we dump the current
8718 * one in the .spl file if the .aff file doesn't define one. That's
8719 * better than guessing the contents, the table will match a
8720 * previously loaded spell file. */
8721 if (!spin.si_add)
8722 spin.si_clear_chartab = TRUE;
8723
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008724 /*
8725 * Read all the .aff and .dic files.
8726 * Text is converted to 'encoding'.
Bram Moolenaar51485f02005-06-04 21:55:20 +00008727 * Words are stored in the case-folded and keep-case trees.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008728 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00008729 for (i = 0; i < incount && !error; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008730 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00008731 spin.si_conv.vc_type = CONV_NONE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00008732 spin.si_region = 1 << i;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008733
Bram Moolenaarb765d632005-06-07 21:00:02 +00008734 vim_snprintf((char *)fname, sizeof(fname), "%s.aff", innames[i]);
Bram Moolenaar51485f02005-06-04 21:55:20 +00008735 if (mch_stat((char *)fname, &st) >= 0)
8736 {
8737 /* Read the .aff file. Will init "spin->si_conv" based on the
8738 * "SET" line. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00008739 afile[i] = spell_read_aff(&spin, fname);
Bram Moolenaarb765d632005-06-07 21:00:02 +00008740 if (afile[i] == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00008741 error = TRUE;
8742 else
8743 {
8744 /* Read the .dic file and store the words in the trees. */
8745 vim_snprintf((char *)fname, sizeof(fname), "%s.dic",
Bram Moolenaarb765d632005-06-07 21:00:02 +00008746 innames[i]);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00008747 if (spell_read_dic(&spin, fname, afile[i]) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00008748 error = TRUE;
8749 }
8750 }
8751 else
8752 {
8753 /* No .aff file, try reading the file as a word list. Store
8754 * the words in the trees. */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00008755 if (spell_read_wordfile(&spin, innames[i]) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00008756 error = TRUE;
8757 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008758
Bram Moolenaarb765d632005-06-07 21:00:02 +00008759#ifdef FEAT_MBYTE
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008760 /* Free any conversion stuff. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00008761 convert_setup(&spin.si_conv, NULL, NULL);
Bram Moolenaarb765d632005-06-07 21:00:02 +00008762#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008763 }
8764
Bram Moolenaar78622822005-08-23 21:00:13 +00008765 if (spin.si_compflags != NULL && spin.si_nobreak)
8766 MSG(_("Warning: both compounding and NOBREAK specified"));
8767
Bram Moolenaar4770d092006-01-12 23:22:24 +00008768 if (!error && !got_int)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008769 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00008770 /*
Bram Moolenaar51485f02005-06-04 21:55:20 +00008771 * Combine tails in the tree.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008772 */
Bram Moolenaar4770d092006-01-12 23:22:24 +00008773 spell_message(&spin, (char_u *)_(msg_compressing));
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00008774 wordtree_compress(&spin, spin.si_foldroot);
8775 wordtree_compress(&spin, spin.si_keeproot);
8776 wordtree_compress(&spin, spin.si_prefroot);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008777 }
8778
Bram Moolenaar4770d092006-01-12 23:22:24 +00008779 if (!error && !got_int)
Bram Moolenaar51485f02005-06-04 21:55:20 +00008780 {
8781 /*
8782 * Write the info in the spell file.
8783 */
Bram Moolenaar4770d092006-01-12 23:22:24 +00008784 vim_snprintf((char *)IObuff, IOSIZE,
8785 _("Writing spell file %s ..."), wfname);
8786 spell_message(&spin, IObuff);
Bram Moolenaar50cde822005-06-05 21:54:54 +00008787
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00008788 error = write_vim_spell(&spin, wfname) == FAIL;
Bram Moolenaarb765d632005-06-07 21:00:02 +00008789
Bram Moolenaar4770d092006-01-12 23:22:24 +00008790 spell_message(&spin, (char_u *)_("Done!"));
8791 vim_snprintf((char *)IObuff, IOSIZE,
8792 _("Estimated runtime memory use: %d bytes"), spin.si_memtot);
8793 spell_message(&spin, IObuff);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00008794
Bram Moolenaar4770d092006-01-12 23:22:24 +00008795 /*
8796 * If the file is loaded need to reload it.
8797 */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00008798 if (!error)
8799 spell_reload_one(wfname, added_word);
Bram Moolenaar51485f02005-06-04 21:55:20 +00008800 }
8801
8802 /* Free the allocated memory. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008803 ga_clear(&spin.si_rep);
Bram Moolenaar4770d092006-01-12 23:22:24 +00008804 ga_clear(&spin.si_repsal);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008805 ga_clear(&spin.si_sal);
8806 ga_clear(&spin.si_map);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008807 ga_clear(&spin.si_prefcond);
Bram Moolenaar4770d092006-01-12 23:22:24 +00008808 hash_clear_all(&spin.si_commonwords, 0);
Bram Moolenaar51485f02005-06-04 21:55:20 +00008809
8810 /* Free the .aff file structures. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00008811 for (i = 0; i < incount; ++i)
8812 if (afile[i] != NULL)
8813 spell_free_aff(afile[i]);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00008814
8815 /* Free all the bits and pieces at once. */
8816 free_blocks(spin.si_blocks);
Bram Moolenaar4770d092006-01-12 23:22:24 +00008817
8818 /*
8819 * If there is soundfolding info and no NOSUGFILE item create the
8820 * .sug file with the soundfolded word trie.
8821 */
8822 if (spin.si_sugtime != 0 && !error && !got_int)
8823 spell_make_sugfile(&spin, wfname);
8824
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008825 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00008826}
8827
Bram Moolenaar4770d092006-01-12 23:22:24 +00008828/*
8829 * Display a message for spell file processing when 'verbose' is set or using
8830 * ":mkspell". "str" can be IObuff.
8831 */
8832 static void
8833spell_message(spin, str)
8834 spellinfo_T *spin;
8835 char_u *str;
8836{
8837 if (spin->si_verbose || p_verbose > 2)
8838 {
8839 if (!spin->si_verbose)
8840 verbose_enter();
8841 MSG(str);
8842 out_flush();
8843 if (!spin->si_verbose)
8844 verbose_leave();
8845 }
8846}
Bram Moolenaarb765d632005-06-07 21:00:02 +00008847
8848/*
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008849 * ":[count]spellgood {word}"
8850 * ":[count]spellwrong {word}"
Bram Moolenaard0131a82006-03-04 21:46:13 +00008851 * ":[count]spellundo {word}"
Bram Moolenaarb765d632005-06-07 21:00:02 +00008852 */
8853 void
8854ex_spell(eap)
8855 exarg_T *eap;
8856{
Bram Moolenaar7887d882005-07-01 22:33:52 +00008857 spell_add_word(eap->arg, STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong,
Bram Moolenaard0131a82006-03-04 21:46:13 +00008858 eap->forceit ? 0 : (int)eap->line2,
8859 eap->cmdidx == CMD_spellundo);
Bram Moolenaarb765d632005-06-07 21:00:02 +00008860}
8861
8862/*
8863 * Add "word[len]" to 'spellfile' as a good or bad word.
8864 */
8865 void
Bram Moolenaard0131a82006-03-04 21:46:13 +00008866spell_add_word(word, len, bad, index, undo)
Bram Moolenaarb765d632005-06-07 21:00:02 +00008867 char_u *word;
8868 int len;
8869 int bad;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008870 int index; /* "zG" and "zW": zero, otherwise index in
8871 'spellfile' */
Bram Moolenaard0131a82006-03-04 21:46:13 +00008872 int undo; /* TRUE for "zug", "zuG", "zuw" and "zuW" */
Bram Moolenaarb765d632005-06-07 21:00:02 +00008873{
8874 FILE *fd;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008875 buf_T *buf = NULL;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008876 int new_spf = FALSE;
Bram Moolenaar7887d882005-07-01 22:33:52 +00008877 char_u *fname;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008878 char_u fnamebuf[MAXPATHL];
8879 char_u line[MAXWLEN * 2];
8880 long fpos, fpos_next = 0;
8881 int i;
8882 char_u *spf;
Bram Moolenaarb765d632005-06-07 21:00:02 +00008883
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008884 if (index == 0) /* use internal wordlist */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008885 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008886 if (int_wordlist == NULL)
Bram Moolenaar7887d882005-07-01 22:33:52 +00008887 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008888 int_wordlist = vim_tempname('s');
8889 if (int_wordlist == NULL)
Bram Moolenaar7887d882005-07-01 22:33:52 +00008890 return;
8891 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008892 fname = int_wordlist;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008893 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00008894 else
8895 {
Bram Moolenaar7887d882005-07-01 22:33:52 +00008896 /* If 'spellfile' isn't set figure out a good default value. */
8897 if (*curbuf->b_p_spf == NUL)
8898 {
8899 init_spellfile();
8900 new_spf = TRUE;
8901 }
8902
8903 if (*curbuf->b_p_spf == NUL)
8904 {
Bram Moolenaarf75a9632005-09-13 21:20:47 +00008905 EMSG2(_(e_notset), "spellfile");
Bram Moolenaar7887d882005-07-01 22:33:52 +00008906 return;
8907 }
8908
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008909 for (spf = curbuf->b_p_spf, i = 1; *spf != NUL; ++i)
8910 {
8911 copy_option_part(&spf, fnamebuf, MAXPATHL, ",");
8912 if (i == index)
8913 break;
8914 if (*spf == NUL)
8915 {
Bram Moolenaare344bea2005-09-01 20:46:49 +00008916 EMSGN(_("E765: 'spellfile' does not have %ld entries"), index);
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008917 return;
8918 }
8919 }
8920
Bram Moolenaarb765d632005-06-07 21:00:02 +00008921 /* Check that the user isn't editing the .add file somewhere. */
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008922 buf = buflist_findname_exp(fnamebuf);
Bram Moolenaarb765d632005-06-07 21:00:02 +00008923 if (buf != NULL && buf->b_ml.ml_mfp == NULL)
8924 buf = NULL;
8925 if (buf != NULL && bufIsChanged(buf))
Bram Moolenaarb765d632005-06-07 21:00:02 +00008926 {
Bram Moolenaar7887d882005-07-01 22:33:52 +00008927 EMSG(_(e_bufloaded));
8928 return;
Bram Moolenaarb765d632005-06-07 21:00:02 +00008929 }
Bram Moolenaar7887d882005-07-01 22:33:52 +00008930
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008931 fname = fnamebuf;
8932 }
8933
Bram Moolenaard0131a82006-03-04 21:46:13 +00008934 if (bad || undo)
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008935 {
Bram Moolenaard0131a82006-03-04 21:46:13 +00008936 /* When the word appears as good word we need to remove that one,
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008937 * since its flags sort before the one with WF_BANNED. */
8938 fd = mch_fopen((char *)fname, "r");
8939 if (fd != NULL)
8940 {
8941 while (!vim_fgets(line, MAXWLEN * 2, fd))
8942 {
8943 fpos = fpos_next;
8944 fpos_next = ftell(fd);
8945 if (STRNCMP(word, line, len) == 0
8946 && (line[len] == '/' || line[len] < ' '))
8947 {
8948 /* Found duplicate word. Remove it by writing a '#' at
8949 * the start of the line. Mixing reading and writing
8950 * doesn't work for all systems, close the file first. */
8951 fclose(fd);
8952 fd = mch_fopen((char *)fname, "r+");
8953 if (fd == NULL)
8954 break;
8955 if (fseek(fd, fpos, SEEK_SET) == 0)
Bram Moolenaard0131a82006-03-04 21:46:13 +00008956 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008957 fputc('#', fd);
Bram Moolenaard0131a82006-03-04 21:46:13 +00008958 if (undo)
8959 smsg((char_u *)_("Word removed from %s"), NameBuff);
8960 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00008961 fseek(fd, fpos_next, SEEK_SET);
8962 }
8963 }
8964 fclose(fd);
8965 }
Bram Moolenaar7887d882005-07-01 22:33:52 +00008966 }
8967
Bram Moolenaard0131a82006-03-04 21:46:13 +00008968 if (!undo)
Bram Moolenaar7887d882005-07-01 22:33:52 +00008969 {
Bram Moolenaard0131a82006-03-04 21:46:13 +00008970 fd = mch_fopen((char *)fname, "a");
8971 if (fd == NULL && new_spf)
Bram Moolenaar7887d882005-07-01 22:33:52 +00008972 {
Bram Moolenaard0131a82006-03-04 21:46:13 +00008973 /* We just initialized the 'spellfile' option and can't open the
8974 * file. We may need to create the "spell" directory first. We
8975 * already checked the runtime directory is writable in
8976 * init_spellfile(). */
8977 if (!dir_of_file_exists(fname))
8978 {
8979 /* The directory doesn't exist. Try creating it and opening
8980 * the file again. */
8981 vim_mkdir(NameBuff, 0755);
8982 fd = mch_fopen((char *)fname, "a");
8983 }
8984 }
8985
8986 if (fd == NULL)
8987 EMSG2(_(e_notopen), fname);
8988 else
8989 {
8990 if (bad)
8991 fprintf(fd, "%.*s/!\n", len, word);
8992 else
8993 fprintf(fd, "%.*s\n", len, word);
8994 fclose(fd);
8995
8996 home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
8997 smsg((char_u *)_("Word added to %s"), NameBuff);
Bram Moolenaar7887d882005-07-01 22:33:52 +00008998 }
8999 }
9000
Bram Moolenaard0131a82006-03-04 21:46:13 +00009001 if (fd != NULL)
Bram Moolenaar7887d882005-07-01 22:33:52 +00009002 {
Bram Moolenaar7887d882005-07-01 22:33:52 +00009003 /* Update the .add.spl file. */
9004 mkspell(1, &fname, FALSE, TRUE, TRUE);
9005
9006 /* If the .add file is edited somewhere, reload it. */
9007 if (buf != NULL)
Bram Moolenaarea8bd732006-01-14 21:15:59 +00009008 buf_reload(buf, buf->b_orig_mode);
Bram Moolenaar7887d882005-07-01 22:33:52 +00009009
Bram Moolenaarf71a3db2006-03-12 21:50:18 +00009010 redraw_all_later(SOME_VALID);
Bram Moolenaarb765d632005-06-07 21:00:02 +00009011 }
9012}
9013
9014/*
9015 * Initialize 'spellfile' for the current buffer.
9016 */
9017 static void
9018init_spellfile()
9019{
9020 char_u buf[MAXPATHL];
9021 int l;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00009022 char_u *fname;
Bram Moolenaarb765d632005-06-07 21:00:02 +00009023 char_u *rtp;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009024 char_u *lend;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00009025 int aspath = FALSE;
9026 char_u *lstart = curbuf->b_p_spl;
Bram Moolenaarb765d632005-06-07 21:00:02 +00009027
9028 if (*curbuf->b_p_spl != NUL && curbuf->b_langp.ga_len > 0)
9029 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +00009030 /* Find the end of the language name. Exclude the region. If there
9031 * is a path separator remember the start of the tail. */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009032 for (lend = curbuf->b_p_spl; *lend != NUL
9033 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend)
Bram Moolenaarda2303d2005-08-30 21:55:26 +00009034 if (vim_ispathsep(*lend))
9035 {
9036 aspath = TRUE;
9037 lstart = lend + 1;
9038 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009039
9040 /* Loop over all entries in 'runtimepath'. Use the first one where we
9041 * are allowed to write. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00009042 rtp = p_rtp;
9043 while (*rtp != NUL)
9044 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +00009045 if (aspath)
9046 /* Use directory of an entry with path, e.g., for
9047 * "/dir/lg.utf-8.spl" use "/dir". */
9048 vim_strncpy(buf, curbuf->b_p_spl, lstart - curbuf->b_p_spl - 1);
9049 else
9050 /* Copy the path from 'runtimepath' to buf[]. */
9051 copy_option_part(&rtp, buf, MAXPATHL, ",");
Bram Moolenaarb765d632005-06-07 21:00:02 +00009052 if (filewritable(buf) == 2)
9053 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00009054 /* Use the first language name from 'spelllang' and the
9055 * encoding used in the first loaded .spl file. */
Bram Moolenaarda2303d2005-08-30 21:55:26 +00009056 if (aspath)
9057 vim_strncpy(buf, curbuf->b_p_spl, lend - curbuf->b_p_spl);
9058 else
9059 {
9060 l = STRLEN(buf);
9061 vim_snprintf((char *)buf + l, MAXPATHL - l,
9062 "/spell/%.*s", (int)(lend - lstart), lstart);
9063 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00009064 l = STRLEN(buf);
Bram Moolenaarda2303d2005-08-30 21:55:26 +00009065 fname = LANGP_ENTRY(curbuf->b_langp, 0)->lp_slang->sl_fname;
9066 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add",
9067 fname != NULL
9068 && strstr((char *)gettail(fname), ".ascii.") != NULL
9069 ? (char_u *)"ascii" : spell_enc());
Bram Moolenaarb765d632005-06-07 21:00:02 +00009070 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
9071 break;
9072 }
Bram Moolenaarda2303d2005-08-30 21:55:26 +00009073 aspath = FALSE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00009074 }
9075 }
9076}
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009077
Bram Moolenaar51485f02005-06-04 21:55:20 +00009078
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009079/*
9080 * Init the chartab used for spelling for ASCII.
9081 * EBCDIC is not supported!
9082 */
9083 static void
9084clear_spell_chartab(sp)
9085 spelltab_T *sp;
9086{
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009087 int i;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009088
9089 /* Init everything to FALSE. */
9090 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
9091 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
9092 for (i = 0; i < 256; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009093 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009094 sp->st_fold[i] = i;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009095 sp->st_upper[i] = i;
9096 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009097
9098 /* We include digits. A word shouldn't start with a digit, but handling
9099 * that is done separately. */
9100 for (i = '0'; i <= '9'; ++i)
9101 sp->st_isw[i] = TRUE;
9102 for (i = 'A'; i <= 'Z'; ++i)
9103 {
9104 sp->st_isw[i] = TRUE;
9105 sp->st_isu[i] = TRUE;
9106 sp->st_fold[i] = i + 0x20;
9107 }
9108 for (i = 'a'; i <= 'z'; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009109 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009110 sp->st_isw[i] = TRUE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009111 sp->st_upper[i] = i - 0x20;
9112 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009113}
9114
9115/*
9116 * Init the chartab used for spelling. Only depends on 'encoding'.
9117 * Called once while starting up and when 'encoding' changes.
9118 * The default is to use isalpha(), but the spell file should define the word
9119 * characters to make it possible that 'encoding' differs from the current
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009120 * locale. For utf-8 we don't use isalpha() but our own functions.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009121 */
9122 void
9123init_spell_chartab()
9124{
9125 int i;
9126
9127 did_set_spelltab = FALSE;
9128 clear_spell_chartab(&spelltab);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009129#ifdef FEAT_MBYTE
9130 if (enc_dbcs)
9131 {
9132 /* DBCS: assume double-wide characters are word characters. */
9133 for (i = 128; i <= 255; ++i)
9134 if (MB_BYTE2LEN(i) == 2)
9135 spelltab.st_isw[i] = TRUE;
9136 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009137 else if (enc_utf8)
9138 {
9139 for (i = 128; i < 256; ++i)
9140 {
9141 spelltab.st_isu[i] = utf_isupper(i);
9142 spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
9143 spelltab.st_fold[i] = utf_fold(i);
9144 spelltab.st_upper[i] = utf_toupper(i);
9145 }
9146 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009147 else
9148#endif
9149 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009150 /* Rough guess: use locale-dependent library functions. */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009151 for (i = 128; i < 256; ++i)
9152 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009153 if (MB_ISUPPER(i))
9154 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009155 spelltab.st_isw[i] = TRUE;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009156 spelltab.st_isu[i] = TRUE;
9157 spelltab.st_fold[i] = MB_TOLOWER(i);
9158 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009159 else if (MB_ISLOWER(i))
9160 {
9161 spelltab.st_isw[i] = TRUE;
9162 spelltab.st_upper[i] = MB_TOUPPER(i);
9163 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009164 }
9165 }
9166}
9167
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009168/*
9169 * Set the spell character tables from strings in the affix file.
9170 */
9171 static int
9172set_spell_chartab(fol, low, upp)
9173 char_u *fol;
9174 char_u *low;
9175 char_u *upp;
9176{
9177 /* We build the new tables here first, so that we can compare with the
9178 * previous one. */
9179 spelltab_T new_st;
9180 char_u *pf = fol, *pl = low, *pu = upp;
9181 int f, l, u;
9182
9183 clear_spell_chartab(&new_st);
9184
9185 while (*pf != NUL)
9186 {
9187 if (*pl == NUL || *pu == NUL)
9188 {
9189 EMSG(_(e_affform));
9190 return FAIL;
9191 }
9192#ifdef FEAT_MBYTE
9193 f = mb_ptr2char_adv(&pf);
9194 l = mb_ptr2char_adv(&pl);
9195 u = mb_ptr2char_adv(&pu);
9196#else
9197 f = *pf++;
9198 l = *pl++;
9199 u = *pu++;
9200#endif
9201 /* Every character that appears is a word character. */
9202 if (f < 256)
9203 new_st.st_isw[f] = TRUE;
9204 if (l < 256)
9205 new_st.st_isw[l] = TRUE;
9206 if (u < 256)
9207 new_st.st_isw[u] = TRUE;
9208
9209 /* if "LOW" and "FOL" are not the same the "LOW" char needs
9210 * case-folding */
9211 if (l < 256 && l != f)
9212 {
9213 if (f >= 256)
9214 {
9215 EMSG(_(e_affrange));
9216 return FAIL;
9217 }
9218 new_st.st_fold[l] = f;
9219 }
9220
9221 /* if "UPP" and "FOL" are not the same the "UPP" char needs
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009222 * case-folding, it's upper case and the "UPP" is the upper case of
9223 * "FOL" . */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009224 if (u < 256 && u != f)
9225 {
9226 if (f >= 256)
9227 {
9228 EMSG(_(e_affrange));
9229 return FAIL;
9230 }
9231 new_st.st_fold[u] = f;
9232 new_st.st_isu[u] = TRUE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009233 new_st.st_upper[f] = u;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009234 }
9235 }
9236
9237 if (*pl != NUL || *pu != NUL)
9238 {
9239 EMSG(_(e_affform));
9240 return FAIL;
9241 }
9242
9243 return set_spell_finish(&new_st);
9244}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009245
9246/*
9247 * Set the spell character tables from strings in the .spl file.
9248 */
Bram Moolenaar5195e452005-08-19 20:32:47 +00009249 static void
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009250set_spell_charflags(flags, cnt, fol)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009251 char_u *flags;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009252 int cnt; /* length of "flags" */
9253 char_u *fol;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009254{
9255 /* We build the new tables here first, so that we can compare with the
9256 * previous one. */
9257 spelltab_T new_st;
9258 int i;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009259 char_u *p = fol;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009260 int c;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009261
9262 clear_spell_chartab(&new_st);
9263
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009264 for (i = 0; i < 128; ++i)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009265 {
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009266 if (i < cnt)
9267 {
9268 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
9269 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
9270 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009271
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009272 if (*p != NUL)
9273 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009274#ifdef FEAT_MBYTE
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009275 c = mb_ptr2char_adv(&p);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009276#else
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009277 c = *p++;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009278#endif
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00009279 new_st.st_fold[i + 128] = c;
9280 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
9281 new_st.st_upper[c] = i + 128;
9282 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009283 }
9284
Bram Moolenaar5195e452005-08-19 20:32:47 +00009285 (void)set_spell_finish(&new_st);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009286}
9287
9288 static int
9289set_spell_finish(new_st)
9290 spelltab_T *new_st;
9291{
9292 int i;
9293
9294 if (did_set_spelltab)
9295 {
9296 /* check that it's the same table */
9297 for (i = 0; i < 256; ++i)
9298 {
9299 if (spelltab.st_isw[i] != new_st->st_isw[i]
9300 || spelltab.st_isu[i] != new_st->st_isu[i]
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009301 || spelltab.st_fold[i] != new_st->st_fold[i]
9302 || spelltab.st_upper[i] != new_st->st_upper[i])
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009303 {
9304 EMSG(_("E763: Word characters differ between spell files"));
9305 return FAIL;
9306 }
9307 }
9308 }
9309 else
9310 {
9311 /* copy the new spelltab into the one being used */
9312 spelltab = *new_st;
9313 did_set_spelltab = TRUE;
9314 }
9315
9316 return OK;
9317}
9318
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009319/*
Bram Moolenaarea408852005-06-25 22:49:46 +00009320 * Return TRUE if "p" points to a word character.
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009321 * As a special case we see "midword" characters as word character when it is
Bram Moolenaarea408852005-06-25 22:49:46 +00009322 * followed by a word character. This finds they'there but not 'they there'.
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009323 * Thus this only works properly when past the first character of the word.
Bram Moolenaarea408852005-06-25 22:49:46 +00009324 */
9325 static int
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009326spell_iswordp(p, buf)
Bram Moolenaarea408852005-06-25 22:49:46 +00009327 char_u *p;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009328 buf_T *buf; /* buffer used */
Bram Moolenaarea408852005-06-25 22:49:46 +00009329{
Bram Moolenaarea408852005-06-25 22:49:46 +00009330#ifdef FEAT_MBYTE
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009331 char_u *s;
9332 int l;
9333 int c;
9334
9335 if (has_mbyte)
9336 {
9337 l = MB_BYTE2LEN(*p);
9338 s = p;
9339 if (l == 1)
9340 {
9341 /* be quick for ASCII */
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009342 if (buf->b_spell_ismw[*p])
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009343 {
9344 s = p + 1; /* skip a mid-word character */
9345 l = MB_BYTE2LEN(*s);
9346 }
9347 }
9348 else
9349 {
9350 c = mb_ptr2char(p);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009351 if (c < 256 ? buf->b_spell_ismw[c]
9352 : (buf->b_spell_ismw_mb != NULL
9353 && vim_strchr(buf->b_spell_ismw_mb, c) != NULL))
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009354 {
9355 s = p + l;
9356 l = MB_BYTE2LEN(*s);
9357 }
9358 }
9359
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009360 c = mb_ptr2char(s);
9361 if (c > 255)
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009362 return mb_get_class(s) >= 2;
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009363 return spelltab.st_isw[c];
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009364 }
Bram Moolenaarea408852005-06-25 22:49:46 +00009365#endif
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009366
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009367 return spelltab.st_isw[buf->b_spell_ismw[*p] ? p[1] : p[0]];
9368}
9369
9370/*
9371 * Return TRUE if "p" points to a word character.
9372 * Unlike spell_iswordp() this doesn't check for "midword" characters.
9373 */
9374 static int
9375spell_iswordp_nmw(p)
9376 char_u *p;
9377{
9378#ifdef FEAT_MBYTE
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009379 int c;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009380
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009381 if (has_mbyte)
9382 {
9383 c = mb_ptr2char(p);
9384 if (c > 255)
9385 return mb_get_class(p) >= 2;
9386 return spelltab.st_isw[c];
9387 }
9388#endif
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009389 return spelltab.st_isw[*p];
Bram Moolenaarea408852005-06-25 22:49:46 +00009390}
9391
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009392#ifdef FEAT_MBYTE
9393/*
9394 * Return TRUE if "p" points to a word character.
9395 * Wide version of spell_iswordp().
9396 */
9397 static int
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009398spell_iswordp_w(p, buf)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009399 int *p;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009400 buf_T *buf;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009401{
9402 int *s;
9403
Bram Moolenaar9c96f592005-06-30 21:52:39 +00009404 if (*p < 256 ? buf->b_spell_ismw[*p]
9405 : (buf->b_spell_ismw_mb != NULL
9406 && vim_strchr(buf->b_spell_ismw_mb, *p) != NULL))
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009407 s = p + 1;
9408 else
9409 s = p;
9410
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009411 if (*s > 255)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009412 {
9413 if (enc_utf8)
9414 return utf_class(*s) >= 2;
9415 if (enc_dbcs)
9416 return dbcs_class((unsigned)*s >> 8, *s & 0xff) >= 2;
9417 return 0;
9418 }
9419 return spelltab.st_isw[*s];
9420}
9421#endif
9422
Bram Moolenaarea408852005-06-25 22:49:46 +00009423/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009424 * Write the table with prefix conditions to the .spl file.
Bram Moolenaar5195e452005-08-19 20:32:47 +00009425 * When "fd" is NULL only count the length of what is written.
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009426 */
Bram Moolenaar5195e452005-08-19 20:32:47 +00009427 static int
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009428write_spell_prefcond(fd, gap)
9429 FILE *fd;
9430 garray_T *gap;
9431{
9432 int i;
9433 char_u *p;
9434 int len;
Bram Moolenaar5195e452005-08-19 20:32:47 +00009435 int totlen;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009436
Bram Moolenaar5195e452005-08-19 20:32:47 +00009437 if (fd != NULL)
9438 put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */
9439
9440 totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009441
9442 for (i = 0; i < gap->ga_len; ++i)
9443 {
9444 /* <prefcond> : <condlen> <condstr> */
9445 p = ((char_u **)gap->ga_data)[i];
Bram Moolenaar5195e452005-08-19 20:32:47 +00009446 if (p != NULL)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009447 {
9448 len = STRLEN(p);
Bram Moolenaar5195e452005-08-19 20:32:47 +00009449 if (fd != NULL)
9450 {
9451 fputc(len, fd);
9452 fwrite(p, (size_t)len, (size_t)1, fd);
9453 }
9454 totlen += len;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00009455 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00009456 else if (fd != NULL)
9457 fputc(0, fd);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009458 }
9459
Bram Moolenaar5195e452005-08-19 20:32:47 +00009460 return totlen;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009461}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009462
9463/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009464 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated.
9465 * Uses the character definitions from the .spl file.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009466 * When using a multi-byte 'encoding' the length may change!
9467 * Returns FAIL when something wrong.
9468 */
9469 static int
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009470spell_casefold(str, len, buf, buflen)
9471 char_u *str;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009472 int len;
9473 char_u *buf;
9474 int buflen;
9475{
9476 int i;
9477
9478 if (len >= buflen)
9479 {
9480 buf[0] = NUL;
9481 return FAIL; /* result will not fit */
9482 }
9483
9484#ifdef FEAT_MBYTE
9485 if (has_mbyte)
9486 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009487 int outi = 0;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009488 char_u *p;
9489 int c;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009490
9491 /* Fold one character at a time. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009492 for (p = str; p < str + len; )
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009493 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009494 if (outi + MB_MAXBYTES > buflen)
9495 {
9496 buf[outi] = NUL;
9497 return FAIL;
9498 }
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009499 c = mb_cptr2char_adv(&p);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009500 outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009501 }
9502 buf[outi] = NUL;
9503 }
9504 else
9505#endif
9506 {
9507 /* Be quick for non-multibyte encodings. */
9508 for (i = 0; i < len; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009509 buf[i] = spelltab.st_fold[str[i]];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009510 buf[i] = NUL;
9511 }
9512
9513 return OK;
9514}
9515
Bram Moolenaar4770d092006-01-12 23:22:24 +00009516/* values for sps_flags */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009517#define SPS_BEST 1
9518#define SPS_FAST 2
9519#define SPS_DOUBLE 4
9520
Bram Moolenaar4770d092006-01-12 23:22:24 +00009521static int sps_flags = SPS_BEST; /* flags from 'spellsuggest' */
9522static int sps_limit = 9999; /* max nr of suggestions given */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009523
9524/*
9525 * Check the 'spellsuggest' option. Return FAIL if it's wrong.
Bram Moolenaar5195e452005-08-19 20:32:47 +00009526 * Sets "sps_flags" and "sps_limit".
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009527 */
9528 int
9529spell_check_sps()
9530{
9531 char_u *p;
Bram Moolenaar5195e452005-08-19 20:32:47 +00009532 char_u *s;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009533 char_u buf[MAXPATHL];
9534 int f;
9535
9536 sps_flags = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00009537 sps_limit = 9999;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009538
9539 for (p = p_sps; *p != NUL; )
9540 {
9541 copy_option_part(&p, buf, MAXPATHL, ",");
9542
9543 f = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00009544 if (VIM_ISDIGIT(*buf))
9545 {
9546 s = buf;
9547 sps_limit = getdigits(&s);
9548 if (*s != NUL && !VIM_ISDIGIT(*s))
9549 f = -1;
9550 }
9551 else if (STRCMP(buf, "best") == 0)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009552 f = SPS_BEST;
9553 else if (STRCMP(buf, "fast") == 0)
9554 f = SPS_FAST;
9555 else if (STRCMP(buf, "double") == 0)
9556 f = SPS_DOUBLE;
9557 else if (STRNCMP(buf, "expr:", 5) != 0
9558 && STRNCMP(buf, "file:", 5) != 0)
9559 f = -1;
9560
9561 if (f == -1 || (sps_flags != 0 && f != 0))
9562 {
9563 sps_flags = SPS_BEST;
Bram Moolenaar5195e452005-08-19 20:32:47 +00009564 sps_limit = 9999;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009565 return FAIL;
9566 }
9567 if (f != 0)
9568 sps_flags = f;
9569 }
9570
9571 if (sps_flags == 0)
9572 sps_flags = SPS_BEST;
9573
9574 return OK;
9575}
9576
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009577/*
9578 * "z?": Find badly spelled word under or after the cursor.
9579 * Give suggestions for the properly spelled word.
Bram Moolenaar66fa2712006-01-22 23:22:22 +00009580 * In Visual mode use the highlighted word as the bad word.
Bram Moolenaard12a1322005-08-21 22:08:24 +00009581 * When "count" is non-zero use that suggestion.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009582 */
9583 void
Bram Moolenaard12a1322005-08-21 22:08:24 +00009584spell_suggest(count)
9585 int count;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009586{
9587 char_u *line;
9588 pos_T prev_cursor = curwin->w_cursor;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009589 char_u wcopy[MAXWLEN + 2];
9590 char_u *p;
9591 int i;
9592 int c;
9593 suginfo_T sug;
9594 suggest_T *stp;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009595 int mouse_used;
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +00009596 int need_cap;
Bram Moolenaar5195e452005-08-19 20:32:47 +00009597 int limit;
Bram Moolenaard12a1322005-08-21 22:08:24 +00009598 int selected = count;
Bram Moolenaar66fa2712006-01-22 23:22:22 +00009599 int badlen = 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009600
Bram Moolenaar66fa2712006-01-22 23:22:22 +00009601 if (no_spell_checking(curwin))
9602 return;
9603
9604#ifdef FEAT_VISUAL
9605 if (VIsual_active)
9606 {
9607 /* Use the Visually selected text as the bad word. But reject
9608 * a multi-line selection. */
9609 if (curwin->w_cursor.lnum != VIsual.lnum)
9610 {
9611 vim_beep();
9612 return;
9613 }
9614 badlen = (int)curwin->w_cursor.col - (int)VIsual.col;
9615 if (badlen < 0)
9616 badlen = -badlen;
9617 else
9618 curwin->w_cursor.col = VIsual.col;
9619 ++badlen;
9620 end_visual_mode();
9621 }
9622 else
9623#endif
9624 /* Find the start of the badly spelled word. */
9625 if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0
Bram Moolenaar0c405862005-06-22 22:26:26 +00009626 || curwin->w_cursor.col > prev_cursor.col)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009627 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00009628 /* No bad word or it starts after the cursor: use the word under the
9629 * cursor. */
9630 curwin->w_cursor = prev_cursor;
9631 line = ml_get_curline();
9632 p = line + curwin->w_cursor.col;
9633 /* Backup to before start of word. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009634 while (p > line && spell_iswordp_nmw(p))
Bram Moolenaar0c405862005-06-22 22:26:26 +00009635 mb_ptr_back(line, p);
9636 /* Forward to start of word. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009637 while (*p != NUL && !spell_iswordp_nmw(p))
Bram Moolenaar0c405862005-06-22 22:26:26 +00009638 mb_ptr_adv(p);
9639
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00009640 if (!spell_iswordp_nmw(p)) /* No word found. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00009641 {
9642 beep_flush();
9643 return;
9644 }
9645 curwin->w_cursor.col = p - line;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009646 }
9647
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009648 /* Get the word and its length. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009649
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +00009650 /* Figure out if the word should be capitalised. */
Bram Moolenaar8b59de92005-08-11 19:59:29 +00009651 need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col);
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +00009652
Bram Moolenaar8b59de92005-08-11 19:59:29 +00009653 line = ml_get_curline();
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +00009654
Bram Moolenaar5195e452005-08-19 20:32:47 +00009655 /* Get the list of suggestions. Limit to 'lines' - 2 or the number in
9656 * 'spellsuggest', whatever is smaller. */
9657 if (sps_limit > (int)Rows - 2)
9658 limit = (int)Rows - 2;
9659 else
9660 limit = sps_limit;
Bram Moolenaar66fa2712006-01-22 23:22:22 +00009661 spell_find_suggest(line + curwin->w_cursor.col, badlen, &sug, limit,
Bram Moolenaar4770d092006-01-12 23:22:24 +00009662 TRUE, need_cap, TRUE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009663
9664 if (sug.su_ga.ga_len == 0)
9665 MSG(_("Sorry, no suggestions"));
Bram Moolenaard12a1322005-08-21 22:08:24 +00009666 else if (count > 0)
9667 {
9668 if (count > sug.su_ga.ga_len)
9669 smsg((char_u *)_("Sorry, only %ld suggestions"),
9670 (long)sug.su_ga.ga_len);
9671 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009672 else
9673 {
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009674 vim_free(repl_from);
9675 repl_from = NULL;
9676 vim_free(repl_to);
9677 repl_to = NULL;
9678
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009679#ifdef FEAT_RIGHTLEFT
9680 /* When 'rightleft' is set the list is drawn right-left. */
9681 cmdmsg_rl = curwin->w_p_rl;
9682 if (cmdmsg_rl)
9683 msg_col = Columns - 1;
9684#endif
9685
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009686 /* List the suggestions. */
9687 msg_start();
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009688 lines_left = Rows; /* avoid more prompt */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009689 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"),
9690 sug.su_badlen, sug.su_badptr);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009691#ifdef FEAT_RIGHTLEFT
9692 if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0)
9693 {
9694 /* And now the rabbit from the high hat: Avoid showing the
9695 * untranslated message rightleft. */
9696 vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC",
9697 sug.su_badlen, sug.su_badptr);
9698 }
9699#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009700 msg_puts(IObuff);
9701 msg_clr_eos();
9702 msg_putchar('\n');
Bram Moolenaar0c405862005-06-22 22:26:26 +00009703
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009704 msg_scroll = TRUE;
9705 for (i = 0; i < sug.su_ga.ga_len; ++i)
9706 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009707 stp = &SUG(sug.su_ga, i);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009708
9709 /* The suggested word may replace only part of the bad word, add
9710 * the not replaced part. */
9711 STRCPY(wcopy, stp->st_word);
9712 if (sug.su_badlen > stp->st_orglen)
Bram Moolenaar4770d092006-01-12 23:22:24 +00009713 vim_strncpy(wcopy + stp->st_wordlen,
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009714 sug.su_badptr + stp->st_orglen,
9715 sug.su_badlen - stp->st_orglen);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009716 vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1);
9717#ifdef FEAT_RIGHTLEFT
9718 if (cmdmsg_rl)
9719 rl_mirror(IObuff);
9720#endif
9721 msg_puts(IObuff);
9722
9723 vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy);
Bram Moolenaar0c405862005-06-22 22:26:26 +00009724 msg_puts(IObuff);
9725
9726 /* The word may replace more than "su_badlen". */
9727 if (sug.su_badlen < stp->st_orglen)
9728 {
9729 vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""),
9730 stp->st_orglen, sug.su_badptr);
9731 msg_puts(IObuff);
9732 }
9733
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009734 if (p_verbose > 0)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009735 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00009736 /* Add the score. */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009737 if (sps_flags & (SPS_DOUBLE | SPS_BEST))
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009738 vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)",
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009739 stp->st_salscore ? "s " : "",
9740 stp->st_score, stp->st_altscore);
9741 else
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009742 vim_snprintf((char *)IObuff, IOSIZE, " (%d)",
Bram Moolenaar0c405862005-06-22 22:26:26 +00009743 stp->st_score);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009744#ifdef FEAT_RIGHTLEFT
9745 if (cmdmsg_rl)
9746 /* Mirror the numbers, but keep the leading space. */
9747 rl_mirror(IObuff + 1);
9748#endif
Bram Moolenaar0c405862005-06-22 22:26:26 +00009749 msg_advance(30);
9750 msg_puts(IObuff);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009751 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009752 msg_putchar('\n');
9753 }
9754
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00009755#ifdef FEAT_RIGHTLEFT
9756 cmdmsg_rl = FALSE;
9757 msg_col = 0;
9758#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009759 /* Ask for choice. */
Bram Moolenaard12a1322005-08-21 22:08:24 +00009760 selected = prompt_for_number(&mouse_used);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009761 if (mouse_used)
Bram Moolenaard12a1322005-08-21 22:08:24 +00009762 selected -= lines_left;
Bram Moolenaar0fd92892006-03-09 22:27:48 +00009763 lines_left = Rows; /* avoid more prompt */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009764 }
9765
Bram Moolenaard12a1322005-08-21 22:08:24 +00009766 if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK)
9767 {
9768 /* Save the from and to text for :spellrepall. */
9769 stp = &SUG(sug.su_ga, selected - 1);
Bram Moolenaard5cdbeb2005-10-10 20:59:28 +00009770 if (sug.su_badlen > stp->st_orglen)
9771 {
9772 /* Replacing less than "su_badlen", append the remainder to
9773 * repl_to. */
9774 repl_from = vim_strnsave(sug.su_badptr, sug.su_badlen);
9775 vim_snprintf((char *)IObuff, IOSIZE, "%s%.*s", stp->st_word,
9776 sug.su_badlen - stp->st_orglen,
9777 sug.su_badptr + stp->st_orglen);
9778 repl_to = vim_strsave(IObuff);
9779 }
9780 else
9781 {
9782 /* Replacing su_badlen or more, use the whole word. */
9783 repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen);
9784 repl_to = vim_strsave(stp->st_word);
9785 }
Bram Moolenaard12a1322005-08-21 22:08:24 +00009786
9787 /* Replace the word. */
Bram Moolenaar4770d092006-01-12 23:22:24 +00009788 p = alloc(STRLEN(line) - stp->st_orglen + stp->st_wordlen + 1);
Bram Moolenaard12a1322005-08-21 22:08:24 +00009789 if (p != NULL)
9790 {
9791 c = sug.su_badptr - line;
9792 mch_memmove(p, line, c);
9793 STRCPY(p + c, stp->st_word);
9794 STRCAT(p, sug.su_badptr + stp->st_orglen);
9795 ml_replace(curwin->w_cursor.lnum, p, FALSE);
9796 curwin->w_cursor.col = c;
9797 changed_bytes(curwin->w_cursor.lnum, c);
9798
9799 /* For redo we use a change-word command. */
9800 ResetRedobuff();
9801 AppendToRedobuff((char_u *)"ciw");
Bram Moolenaarebefac62005-12-28 22:39:57 +00009802 AppendToRedobuffLit(p + c,
Bram Moolenaar4770d092006-01-12 23:22:24 +00009803 stp->st_wordlen + sug.su_badlen - stp->st_orglen);
Bram Moolenaard12a1322005-08-21 22:08:24 +00009804 AppendCharToRedobuff(ESC);
9805 }
9806 }
9807 else
9808 curwin->w_cursor = prev_cursor;
9809
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009810 spell_find_cleanup(&sug);
9811}
9812
9813/*
Bram Moolenaar8b59de92005-08-11 19:59:29 +00009814 * Check if the word at line "lnum" column "col" is required to start with a
9815 * capital. This uses 'spellcapcheck' of the current buffer.
9816 */
9817 static int
9818check_need_cap(lnum, col)
9819 linenr_T lnum;
9820 colnr_T col;
9821{
9822 int need_cap = FALSE;
9823 char_u *line;
9824 char_u *line_copy = NULL;
9825 char_u *p;
9826 colnr_T endcol;
9827 regmatch_T regmatch;
9828
9829 if (curbuf->b_cap_prog == NULL)
9830 return FALSE;
9831
9832 line = ml_get_curline();
9833 endcol = 0;
9834 if ((int)(skipwhite(line) - line) >= (int)col)
9835 {
9836 /* At start of line, check if previous line is empty or sentence
9837 * ends there. */
9838 if (lnum == 1)
9839 need_cap = TRUE;
9840 else
9841 {
9842 line = ml_get(lnum - 1);
9843 if (*skipwhite(line) == NUL)
9844 need_cap = TRUE;
9845 else
9846 {
9847 /* Append a space in place of the line break. */
9848 line_copy = concat_str(line, (char_u *)" ");
9849 line = line_copy;
9850 endcol = STRLEN(line);
9851 }
9852 }
9853 }
9854 else
9855 endcol = col;
9856
9857 if (endcol > 0)
9858 {
9859 /* Check if sentence ends before the bad word. */
9860 regmatch.regprog = curbuf->b_cap_prog;
9861 regmatch.rm_ic = FALSE;
9862 p = line + endcol;
9863 for (;;)
9864 {
9865 mb_ptr_back(line, p);
9866 if (p == line || spell_iswordp_nmw(p))
9867 break;
9868 if (vim_regexec(&regmatch, p, 0)
9869 && regmatch.endp[0] == line + endcol)
9870 {
9871 need_cap = TRUE;
9872 break;
9873 }
9874 }
9875 }
9876
9877 vim_free(line_copy);
9878
9879 return need_cap;
9880}
9881
9882
9883/*
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009884 * ":spellrepall"
9885 */
9886/*ARGSUSED*/
9887 void
9888ex_spellrepall(eap)
9889 exarg_T *eap;
9890{
9891 pos_T pos = curwin->w_cursor;
9892 char_u *frompat;
9893 int addlen;
9894 char_u *line;
9895 char_u *p;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009896 int save_ws = p_ws;
Bram Moolenaar5195e452005-08-19 20:32:47 +00009897 linenr_T prev_lnum = 0;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009898
9899 if (repl_from == NULL || repl_to == NULL)
9900 {
9901 EMSG(_("E752: No previous spell replacement"));
9902 return;
9903 }
9904 addlen = STRLEN(repl_to) - STRLEN(repl_from);
9905
9906 frompat = alloc(STRLEN(repl_from) + 7);
9907 if (frompat == NULL)
9908 return;
9909 sprintf((char *)frompat, "\\V\\<%s\\>", repl_from);
9910 p_ws = FALSE;
9911
Bram Moolenaar5195e452005-08-19 20:32:47 +00009912 sub_nsubs = 0;
9913 sub_nlines = 0;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009914 curwin->w_cursor.lnum = 0;
9915 while (!got_int)
9916 {
9917 if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP) == 0
9918 || u_save_cursor() == FAIL)
9919 break;
9920
9921 /* Only replace when the right word isn't there yet. This happens
9922 * when changing "etc" to "etc.". */
9923 line = ml_get_curline();
9924 if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col,
9925 repl_to, STRLEN(repl_to)) != 0)
9926 {
9927 p = alloc(STRLEN(line) + addlen + 1);
9928 if (p == NULL)
9929 break;
9930 mch_memmove(p, line, curwin->w_cursor.col);
9931 STRCPY(p + curwin->w_cursor.col, repl_to);
9932 STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from));
9933 ml_replace(curwin->w_cursor.lnum, p, FALSE);
9934 changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col);
Bram Moolenaar5195e452005-08-19 20:32:47 +00009935
9936 if (curwin->w_cursor.lnum != prev_lnum)
9937 {
9938 ++sub_nlines;
9939 prev_lnum = curwin->w_cursor.lnum;
9940 }
9941 ++sub_nsubs;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009942 }
9943 curwin->w_cursor.col += STRLEN(repl_to);
9944 }
9945
9946 p_ws = save_ws;
9947 curwin->w_cursor = pos;
9948 vim_free(frompat);
9949
Bram Moolenaar5195e452005-08-19 20:32:47 +00009950 if (sub_nsubs == 0)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009951 EMSG2(_("E753: Not found: %s"), repl_from);
Bram Moolenaar5195e452005-08-19 20:32:47 +00009952 else
9953 do_sub_msg(FALSE);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009954}
9955
9956/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009957 * Find spell suggestions for "word". Return them in the growarray "*gap" as
9958 * a list of allocated strings.
9959 */
9960 void
Bram Moolenaar4770d092006-01-12 23:22:24 +00009961spell_suggest_list(gap, word, maxcount, need_cap, interactive)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009962 garray_T *gap;
9963 char_u *word;
9964 int maxcount; /* maximum nr of suggestions */
Bram Moolenaar8b59de92005-08-11 19:59:29 +00009965 int need_cap; /* 'spellcapcheck' matched */
Bram Moolenaar4770d092006-01-12 23:22:24 +00009966 int interactive;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009967{
9968 suginfo_T sug;
9969 int i;
9970 suggest_T *stp;
9971 char_u *wcopy;
9972
Bram Moolenaar66fa2712006-01-22 23:22:22 +00009973 spell_find_suggest(word, 0, &sug, maxcount, FALSE, need_cap, interactive);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009974
9975 /* Make room in "gap". */
9976 ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1);
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00009977 if (ga_grow(gap, sug.su_ga.ga_len) == OK)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009978 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00009979 for (i = 0; i < sug.su_ga.ga_len; ++i)
9980 {
9981 stp = &SUG(sug.su_ga, i);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009982
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00009983 /* The suggested word may replace only part of "word", add the not
9984 * replaced part. */
9985 wcopy = alloc(stp->st_wordlen
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009986 + STRLEN(sug.su_badptr + stp->st_orglen) + 1);
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00009987 if (wcopy == NULL)
9988 break;
9989 STRCPY(wcopy, stp->st_word);
9990 STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen);
9991 ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy;
9992 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009993 }
9994
9995 spell_find_cleanup(&sug);
9996}
9997
9998/*
9999 * Find spell suggestions for the word at the start of "badptr".
10000 * Return the suggestions in "su->su_ga".
10001 * The maximum number of suggestions is "maxcount".
10002 * Note: does use info for the current window.
10003 * This is based on the mechanisms of Aspell, but completely reimplemented.
10004 */
10005 static void
Bram Moolenaar66fa2712006-01-22 23:22:22 +000010006spell_find_suggest(badptr, badlen, su, maxcount, banbadword, need_cap, interactive)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010007 char_u *badptr;
Bram Moolenaar66fa2712006-01-22 23:22:22 +000010008 int badlen; /* length of bad word or 0 if unknown */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010009 suginfo_T *su;
10010 int maxcount;
Bram Moolenaarea408852005-06-25 22:49:46 +000010011 int banbadword; /* don't include badword in suggestions */
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +000010012 int need_cap; /* word should start with capital */
Bram Moolenaar4770d092006-01-12 23:22:24 +000010013 int interactive;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010014{
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000010015 hlf_T attr = HLF_COUNT;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010016 char_u buf[MAXPATHL];
10017 char_u *p;
10018 int do_combine = FALSE;
10019 char_u *sps_copy;
10020#ifdef FEAT_EVAL
10021 static int expr_busy = FALSE;
10022#endif
Bram Moolenaarf9184a12005-07-02 23:10:47 +000010023 int c;
Bram Moolenaar8b96d642005-09-05 22:05:30 +000010024 int i;
10025 langp_T *lp;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010026
10027 /*
10028 * Set the info in "*su".
10029 */
10030 vim_memset(su, 0, sizeof(suginfo_T));
10031 ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10);
10032 ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10);
Bram Moolenaar0a5fe212005-06-24 23:01:23 +000010033 if (*badptr == NUL)
10034 return;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010035 hash_init(&su->su_banned);
10036
10037 su->su_badptr = badptr;
Bram Moolenaar66fa2712006-01-22 23:22:22 +000010038 if (badlen != 0)
10039 su->su_badlen = badlen;
10040 else
10041 su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL, FALSE);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010042 su->su_maxcount = maxcount;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010043 su->su_maxscore = SCORE_MAXINIT;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010044
10045 if (su->su_badlen >= MAXWLEN)
10046 su->su_badlen = MAXWLEN - 1; /* just in case */
10047 vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen);
10048 (void)spell_casefold(su->su_badptr, su->su_badlen,
10049 su->su_fbadword, MAXWLEN);
Bram Moolenaar0c405862005-06-22 22:26:26 +000010050 /* get caps flags for bad word */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000010051 su->su_badflags = badword_captype(su->su_badptr,
10052 su->su_badptr + su->su_badlen);
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +000010053 if (need_cap)
10054 su->su_badflags |= WF_ONECAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010055
Bram Moolenaar8b96d642005-09-05 22:05:30 +000010056 /* Find the default language for sound folding. We simply use the first
10057 * one in 'spelllang' that supports sound folding. That's good for when
10058 * using multiple files for one language, it's not that bad when mixing
10059 * languages (e.g., "pl,en"). */
10060 for (i = 0; i < curbuf->b_langp.ga_len; ++i)
10061 {
10062 lp = LANGP_ENTRY(curbuf->b_langp, i);
10063 if (lp->lp_sallang != NULL)
10064 {
10065 su->su_sallang = lp->lp_sallang;
10066 break;
10067 }
10068 }
10069
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000010070 /* Soundfold the bad word with the default sound folding, so that we don't
10071 * have to do this many times. */
10072 if (su->su_sallang != NULL)
10073 spell_soundfold(su->su_sallang, su->su_fbadword, TRUE,
10074 su->su_sal_badword);
10075
Bram Moolenaarf9184a12005-07-02 23:10:47 +000010076 /* If the word is not capitalised and spell_check() doesn't consider the
10077 * word to be bad then it might need to be capitalised. Add a suggestion
10078 * for that. */
Bram Moolenaar53805d12005-08-01 07:08:33 +000010079 c = PTR2CHAR(su->su_badptr);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000010080 if (!SPELL_ISUPPER(c) && attr == HLF_COUNT)
Bram Moolenaarf9184a12005-07-02 23:10:47 +000010081 {
10082 make_case_word(su->su_badword, buf, WF_ONECAP);
10083 add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE,
Bram Moolenaar4770d092006-01-12 23:22:24 +000010084 0, TRUE, su->su_sallang, FALSE);
Bram Moolenaarf9184a12005-07-02 23:10:47 +000010085 }
10086
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010087 /* Ban the bad word itself. It may appear in another region. */
Bram Moolenaarea408852005-06-25 22:49:46 +000010088 if (banbadword)
10089 add_banned(su, su->su_badword);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010090
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010091 /* Make a copy of 'spellsuggest', because the expression may change it. */
10092 sps_copy = vim_strsave(p_sps);
10093 if (sps_copy == NULL)
10094 return;
10095
10096 /* Loop over the items in 'spellsuggest'. */
10097 for (p = sps_copy; *p != NUL; )
10098 {
10099 copy_option_part(&p, buf, MAXPATHL, ",");
10100
10101 if (STRNCMP(buf, "expr:", 5) == 0)
10102 {
10103#ifdef FEAT_EVAL
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010104 /* Evaluate an expression. Skip this when called recursively,
10105 * when using spellsuggest() in the expression. */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010106 if (!expr_busy)
10107 {
10108 expr_busy = TRUE;
10109 spell_suggest_expr(su, buf + 5);
10110 expr_busy = FALSE;
10111 }
10112#endif
10113 }
10114 else if (STRNCMP(buf, "file:", 5) == 0)
10115 /* Use list of suggestions in a file. */
10116 spell_suggest_file(su, buf + 5);
10117 else
10118 {
10119 /* Use internal method. */
Bram Moolenaar4770d092006-01-12 23:22:24 +000010120 spell_suggest_intern(su, interactive);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010121 if (sps_flags & SPS_DOUBLE)
10122 do_combine = TRUE;
10123 }
10124 }
10125
10126 vim_free(sps_copy);
10127
10128 if (do_combine)
10129 /* Combine the two list of suggestions. This must be done last,
10130 * because sorting changes the order again. */
10131 score_combine(su);
10132}
10133
10134#ifdef FEAT_EVAL
10135/*
10136 * Find suggestions by evaluating expression "expr".
10137 */
10138 static void
10139spell_suggest_expr(su, expr)
10140 suginfo_T *su;
10141 char_u *expr;
10142{
10143 list_T *list;
10144 listitem_T *li;
10145 int score;
10146 char_u *p;
10147
10148 /* The work is split up in a few parts to avoid having to export
10149 * suginfo_T.
10150 * First evaluate the expression and get the resulting list. */
10151 list = eval_spell_expr(su->su_badword, expr);
10152 if (list != NULL)
10153 {
10154 /* Loop over the items in the list. */
10155 for (li = list->lv_first; li != NULL; li = li->li_next)
10156 if (li->li_tv.v_type == VAR_LIST)
10157 {
10158 /* Get the word and the score from the items. */
10159 score = get_spellword(li->li_tv.vval.v_list, &p);
Bram Moolenaar4770d092006-01-12 23:22:24 +000010160 if (score >= 0 && score <= su->su_maxscore)
10161 add_suggestion(su, &su->su_ga, p, su->su_badlen,
10162 score, 0, TRUE, su->su_sallang, FALSE);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010163 }
10164 list_unref(list);
10165 }
10166
Bram Moolenaar4770d092006-01-12 23:22:24 +000010167 /* Remove bogus suggestions, sort and truncate at "maxcount". */
10168 check_suggestions(su, &su->su_ga);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010169 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
10170}
10171#endif
10172
10173/*
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000010174 * Find suggestions in file "fname". Used for "file:" in 'spellsuggest'.
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010175 */
10176 static void
10177spell_suggest_file(su, fname)
10178 suginfo_T *su;
10179 char_u *fname;
10180{
10181 FILE *fd;
10182 char_u line[MAXWLEN * 2];
10183 char_u *p;
10184 int len;
10185 char_u cword[MAXWLEN];
10186
10187 /* Open the file. */
10188 fd = mch_fopen((char *)fname, "r");
10189 if (fd == NULL)
10190 {
10191 EMSG2(_(e_notopen), fname);
10192 return;
10193 }
10194
10195 /* Read it line by line. */
10196 while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int)
10197 {
10198 line_breakcheck();
10199
10200 p = vim_strchr(line, '/');
10201 if (p == NULL)
10202 continue; /* No Tab found, just skip the line. */
10203 *p++ = NUL;
10204 if (STRICMP(su->su_badword, line) == 0)
10205 {
10206 /* Match! Isolate the good word, until CR or NL. */
10207 for (len = 0; p[len] >= ' '; ++len)
10208 ;
10209 p[len] = NUL;
10210
10211 /* If the suggestion doesn't have specific case duplicate the case
10212 * of the bad word. */
10213 if (captype(p, NULL) == 0)
10214 {
10215 make_case_word(p, cword, su->su_badflags);
10216 p = cword;
10217 }
10218
10219 add_suggestion(su, &su->su_ga, p, su->su_badlen,
Bram Moolenaar4770d092006-01-12 23:22:24 +000010220 SCORE_FILE, 0, TRUE, su->su_sallang, FALSE);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010221 }
10222 }
10223
10224 fclose(fd);
10225
Bram Moolenaar4770d092006-01-12 23:22:24 +000010226 /* Remove bogus suggestions, sort and truncate at "maxcount". */
10227 check_suggestions(su, &su->su_ga);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010228 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
10229}
10230
10231/*
10232 * Find suggestions for the internal method indicated by "sps_flags".
10233 */
10234 static void
Bram Moolenaar4770d092006-01-12 23:22:24 +000010235spell_suggest_intern(su, interactive)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010236 suginfo_T *su;
Bram Moolenaar4770d092006-01-12 23:22:24 +000010237 int interactive;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010238{
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010239 /*
Bram Moolenaar4770d092006-01-12 23:22:24 +000010240 * Load the .sug file(s) that are available and not done yet.
10241 */
10242 suggest_load_files();
10243
10244 /*
Bram Moolenaar0c405862005-06-22 22:26:26 +000010245 * 1. Try special cases, such as repeating a word: "the the" -> "the".
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010246 *
10247 * Set a maximum score to limit the combination of operations that is
10248 * tried.
10249 */
Bram Moolenaar0c405862005-06-22 22:26:26 +000010250 suggest_try_special(su);
10251
10252 /*
10253 * 2. Try inserting/deleting/swapping/changing a letter, use REP entries
10254 * from the .aff file and inserting a space (split the word).
10255 */
10256 suggest_try_change(su);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010257
10258 /* For the resulting top-scorers compute the sound-a-like score. */
10259 if (sps_flags & SPS_DOUBLE)
10260 score_comp_sal(su);
10261
10262 /*
Bram Moolenaar0c405862005-06-22 22:26:26 +000010263 * 3. Try finding sound-a-like words.
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010264 */
Bram Moolenaar4770d092006-01-12 23:22:24 +000010265 if ((sps_flags & SPS_FAST) == 0)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010266 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000010267 if (sps_flags & SPS_BEST)
10268 /* Adjust the word score for the suggestions found so far for how
10269 * they sounds like. */
10270 rescore_suggestions(su);
10271
10272 /*
10273 * While going throught the soundfold tree "su_maxscore" is the score
10274 * for the soundfold word, limits the changes that are being tried,
10275 * and "su_sfmaxscore" the rescored score, which is set by
10276 * cleanup_suggestions().
10277 * First find words with a small edit distance, because this is much
10278 * faster and often already finds the top-N suggestions. If we didn't
10279 * find many suggestions try again with a higher edit distance.
10280 * "sl_sounddone" is used to avoid doing the same word twice.
10281 */
10282 suggest_try_soundalike_prep();
10283 su->su_maxscore = SCORE_SFMAX1;
10284 su->su_sfmaxscore = SCORE_MAXINIT * 3;
Bram Moolenaar0c405862005-06-22 22:26:26 +000010285 suggest_try_soundalike(su);
Bram Moolenaar4770d092006-01-12 23:22:24 +000010286 if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
10287 {
10288 /* We didn't find enough matches, try again, allowing more
10289 * changes to the soundfold word. */
10290 su->su_maxscore = SCORE_SFMAX2;
10291 suggest_try_soundalike(su);
10292 if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
10293 {
10294 /* Still didn't find enough matches, try again, allowing even
10295 * more changes to the soundfold word. */
10296 su->su_maxscore = SCORE_SFMAX3;
10297 suggest_try_soundalike(su);
10298 }
10299 }
10300 su->su_maxscore = su->su_sfmaxscore;
10301 suggest_try_soundalike_finish();
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010302 }
10303
Bram Moolenaar4770d092006-01-12 23:22:24 +000010304 /* When CTRL-C was hit while searching do show the results. Only clear
10305 * got_int when using a command, not for spellsuggest(). */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010306 ui_breakcheck();
Bram Moolenaar4770d092006-01-12 23:22:24 +000010307 if (interactive && got_int)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010308 {
10309 (void)vgetc();
10310 got_int = FALSE;
10311 }
10312
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010313 if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010314 {
10315 if (sps_flags & SPS_BEST)
10316 /* Adjust the word score for how it sounds like. */
10317 rescore_suggestions(su);
10318
Bram Moolenaar4770d092006-01-12 23:22:24 +000010319 /* Remove bogus suggestions, sort and truncate at "maxcount". */
10320 check_suggestions(su, &su->su_ga);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000010321 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010322 }
10323}
10324
10325/*
Bram Moolenaar4770d092006-01-12 23:22:24 +000010326 * Load the .sug files for languages that have one and weren't loaded yet.
10327 */
10328 static void
10329suggest_load_files()
10330{
10331 langp_T *lp;
10332 int lpi;
10333 slang_T *slang;
10334 char_u *dotp;
10335 FILE *fd;
10336 char_u buf[MAXWLEN];
10337 int i;
10338 time_t timestamp;
10339 int wcount;
10340 int wordnr;
10341 garray_T ga;
10342 int c;
10343
10344 /* Do this for all languages that support sound folding. */
10345 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
10346 {
10347 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
10348 slang = lp->lp_slang;
10349 if (slang->sl_sugtime != 0 && !slang->sl_sugloaded)
10350 {
10351 /* Change ".spl" to ".sug" and open the file. When the file isn't
10352 * found silently skip it. Do set "sl_sugloaded" so that we
10353 * don't try again and again. */
10354 slang->sl_sugloaded = TRUE;
10355
10356 dotp = vim_strrchr(slang->sl_fname, '.');
10357 if (dotp == NULL || fnamecmp(dotp, ".spl") != 0)
10358 continue;
10359 STRCPY(dotp, ".sug");
10360 fd = fopen((char *)slang->sl_fname, "r");
10361 if (fd == NULL)
10362 goto nextone;
10363
10364 /*
10365 * <SUGHEADER>: <fileID> <versionnr> <timestamp>
10366 */
10367 for (i = 0; i < VIMSUGMAGICL; ++i)
10368 buf[i] = getc(fd); /* <fileID> */
10369 if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0)
10370 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000010371 EMSG2(_("E778: This does not look like a .sug file: %s"),
Bram Moolenaar4770d092006-01-12 23:22:24 +000010372 slang->sl_fname);
10373 goto nextone;
10374 }
10375 c = getc(fd); /* <versionnr> */
10376 if (c < VIMSUGVERSION)
10377 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000010378 EMSG2(_("E779: Old .sug file, needs to be updated: %s"),
Bram Moolenaar4770d092006-01-12 23:22:24 +000010379 slang->sl_fname);
10380 goto nextone;
10381 }
10382 else if (c > VIMSUGVERSION)
10383 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000010384 EMSG2(_("E780: .sug file is for newer version of Vim: %s"),
Bram Moolenaar4770d092006-01-12 23:22:24 +000010385 slang->sl_fname);
10386 goto nextone;
10387 }
10388
10389 /* Check the timestamp, it must be exactly the same as the one in
10390 * the .spl file. Otherwise the word numbers won't match. */
Bram Moolenaarb388adb2006-02-28 23:50:17 +000010391 timestamp = get8c(fd); /* <timestamp> */
Bram Moolenaar4770d092006-01-12 23:22:24 +000010392 if (timestamp != slang->sl_sugtime)
10393 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000010394 EMSG2(_("E781: .sug file doesn't match .spl file: %s"),
Bram Moolenaar4770d092006-01-12 23:22:24 +000010395 slang->sl_fname);
10396 goto nextone;
10397 }
10398
10399 /*
10400 * <SUGWORDTREE>: <wordtree>
10401 * Read the trie with the soundfolded words.
10402 */
10403 if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs,
10404 FALSE, 0) != 0)
10405 {
10406someerror:
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000010407 EMSG2(_("E782: error while reading .sug file: %s"),
Bram Moolenaar4770d092006-01-12 23:22:24 +000010408 slang->sl_fname);
10409 slang_clear_sug(slang);
10410 goto nextone;
10411 }
10412
10413 /*
10414 * <SUGTABLE>: <sugwcount> <sugline> ...
10415 *
10416 * Read the table with word numbers. We use a file buffer for
10417 * this, because it's so much like a file with lines. Makes it
10418 * possible to swap the info and save on memory use.
10419 */
10420 slang->sl_sugbuf = open_spellbuf();
10421 if (slang->sl_sugbuf == NULL)
10422 goto someerror;
10423 /* <sugwcount> */
Bram Moolenaarb388adb2006-02-28 23:50:17 +000010424 wcount = get4c(fd);
Bram Moolenaar4770d092006-01-12 23:22:24 +000010425 if (wcount < 0)
10426 goto someerror;
10427
10428 /* Read all the wordnr lists into the buffer, one NUL terminated
10429 * list per line. */
10430 ga_init2(&ga, 1, 100);
10431 for (wordnr = 0; wordnr < wcount; ++wordnr)
10432 {
10433 ga.ga_len = 0;
10434 for (;;)
10435 {
10436 c = getc(fd); /* <sugline> */
10437 if (c < 0 || ga_grow(&ga, 1) == FAIL)
10438 goto someerror;
10439 ((char_u *)ga.ga_data)[ga.ga_len++] = c;
10440 if (c == NUL)
10441 break;
10442 }
10443 if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr,
10444 ga.ga_data, ga.ga_len, TRUE) == FAIL)
10445 goto someerror;
10446 }
10447 ga_clear(&ga);
10448
10449 /*
10450 * Need to put word counts in the word tries, so that we can find
10451 * a word by its number.
10452 */
10453 tree_count_words(slang->sl_fbyts, slang->sl_fidxs);
10454 tree_count_words(slang->sl_sbyts, slang->sl_sidxs);
10455
10456nextone:
10457 if (fd != NULL)
10458 fclose(fd);
10459 STRCPY(dotp, ".spl");
10460 }
10461 }
10462}
10463
10464
10465/*
10466 * Fill in the wordcount fields for a trie.
10467 * Returns the total number of words.
10468 */
10469 static void
10470tree_count_words(byts, idxs)
10471 char_u *byts;
10472 idx_T *idxs;
10473{
10474 int depth;
10475 idx_T arridx[MAXWLEN];
10476 int curi[MAXWLEN];
10477 int c;
10478 idx_T n;
10479 int wordcount[MAXWLEN];
10480
10481 arridx[0] = 0;
10482 curi[0] = 1;
10483 wordcount[0] = 0;
10484 depth = 0;
10485 while (depth >= 0 && !got_int)
10486 {
10487 if (curi[depth] > byts[arridx[depth]])
10488 {
10489 /* Done all bytes at this node, go up one level. */
10490 idxs[arridx[depth]] = wordcount[depth];
10491 if (depth > 0)
10492 wordcount[depth - 1] += wordcount[depth];
10493
10494 --depth;
10495 fast_breakcheck();
10496 }
10497 else
10498 {
10499 /* Do one more byte at this node. */
10500 n = arridx[depth] + curi[depth];
10501 ++curi[depth];
10502
10503 c = byts[n];
10504 if (c == 0)
10505 {
10506 /* End of word, count it. */
10507 ++wordcount[depth];
10508
10509 /* Skip over any other NUL bytes (same word with different
10510 * flags). */
10511 while (byts[n + 1] == 0)
10512 {
10513 ++n;
10514 ++curi[depth];
10515 }
10516 }
10517 else
10518 {
10519 /* Normal char, go one level deeper to count the words. */
10520 ++depth;
10521 arridx[depth] = idxs[n];
10522 curi[depth] = 1;
10523 wordcount[depth] = 0;
10524 }
10525 }
10526 }
10527}
10528
10529/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +000010530 * Free the info put in "*su" by spell_find_suggest().
10531 */
10532 static void
10533spell_find_cleanup(su)
10534 suginfo_T *su;
10535{
10536 int i;
10537
10538 /* Free the suggestions. */
10539 for (i = 0; i < su->su_ga.ga_len; ++i)
10540 vim_free(SUG(su->su_ga, i).st_word);
10541 ga_clear(&su->su_ga);
10542 for (i = 0; i < su->su_sga.ga_len; ++i)
10543 vim_free(SUG(su->su_sga, i).st_word);
10544 ga_clear(&su->su_sga);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010545
10546 /* Free the banned words. */
Bram Moolenaar4770d092006-01-12 23:22:24 +000010547 hash_clear_all(&su->su_banned, 0);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010548}
10549
10550/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +000010551 * Make a copy of "word", with the first letter upper or lower cased, to
10552 * "wcopy[MAXWLEN]". "word" must not be empty.
10553 * The result is NUL terminated.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010554 */
10555 static void
Bram Moolenaar9f30f502005-06-14 22:01:04 +000010556onecap_copy(word, wcopy, upper)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010557 char_u *word;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010558 char_u *wcopy;
10559 int upper; /* TRUE: first letter made upper case */
10560{
10561 char_u *p;
10562 int c;
10563 int l;
10564
10565 p = word;
10566#ifdef FEAT_MBYTE
10567 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000010568 c = mb_cptr2char_adv(&p);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010569 else
10570#endif
10571 c = *p++;
10572 if (upper)
Bram Moolenaar9f30f502005-06-14 22:01:04 +000010573 c = SPELL_TOUPPER(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010574 else
Bram Moolenaar9f30f502005-06-14 22:01:04 +000010575 c = SPELL_TOFOLD(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010576#ifdef FEAT_MBYTE
10577 if (has_mbyte)
10578 l = mb_char2bytes(c, wcopy);
10579 else
10580#endif
10581 {
10582 l = 1;
10583 wcopy[0] = c;
10584 }
Bram Moolenaar9c96f592005-06-30 21:52:39 +000010585 vim_strncpy(wcopy + l, p, MAXWLEN - l - 1);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010586}
10587
10588/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +000010589 * Make a copy of "word" with all the letters upper cased into
10590 * "wcopy[MAXWLEN]". The result is NUL terminated.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010591 */
10592 static void
10593allcap_copy(word, wcopy)
10594 char_u *word;
10595 char_u *wcopy;
10596{
10597 char_u *s;
10598 char_u *d;
10599 int c;
10600
10601 d = wcopy;
10602 for (s = word; *s != NUL; )
10603 {
10604#ifdef FEAT_MBYTE
10605 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000010606 c = mb_cptr2char_adv(&s);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010607 else
10608#endif
10609 c = *s++;
Bram Moolenaar78622822005-08-23 21:00:13 +000010610
10611#ifdef FEAT_MBYTE
10612 /* We only change ß to SS when we are certain latin1 is used. It
10613 * would cause weird errors in other 8-bit encodings. */
10614 if (enc_latin1like && c == 0xdf)
10615 {
10616 c = 'S';
10617 if (d - wcopy >= MAXWLEN - 1)
10618 break;
10619 *d++ = c;
10620 }
10621 else
10622#endif
10623 c = SPELL_TOUPPER(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010624
10625#ifdef FEAT_MBYTE
10626 if (has_mbyte)
10627 {
10628 if (d - wcopy >= MAXWLEN - MB_MAXBYTES)
10629 break;
10630 d += mb_char2bytes(c, d);
10631 }
10632 else
10633#endif
10634 {
10635 if (d - wcopy >= MAXWLEN - 1)
10636 break;
10637 *d++ = c;
10638 }
10639 }
10640 *d = NUL;
10641}
10642
10643/*
Bram Moolenaar0c405862005-06-22 22:26:26 +000010644 * Try finding suggestions by recognizing specific situations.
10645 */
10646 static void
10647suggest_try_special(su)
10648 suginfo_T *su;
10649{
10650 char_u *p;
Bram Moolenaar9c96f592005-06-30 21:52:39 +000010651 size_t len;
Bram Moolenaar0c405862005-06-22 22:26:26 +000010652 int c;
10653 char_u word[MAXWLEN];
10654
10655 /*
10656 * Recognize a word that is repeated: "the the".
10657 */
10658 p = skiptowhite(su->su_fbadword);
10659 len = p - su->su_fbadword;
10660 p = skipwhite(p);
10661 if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0)
10662 {
10663 /* Include badflags: if the badword is onecap or allcap
10664 * use that for the goodword too: "The the" -> "The". */
10665 c = su->su_fbadword[len];
10666 su->su_fbadword[len] = NUL;
10667 make_case_word(su->su_fbadword, word, su->su_badflags);
10668 su->su_fbadword[len] = c;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000010669
10670 /* Give a soundalike score of 0, compute the score as if deleting one
10671 * character. */
10672 add_suggestion(su, &su->su_ga, word, su->su_badlen,
Bram Moolenaar4770d092006-01-12 23:22:24 +000010673 RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang, FALSE);
Bram Moolenaar0c405862005-06-22 22:26:26 +000010674 }
10675}
10676
10677/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010678 * Try finding suggestions by adding/removing/swapping letters.
10679 */
10680 static void
Bram Moolenaar0c405862005-06-22 22:26:26 +000010681suggest_try_change(su)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010682 suginfo_T *su;
10683{
10684 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000010685 int n;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010686 char_u *p;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000010687 int lpi;
Bram Moolenaar4770d092006-01-12 23:22:24 +000010688 langp_T *lp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010689
10690 /* We make a copy of the case-folded bad word, so that we can modify it
Bram Moolenaar0c405862005-06-22 22:26:26 +000010691 * to find matches (esp. REP items). Append some more text, changing
10692 * chars after the bad word may help. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010693 STRCPY(fword, su->su_fbadword);
Bram Moolenaar0c405862005-06-22 22:26:26 +000010694 n = STRLEN(fword);
10695 p = su->su_badptr + su->su_badlen;
10696 (void)spell_casefold(p, STRLEN(p), fword + n, MAXWLEN - n);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010697
Bram Moolenaar8b96d642005-09-05 22:05:30 +000010698 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010699 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +000010700 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000010701
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000010702 /* If reloading a spell file fails it's still in the list but
10703 * everything has been cleared. */
Bram Moolenaar4770d092006-01-12 23:22:24 +000010704 if (lp->lp_slang->sl_fbyts == NULL)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000010705 continue;
10706
Bram Moolenaar4770d092006-01-12 23:22:24 +000010707 /* Try it for this language. Will add possible suggestions. */
10708 suggest_trie_walk(su, lp, fword, FALSE);
10709 }
10710}
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010711
Bram Moolenaar4770d092006-01-12 23:22:24 +000010712/* Check the maximum score, if we go over it we won't try this change. */
10713#define TRY_DEEPER(su, stack, depth, add) \
10714 (stack[depth].ts_score + (add) < su->su_maxscore)
10715
10716/*
10717 * Try finding suggestions by adding/removing/swapping letters.
10718 *
10719 * This uses a state machine. At each node in the tree we try various
10720 * operations. When trying if an operation works "depth" is increased and the
10721 * stack[] is used to store info. This allows combinations, thus insert one
10722 * character, replace one and delete another. The number of changes is
10723 * limited by su->su_maxscore.
10724 *
10725 * After implementing this I noticed an article by Kemal Oflazer that
10726 * describes something similar: "Error-tolerant Finite State Recognition with
10727 * Applications to Morphological Analysis and Spelling Correction" (1996).
10728 * The implementation in the article is simplified and requires a stack of
10729 * unknown depth. The implementation here only needs a stack depth equal to
10730 * the length of the word.
10731 *
10732 * This is also used for the sound-folded word, "soundfold" is TRUE then.
10733 * The mechanism is the same, but we find a match with a sound-folded word
10734 * that comes from one or more original words. Each of these words may be
10735 * added, this is done by add_sound_suggest().
10736 * Don't use:
10737 * the prefix tree or the keep-case tree
10738 * "su->su_badlen"
10739 * anything to do with upper and lower case
10740 * anything to do with word or non-word characters ("spell_iswordp()")
10741 * banned words
10742 * word flags (rare, region, compounding)
10743 * word splitting for now
10744 * "similar_chars()"
10745 * use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep"
10746 */
10747 static void
10748suggest_trie_walk(su, lp, fword, soundfold)
10749 suginfo_T *su;
10750 langp_T *lp;
10751 char_u *fword;
10752 int soundfold;
10753{
10754 char_u tword[MAXWLEN]; /* good word collected so far */
10755 trystate_T stack[MAXWLEN];
10756 char_u preword[MAXWLEN * 3]; /* word found with proper case;
10757 * concatanation of prefix compound
10758 * words and split word. NUL terminated
10759 * when going deeper but not when coming
10760 * back. */
10761 char_u compflags[MAXWLEN]; /* compound flags, one for each word */
10762 trystate_T *sp;
10763 int newscore;
10764 int score;
10765 char_u *byts, *fbyts, *pbyts;
10766 idx_T *idxs, *fidxs, *pidxs;
10767 int depth;
10768 int c, c2, c3;
10769 int n = 0;
10770 int flags;
10771 garray_T *gap;
10772 idx_T arridx;
10773 int len;
10774 char_u *p;
10775 fromto_T *ftp;
10776 int fl = 0, tl;
10777 int repextra = 0; /* extra bytes in fword[] from REP item */
10778 slang_T *slang = lp->lp_slang;
10779 int fword_ends;
10780 int goodword_ends;
10781#ifdef DEBUG_TRIEWALK
10782 /* Stores the name of the change made at each level. */
10783 char_u changename[MAXWLEN][80];
10784#endif
10785 int breakcheckcount = 1000;
10786 int compound_ok;
10787
10788 /*
10789 * Go through the whole case-fold tree, try changes at each node.
10790 * "tword[]" contains the word collected from nodes in the tree.
10791 * "fword[]" the word we are trying to match with (initially the bad
10792 * word).
10793 */
10794 depth = 0;
10795 sp = &stack[0];
10796 vim_memset(sp, 0, sizeof(trystate_T));
10797 sp->ts_curi = 1;
10798
10799 if (soundfold)
10800 {
10801 /* Going through the soundfold tree. */
10802 byts = fbyts = slang->sl_sbyts;
10803 idxs = fidxs = slang->sl_sidxs;
10804 pbyts = NULL;
10805 pidxs = NULL;
10806 sp->ts_prefixdepth = PFD_NOPREFIX;
10807 sp->ts_state = STATE_START;
10808 }
10809 else
10810 {
Bram Moolenaarea424162005-06-16 21:51:00 +000010811 /*
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010812 * When there are postponed prefixes we need to use these first. At
10813 * the end of the prefix we continue in the case-fold tree.
10814 */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000010815 fbyts = slang->sl_fbyts;
10816 fidxs = slang->sl_fidxs;
10817 pbyts = slang->sl_pbyts;
10818 pidxs = slang->sl_pidxs;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010819 if (pbyts != NULL)
10820 {
10821 byts = pbyts;
10822 idxs = pidxs;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000010823 sp->ts_prefixdepth = PFD_PREFIXTREE;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010824 sp->ts_state = STATE_NOPREFIX; /* try without prefix first */
10825 }
10826 else
10827 {
10828 byts = fbyts;
10829 idxs = fidxs;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000010830 sp->ts_prefixdepth = PFD_NOPREFIX;
Bram Moolenaard12a1322005-08-21 22:08:24 +000010831 sp->ts_state = STATE_START;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010832 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000010833 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010834
Bram Moolenaar4770d092006-01-12 23:22:24 +000010835 /*
10836 * Loop to find all suggestions. At each round we either:
10837 * - For the current state try one operation, advance "ts_curi",
10838 * increase "depth".
10839 * - When a state is done go to the next, set "ts_state".
10840 * - When all states are tried decrease "depth".
10841 */
10842 while (depth >= 0 && !got_int)
10843 {
10844 sp = &stack[depth];
10845 switch (sp->ts_state)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010846 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000010847 case STATE_START:
10848 case STATE_NOPREFIX:
10849 /*
10850 * Start of node: Deal with NUL bytes, which means
10851 * tword[] may end here.
10852 */
10853 arridx = sp->ts_arridx; /* current node in the tree */
10854 len = byts[arridx]; /* bytes in this node */
10855 arridx += sp->ts_curi; /* index of current byte */
10856
10857 if (sp->ts_prefixdepth == PFD_PREFIXTREE)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010858 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000010859 /* Skip over the NUL bytes, we use them later. */
10860 for (n = 0; n < len && byts[arridx + n] == 0; ++n)
10861 ;
10862 sp->ts_curi += n;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010863
Bram Moolenaar4770d092006-01-12 23:22:24 +000010864 /* Always past NUL bytes now. */
10865 n = (int)sp->ts_state;
10866 sp->ts_state = STATE_ENDNUL;
10867 sp->ts_save_badflags = su->su_badflags;
10868
10869 /* At end of a prefix or at start of prefixtree: check for
10870 * following word. */
10871 if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX)
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010872 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000010873 /* Set su->su_badflags to the caps type at this position.
10874 * Use the caps type until here for the prefix itself. */
Bram Moolenaar53805d12005-08-01 07:08:33 +000010875#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000010876 if (has_mbyte)
10877 n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
10878 else
Bram Moolenaar53805d12005-08-01 07:08:33 +000010879#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000010880 n = sp->ts_fidx;
10881 flags = badword_captype(su->su_badptr, su->su_badptr + n);
10882 su->su_badflags = badword_captype(su->su_badptr + n,
Bram Moolenaar53805d12005-08-01 07:08:33 +000010883 su->su_badptr + su->su_badlen);
Bram Moolenaar4770d092006-01-12 23:22:24 +000010884#ifdef DEBUG_TRIEWALK
10885 sprintf(changename[depth], "prefix");
10886#endif
10887 go_deeper(stack, depth, 0);
10888 ++depth;
10889 sp = &stack[depth];
10890 sp->ts_prefixdepth = depth - 1;
10891 byts = fbyts;
10892 idxs = fidxs;
10893 sp->ts_arridx = 0;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010894
Bram Moolenaar4770d092006-01-12 23:22:24 +000010895 /* Move the prefix to preword[] with the right case
10896 * and make find_keepcap_word() works. */
10897 tword[sp->ts_twordlen] = NUL;
10898 make_case_word(tword + sp->ts_splitoff,
10899 preword + sp->ts_prewordlen, flags);
10900 sp->ts_prewordlen = STRLEN(preword);
10901 sp->ts_splitoff = sp->ts_twordlen;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010902 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000010903 break;
10904 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010905
Bram Moolenaar4770d092006-01-12 23:22:24 +000010906 if (sp->ts_curi > len || byts[arridx] != 0)
10907 {
10908 /* Past bytes in node and/or past NUL bytes. */
10909 sp->ts_state = STATE_ENDNUL;
10910 sp->ts_save_badflags = su->su_badflags;
10911 break;
10912 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010913
Bram Moolenaar4770d092006-01-12 23:22:24 +000010914 /*
10915 * End of word in tree.
10916 */
10917 ++sp->ts_curi; /* eat one NUL byte */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010918
Bram Moolenaar4770d092006-01-12 23:22:24 +000010919 flags = (int)idxs[arridx];
Bram Moolenaare1438bb2006-03-01 22:01:55 +000010920
10921 /* Skip words with the NOSUGGEST flag. */
10922 if (flags & WF_NOSUGGEST)
10923 break;
10924
Bram Moolenaar4770d092006-01-12 23:22:24 +000010925 fword_ends = (fword[sp->ts_fidx] == NUL
10926 || (soundfold
10927 ? vim_iswhite(fword[sp->ts_fidx])
10928 : !spell_iswordp(fword + sp->ts_fidx, curbuf)));
10929 tword[sp->ts_twordlen] = NUL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000010930
Bram Moolenaar4770d092006-01-12 23:22:24 +000010931 if (sp->ts_prefixdepth <= PFD_NOTSPECIAL
Bram Moolenaard12a1322005-08-21 22:08:24 +000010932 && (sp->ts_flags & TSF_PREFIXOK) == 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +000010933 {
10934 /* There was a prefix before the word. Check that the prefix
10935 * can be used with this word. */
10936 /* Count the length of the NULs in the prefix. If there are
10937 * none this must be the first try without a prefix. */
10938 n = stack[sp->ts_prefixdepth].ts_arridx;
10939 len = pbyts[n++];
10940 for (c = 0; c < len && pbyts[n + c] == 0; ++c)
10941 ;
10942 if (c > 0)
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010943 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000010944 c = valid_word_prefix(c, n, flags,
Bram Moolenaar5195e452005-08-19 20:32:47 +000010945 tword + sp->ts_splitoff, slang, FALSE);
Bram Moolenaar4770d092006-01-12 23:22:24 +000010946 if (c == 0)
10947 break;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010948
Bram Moolenaar4770d092006-01-12 23:22:24 +000010949 /* Use the WF_RARE flag for a rare prefix. */
10950 if (c & WF_RAREPFX)
10951 flags |= WF_RARE;
Bram Moolenaard12a1322005-08-21 22:08:24 +000010952
Bram Moolenaar4770d092006-01-12 23:22:24 +000010953 /* Tricky: when checking for both prefix and compounding
10954 * we run into the prefix flag first.
10955 * Remember that it's OK, so that we accept the prefix
10956 * when arriving at a compound flag. */
10957 sp->ts_flags |= TSF_PREFIXOK;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010958 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000010959 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +000010960
Bram Moolenaar4770d092006-01-12 23:22:24 +000010961 /* Check NEEDCOMPOUND: can't use word without compounding. Do try
10962 * appending another compound word below. */
10963 if (sp->ts_complen == sp->ts_compsplit && fword_ends
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000010964 && (flags & WF_NEEDCOMP))
Bram Moolenaar4770d092006-01-12 23:22:24 +000010965 goodword_ends = FALSE;
10966 else
10967 goodword_ends = TRUE;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000010968
Bram Moolenaar4770d092006-01-12 23:22:24 +000010969 p = NULL;
10970 compound_ok = TRUE;
10971 if (sp->ts_complen > sp->ts_compsplit)
10972 {
10973 if (slang->sl_nobreak)
Bram Moolenaard12a1322005-08-21 22:08:24 +000010974 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000010975 /* There was a word before this word. When there was no
10976 * change in this word (it was correct) add the first word
10977 * as a suggestion. If this word was corrected too, we
10978 * need to check if a correct word follows. */
10979 if (sp->ts_fidx - sp->ts_splitfidx
Bram Moolenaar78622822005-08-23 21:00:13 +000010980 == sp->ts_twordlen - sp->ts_splitoff
Bram Moolenaar4770d092006-01-12 23:22:24 +000010981 && STRNCMP(fword + sp->ts_splitfidx,
10982 tword + sp->ts_splitoff,
Bram Moolenaar78622822005-08-23 21:00:13 +000010983 sp->ts_fidx - sp->ts_splitfidx) == 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +000010984 {
10985 preword[sp->ts_prewordlen] = NUL;
10986 newscore = score_wordcount_adj(slang, sp->ts_score,
10987 preword + sp->ts_prewordlen,
10988 sp->ts_prewordlen > 0);
10989 /* Add the suggestion if the score isn't too bad. */
10990 if (newscore <= su->su_maxscore)
Bram Moolenaar78622822005-08-23 21:00:13 +000010991 add_suggestion(su, &su->su_ga, preword,
Bram Moolenaar8b96d642005-09-05 22:05:30 +000010992 sp->ts_splitfidx - repextra,
Bram Moolenaar4770d092006-01-12 23:22:24 +000010993 newscore, 0, FALSE,
10994 lp->lp_sallang, FALSE);
10995 break;
Bram Moolenaar78622822005-08-23 21:00:13 +000010996 }
Bram Moolenaard12a1322005-08-21 22:08:24 +000010997 }
Bram Moolenaare52325c2005-08-22 22:54:29 +000010998 else
Bram Moolenaar0c405862005-06-22 22:26:26 +000010999 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011000 /* There was a compound word before this word. If this
11001 * word does not support compounding then give up
11002 * (splitting is tried for the word without compound
11003 * flag). */
11004 if (((unsigned)flags >> 24) == 0
11005 || sp->ts_twordlen - sp->ts_splitoff
11006 < slang->sl_compminlen)
11007 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +000011008#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000011009 /* For multi-byte chars check character length against
11010 * COMPOUNDMIN. */
11011 if (has_mbyte
11012 && slang->sl_compminlen > 0
11013 && mb_charlen(tword + sp->ts_splitoff)
11014 < slang->sl_compminlen)
11015 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +000011016#endif
Bram Moolenaare52325c2005-08-22 22:54:29 +000011017
Bram Moolenaar4770d092006-01-12 23:22:24 +000011018 compflags[sp->ts_complen] = ((unsigned)flags >> 24);
11019 compflags[sp->ts_complen + 1] = NUL;
11020 vim_strncpy(preword + sp->ts_prewordlen,
11021 tword + sp->ts_splitoff,
11022 sp->ts_twordlen - sp->ts_splitoff);
11023 p = preword;
11024 while (*skiptowhite(p) != NUL)
11025 p = skipwhite(skiptowhite(p));
11026 if (fword_ends && !can_compound(slang, p,
11027 compflags + sp->ts_compsplit))
11028 /* Compound is not allowed. But it may still be
11029 * possible if we add another (short) word. */
11030 compound_ok = FALSE;
11031
11032 /* Get pointer to last char of previous word. */
11033 p = preword + sp->ts_prewordlen;
11034 mb_ptr_back(preword, p);
Bram Moolenaar0c405862005-06-22 22:26:26 +000011035 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011036 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011037
Bram Moolenaar4770d092006-01-12 23:22:24 +000011038 /*
11039 * Form the word with proper case in preword.
11040 * If there is a word from a previous split, append.
11041 * For the soundfold tree don't change the case, simply append.
11042 */
11043 if (soundfold)
11044 STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff);
11045 else if (flags & WF_KEEPCAP)
11046 /* Must find the word in the keep-case tree. */
11047 find_keepcap_word(slang, tword + sp->ts_splitoff,
11048 preword + sp->ts_prewordlen);
11049 else
11050 {
11051 /* Include badflags: If the badword is onecap or allcap
11052 * use that for the goodword too. But if the badword is
11053 * allcap and it's only one char long use onecap. */
11054 c = su->su_badflags;
11055 if ((c & WF_ALLCAP)
11056#ifdef FEAT_MBYTE
11057 && su->su_badlen == (*mb_ptr2len)(su->su_badptr)
11058#else
11059 && su->su_badlen == 1
11060#endif
11061 )
11062 c = WF_ONECAP;
11063 c |= flags;
11064
11065 /* When appending a compound word after a word character don't
11066 * use Onecap. */
11067 if (p != NULL && spell_iswordp_nmw(p))
11068 c &= ~WF_ONECAP;
11069 make_case_word(tword + sp->ts_splitoff,
11070 preword + sp->ts_prewordlen, c);
11071 }
11072
11073 if (!soundfold)
11074 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011075 /* Don't use a banned word. It may appear again as a good
11076 * word, thus remember it. */
11077 if (flags & WF_BANNED)
11078 {
Bram Moolenaar5195e452005-08-19 20:32:47 +000011079 add_banned(su, preword + sp->ts_prewordlen);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011080 break;
11081 }
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000011082 if ((sp->ts_complen == sp->ts_compsplit
Bram Moolenaar4770d092006-01-12 23:22:24 +000011083 && WAS_BANNED(su, preword + sp->ts_prewordlen))
11084 || WAS_BANNED(su, preword))
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000011085 {
11086 if (slang->sl_compprog == NULL)
11087 break;
11088 /* the word so far was banned but we may try compounding */
11089 goodword_ends = FALSE;
11090 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011091 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011092
Bram Moolenaar4770d092006-01-12 23:22:24 +000011093 newscore = 0;
11094 if (!soundfold) /* soundfold words don't have flags */
11095 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011096 if ((flags & WF_REGION)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000011097 && (((unsigned)flags >> 16) & lp->lp_region) == 0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011098 newscore += SCORE_REGION;
11099 if (flags & WF_RARE)
11100 newscore += SCORE_RARE;
11101
Bram Moolenaar0c405862005-06-22 22:26:26 +000011102 if (!spell_valid_case(su->su_badflags,
Bram Moolenaar5195e452005-08-19 20:32:47 +000011103 captype(preword + sp->ts_prewordlen, NULL)))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011104 newscore += SCORE_ICASE;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011105 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011106
Bram Moolenaar4770d092006-01-12 23:22:24 +000011107 /* TODO: how about splitting in the soundfold tree? */
11108 if (fword_ends
11109 && goodword_ends
11110 && sp->ts_fidx >= sp->ts_fidxtry
11111 && compound_ok)
11112 {
11113 /* The badword also ends: add suggestions. */
11114#ifdef DEBUG_TRIEWALK
11115 if (soundfold && STRCMP(preword, "smwrd") == 0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011116 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011117 int j;
11118
11119 /* print the stack of changes that brought us here */
11120 smsg("------ %s -------", fword);
11121 for (j = 0; j < depth; ++j)
11122 smsg("%s", changename[j]);
11123 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +000011124#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000011125 if (soundfold)
11126 {
11127 /* For soundfolded words we need to find the original
11128 * words, the edit distrance and then add them. */
11129 add_sound_suggest(su, preword, sp->ts_score, lp);
11130 }
11131 else
11132 {
11133 /* Give a penalty when changing non-word char to word
11134 * char, e.g., "thes," -> "these". */
11135 p = fword + sp->ts_fidx;
11136 mb_ptr_back(fword, p);
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011137 if (!spell_iswordp(p, curbuf))
Bram Moolenaarcf6bf392005-06-27 22:27:46 +000011138 {
11139 p = preword + STRLEN(preword);
Bram Moolenaar4770d092006-01-12 23:22:24 +000011140 mb_ptr_back(preword, p);
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011141 if (spell_iswordp(p, curbuf))
Bram Moolenaarcf6bf392005-06-27 22:27:46 +000011142 newscore += SCORE_NONWORD;
11143 }
11144
Bram Moolenaar4770d092006-01-12 23:22:24 +000011145 /* Give a bonus to words seen before. */
11146 score = score_wordcount_adj(slang,
11147 sp->ts_score + newscore,
11148 preword + sp->ts_prewordlen,
11149 sp->ts_prewordlen > 0);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000011150
Bram Moolenaar4770d092006-01-12 23:22:24 +000011151 /* Add the suggestion if the score isn't too bad. */
11152 if (score <= su->su_maxscore)
Bram Moolenaar2d3f4892006-01-20 23:02:51 +000011153 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011154 add_suggestion(su, &su->su_ga, preword,
11155 sp->ts_fidx - repextra,
11156 score, 0, FALSE, lp->lp_sallang, FALSE);
Bram Moolenaar2d3f4892006-01-20 23:02:51 +000011157
11158 if (su->su_badflags & WF_MIXCAP)
11159 {
11160 /* We really don't know if the word should be
11161 * upper or lower case, add both. */
11162 c = captype(preword, NULL);
11163 if (c == 0 || c == WF_ALLCAP)
11164 {
11165 make_case_word(tword + sp->ts_splitoff,
11166 preword + sp->ts_prewordlen,
11167 c == 0 ? WF_ALLCAP : 0);
11168
11169 add_suggestion(su, &su->su_ga, preword,
11170 sp->ts_fidx - repextra,
11171 score + SCORE_ICASE, 0, FALSE,
11172 lp->lp_sallang, FALSE);
11173 }
11174 }
11175 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011176 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011177 }
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000011178
Bram Moolenaar4770d092006-01-12 23:22:24 +000011179 /*
11180 * Try word split and/or compounding.
11181 */
11182 if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends)
Bram Moolenaarea424162005-06-16 21:51:00 +000011183#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000011184 /* Don't split halfway a character. */
11185 && (!has_mbyte || sp->ts_tcharlen == 0)
Bram Moolenaarea424162005-06-16 21:51:00 +000011186#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000011187 )
11188 {
11189 int try_compound;
11190 int try_split;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011191
Bram Moolenaar4770d092006-01-12 23:22:24 +000011192 /* If past the end of the bad word don't try a split.
11193 * Otherwise try changing the next word. E.g., find
11194 * suggestions for "the the" where the second "the" is
11195 * different. It's done like a split.
11196 * TODO: word split for soundfold words */
11197 try_split = (sp->ts_fidx - repextra < su->su_badlen)
11198 && !soundfold;
11199
11200 /* Get here in several situations:
11201 * 1. The word in the tree ends:
11202 * If the word allows compounding try that. Otherwise try
11203 * a split by inserting a space. For both check that a
11204 * valid words starts at fword[sp->ts_fidx].
11205 * For NOBREAK do like compounding to be able to check if
11206 * the next word is valid.
11207 * 2. The badword does end, but it was due to a change (e.g.,
11208 * a swap). No need to split, but do check that the
11209 * following word is valid.
11210 * 3. The badword and the word in the tree end. It may still
11211 * be possible to compound another (short) word.
11212 */
11213 try_compound = FALSE;
11214 if (!soundfold
11215 && slang->sl_compprog != NULL
11216 && ((unsigned)flags >> 24) != 0
11217 && sp->ts_twordlen - sp->ts_splitoff
11218 >= slang->sl_compminlen
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000011219#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000011220 && (!has_mbyte
11221 || slang->sl_compminlen == 0
11222 || mb_charlen(tword + sp->ts_splitoff)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000011223 >= slang->sl_compminlen)
11224#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000011225 && (slang->sl_compsylmax < MAXWLEN
11226 || sp->ts_complen + 1 - sp->ts_compsplit
11227 < slang->sl_compmax)
11228 && (byte_in_str(sp->ts_complen == sp->ts_compsplit
11229 ? slang->sl_compstartflags
11230 : slang->sl_compallflags,
Bram Moolenaar6de68532005-08-24 22:08:48 +000011231 ((unsigned)flags >> 24))))
Bram Moolenaar4770d092006-01-12 23:22:24 +000011232 {
11233 try_compound = TRUE;
11234 compflags[sp->ts_complen] = ((unsigned)flags >> 24);
11235 compflags[sp->ts_complen + 1] = NUL;
11236 }
Bram Moolenaard12a1322005-08-21 22:08:24 +000011237
Bram Moolenaar4770d092006-01-12 23:22:24 +000011238 /* For NOBREAK we never try splitting, it won't make any word
11239 * valid. */
11240 if (slang->sl_nobreak)
11241 try_compound = TRUE;
Bram Moolenaar78622822005-08-23 21:00:13 +000011242
Bram Moolenaar4770d092006-01-12 23:22:24 +000011243 /* If we could add a compound word, and it's also possible to
11244 * split at this point, do the split first and set
11245 * TSF_DIDSPLIT to avoid doing it again. */
11246 else if (!fword_ends
11247 && try_compound
11248 && (sp->ts_flags & TSF_DIDSPLIT) == 0)
11249 {
11250 try_compound = FALSE;
11251 sp->ts_flags |= TSF_DIDSPLIT;
11252 --sp->ts_curi; /* do the same NUL again */
11253 compflags[sp->ts_complen] = NUL;
11254 }
11255 else
11256 sp->ts_flags &= ~TSF_DIDSPLIT;
Bram Moolenaard12a1322005-08-21 22:08:24 +000011257
Bram Moolenaar4770d092006-01-12 23:22:24 +000011258 if (try_split || try_compound)
11259 {
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000011260 if (!try_compound && (!fword_ends || !goodword_ends))
Bram Moolenaard12a1322005-08-21 22:08:24 +000011261 {
11262 /* If we're going to split need to check that the
Bram Moolenaarda2303d2005-08-30 21:55:26 +000011263 * words so far are valid for compounding. If there
11264 * is only one word it must not have the NEEDCOMPOUND
11265 * flag. */
11266 if (sp->ts_complen == sp->ts_compsplit
11267 && (flags & WF_NEEDCOMP))
11268 break;
Bram Moolenaare52325c2005-08-22 22:54:29 +000011269 p = preword;
11270 while (*skiptowhite(p) != NUL)
11271 p = skipwhite(skiptowhite(p));
Bram Moolenaard12a1322005-08-21 22:08:24 +000011272 if (sp->ts_complen > sp->ts_compsplit
Bram Moolenaare52325c2005-08-22 22:54:29 +000011273 && !can_compound(slang, p,
Bram Moolenaard12a1322005-08-21 22:08:24 +000011274 compflags + sp->ts_compsplit))
11275 break;
Bram Moolenaare1438bb2006-03-01 22:01:55 +000011276
11277 if (slang->sl_nosplitsugs)
11278 newscore += SCORE_SPLIT_NO;
11279 else
11280 newscore += SCORE_SPLIT;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011281
11282 /* Give a bonus to words seen before. */
11283 newscore = score_wordcount_adj(slang, newscore,
11284 preword + sp->ts_prewordlen, TRUE);
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011285 }
11286
Bram Moolenaar4770d092006-01-12 23:22:24 +000011287 if (TRY_DEEPER(su, stack, depth, newscore))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011288 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011289 go_deeper(stack, depth, newscore);
11290#ifdef DEBUG_TRIEWALK
11291 if (!try_compound && !fword_ends)
11292 sprintf(changename[depth], "%.*s-%s: split",
11293 sp->ts_twordlen, tword, fword + sp->ts_fidx);
11294 else
11295 sprintf(changename[depth], "%.*s-%s: compound",
11296 sp->ts_twordlen, tword, fword + sp->ts_fidx);
11297#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011298 /* Save things to be restored at STATE_SPLITUNDO. */
Bram Moolenaar0c405862005-06-22 22:26:26 +000011299 sp->ts_save_badflags = su->su_badflags;
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011300 sp->ts_state = STATE_SPLITUNDO;
11301
11302 ++depth;
11303 sp = &stack[depth];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011304
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011305 /* Append a space to preword when splitting. */
11306 if (!try_compound && !fword_ends)
11307 STRCAT(preword, " ");
Bram Moolenaar5195e452005-08-19 20:32:47 +000011308 sp->ts_prewordlen = STRLEN(preword);
11309 sp->ts_splitoff = sp->ts_twordlen;
Bram Moolenaar78622822005-08-23 21:00:13 +000011310 sp->ts_splitfidx = sp->ts_fidx;
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011311
11312 /* If the badword has a non-word character at this
11313 * position skip it. That means replacing the
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011314 * non-word character with a space. Always skip a
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000011315 * character when the word ends. But only when the
11316 * good word can end. */
Bram Moolenaar4770d092006-01-12 23:22:24 +000011317 if (((!try_compound && !spell_iswordp_nmw(fword
11318 + sp->ts_fidx))
11319 || fword_ends)
11320 && fword[sp->ts_fidx] != NUL
11321 && goodword_ends)
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011322 {
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011323 int l;
11324
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011325#ifdef FEAT_MBYTE
11326 if (has_mbyte)
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011327 l = MB_BYTE2LEN(fword[sp->ts_fidx]);
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011328 else
11329#endif
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011330 l = 1;
11331 if (fword_ends)
11332 {
11333 /* Copy the skipped character to preword. */
Bram Moolenaar5195e452005-08-19 20:32:47 +000011334 mch_memmove(preword + sp->ts_prewordlen,
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011335 fword + sp->ts_fidx, l);
Bram Moolenaar5195e452005-08-19 20:32:47 +000011336 sp->ts_prewordlen += l;
11337 preword[sp->ts_prewordlen] = NUL;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011338 }
11339 else
11340 sp->ts_score -= SCORE_SPLIT - SCORE_SUBST;
11341 sp->ts_fidx += l;
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011342 }
Bram Moolenaar53805d12005-08-01 07:08:33 +000011343
Bram Moolenaard12a1322005-08-21 22:08:24 +000011344 /* When compounding include compound flag in
11345 * compflags[] (already set above). When splitting we
11346 * may start compounding over again. */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011347 if (try_compound)
Bram Moolenaar5195e452005-08-19 20:32:47 +000011348 ++sp->ts_complen;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011349 else
Bram Moolenaard12a1322005-08-21 22:08:24 +000011350 sp->ts_compsplit = sp->ts_complen;
11351 sp->ts_prefixdepth = PFD_NOPREFIX;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +000011352
Bram Moolenaar53805d12005-08-01 07:08:33 +000011353 /* set su->su_badflags to the caps type at this
11354 * position */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000011355#ifdef FEAT_MBYTE
11356 if (has_mbyte)
Bram Moolenaar53805d12005-08-01 07:08:33 +000011357 n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
Bram Moolenaar9f30f502005-06-14 22:01:04 +000011358 else
11359#endif
Bram Moolenaar53805d12005-08-01 07:08:33 +000011360 n = sp->ts_fidx;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000011361 su->su_badflags = badword_captype(su->su_badptr + n,
Bram Moolenaar53805d12005-08-01 07:08:33 +000011362 su->su_badptr + su->su_badlen);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011363
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011364 /* Restart at top of the tree. */
Bram Moolenaar9c96f592005-06-30 21:52:39 +000011365 sp->ts_arridx = 0;
Bram Moolenaard12a1322005-08-21 22:08:24 +000011366
11367 /* If there are postponed prefixes, try these too. */
11368 if (pbyts != NULL)
11369 {
11370 byts = pbyts;
11371 idxs = pidxs;
11372 sp->ts_prefixdepth = PFD_PREFIXTREE;
11373 sp->ts_state = STATE_NOPREFIX;
11374 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011375 }
11376 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011377 }
11378 break;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011379
Bram Moolenaar4770d092006-01-12 23:22:24 +000011380 case STATE_SPLITUNDO:
11381 /* Undo the changes done for word split or compound word. */
11382 su->su_badflags = sp->ts_save_badflags;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011383
Bram Moolenaar4770d092006-01-12 23:22:24 +000011384 /* Continue looking for NUL bytes. */
11385 sp->ts_state = STATE_START;
Bram Moolenaard12a1322005-08-21 22:08:24 +000011386
Bram Moolenaar4770d092006-01-12 23:22:24 +000011387 /* In case we went into the prefix tree. */
11388 byts = fbyts;
11389 idxs = fidxs;
11390 break;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011391
Bram Moolenaar4770d092006-01-12 23:22:24 +000011392 case STATE_ENDNUL:
11393 /* Past the NUL bytes in the node. */
11394 su->su_badflags = sp->ts_save_badflags;
11395 if (fword[sp->ts_fidx] == NUL
Bram Moolenaarda2303d2005-08-30 21:55:26 +000011396#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000011397 && sp->ts_tcharlen == 0
Bram Moolenaarda2303d2005-08-30 21:55:26 +000011398#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000011399 )
11400 {
11401 /* The badword ends, can't use STATE_PLAIN. */
11402 sp->ts_state = STATE_DEL;
11403 break;
11404 }
11405 sp->ts_state = STATE_PLAIN;
11406 /*FALLTHROUGH*/
11407
11408 case STATE_PLAIN:
11409 /*
11410 * Go over all possible bytes at this node, add each to tword[]
11411 * and use child node. "ts_curi" is the index.
11412 */
11413 arridx = sp->ts_arridx;
11414 if (sp->ts_curi > byts[arridx])
11415 {
11416 /* Done all bytes at this node, do next state. When still at
11417 * already changed bytes skip the other tricks. */
11418 if (sp->ts_fidx >= sp->ts_fidxtry)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011419 sp->ts_state = STATE_DEL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011420 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000011421 sp->ts_state = STATE_FINAL;
11422 }
11423 else
11424 {
11425 arridx += sp->ts_curi++;
11426 c = byts[arridx];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011427
Bram Moolenaar4770d092006-01-12 23:22:24 +000011428 /* Normal byte, go one level deeper. If it's not equal to the
11429 * byte in the bad word adjust the score. But don't even try
11430 * when the byte was already changed. And don't try when we
11431 * just deleted this byte, accepting it is always cheaper then
11432 * delete + substitute. */
11433 if (c == fword[sp->ts_fidx]
Bram Moolenaarea424162005-06-16 21:51:00 +000011434#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000011435 || (sp->ts_tcharlen > 0 && sp->ts_isdiff != DIFF_NONE)
Bram Moolenaar9f30f502005-06-14 22:01:04 +000011436#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000011437 )
11438 newscore = 0;
11439 else
11440 newscore = SCORE_SUBST;
11441 if ((newscore == 0
11442 || (sp->ts_fidx >= sp->ts_fidxtry
11443 && ((sp->ts_flags & TSF_DIDDEL) == 0
11444 || c != fword[sp->ts_delidx])))
11445 && TRY_DEEPER(su, stack, depth, newscore))
11446 {
11447 go_deeper(stack, depth, newscore);
11448#ifdef DEBUG_TRIEWALK
11449 if (newscore > 0)
11450 sprintf(changename[depth], "%.*s-%s: subst %c to %c",
11451 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11452 fword[sp->ts_fidx], c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011453 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000011454 sprintf(changename[depth], "%.*s-%s: accept %c",
11455 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11456 fword[sp->ts_fidx]);
11457#endif
11458 ++depth;
11459 sp = &stack[depth];
11460 ++sp->ts_fidx;
11461 tword[sp->ts_twordlen++] = c;
11462 sp->ts_arridx = idxs[arridx];
Bram Moolenaarea424162005-06-16 21:51:00 +000011463#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000011464 if (newscore == SCORE_SUBST)
11465 sp->ts_isdiff = DIFF_YES;
11466 if (has_mbyte)
11467 {
11468 /* Multi-byte characters are a bit complicated to
11469 * handle: They differ when any of the bytes differ
11470 * and then their length may also differ. */
11471 if (sp->ts_tcharlen == 0)
Bram Moolenaarea424162005-06-16 21:51:00 +000011472 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011473 /* First byte. */
11474 sp->ts_tcharidx = 0;
11475 sp->ts_tcharlen = MB_BYTE2LEN(c);
11476 sp->ts_fcharstart = sp->ts_fidx - 1;
11477 sp->ts_isdiff = (newscore != 0)
Bram Moolenaarea424162005-06-16 21:51:00 +000011478 ? DIFF_YES : DIFF_NONE;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011479 }
11480 else if (sp->ts_isdiff == DIFF_INSERT)
11481 /* When inserting trail bytes don't advance in the
11482 * bad word. */
11483 --sp->ts_fidx;
11484 if (++sp->ts_tcharidx == sp->ts_tcharlen)
11485 {
11486 /* Last byte of character. */
11487 if (sp->ts_isdiff == DIFF_YES)
Bram Moolenaarea424162005-06-16 21:51:00 +000011488 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011489 /* Correct ts_fidx for the byte length of the
11490 * character (we didn't check that before). */
11491 sp->ts_fidx = sp->ts_fcharstart
11492 + MB_BYTE2LEN(
Bram Moolenaarea424162005-06-16 21:51:00 +000011493 fword[sp->ts_fcharstart]);
11494
Bram Moolenaar4770d092006-01-12 23:22:24 +000011495 /* For changing a composing character adjust
11496 * the score from SCORE_SUBST to
11497 * SCORE_SUBCOMP. */
11498 if (enc_utf8
11499 && utf_iscomposing(
11500 mb_ptr2char(tword
11501 + sp->ts_twordlen
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +000011502 - sp->ts_tcharlen))
Bram Moolenaar4770d092006-01-12 23:22:24 +000011503 && utf_iscomposing(
11504 mb_ptr2char(fword
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +000011505 + sp->ts_fcharstart)))
Bram Moolenaar4770d092006-01-12 23:22:24 +000011506 sp->ts_score -=
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +000011507 SCORE_SUBST - SCORE_SUBCOMP;
11508
Bram Moolenaar4770d092006-01-12 23:22:24 +000011509 /* For a similar character adjust score from
11510 * SCORE_SUBST to SCORE_SIMILAR. */
11511 else if (!soundfold
11512 && slang->sl_has_map
11513 && similar_chars(slang,
11514 mb_ptr2char(tword
11515 + sp->ts_twordlen
Bram Moolenaarea424162005-06-16 21:51:00 +000011516 - sp->ts_tcharlen),
Bram Moolenaar4770d092006-01-12 23:22:24 +000011517 mb_ptr2char(fword
Bram Moolenaarea424162005-06-16 21:51:00 +000011518 + sp->ts_fcharstart)))
Bram Moolenaar4770d092006-01-12 23:22:24 +000011519 sp->ts_score -=
Bram Moolenaarea424162005-06-16 21:51:00 +000011520 SCORE_SUBST - SCORE_SIMILAR;
Bram Moolenaarea424162005-06-16 21:51:00 +000011521 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011522 else if (sp->ts_isdiff == DIFF_INSERT
11523 && sp->ts_twordlen > sp->ts_tcharlen)
11524 {
11525 p = tword + sp->ts_twordlen - sp->ts_tcharlen;
11526 c = mb_ptr2char(p);
11527 if (enc_utf8 && utf_iscomposing(c))
11528 {
11529 /* Inserting a composing char doesn't
11530 * count that much. */
11531 sp->ts_score -= SCORE_INS - SCORE_INSCOMP;
11532 }
11533 else
11534 {
11535 /* If the previous character was the same,
11536 * thus doubling a character, give a bonus
11537 * to the score. Also for the soundfold
11538 * tree (might seem illogical but does
11539 * give better scores). */
11540 mb_ptr_back(tword, p);
11541 if (c == mb_ptr2char(p))
11542 sp->ts_score -= SCORE_INS
11543 - SCORE_INSDUP;
11544 }
11545 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011546
Bram Moolenaar4770d092006-01-12 23:22:24 +000011547 /* Starting a new char, reset the length. */
11548 sp->ts_tcharlen = 0;
11549 }
Bram Moolenaarea408852005-06-25 22:49:46 +000011550 }
Bram Moolenaarea424162005-06-16 21:51:00 +000011551 else
11552#endif
Bram Moolenaarea408852005-06-25 22:49:46 +000011553 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011554 /* If we found a similar char adjust the score.
11555 * We do this after calling go_deeper() because
11556 * it's slow. */
11557 if (newscore != 0
11558 && !soundfold
11559 && slang->sl_has_map
11560 && similar_chars(slang,
11561 c, fword[sp->ts_fidx - 1]))
11562 sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
Bram Moolenaarea408852005-06-25 22:49:46 +000011563 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011564 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011565 }
11566 break;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011567
Bram Moolenaar4770d092006-01-12 23:22:24 +000011568 case STATE_DEL:
11569#ifdef FEAT_MBYTE
11570 /* When past the first byte of a multi-byte char don't try
11571 * delete/insert/swap a character. */
11572 if (has_mbyte && sp->ts_tcharlen > 0)
11573 {
11574 sp->ts_state = STATE_FINAL;
11575 break;
11576 }
11577#endif
11578 /*
11579 * Try skipping one character in the bad word (delete it).
11580 */
11581 sp->ts_state = STATE_INS_PREP;
11582 sp->ts_curi = 1;
11583 if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*')
11584 /* Deleting a vowel at the start of a word counts less, see
11585 * soundalike_score(). */
11586 newscore = 2 * SCORE_DEL / 3;
11587 else
11588 newscore = SCORE_DEL;
11589 if (fword[sp->ts_fidx] != NUL
11590 && TRY_DEEPER(su, stack, depth, newscore))
11591 {
11592 go_deeper(stack, depth, newscore);
11593#ifdef DEBUG_TRIEWALK
11594 sprintf(changename[depth], "%.*s-%s: delete %c",
11595 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11596 fword[sp->ts_fidx]);
11597#endif
11598 ++depth;
11599
11600 /* Remember what character we deleted, so that we can avoid
11601 * inserting it again. */
11602 stack[depth].ts_flags |= TSF_DIDDEL;
11603 stack[depth].ts_delidx = sp->ts_fidx;
11604
11605 /* Advance over the character in fword[]. Give a bonus to the
11606 * score if the same character is following "nn" -> "n". It's
11607 * a bit illogical for soundfold tree but it does give better
11608 * results. */
11609#ifdef FEAT_MBYTE
11610 if (has_mbyte)
11611 {
11612 c = mb_ptr2char(fword + sp->ts_fidx);
11613 stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]);
11614 if (enc_utf8 && utf_iscomposing(c))
11615 stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP;
11616 else if (c == mb_ptr2char(fword + stack[depth].ts_fidx))
11617 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
11618 }
11619 else
11620#endif
11621 {
11622 ++stack[depth].ts_fidx;
11623 if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1])
11624 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
11625 }
11626 break;
11627 }
11628 /*FALLTHROUGH*/
11629
11630 case STATE_INS_PREP:
11631 if (sp->ts_flags & TSF_DIDDEL)
11632 {
11633 /* If we just deleted a byte then inserting won't make sense,
11634 * a substitute is always cheaper. */
11635 sp->ts_state = STATE_SWAP;
11636 break;
11637 }
11638
11639 /* skip over NUL bytes */
11640 n = sp->ts_arridx;
11641 for (;;)
11642 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011643 if (sp->ts_curi > byts[n])
11644 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011645 /* Only NUL bytes at this node, go to next state. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011646 sp->ts_state = STATE_SWAP;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011647 break;
11648 }
11649 if (byts[n + sp->ts_curi] != NUL)
11650 {
11651 /* Found a byte to insert. */
11652 sp->ts_state = STATE_INS;
11653 break;
11654 }
11655 ++sp->ts_curi;
11656 }
11657 break;
11658
11659 /*FALLTHROUGH*/
11660
11661 case STATE_INS:
11662 /* Insert one byte. Repeat this for each possible byte at this
11663 * node. */
11664 n = sp->ts_arridx;
11665 if (sp->ts_curi > byts[n])
11666 {
11667 /* Done all bytes at this node, go to next state. */
11668 sp->ts_state = STATE_SWAP;
11669 break;
11670 }
11671
11672 /* Do one more byte at this node, but:
11673 * - Skip NUL bytes.
11674 * - Skip the byte if it's equal to the byte in the word,
11675 * accepting that byte is always better.
11676 */
11677 n += sp->ts_curi++;
11678 c = byts[n];
11679 if (soundfold && sp->ts_twordlen == 0 && c == '*')
11680 /* Inserting a vowel at the start of a word counts less,
11681 * see soundalike_score(). */
11682 newscore = 2 * SCORE_INS / 3;
11683 else
11684 newscore = SCORE_INS;
11685 if (c != fword[sp->ts_fidx]
11686 && TRY_DEEPER(su, stack, depth, newscore))
11687 {
11688 go_deeper(stack, depth, newscore);
11689#ifdef DEBUG_TRIEWALK
11690 sprintf(changename[depth], "%.*s-%s: insert %c",
11691 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11692 c);
11693#endif
11694 ++depth;
11695 sp = &stack[depth];
11696 tword[sp->ts_twordlen++] = c;
11697 sp->ts_arridx = idxs[n];
11698#ifdef FEAT_MBYTE
11699 if (has_mbyte)
11700 {
11701 fl = MB_BYTE2LEN(c);
11702 if (fl > 1)
11703 {
11704 /* There are following bytes for the same character.
11705 * We must find all bytes before trying
11706 * delete/insert/swap/etc. */
11707 sp->ts_tcharlen = fl;
11708 sp->ts_tcharidx = 1;
11709 sp->ts_isdiff = DIFF_INSERT;
11710 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011711 }
11712 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000011713 fl = 1;
11714 if (fl == 1)
Bram Moolenaarea424162005-06-16 21:51:00 +000011715#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000011716 {
11717 /* If the previous character was the same, thus doubling a
11718 * character, give a bonus to the score. Also for
11719 * soundfold words (illogical but does give a better
11720 * score). */
11721 if (sp->ts_twordlen >= 2
Bram Moolenaarea408852005-06-25 22:49:46 +000011722 && tword[sp->ts_twordlen - 2] == c)
Bram Moolenaar4770d092006-01-12 23:22:24 +000011723 sp->ts_score -= SCORE_INS - SCORE_INSDUP;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011724 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011725 }
11726 break;
11727
11728 case STATE_SWAP:
11729 /*
11730 * Swap two bytes in the bad word: "12" -> "21".
11731 * We change "fword" here, it's changed back afterwards at
11732 * STATE_UNSWAP.
11733 */
11734 p = fword + sp->ts_fidx;
11735 c = *p;
11736 if (c == NUL)
11737 {
11738 /* End of word, can't swap or replace. */
11739 sp->ts_state = STATE_FINAL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011740 break;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011741 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011742
Bram Moolenaar4770d092006-01-12 23:22:24 +000011743 /* Don't swap if the first character is not a word character.
11744 * SWAP3 etc. also don't make sense then. */
11745 if (!soundfold && !spell_iswordp(p, curbuf))
11746 {
11747 sp->ts_state = STATE_REP_INI;
11748 break;
11749 }
Bram Moolenaarbb15b652005-10-03 21:52:09 +000011750
Bram Moolenaar4770d092006-01-12 23:22:24 +000011751#ifdef FEAT_MBYTE
11752 if (has_mbyte)
11753 {
11754 n = mb_cptr2len(p);
11755 c = mb_ptr2char(p);
11756 if (!soundfold && !spell_iswordp(p + n, curbuf))
11757 c2 = c; /* don't swap non-word char */
11758 else
11759 c2 = mb_ptr2char(p + n);
11760 }
11761 else
11762#endif
11763 {
11764 if (!soundfold && !spell_iswordp(p + 1, curbuf))
11765 c2 = c; /* don't swap non-word char */
11766 else
11767 c2 = p[1];
11768 }
Bram Moolenaarbb15b652005-10-03 21:52:09 +000011769
Bram Moolenaar4770d092006-01-12 23:22:24 +000011770 /* When characters are identical, swap won't do anything.
11771 * Also get here if the second char is not a word character. */
11772 if (c == c2)
11773 {
11774 sp->ts_state = STATE_SWAP3;
11775 break;
11776 }
11777 if (c2 != NUL && TRY_DEEPER(su, stack, depth, SCORE_SWAP))
11778 {
11779 go_deeper(stack, depth, SCORE_SWAP);
11780#ifdef DEBUG_TRIEWALK
11781 sprintf(changename[depth], "%.*s-%s: swap %c and %c",
11782 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11783 c, c2);
11784#endif
11785 sp->ts_state = STATE_UNSWAP;
11786 ++depth;
Bram Moolenaarea424162005-06-16 21:51:00 +000011787#ifdef FEAT_MBYTE
11788 if (has_mbyte)
11789 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011790 fl = mb_char2len(c2);
11791 mch_memmove(p, p + n, fl);
11792 mb_char2bytes(c, p + fl);
11793 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
Bram Moolenaarea424162005-06-16 21:51:00 +000011794 }
11795 else
11796#endif
Bram Moolenaarbb15b652005-10-03 21:52:09 +000011797 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000011798 p[0] = c2;
Bram Moolenaarea424162005-06-16 21:51:00 +000011799 p[1] = c;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011800 stack[depth].ts_fidxtry = sp->ts_fidx + 2;
Bram Moolenaarea424162005-06-16 21:51:00 +000011801 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011802 }
11803 else
11804 /* If this swap doesn't work then SWAP3 won't either. */
11805 sp->ts_state = STATE_REP_INI;
11806 break;
Bram Moolenaarea424162005-06-16 21:51:00 +000011807
Bram Moolenaar4770d092006-01-12 23:22:24 +000011808 case STATE_UNSWAP:
11809 /* Undo the STATE_SWAP swap: "21" -> "12". */
11810 p = fword + sp->ts_fidx;
11811#ifdef FEAT_MBYTE
11812 if (has_mbyte)
11813 {
11814 n = MB_BYTE2LEN(*p);
11815 c = mb_ptr2char(p + n);
11816 mch_memmove(p + MB_BYTE2LEN(p[n]), p, n);
11817 mb_char2bytes(c, p);
11818 }
11819 else
11820#endif
11821 {
11822 c = *p;
11823 *p = p[1];
11824 p[1] = c;
11825 }
11826 /*FALLTHROUGH*/
11827
11828 case STATE_SWAP3:
11829 /* Swap two bytes, skipping one: "123" -> "321". We change
11830 * "fword" here, it's changed back afterwards at STATE_UNSWAP3. */
11831 p = fword + sp->ts_fidx;
11832#ifdef FEAT_MBYTE
11833 if (has_mbyte)
11834 {
11835 n = mb_cptr2len(p);
11836 c = mb_ptr2char(p);
11837 fl = mb_cptr2len(p + n);
11838 c2 = mb_ptr2char(p + n);
11839 if (!soundfold && !spell_iswordp(p + n + fl, curbuf))
11840 c3 = c; /* don't swap non-word char */
11841 else
11842 c3 = mb_ptr2char(p + n + fl);
11843 }
11844 else
11845#endif
11846 {
11847 c = *p;
11848 c2 = p[1];
11849 if (!soundfold && !spell_iswordp(p + 2, curbuf))
11850 c3 = c; /* don't swap non-word char */
11851 else
11852 c3 = p[2];
11853 }
11854
11855 /* When characters are identical: "121" then SWAP3 result is
11856 * identical, ROT3L result is same as SWAP: "211", ROT3L result is
11857 * same as SWAP on next char: "112". Thus skip all swapping.
11858 * Also skip when c3 is NUL.
11859 * Also get here when the third character is not a word character.
11860 * Second character may any char: "a.b" -> "b.a" */
11861 if (c == c3 || c3 == NUL)
11862 {
11863 sp->ts_state = STATE_REP_INI;
11864 break;
11865 }
11866 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
11867 {
11868 go_deeper(stack, depth, SCORE_SWAP3);
11869#ifdef DEBUG_TRIEWALK
11870 sprintf(changename[depth], "%.*s-%s: swap3 %c and %c",
11871 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11872 c, c3);
11873#endif
11874 sp->ts_state = STATE_UNSWAP3;
11875 ++depth;
11876#ifdef FEAT_MBYTE
11877 if (has_mbyte)
11878 {
11879 tl = mb_char2len(c3);
11880 mch_memmove(p, p + n + fl, tl);
11881 mb_char2bytes(c2, p + tl);
11882 mb_char2bytes(c, p + fl + tl);
11883 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl;
11884 }
11885 else
11886#endif
11887 {
11888 p[0] = p[2];
11889 p[2] = c;
11890 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
11891 }
11892 }
11893 else
11894 sp->ts_state = STATE_REP_INI;
11895 break;
11896
11897 case STATE_UNSWAP3:
11898 /* Undo STATE_SWAP3: "321" -> "123" */
11899 p = fword + sp->ts_fidx;
11900#ifdef FEAT_MBYTE
11901 if (has_mbyte)
11902 {
11903 n = MB_BYTE2LEN(*p);
11904 c2 = mb_ptr2char(p + n);
11905 fl = MB_BYTE2LEN(p[n]);
11906 c = mb_ptr2char(p + n + fl);
11907 tl = MB_BYTE2LEN(p[n + fl]);
11908 mch_memmove(p + fl + tl, p, n);
11909 mb_char2bytes(c, p);
11910 mb_char2bytes(c2, p + tl);
11911 p = p + tl;
11912 }
11913 else
11914#endif
11915 {
11916 c = *p;
11917 *p = p[2];
11918 p[2] = c;
11919 ++p;
11920 }
11921
11922 if (!soundfold && !spell_iswordp(p, curbuf))
11923 {
11924 /* Middle char is not a word char, skip the rotate. First and
11925 * third char were already checked at swap and swap3. */
11926 sp->ts_state = STATE_REP_INI;
11927 break;
11928 }
11929
11930 /* Rotate three characters left: "123" -> "231". We change
11931 * "fword" here, it's changed back afterwards at STATE_UNROT3L. */
11932 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
11933 {
11934 go_deeper(stack, depth, SCORE_SWAP3);
11935#ifdef DEBUG_TRIEWALK
11936 p = fword + sp->ts_fidx;
11937 sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c",
11938 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11939 p[0], p[1], p[2]);
11940#endif
11941 sp->ts_state = STATE_UNROT3L;
11942 ++depth;
Bram Moolenaarea424162005-06-16 21:51:00 +000011943 p = fword + sp->ts_fidx;
11944#ifdef FEAT_MBYTE
11945 if (has_mbyte)
11946 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000011947 n = mb_cptr2len(p);
Bram Moolenaarea424162005-06-16 21:51:00 +000011948 c = mb_ptr2char(p);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000011949 fl = mb_cptr2len(p + n);
Bram Moolenaar4770d092006-01-12 23:22:24 +000011950 fl += mb_cptr2len(p + n + fl);
11951 mch_memmove(p, p + n, fl);
11952 mb_char2bytes(c, p + fl);
11953 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
Bram Moolenaarea424162005-06-16 21:51:00 +000011954 }
11955 else
11956#endif
11957 {
11958 c = *p;
11959 *p = p[1];
11960 p[1] = p[2];
11961 p[2] = c;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011962 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
Bram Moolenaarea424162005-06-16 21:51:00 +000011963 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000011964 }
11965 else
11966 sp->ts_state = STATE_REP_INI;
11967 break;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011968
Bram Moolenaar4770d092006-01-12 23:22:24 +000011969 case STATE_UNROT3L:
11970 /* Undo ROT3L: "231" -> "123" */
11971 p = fword + sp->ts_fidx;
Bram Moolenaarea424162005-06-16 21:51:00 +000011972#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +000011973 if (has_mbyte)
11974 {
11975 n = MB_BYTE2LEN(*p);
11976 n += MB_BYTE2LEN(p[n]);
11977 c = mb_ptr2char(p + n);
11978 tl = MB_BYTE2LEN(p[n]);
11979 mch_memmove(p + tl, p, n);
11980 mb_char2bytes(c, p);
11981 }
11982 else
Bram Moolenaarea424162005-06-16 21:51:00 +000011983#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +000011984 {
11985 c = p[2];
11986 p[2] = p[1];
11987 p[1] = *p;
11988 *p = c;
11989 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011990
Bram Moolenaar4770d092006-01-12 23:22:24 +000011991 /* Rotate three bytes right: "123" -> "312". We change "fword"
11992 * here, it's changed back afterwards at STATE_UNROT3R. */
11993 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
11994 {
11995 go_deeper(stack, depth, SCORE_SWAP3);
11996#ifdef DEBUG_TRIEWALK
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000011997 p = fword + sp->ts_fidx;
Bram Moolenaar4770d092006-01-12 23:22:24 +000011998 sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c",
11999 sp->ts_twordlen, tword, fword + sp->ts_fidx,
12000 p[0], p[1], p[2]);
12001#endif
12002 sp->ts_state = STATE_UNROT3R;
12003 ++depth;
12004 p = fword + sp->ts_fidx;
12005#ifdef FEAT_MBYTE
12006 if (has_mbyte)
Bram Moolenaar0c405862005-06-22 22:26:26 +000012007 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012008 n = mb_cptr2len(p);
12009 n += mb_cptr2len(p + n);
12010 c = mb_ptr2char(p + n);
12011 tl = mb_cptr2len(p + n);
12012 mch_memmove(p + tl, p, n);
12013 mb_char2bytes(c, p);
12014 stack[depth].ts_fidxtry = sp->ts_fidx + n + tl;
Bram Moolenaar0c405862005-06-22 22:26:26 +000012015 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000012016 else
12017#endif
12018 {
12019 c = p[2];
12020 p[2] = p[1];
12021 p[1] = *p;
12022 *p = c;
12023 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
12024 }
12025 }
12026 else
12027 sp->ts_state = STATE_REP_INI;
12028 break;
12029
12030 case STATE_UNROT3R:
12031 /* Undo ROT3R: "312" -> "123" */
12032 p = fword + sp->ts_fidx;
12033#ifdef FEAT_MBYTE
12034 if (has_mbyte)
12035 {
12036 c = mb_ptr2char(p);
12037 tl = MB_BYTE2LEN(*p);
12038 n = MB_BYTE2LEN(p[tl]);
12039 n += MB_BYTE2LEN(p[tl + n]);
12040 mch_memmove(p, p + tl, n);
12041 mb_char2bytes(c, p + n);
12042 }
12043 else
12044#endif
12045 {
12046 c = *p;
12047 *p = p[1];
12048 p[1] = p[2];
12049 p[2] = c;
12050 }
12051 /*FALLTHROUGH*/
12052
12053 case STATE_REP_INI:
12054 /* Check if matching with REP items from the .aff file would work.
12055 * Quickly skip if:
12056 * - there are no REP items and we are not in the soundfold trie
12057 * - the score is going to be too high anyway
12058 * - already applied a REP item or swapped here */
12059 if ((lp->lp_replang == NULL && !soundfold)
12060 || sp->ts_score + SCORE_REP >= su->su_maxscore
12061 || sp->ts_fidx < sp->ts_fidxtry)
12062 {
12063 sp->ts_state = STATE_FINAL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012064 break;
Bram Moolenaar4770d092006-01-12 23:22:24 +000012065 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012066
Bram Moolenaar4770d092006-01-12 23:22:24 +000012067 /* Use the first byte to quickly find the first entry that may
12068 * match. If the index is -1 there is none. */
12069 if (soundfold)
12070 sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]];
12071 else
12072 sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012073
Bram Moolenaar4770d092006-01-12 23:22:24 +000012074 if (sp->ts_curi < 0)
12075 {
12076 sp->ts_state = STATE_FINAL;
12077 break;
12078 }
12079
12080 sp->ts_state = STATE_REP;
12081 /*FALLTHROUGH*/
12082
12083 case STATE_REP:
12084 /* Try matching with REP items from the .aff file. For each match
12085 * replace the characters and check if the resulting word is
12086 * valid. */
12087 p = fword + sp->ts_fidx;
12088
12089 if (soundfold)
12090 gap = &slang->sl_repsal;
12091 else
12092 gap = &lp->lp_replang->sl_rep;
12093 while (sp->ts_curi < gap->ga_len)
12094 {
12095 ftp = (fromto_T *)gap->ga_data + sp->ts_curi++;
12096 if (*ftp->ft_from != *p)
Bram Moolenaar42eeac32005-06-29 22:40:58 +000012097 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012098 /* past possible matching entries */
12099 sp->ts_curi = gap->ga_len;
12100 break;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000012101 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000012102 if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0
12103 && TRY_DEEPER(su, stack, depth, SCORE_REP))
12104 {
12105 go_deeper(stack, depth, SCORE_REP);
12106#ifdef DEBUG_TRIEWALK
12107 sprintf(changename[depth], "%.*s-%s: replace %s with %s",
12108 sp->ts_twordlen, tword, fword + sp->ts_fidx,
12109 ftp->ft_from, ftp->ft_to);
12110#endif
12111 /* Need to undo this afterwards. */
12112 sp->ts_state = STATE_REP_UNDO;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000012113
Bram Moolenaar4770d092006-01-12 23:22:24 +000012114 /* Change the "from" to the "to" string. */
12115 ++depth;
12116 fl = STRLEN(ftp->ft_from);
12117 tl = STRLEN(ftp->ft_to);
12118 if (fl != tl)
12119 {
12120 mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1);
12121 repextra += tl - fl;
12122 }
12123 mch_memmove(p, ftp->ft_to, tl);
12124 stack[depth].ts_fidxtry = sp->ts_fidx + tl;
12125#ifdef FEAT_MBYTE
12126 stack[depth].ts_tcharlen = 0;
12127#endif
12128 break;
12129 }
12130 }
12131
12132 if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP)
12133 /* No (more) matches. */
12134 sp->ts_state = STATE_FINAL;
12135
12136 break;
12137
12138 case STATE_REP_UNDO:
12139 /* Undo a REP replacement and continue with the next one. */
12140 if (soundfold)
12141 gap = &slang->sl_repsal;
12142 else
12143 gap = &lp->lp_replang->sl_rep;
12144 ftp = (fromto_T *)gap->ga_data + sp->ts_curi - 1;
12145 fl = STRLEN(ftp->ft_from);
12146 tl = STRLEN(ftp->ft_to);
12147 p = fword + sp->ts_fidx;
12148 if (fl != tl)
12149 {
12150 mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1);
12151 repextra -= tl - fl;
12152 }
12153 mch_memmove(p, ftp->ft_from, fl);
12154 sp->ts_state = STATE_REP;
12155 break;
12156
12157 default:
12158 /* Did all possible states at this level, go up one level. */
12159 --depth;
12160
12161 if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE)
12162 {
12163 /* Continue in or go back to the prefix tree. */
12164 byts = pbyts;
12165 idxs = pidxs;
12166 }
12167
12168 /* Don't check for CTRL-C too often, it takes time. */
12169 if (--breakcheckcount == 0)
12170 {
12171 ui_breakcheck();
12172 breakcheckcount = 1000;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012173 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012174 }
12175 }
12176}
12177
Bram Moolenaar4770d092006-01-12 23:22:24 +000012178
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012179/*
Bram Moolenaar4770d092006-01-12 23:22:24 +000012180 * Go one level deeper in the tree.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012181 */
Bram Moolenaar4770d092006-01-12 23:22:24 +000012182 static void
12183go_deeper(stack, depth, score_add)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012184 trystate_T *stack;
12185 int depth;
12186 int score_add;
12187{
Bram Moolenaarea424162005-06-16 21:51:00 +000012188 stack[depth + 1] = stack[depth];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012189 stack[depth + 1].ts_state = STATE_START;
Bram Moolenaar4770d092006-01-12 23:22:24 +000012190 stack[depth + 1].ts_score = stack[depth].ts_score + score_add;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012191 stack[depth + 1].ts_curi = 1; /* start just after length byte */
Bram Moolenaard12a1322005-08-21 22:08:24 +000012192 stack[depth + 1].ts_flags = 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012193}
12194
Bram Moolenaar53805d12005-08-01 07:08:33 +000012195#ifdef FEAT_MBYTE
12196/*
12197 * Case-folding may change the number of bytes: Count nr of chars in
12198 * fword[flen] and return the byte length of that many chars in "word".
12199 */
12200 static int
12201nofold_len(fword, flen, word)
12202 char_u *fword;
12203 int flen;
12204 char_u *word;
12205{
12206 char_u *p;
12207 int i = 0;
12208
12209 for (p = fword; p < fword + flen; mb_ptr_adv(p))
12210 ++i;
12211 for (p = word; i > 0; mb_ptr_adv(p))
12212 --i;
12213 return (int)(p - word);
12214}
12215#endif
12216
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012217/*
12218 * "fword" is a good word with case folded. Find the matching keep-case
12219 * words and put it in "kword".
12220 * Theoretically there could be several keep-case words that result in the
12221 * same case-folded word, but we only find one...
12222 */
12223 static void
12224find_keepcap_word(slang, fword, kword)
12225 slang_T *slang;
12226 char_u *fword;
12227 char_u *kword;
12228{
12229 char_u uword[MAXWLEN]; /* "fword" in upper-case */
12230 int depth;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012231 idx_T tryidx;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012232
12233 /* The following arrays are used at each depth in the tree. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012234 idx_T arridx[MAXWLEN];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012235 int round[MAXWLEN];
12236 int fwordidx[MAXWLEN];
12237 int uwordidx[MAXWLEN];
12238 int kwordlen[MAXWLEN];
12239
12240 int flen, ulen;
12241 int l;
12242 int len;
12243 int c;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012244 idx_T lo, hi, m;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012245 char_u *p;
12246 char_u *byts = slang->sl_kbyts; /* array with bytes of the words */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012247 idx_T *idxs = slang->sl_kidxs; /* array with indexes */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012248
12249 if (byts == NULL)
12250 {
12251 /* array is empty: "cannot happen" */
12252 *kword = NUL;
12253 return;
12254 }
12255
12256 /* Make an all-cap version of "fword". */
12257 allcap_copy(fword, uword);
12258
12259 /*
12260 * Each character needs to be tried both case-folded and upper-case.
12261 * All this gets very complicated if we keep in mind that changing case
12262 * may change the byte length of a multi-byte character...
12263 */
12264 depth = 0;
12265 arridx[0] = 0;
12266 round[0] = 0;
12267 fwordidx[0] = 0;
12268 uwordidx[0] = 0;
12269 kwordlen[0] = 0;
12270 while (depth >= 0)
12271 {
12272 if (fword[fwordidx[depth]] == NUL)
12273 {
12274 /* We are at the end of "fword". If the tree allows a word to end
12275 * here we have found a match. */
12276 if (byts[arridx[depth] + 1] == 0)
12277 {
12278 kword[kwordlen[depth]] = NUL;
12279 return;
12280 }
12281
12282 /* kword is getting too long, continue one level up */
12283 --depth;
12284 }
12285 else if (++round[depth] > 2)
12286 {
12287 /* tried both fold-case and upper-case character, continue one
12288 * level up */
12289 --depth;
12290 }
12291 else
12292 {
12293 /*
12294 * round[depth] == 1: Try using the folded-case character.
12295 * round[depth] == 2: Try using the upper-case character.
12296 */
12297#ifdef FEAT_MBYTE
12298 if (has_mbyte)
12299 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000012300 flen = mb_cptr2len(fword + fwordidx[depth]);
12301 ulen = mb_cptr2len(uword + uwordidx[depth]);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012302 }
12303 else
12304#endif
12305 ulen = flen = 1;
12306 if (round[depth] == 1)
12307 {
12308 p = fword + fwordidx[depth];
12309 l = flen;
12310 }
12311 else
12312 {
12313 p = uword + uwordidx[depth];
12314 l = ulen;
12315 }
12316
12317 for (tryidx = arridx[depth]; l > 0; --l)
12318 {
12319 /* Perform a binary search in the list of accepted bytes. */
12320 len = byts[tryidx++];
12321 c = *p++;
12322 lo = tryidx;
12323 hi = tryidx + len - 1;
12324 while (lo < hi)
12325 {
12326 m = (lo + hi) / 2;
12327 if (byts[m] > c)
12328 hi = m - 1;
12329 else if (byts[m] < c)
12330 lo = m + 1;
12331 else
12332 {
12333 lo = hi = m;
12334 break;
12335 }
12336 }
12337
12338 /* Stop if there is no matching byte. */
12339 if (hi < lo || byts[lo] != c)
12340 break;
12341
12342 /* Continue at the child (if there is one). */
12343 tryidx = idxs[lo];
12344 }
12345
12346 if (l == 0)
12347 {
12348 /*
12349 * Found the matching char. Copy it to "kword" and go a
12350 * level deeper.
12351 */
12352 if (round[depth] == 1)
12353 {
12354 STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth],
12355 flen);
12356 kwordlen[depth + 1] = kwordlen[depth] + flen;
12357 }
12358 else
12359 {
12360 STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth],
12361 ulen);
12362 kwordlen[depth + 1] = kwordlen[depth] + ulen;
12363 }
12364 fwordidx[depth + 1] = fwordidx[depth] + flen;
12365 uwordidx[depth + 1] = uwordidx[depth] + ulen;
12366
12367 ++depth;
12368 arridx[depth] = tryidx;
12369 round[depth] = 0;
12370 }
12371 }
12372 }
12373
12374 /* Didn't find it: "cannot happen". */
12375 *kword = NUL;
12376}
12377
12378/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012379 * Compute the sound-a-like score for suggestions in su->su_ga and add them to
12380 * su->su_sga.
12381 */
12382 static void
12383score_comp_sal(su)
12384 suginfo_T *su;
12385{
12386 langp_T *lp;
12387 char_u badsound[MAXWLEN];
12388 int i;
12389 suggest_T *stp;
12390 suggest_T *sstp;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012391 int score;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000012392 int lpi;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012393
12394 if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL)
12395 return;
12396
12397 /* Use the sound-folding of the first language that supports it. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012398 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000012399 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012400 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012401 if (lp->lp_slang->sl_sal.ga_len > 0)
12402 {
12403 /* soundfold the bad word */
Bram Moolenaar42eeac32005-06-29 22:40:58 +000012404 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012405
12406 for (i = 0; i < su->su_ga.ga_len; ++i)
12407 {
12408 stp = &SUG(su->su_ga, i);
12409
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012410 /* Case-fold the suggested word, sound-fold it and compute the
12411 * sound-a-like score. */
12412 score = stp_sal_score(stp, su, lp->lp_slang, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012413 if (score < SCORE_MAXMAX)
12414 {
12415 /* Add the suggestion. */
12416 sstp = &SUG(su->su_sga, su->su_sga.ga_len);
12417 sstp->st_word = vim_strsave(stp->st_word);
12418 if (sstp->st_word != NULL)
12419 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012420 sstp->st_wordlen = stp->st_wordlen;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012421 sstp->st_score = score;
12422 sstp->st_altscore = 0;
12423 sstp->st_orglen = stp->st_orglen;
12424 ++su->su_sga.ga_len;
12425 }
12426 }
12427 }
12428 break;
12429 }
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000012430 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012431}
12432
12433/*
12434 * Combine the list of suggestions in su->su_ga and su->su_sga.
12435 * They are intwined.
12436 */
12437 static void
12438score_combine(su)
12439 suginfo_T *su;
12440{
12441 int i;
12442 int j;
12443 garray_T ga;
12444 garray_T *gap;
12445 langp_T *lp;
12446 suggest_T *stp;
12447 char_u *p;
12448 char_u badsound[MAXWLEN];
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012449 int round;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000012450 int lpi;
Bram Moolenaar4770d092006-01-12 23:22:24 +000012451 slang_T *slang = NULL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012452
12453 /* Add the alternate score to su_ga. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012454 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012455 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012456 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012457 if (lp->lp_slang->sl_sal.ga_len > 0)
12458 {
12459 /* soundfold the bad word */
Bram Moolenaar4770d092006-01-12 23:22:24 +000012460 slang = lp->lp_slang;
12461 spell_soundfold(slang, su->su_fbadword, TRUE, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012462
12463 for (i = 0; i < su->su_ga.ga_len; ++i)
12464 {
12465 stp = &SUG(su->su_ga, i);
Bram Moolenaar4770d092006-01-12 23:22:24 +000012466 stp->st_altscore = stp_sal_score(stp, su, slang, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012467 if (stp->st_altscore == SCORE_MAXMAX)
12468 stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4;
12469 else
12470 stp->st_score = (stp->st_score * 3
12471 + stp->st_altscore) / 4;
12472 stp->st_salscore = FALSE;
12473 }
12474 break;
12475 }
12476 }
12477
Bram Moolenaar4770d092006-01-12 23:22:24 +000012478 if (slang == NULL) /* just in case */
12479 return;
12480
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012481 /* Add the alternate score to su_sga. */
12482 for (i = 0; i < su->su_sga.ga_len; ++i)
12483 {
12484 stp = &SUG(su->su_sga, i);
Bram Moolenaar4770d092006-01-12 23:22:24 +000012485 stp->st_altscore = spell_edit_score(slang,
12486 su->su_badword, stp->st_word);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012487 if (stp->st_score == SCORE_MAXMAX)
12488 stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8;
12489 else
12490 stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8;
12491 stp->st_salscore = TRUE;
12492 }
12493
Bram Moolenaar4770d092006-01-12 23:22:24 +000012494 /* Remove bad suggestions, sort the suggestions and truncate at "maxcount"
12495 * for both lists. */
12496 check_suggestions(su, &su->su_ga);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012497 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
Bram Moolenaar4770d092006-01-12 23:22:24 +000012498 check_suggestions(su, &su->su_sga);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000012499 (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount);
12500
12501 ga_init2(&ga, (int)sizeof(suginfo_T), 1);
12502 if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL)
12503 return;
12504
12505 stp = &SUG(ga, 0);
12506 for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i)
12507 {
12508 /* round 1: get a suggestion from su_ga
12509 * round 2: get a suggestion from su_sga */
12510 for (round = 1; round <= 2; ++round)
12511 {
12512 gap = round == 1 ? &su->su_ga : &su->su_sga;
12513 if (i < gap->ga_len)
12514 {
12515 /* Don't add a word if it's already there. */
12516 p = SUG(*gap, i).st_word;
12517 for (j = 0; j < ga.ga_len; ++j)
12518 if (STRCMP(stp[j].st_word, p) == 0)
12519 break;
12520 if (j == ga.ga_len)
12521 stp[ga.ga_len++] = SUG(*gap, i);
12522 else
12523 vim_free(p);
12524 }
12525 }
12526 }
12527
12528 ga_clear(&su->su_ga);
12529 ga_clear(&su->su_sga);
12530
12531 /* Truncate the list to the number of suggestions that will be displayed. */
12532 if (ga.ga_len > su->su_maxcount)
12533 {
12534 for (i = su->su_maxcount; i < ga.ga_len; ++i)
12535 vim_free(stp[i].st_word);
12536 ga.ga_len = su->su_maxcount;
12537 }
12538
12539 su->su_ga = ga;
12540}
12541
12542/*
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012543 * For the goodword in "stp" compute the soundalike score compared to the
12544 * badword.
12545 */
12546 static int
12547stp_sal_score(stp, su, slang, badsound)
12548 suggest_T *stp;
12549 suginfo_T *su;
12550 slang_T *slang;
12551 char_u *badsound; /* sound-folded badword */
12552{
12553 char_u *p;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012554 char_u *pbad;
12555 char_u *pgood;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012556 char_u badsound2[MAXWLEN];
12557 char_u fword[MAXWLEN];
12558 char_u goodsound[MAXWLEN];
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012559 char_u goodword[MAXWLEN];
12560 int lendiff;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012561
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012562 lendiff = (int)(su->su_badlen - stp->st_orglen);
12563 if (lendiff >= 0)
12564 pbad = badsound;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012565 else
12566 {
12567 /* soundfold the bad word with more characters following */
12568 (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN);
12569
12570 /* When joining two words the sound often changes a lot. E.g., "t he"
12571 * sounds like "t h" while "the" sounds like "@". Avoid that by
12572 * removing the space. Don't do it when the good word also contains a
12573 * space. */
12574 if (vim_iswhite(su->su_badptr[su->su_badlen])
12575 && *skiptowhite(stp->st_word) == NUL)
12576 for (p = fword; *(p = skiptowhite(p)) != NUL; )
12577 mch_memmove(p, p + 1, STRLEN(p));
12578
Bram Moolenaar42eeac32005-06-29 22:40:58 +000012579 spell_soundfold(slang, fword, TRUE, badsound2);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012580 pbad = badsound2;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012581 }
12582
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012583 if (lendiff > 0)
12584 {
12585 /* Add part of the bad word to the good word, so that we soundfold
12586 * what replaces the bad word. */
12587 STRCPY(goodword, stp->st_word);
Bram Moolenaar4770d092006-01-12 23:22:24 +000012588 vim_strncpy(goodword + stp->st_wordlen,
12589 su->su_badptr + su->su_badlen - lendiff, lendiff);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012590 pgood = goodword;
12591 }
12592 else
12593 pgood = stp->st_word;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012594
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000012595 /* Sound-fold the word and compute the score for the difference. */
12596 spell_soundfold(slang, pgood, FALSE, goodsound);
12597
12598 return soundalike_score(goodsound, pbad);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012599}
12600
Bram Moolenaar4770d092006-01-12 23:22:24 +000012601/* structure used to store soundfolded words that add_sound_suggest() has
12602 * handled already. */
12603typedef struct
12604{
12605 short sft_score; /* lowest score used */
12606 char_u sft_word[1]; /* soundfolded word, actually longer */
12607} sftword_T;
12608
12609static sftword_T dumsft;
12610#define HIKEY2SFT(p) ((sftword_T *)(p - (dumsft.sft_word - (char_u *)&dumsft)))
12611#define HI2SFT(hi) HIKEY2SFT((hi)->hi_key)
12612
12613/*
12614 * Prepare for calling suggest_try_soundalike().
12615 */
12616 static void
12617suggest_try_soundalike_prep()
12618{
12619 langp_T *lp;
12620 int lpi;
12621 slang_T *slang;
12622
12623 /* Do this for all languages that support sound folding and for which a
12624 * .sug file has been loaded. */
12625 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
12626 {
12627 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
12628 slang = lp->lp_slang;
12629 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
12630 /* prepare the hashtable used by add_sound_suggest() */
12631 hash_init(&slang->sl_sounddone);
12632 }
12633}
12634
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000012635/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012636 * Find suggestions by comparing the word in a sound-a-like form.
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012637 * Note: This doesn't support postponed prefixes.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012638 */
12639 static void
Bram Moolenaar0c405862005-06-22 22:26:26 +000012640suggest_try_soundalike(su)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012641 suginfo_T *su;
12642{
12643 char_u salword[MAXWLEN];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012644 langp_T *lp;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000012645 int lpi;
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012646 slang_T *slang;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012647
Bram Moolenaar4770d092006-01-12 23:22:24 +000012648 /* Do this for all languages that support sound folding and for which a
12649 * .sug file has been loaded. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012650 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012651 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012652 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
12653 slang = lp->lp_slang;
Bram Moolenaar4770d092006-01-12 23:22:24 +000012654 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012655 {
12656 /* soundfold the bad word */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000012657 spell_soundfold(slang, su->su_fbadword, TRUE, salword);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012658
Bram Moolenaar4770d092006-01-12 23:22:24 +000012659 /* try all kinds of inserts/deletes/swaps/etc. */
12660 /* TODO: also soundfold the next words, so that we can try joining
12661 * and splitting */
12662 suggest_trie_walk(su, lp, salword, TRUE);
12663 }
12664 }
12665}
12666
12667/*
12668 * Finish up after calling suggest_try_soundalike().
12669 */
12670 static void
12671suggest_try_soundalike_finish()
12672{
12673 langp_T *lp;
12674 int lpi;
12675 slang_T *slang;
12676 int todo;
12677 hashitem_T *hi;
12678
12679 /* Do this for all languages that support sound folding and for which a
12680 * .sug file has been loaded. */
12681 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
12682 {
12683 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
12684 slang = lp->lp_slang;
12685 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
12686 {
12687 /* Free the info about handled words. */
12688 todo = slang->sl_sounddone.ht_used;
12689 for (hi = slang->sl_sounddone.ht_array; todo > 0; ++hi)
12690 if (!HASHITEM_EMPTY(hi))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012691 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012692 vim_free(HI2SFT(hi));
12693 --todo;
12694 }
12695 hash_clear(&slang->sl_sounddone);
12696 }
12697 }
12698}
12699
12700/*
12701 * A match with a soundfolded word is found. Add the good word(s) that
12702 * produce this soundfolded word.
12703 */
12704 static void
12705add_sound_suggest(su, goodword, score, lp)
12706 suginfo_T *su;
12707 char_u *goodword;
12708 int score; /* soundfold score */
12709 langp_T *lp;
12710{
12711 slang_T *slang = lp->lp_slang; /* language for sound folding */
12712 int sfwordnr;
12713 char_u *nrline;
12714 int orgnr;
12715 char_u theword[MAXWLEN];
12716 int i;
12717 int wlen;
12718 char_u *byts;
12719 idx_T *idxs;
12720 int n;
12721 int wordcount;
12722 int wc;
12723 int goodscore;
12724 hash_T hash;
12725 hashitem_T *hi;
12726 sftword_T *sft;
12727 int bc, gc;
12728 int limit;
12729
12730 /*
12731 * It's very well possible that the same soundfold word is found several
12732 * times with different scores. Since the following is quite slow only do
12733 * the words that have a better score than before. Use a hashtable to
12734 * remember the words that have been done.
12735 */
12736 hash = hash_hash(goodword);
12737 hi = hash_lookup(&slang->sl_sounddone, goodword, hash);
12738 if (HASHITEM_EMPTY(hi))
12739 {
12740 sft = (sftword_T *)alloc(sizeof(sftword_T) + STRLEN(goodword));
12741 if (sft != NULL)
12742 {
12743 sft->sft_score = score;
12744 STRCPY(sft->sft_word, goodword);
12745 hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash);
12746 }
12747 }
12748 else
12749 {
12750 sft = HI2SFT(hi);
12751 if (score >= sft->sft_score)
12752 return;
12753 sft->sft_score = score;
12754 }
12755
12756 /*
12757 * Find the word nr in the soundfold tree.
12758 */
12759 sfwordnr = soundfold_find(slang, goodword);
12760 if (sfwordnr < 0)
12761 {
12762 EMSG2(_(e_intern2), "add_sound_suggest()");
12763 return;
12764 }
12765
12766 /*
12767 * go over the list of good words that produce this soundfold word
12768 */
12769 nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)(sfwordnr + 1), FALSE);
12770 orgnr = 0;
12771 while (*nrline != NUL)
12772 {
12773 /* The wordnr was stored in a minimal nr of bytes as an offset to the
12774 * previous wordnr. */
12775 orgnr += bytes2offset(&nrline);
12776
12777 byts = slang->sl_fbyts;
12778 idxs = slang->sl_fidxs;
12779
12780 /* Lookup the word "orgnr" one of the two tries. */
12781 n = 0;
12782 wlen = 0;
12783 wordcount = 0;
12784 for (;;)
12785 {
12786 i = 1;
12787 if (wordcount == orgnr && byts[n + 1] == NUL)
12788 break; /* found end of word */
12789
12790 if (byts[n + 1] == NUL)
12791 ++wordcount;
12792
12793 /* skip over the NUL bytes */
12794 for ( ; byts[n + i] == NUL; ++i)
12795 if (i > byts[n]) /* safety check */
12796 {
12797 STRCPY(theword + wlen, "BAD");
12798 goto badword;
12799 }
12800
12801 /* One of the siblings must have the word. */
12802 for ( ; i < byts[n]; ++i)
12803 {
12804 wc = idxs[idxs[n + i]]; /* nr of words under this byte */
12805 if (wordcount + wc > orgnr)
12806 break;
12807 wordcount += wc;
12808 }
12809
12810 theword[wlen++] = byts[n + i];
12811 n = idxs[n + i];
12812 }
12813badword:
12814 theword[wlen] = NUL;
12815
12816 /* Go over the possible flags and regions. */
12817 for (; i <= byts[n] && byts[n + i] == NUL; ++i)
12818 {
12819 char_u cword[MAXWLEN];
12820 char_u *p;
12821 int flags = (int)idxs[n + i];
12822
Bram Moolenaare1438bb2006-03-01 22:01:55 +000012823 /* Skip words with the NOSUGGEST flag */
12824 if (flags & WF_NOSUGGEST)
12825 continue;
12826
Bram Moolenaar4770d092006-01-12 23:22:24 +000012827 if (flags & WF_KEEPCAP)
12828 {
12829 /* Must find the word in the keep-case tree. */
12830 find_keepcap_word(slang, theword, cword);
12831 p = cword;
12832 }
12833 else
12834 {
12835 flags |= su->su_badflags;
12836 if ((flags & WF_CAPMASK) != 0)
12837 {
12838 /* Need to fix case according to "flags". */
12839 make_case_word(theword, cword, flags);
12840 p = cword;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012841 }
12842 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000012843 p = theword;
12844 }
12845
12846 /* Add the suggestion. */
12847 if (sps_flags & SPS_DOUBLE)
12848 {
12849 /* Add the suggestion if the score isn't too bad. */
12850 if (score <= su->su_maxscore)
12851 add_suggestion(su, &su->su_sga, p, su->su_badlen,
12852 score, 0, FALSE, slang, FALSE);
12853 }
12854 else
12855 {
12856 /* Add a penalty for words in another region. */
12857 if ((flags & WF_REGION)
12858 && (((unsigned)flags >> 16) & lp->lp_region) == 0)
12859 goodscore = SCORE_REGION;
12860 else
12861 goodscore = 0;
12862
12863 /* Add a small penalty for changing the first letter from
12864 * lower to upper case. Helps for "tath" -> "Kath", which is
12865 * less common thatn "tath" -> "path". Don't do it when the
12866 * letter is the same, that has already been counted. */
12867 gc = PTR2CHAR(p);
12868 if (SPELL_ISUPPER(gc))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012869 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012870 bc = PTR2CHAR(su->su_badword);
12871 if (!SPELL_ISUPPER(bc)
12872 && SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc))
12873 goodscore += SCORE_ICASE / 2;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012874 }
12875
Bram Moolenaar4770d092006-01-12 23:22:24 +000012876 /* Compute the score for the good word. This only does letter
12877 * insert/delete/swap/replace. REP items are not considered,
12878 * which may make the score a bit higher.
12879 * Use a limit for the score to make it work faster. Use
12880 * MAXSCORE(), because RESCORE() will change the score.
12881 * If the limit is very high then the iterative method is
12882 * inefficient, using an array is quicker. */
12883 limit = MAXSCORE(su->su_sfmaxscore - goodscore, score);
12884 if (limit > SCORE_LIMITMAX)
12885 goodscore += spell_edit_score(slang, su->su_badword, p);
12886 else
12887 goodscore += spell_edit_score_limit(slang, su->su_badword,
12888 p, limit);
12889
12890 /* When going over the limit don't bother to do the rest. */
12891 if (goodscore < SCORE_MAXMAX)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012892 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000012893 /* Give a bonus to words seen before. */
12894 goodscore = score_wordcount_adj(slang, goodscore, p, FALSE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012895
Bram Moolenaar4770d092006-01-12 23:22:24 +000012896 /* Add the suggestion if the score isn't too bad. */
12897 goodscore = RESCORE(goodscore, score);
12898 if (goodscore <= su->su_sfmaxscore)
12899 add_suggestion(su, &su->su_ga, p, su->su_badlen,
12900 goodscore, score, TRUE, slang, TRUE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012901 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012902 }
12903 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000012904 /* smsg("word %s (%d): %s (%d)", sftword, sftnr, theword, orgnr); */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012905 }
12906}
12907
12908/*
Bram Moolenaar4770d092006-01-12 23:22:24 +000012909 * Find word "word" in fold-case tree for "slang" and return the word number.
12910 */
12911 static int
12912soundfold_find(slang, word)
12913 slang_T *slang;
12914 char_u *word;
12915{
12916 idx_T arridx = 0;
12917 int len;
12918 int wlen = 0;
12919 int c;
12920 char_u *ptr = word;
12921 char_u *byts;
12922 idx_T *idxs;
12923 int wordnr = 0;
12924
12925 byts = slang->sl_sbyts;
12926 idxs = slang->sl_sidxs;
12927
12928 for (;;)
12929 {
12930 /* First byte is the number of possible bytes. */
12931 len = byts[arridx++];
12932
12933 /* If the first possible byte is a zero the word could end here.
12934 * If the word ends we found the word. If not skip the NUL bytes. */
12935 c = ptr[wlen];
12936 if (byts[arridx] == NUL)
12937 {
12938 if (c == NUL)
12939 break;
12940
12941 /* Skip over the zeros, there can be several. */
12942 while (len > 0 && byts[arridx] == NUL)
12943 {
12944 ++arridx;
12945 --len;
12946 }
12947 if (len == 0)
12948 return -1; /* no children, word should have ended here */
12949 ++wordnr;
12950 }
12951
12952 /* If the word ends we didn't find it. */
12953 if (c == NUL)
12954 return -1;
12955
12956 /* Perform a binary search in the list of accepted bytes. */
12957 if (c == TAB) /* <Tab> is handled like <Space> */
12958 c = ' ';
12959 while (byts[arridx] < c)
12960 {
12961 /* The word count is in the first idxs[] entry of the child. */
12962 wordnr += idxs[idxs[arridx]];
12963 ++arridx;
12964 if (--len == 0) /* end of the bytes, didn't find it */
12965 return -1;
12966 }
12967 if (byts[arridx] != c) /* didn't find the byte */
12968 return -1;
12969
12970 /* Continue at the child (if there is one). */
12971 arridx = idxs[arridx];
12972 ++wlen;
12973
12974 /* One space in the good word may stand for several spaces in the
12975 * checked word. */
12976 if (c == ' ')
12977 while (ptr[wlen] == ' ' || ptr[wlen] == TAB)
12978 ++wlen;
12979 }
12980
12981 return wordnr;
12982}
12983
12984/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012985 * Copy "fword" to "cword", fixing case according to "flags".
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012986 */
12987 static void
12988make_case_word(fword, cword, flags)
12989 char_u *fword;
12990 char_u *cword;
12991 int flags;
12992{
12993 if (flags & WF_ALLCAP)
12994 /* Make it all upper-case */
12995 allcap_copy(fword, cword);
12996 else if (flags & WF_ONECAP)
12997 /* Make the first letter upper-case */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000012998 onecap_copy(fword, cword, TRUE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000012999 else
13000 /* Use goodword as-is. */
13001 STRCPY(cword, fword);
13002}
13003
Bram Moolenaarea424162005-06-16 21:51:00 +000013004/*
13005 * Use map string "map" for languages "lp".
13006 */
13007 static void
13008set_map_str(lp, map)
13009 slang_T *lp;
13010 char_u *map;
13011{
13012 char_u *p;
13013 int headc = 0;
13014 int c;
13015 int i;
13016
13017 if (*map == NUL)
13018 {
13019 lp->sl_has_map = FALSE;
13020 return;
13021 }
13022 lp->sl_has_map = TRUE;
13023
Bram Moolenaar4770d092006-01-12 23:22:24 +000013024 /* Init the array and hash tables empty. */
Bram Moolenaarea424162005-06-16 21:51:00 +000013025 for (i = 0; i < 256; ++i)
13026 lp->sl_map_array[i] = 0;
13027#ifdef FEAT_MBYTE
13028 hash_init(&lp->sl_map_hash);
13029#endif
13030
13031 /*
13032 * The similar characters are stored separated with slashes:
13033 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and
13034 * before the same slash. For characters above 255 sl_map_hash is used.
13035 */
13036 for (p = map; *p != NUL; )
13037 {
13038#ifdef FEAT_MBYTE
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000013039 c = mb_cptr2char_adv(&p);
Bram Moolenaarea424162005-06-16 21:51:00 +000013040#else
13041 c = *p++;
13042#endif
13043 if (c == '/')
13044 headc = 0;
13045 else
13046 {
13047 if (headc == 0)
13048 headc = c;
13049
13050#ifdef FEAT_MBYTE
13051 /* Characters above 255 don't fit in sl_map_array[], put them in
13052 * the hash table. Each entry is the char, a NUL the headchar and
13053 * a NUL. */
13054 if (c >= 256)
13055 {
13056 int cl = mb_char2len(c);
13057 int headcl = mb_char2len(headc);
13058 char_u *b;
13059 hash_T hash;
13060 hashitem_T *hi;
13061
13062 b = alloc((unsigned)(cl + headcl + 2));
13063 if (b == NULL)
13064 return;
13065 mb_char2bytes(c, b);
13066 b[cl] = NUL;
13067 mb_char2bytes(headc, b + cl + 1);
13068 b[cl + 1 + headcl] = NUL;
13069 hash = hash_hash(b);
13070 hi = hash_lookup(&lp->sl_map_hash, b, hash);
13071 if (HASHITEM_EMPTY(hi))
13072 hash_add_item(&lp->sl_map_hash, hi, b, hash);
13073 else
13074 {
13075 /* This should have been checked when generating the .spl
13076 * file. */
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000013077 EMSG(_("E783: duplicate char in MAP entry"));
Bram Moolenaarea424162005-06-16 21:51:00 +000013078 vim_free(b);
13079 }
13080 }
13081 else
13082#endif
13083 lp->sl_map_array[c] = headc;
13084 }
13085 }
13086}
13087
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013088/*
13089 * Return TRUE if "c1" and "c2" are similar characters according to the MAP
13090 * lines in the .aff file.
13091 */
13092 static int
13093similar_chars(slang, c1, c2)
13094 slang_T *slang;
13095 int c1;
13096 int c2;
13097{
Bram Moolenaarea424162005-06-16 21:51:00 +000013098 int m1, m2;
13099#ifdef FEAT_MBYTE
13100 char_u buf[MB_MAXBYTES];
13101 hashitem_T *hi;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013102
Bram Moolenaarea424162005-06-16 21:51:00 +000013103 if (c1 >= 256)
13104 {
13105 buf[mb_char2bytes(c1, buf)] = 0;
13106 hi = hash_find(&slang->sl_map_hash, buf);
13107 if (HASHITEM_EMPTY(hi))
13108 m1 = 0;
13109 else
13110 m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
13111 }
13112 else
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013113#endif
Bram Moolenaarea424162005-06-16 21:51:00 +000013114 m1 = slang->sl_map_array[c1];
13115 if (m1 == 0)
13116 return FALSE;
13117
13118
13119#ifdef FEAT_MBYTE
13120 if (c2 >= 256)
13121 {
13122 buf[mb_char2bytes(c2, buf)] = 0;
13123 hi = hash_find(&slang->sl_map_hash, buf);
13124 if (HASHITEM_EMPTY(hi))
13125 m2 = 0;
13126 else
13127 m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
13128 }
13129 else
13130#endif
13131 m2 = slang->sl_map_array[c2];
13132
13133 return m1 == m2;
13134}
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013135
13136/*
13137 * Add a suggestion to the list of suggestions.
Bram Moolenaar4770d092006-01-12 23:22:24 +000013138 * For a suggestion that is already in the list the lowest score is remembered.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013139 */
13140 static void
Bram Moolenaar4770d092006-01-12 23:22:24 +000013141add_suggestion(su, gap, goodword, badlenarg, score, altscore, had_bonus,
13142 slang, maxsf)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013143 suginfo_T *su;
Bram Moolenaar4770d092006-01-12 23:22:24 +000013144 garray_T *gap; /* either su_ga or su_sga */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013145 char_u *goodword;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013146 int badlenarg; /* len of bad word replaced with "goodword" */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013147 int score;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000013148 int altscore;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013149 int had_bonus; /* value for st_had_bonus */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013150 slang_T *slang; /* language for sound folding */
Bram Moolenaar4770d092006-01-12 23:22:24 +000013151 int maxsf; /* su_maxscore applies to soundfold score,
13152 su_sfmaxscore to the total score. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013153{
Bram Moolenaar4770d092006-01-12 23:22:24 +000013154 int goodlen; /* len of goodword changed */
13155 int badlen; /* len of bad word changed */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013156 suggest_T *stp;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013157 suggest_T new_sug;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013158 int i;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013159 char_u *pgood, *pbad;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013160
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013161 /* Minimize "badlen" for consistency. Avoids that changing "the the" to
13162 * "thee the" is added next to changing the first "the" the "thee". */
13163 pgood = goodword + STRLEN(goodword);
Bram Moolenaar4770d092006-01-12 23:22:24 +000013164 pbad = su->su_badptr + badlenarg;
13165 for (;;)
Bram Moolenaar0c405862005-06-22 22:26:26 +000013166 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000013167 goodlen = pgood - goodword;
13168 badlen = pbad - su->su_badptr;
13169 if (goodlen <= 0 || badlen <= 0)
13170 break;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013171 mb_ptr_back(goodword, pgood);
13172 mb_ptr_back(su->su_badptr, pbad);
13173#ifdef FEAT_MBYTE
13174 if (has_mbyte)
Bram Moolenaar0c405862005-06-22 22:26:26 +000013175 {
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013176 if (mb_ptr2char(pgood) != mb_ptr2char(pbad))
13177 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +000013178 }
13179 else
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013180#endif
13181 if (*pgood != *pbad)
13182 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +000013183 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000013184
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013185 if (badlen == 0 && goodlen == 0)
13186 /* goodword doesn't change anything; may happen for "the the" changing
13187 * the first "the" to itself. */
13188 return;
Bram Moolenaar0c405862005-06-22 22:26:26 +000013189
Bram Moolenaar4770d092006-01-12 23:22:24 +000013190 /* Check if the word is already there. Also check the length that is
13191 * being replaced "thes," -> "these" is a different suggestion from
13192 * "thes" -> "these". */
13193 stp = &SUG(*gap, 0);
13194 for (i = gap->ga_len; --i >= 0; ++stp)
13195 if (stp->st_wordlen == goodlen
13196 && stp->st_orglen == badlen
13197 && STRNCMP(stp->st_word, goodword, goodlen) == 0)
13198 {
13199 /*
13200 * Found it. Remember the word with the lowest score.
13201 */
13202 if (stp->st_slang == NULL)
13203 stp->st_slang = slang;
13204
13205 new_sug.st_score = score;
13206 new_sug.st_altscore = altscore;
13207 new_sug.st_had_bonus = had_bonus;
13208
13209 if (stp->st_had_bonus != had_bonus)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013210 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000013211 /* Only one of the two had the soundalike score computed.
13212 * Need to do that for the other one now, otherwise the
13213 * scores can't be compared. This happens because
13214 * suggest_try_change() doesn't compute the soundalike
13215 * word to keep it fast, while some special methods set
13216 * the soundalike score to zero. */
13217 if (had_bonus)
13218 rescore_one(su, stp);
13219 else
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013220 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000013221 new_sug.st_word = stp->st_word;
13222 new_sug.st_wordlen = stp->st_wordlen;
13223 new_sug.st_slang = stp->st_slang;
13224 new_sug.st_orglen = badlen;
13225 rescore_one(su, &new_sug);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013226 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013227 }
13228
Bram Moolenaar4770d092006-01-12 23:22:24 +000013229 if (stp->st_score > new_sug.st_score)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013230 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000013231 stp->st_score = new_sug.st_score;
13232 stp->st_altscore = new_sug.st_altscore;
13233 stp->st_had_bonus = new_sug.st_had_bonus;
13234 }
13235 break;
13236 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013237
Bram Moolenaar4770d092006-01-12 23:22:24 +000013238 if (i < 0 && ga_grow(gap, 1) == OK)
13239 {
13240 /* Add a suggestion. */
13241 stp = &SUG(*gap, gap->ga_len);
13242 stp->st_word = vim_strnsave(goodword, goodlen);
13243 if (stp->st_word != NULL)
13244 {
13245 stp->st_wordlen = goodlen;
13246 stp->st_score = score;
13247 stp->st_altscore = altscore;
13248 stp->st_had_bonus = had_bonus;
13249 stp->st_orglen = badlen;
13250 stp->st_slang = slang;
13251 ++gap->ga_len;
13252
13253 /* If we have too many suggestions now, sort the list and keep
13254 * the best suggestions. */
13255 if (gap->ga_len > SUG_MAX_COUNT(su))
13256 {
13257 if (maxsf)
13258 su->su_sfmaxscore = cleanup_suggestions(gap,
13259 su->su_sfmaxscore, SUG_CLEAN_COUNT(su));
13260 else
13261 {
13262 i = su->su_maxscore;
13263 su->su_maxscore = cleanup_suggestions(gap,
13264 su->su_maxscore, SUG_CLEAN_COUNT(su));
13265 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013266 }
13267 }
13268 }
13269}
13270
13271/*
Bram Moolenaar4770d092006-01-12 23:22:24 +000013272 * Suggestions may in fact be flagged as errors. Esp. for banned words and
13273 * for split words, such as "the the". Remove these from the list here.
13274 */
13275 static void
13276check_suggestions(su, gap)
13277 suginfo_T *su;
13278 garray_T *gap; /* either su_ga or su_sga */
13279{
13280 suggest_T *stp;
13281 int i;
13282 char_u longword[MAXWLEN + 1];
13283 int len;
13284 hlf_T attr;
13285
13286 stp = &SUG(*gap, 0);
13287 for (i = gap->ga_len - 1; i >= 0; --i)
13288 {
13289 /* Need to append what follows to check for "the the". */
13290 STRCPY(longword, stp[i].st_word);
13291 len = stp[i].st_wordlen;
13292 vim_strncpy(longword + len, su->su_badptr + stp[i].st_orglen,
13293 MAXWLEN - len);
13294 attr = HLF_COUNT;
13295 (void)spell_check(curwin, longword, &attr, NULL, FALSE);
13296 if (attr != HLF_COUNT)
13297 {
13298 /* Remove this entry. */
13299 vim_free(stp[i].st_word);
13300 --gap->ga_len;
13301 if (i < gap->ga_len)
13302 mch_memmove(stp + i, stp + i + 1,
13303 sizeof(suggest_T) * (gap->ga_len - i));
13304 }
13305 }
13306}
13307
13308
13309/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013310 * Add a word to be banned.
13311 */
13312 static void
13313add_banned(su, word)
13314 suginfo_T *su;
13315 char_u *word;
13316{
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000013317 char_u *s;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013318 hash_T hash;
13319 hashitem_T *hi;
13320
Bram Moolenaar4770d092006-01-12 23:22:24 +000013321 hash = hash_hash(word);
13322 hi = hash_lookup(&su->su_banned, word, hash);
13323 if (HASHITEM_EMPTY(hi))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013324 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000013325 s = vim_strsave(word);
13326 if (s != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013327 hash_add_item(&su->su_banned, hi, s, hash);
13328 }
13329}
13330
13331/*
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013332 * Recompute the score for all suggestions if sound-folding is possible. This
13333 * is slow, thus only done for the final results.
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013334 */
13335 static void
13336rescore_suggestions(su)
13337 suginfo_T *su;
13338{
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013339 int i;
13340
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013341 if (su->su_sallang != NULL)
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013342 for (i = 0; i < su->su_ga.ga_len; ++i)
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013343 rescore_one(su, &SUG(su->su_ga, i));
13344}
13345
13346/*
13347 * Recompute the score for one suggestion if sound-folding is possible.
13348 */
13349 static void
13350rescore_one(su, stp)
Bram Moolenaar4effc802005-09-30 21:12:02 +000013351 suginfo_T *su;
13352 suggest_T *stp;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013353{
13354 slang_T *slang = stp->st_slang;
13355 char_u sal_badword[MAXWLEN];
Bram Moolenaar4effc802005-09-30 21:12:02 +000013356 char_u *p;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013357
13358 /* Only rescore suggestions that have no sal score yet and do have a
13359 * language. */
13360 if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus)
13361 {
13362 if (slang == su->su_sallang)
Bram Moolenaar4effc802005-09-30 21:12:02 +000013363 p = su->su_sal_badword;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013364 else
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013365 {
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013366 spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword);
Bram Moolenaar4effc802005-09-30 21:12:02 +000013367 p = sal_badword;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013368 }
Bram Moolenaar4effc802005-09-30 21:12:02 +000013369
13370 stp->st_altscore = stp_sal_score(stp, su, slang, p);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +000013371 if (stp->st_altscore == SCORE_MAXMAX)
13372 stp->st_altscore = SCORE_BIG;
13373 stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
13374 stp->st_had_bonus = TRUE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013375 }
13376}
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013377
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013378static int
13379#ifdef __BORLANDC__
13380_RTLENTRYF
13381#endif
13382sug_compare __ARGS((const void *s1, const void *s2));
13383
13384/*
13385 * Function given to qsort() to sort the suggestions on st_score.
Bram Moolenaar6b730e12005-09-16 21:47:57 +000013386 * First on "st_score", then "st_altscore" then alphabetically.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013387 */
13388 static int
13389#ifdef __BORLANDC__
13390_RTLENTRYF
13391#endif
13392sug_compare(s1, s2)
13393 const void *s1;
13394 const void *s2;
13395{
13396 suggest_T *p1 = (suggest_T *)s1;
13397 suggest_T *p2 = (suggest_T *)s2;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013398 int n = p1->st_score - p2->st_score;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013399
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013400 if (n == 0)
Bram Moolenaar6b730e12005-09-16 21:47:57 +000013401 {
13402 n = p1->st_altscore - p2->st_altscore;
13403 if (n == 0)
13404 n = STRICMP(p1->st_word, p2->st_word);
13405 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013406 return n;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013407}
13408
13409/*
13410 * Cleanup the suggestions:
13411 * - Sort on score.
13412 * - Remove words that won't be displayed.
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013413 * Returns the maximum score in the list or "maxscore" unmodified.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013414 */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013415 static int
13416cleanup_suggestions(gap, maxscore, keep)
13417 garray_T *gap;
13418 int maxscore;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013419 int keep; /* nr of suggestions to keep */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013420{
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013421 suggest_T *stp = &SUG(*gap, 0);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013422 int i;
13423
13424 /* Sort the list. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013425 qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013426
13427 /* Truncate the list to the number of suggestions that will be displayed. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013428 if (gap->ga_len > keep)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013429 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013430 for (i = keep; i < gap->ga_len; ++i)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013431 vim_free(stp[i].st_word);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013432 gap->ga_len = keep;
13433 return stp[keep - 1].st_score;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013434 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013435 return maxscore;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013436}
13437
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013438#if defined(FEAT_EVAL) || defined(PROTO)
13439/*
13440 * Soundfold a string, for soundfold().
13441 * Result is in allocated memory, NULL for an error.
13442 */
13443 char_u *
13444eval_soundfold(word)
13445 char_u *word;
13446{
13447 langp_T *lp;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013448 char_u sound[MAXWLEN];
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000013449 int lpi;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013450
13451 if (curwin->w_p_spell && *curbuf->b_p_spl != NUL)
13452 /* Use the sound-folding of the first language that supports it. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013453 for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000013454 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +000013455 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013456 if (lp->lp_slang->sl_sal.ga_len > 0)
13457 {
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013458 /* soundfold the word */
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013459 spell_soundfold(lp->lp_slang, word, FALSE, sound);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013460 return vim_strsave(sound);
13461 }
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000013462 }
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013463
13464 /* No language with sound folding, return word as-is. */
13465 return vim_strsave(word);
13466}
13467#endif
13468
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013469/*
13470 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
Bram Moolenaard12a1322005-08-21 22:08:24 +000013471 *
13472 * There are many ways to turn a word into a sound-a-like representation. The
13473 * oldest is Soundex (1918!). A nice overview can be found in "Approximate
13474 * swedish name matching - survey and test of different algorithms" by Klas
13475 * Erikson.
13476 *
13477 * We support two methods:
13478 * 1. SOFOFROM/SOFOTO do a simple character mapping.
13479 * 2. SAL items define a more advanced sound-folding (and much slower).
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013480 */
13481 static void
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013482spell_soundfold(slang, inword, folded, res)
13483 slang_T *slang;
13484 char_u *inword;
13485 int folded; /* "inword" is already case-folded */
13486 char_u *res;
13487{
13488 char_u fword[MAXWLEN];
13489 char_u *word;
13490
13491 if (slang->sl_sofo)
13492 /* SOFOFROM and SOFOTO used */
13493 spell_soundfold_sofo(slang, inword, res);
13494 else
13495 {
13496 /* SAL items used. Requires the word to be case-folded. */
13497 if (folded)
13498 word = inword;
13499 else
13500 {
13501 (void)spell_casefold(inword, STRLEN(inword), fword, MAXWLEN);
13502 word = fword;
13503 }
13504
13505#ifdef FEAT_MBYTE
13506 if (has_mbyte)
13507 spell_soundfold_wsal(slang, word, res);
13508 else
13509#endif
13510 spell_soundfold_sal(slang, word, res);
13511 }
13512}
13513
13514/*
13515 * Perform sound folding of "inword" into "res" according to SOFOFROM and
13516 * SOFOTO lines.
13517 */
13518 static void
13519spell_soundfold_sofo(slang, inword, res)
13520 slang_T *slang;
13521 char_u *inword;
13522 char_u *res;
13523{
13524 char_u *s;
13525 int ri = 0;
13526 int c;
13527
13528#ifdef FEAT_MBYTE
13529 if (has_mbyte)
13530 {
13531 int prevc = 0;
13532 int *ip;
13533
13534 /* The sl_sal_first[] table contains the translation for chars up to
13535 * 255, sl_sal the rest. */
13536 for (s = inword; *s != NUL; )
13537 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000013538 c = mb_cptr2char_adv(&s);
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013539 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c))
13540 c = ' ';
13541 else if (c < 256)
13542 c = slang->sl_sal_first[c];
13543 else
13544 {
13545 ip = ((int **)slang->sl_sal.ga_data)[c & 0xff];
13546 if (ip == NULL) /* empty list, can't match */
13547 c = NUL;
13548 else
13549 for (;;) /* find "c" in the list */
13550 {
13551 if (*ip == 0) /* not found */
13552 {
13553 c = NUL;
13554 break;
13555 }
13556 if (*ip == c) /* match! */
13557 {
13558 c = ip[1];
13559 break;
13560 }
13561 ip += 2;
13562 }
13563 }
13564
13565 if (c != NUL && c != prevc)
13566 {
13567 ri += mb_char2bytes(c, res + ri);
13568 if (ri + MB_MAXBYTES > MAXWLEN)
13569 break;
13570 prevc = c;
13571 }
13572 }
13573 }
13574 else
13575#endif
13576 {
13577 /* The sl_sal_first[] table contains the translation. */
13578 for (s = inword; (c = *s) != NUL; ++s)
13579 {
13580 if (vim_iswhite(c))
13581 c = ' ';
13582 else
13583 c = slang->sl_sal_first[c];
13584 if (c != NUL && (ri == 0 || res[ri - 1] != c))
13585 res[ri++] = c;
13586 }
13587 }
13588
13589 res[ri] = NUL;
13590}
13591
13592 static void
13593spell_soundfold_sal(slang, inword, res)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013594 slang_T *slang;
13595 char_u *inword;
13596 char_u *res;
13597{
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013598 salitem_T *smp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013599 char_u word[MAXWLEN];
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013600 char_u *s = inword;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013601 char_u *t;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013602 char_u *pf;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013603 int i, j, z;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013604 int reslen;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013605 int n, k = 0;
13606 int z0;
13607 int k0;
13608 int n0;
13609 int c;
13610 int pri;
13611 int p0 = -333;
13612 int c0;
13613
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013614 /* Remove accents, if wanted. We actually remove all non-word characters.
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013615 * But keep white space. We need a copy, the word may be changed here. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013616 if (slang->sl_rem_accents)
13617 {
13618 t = word;
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013619 while (*s != NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013620 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013621 if (vim_iswhite(*s))
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013622 {
13623 *t++ = ' ';
13624 s = skipwhite(s);
13625 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013626 else
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013627 {
Bram Moolenaar9c96f592005-06-30 21:52:39 +000013628 if (spell_iswordp_nmw(s))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013629 *t++ = *s;
13630 ++s;
13631 }
13632 }
13633 *t = NUL;
13634 }
13635 else
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013636 STRCPY(word, s);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013637
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013638 smp = (salitem_T *)slang->sl_sal.ga_data;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013639
13640 /*
13641 * This comes from Aspell phonet.cpp. Converted from C++ to C.
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013642 * Changed to keep spaces.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013643 */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013644 i = reslen = z = 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013645 while ((c = word[i]) != NUL)
13646 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013647 /* Start with the first rule that has the character in the word. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013648 n = slang->sl_sal_first[c];
13649 z0 = 0;
13650
13651 if (n >= 0)
13652 {
13653 /* check all rules for the same letter */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013654 for (; (s = smp[n].sm_lead)[0] == c; ++n)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013655 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013656 /* Quickly skip entries that don't match the word. Most
13657 * entries are less then three chars, optimize for that. */
13658 k = smp[n].sm_leadlen;
13659 if (k > 1)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013660 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013661 if (word[i + 1] != s[1])
13662 continue;
13663 if (k > 2)
13664 {
13665 for (j = 2; j < k; ++j)
13666 if (word[i + j] != s[j])
13667 break;
13668 if (j < k)
13669 continue;
13670 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013671 }
13672
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013673 if ((pf = smp[n].sm_oneof) != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013674 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013675 /* Check for match with one of the chars in "sm_oneof". */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013676 while (*pf != NUL && *pf != word[i + k])
13677 ++pf;
13678 if (*pf == NUL)
13679 continue;
13680 ++k;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013681 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013682 s = smp[n].sm_rules;
13683 pri = 5; /* default priority */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013684
13685 p0 = *s;
13686 k0 = k;
13687 while (*s == '-' && k > 1)
13688 {
13689 k--;
13690 s++;
13691 }
13692 if (*s == '<')
13693 s++;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013694 if (VIM_ISDIGIT(*s))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013695 {
13696 /* determine priority */
13697 pri = *s - '0';
13698 s++;
13699 }
13700 if (*s == '^' && *(s + 1) == '^')
13701 s++;
13702
13703 if (*s == NUL
13704 || (*s == '^'
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013705 && (i == 0 || !(word[i - 1] == ' '
Bram Moolenaar9c96f592005-06-30 21:52:39 +000013706 || spell_iswordp(word + i - 1, curbuf)))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013707 && (*(s + 1) != '$'
Bram Moolenaar9c96f592005-06-30 21:52:39 +000013708 || (!spell_iswordp(word + i + k0, curbuf))))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013709 || (*s == '$' && i > 0
Bram Moolenaar9c96f592005-06-30 21:52:39 +000013710 && spell_iswordp(word + i - 1, curbuf)
13711 && (!spell_iswordp(word + i + k0, curbuf))))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013712 {
13713 /* search for followup rules, if: */
13714 /* followup and k > 1 and NO '-' in searchstring */
13715 c0 = word[i + k - 1];
13716 n0 = slang->sl_sal_first[c0];
13717
13718 if (slang->sl_followup && k > 1 && n0 >= 0
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013719 && p0 != '-' && word[i + k] != NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013720 {
13721 /* test follow-up rule for "word[i + k]" */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013722 for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013723 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013724 /* Quickly skip entries that don't match the word.
13725 * */
13726 k0 = smp[n0].sm_leadlen;
13727 if (k0 > 1)
13728 {
13729 if (word[i + k] != s[1])
13730 continue;
13731 if (k0 > 2)
13732 {
13733 pf = word + i + k + 1;
13734 for (j = 2; j < k0; ++j)
13735 if (*pf++ != s[j])
13736 break;
13737 if (j < k0)
13738 continue;
13739 }
13740 }
13741 k0 += k - 1;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013742
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013743 if ((pf = smp[n0].sm_oneof) != NULL)
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013744 {
13745 /* Check for match with one of the chars in
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013746 * "sm_oneof". */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013747 while (*pf != NUL && *pf != word[i + k0])
13748 ++pf;
13749 if (*pf == NUL)
13750 continue;
13751 ++k0;
13752 }
13753
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013754 p0 = 5;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013755 s = smp[n0].sm_rules;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013756 while (*s == '-')
13757 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013758 /* "k0" gets NOT reduced because
13759 * "if (k0 == k)" */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013760 s++;
13761 }
13762 if (*s == '<')
13763 s++;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013764 if (VIM_ISDIGIT(*s))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013765 {
13766 p0 = *s - '0';
13767 s++;
13768 }
13769
13770 if (*s == NUL
13771 /* *s == '^' cuts */
13772 || (*s == '$'
Bram Moolenaar9c96f592005-06-30 21:52:39 +000013773 && !spell_iswordp(word + i + k0,
13774 curbuf)))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013775 {
13776 if (k0 == k)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013777 /* this is just a piece of the string */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013778 continue;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013779
13780 if (p0 < pri)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013781 /* priority too low */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013782 continue;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013783 /* rule fits; stop search */
13784 break;
13785 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013786 }
13787
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013788 if (p0 >= pri && smp[n0].sm_lead[0] == c0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013789 continue;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013790 }
13791
13792 /* replace string */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013793 s = smp[n].sm_to;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000013794 if (s == NULL)
13795 s = (char_u *)"";
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013796 pf = smp[n].sm_rules;
13797 p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013798 if (p0 == 1 && z == 0)
13799 {
13800 /* rule with '<' is used */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013801 if (reslen > 0 && *s != NUL && (res[reslen - 1] == c
13802 || res[reslen - 1] == *s))
13803 reslen--;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013804 z0 = 1;
13805 z = 1;
13806 k0 = 0;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013807 while (*s != NUL && word[i + k0] != NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013808 {
13809 word[i + k0] = *s;
13810 k0++;
13811 s++;
13812 }
13813 if (k > k0)
13814 mch_memmove(word + i + k0, word + i + k,
13815 STRLEN(word + i + k) + 1);
13816
13817 /* new "actual letter" */
13818 c = word[i];
13819 }
13820 else
13821 {
13822 /* no '<' rule used */
13823 i += k - 1;
13824 z = 0;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013825 while (*s != NUL && s[1] != NUL && reslen < MAXWLEN)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013826 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013827 if (reslen == 0 || res[reslen - 1] != *s)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013828 res[reslen++] = *s;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013829 s++;
13830 }
13831 /* new "actual letter" */
13832 c = *s;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013833 if (strstr((char *)pf, "^^") != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013834 {
13835 if (c != NUL)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013836 res[reslen++] = c;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013837 mch_memmove(word, word + i + 1,
13838 STRLEN(word + i + 1) + 1);
13839 i = 0;
13840 z0 = 1;
13841 }
13842 }
13843 break;
13844 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013845 }
13846 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +000013847 else if (vim_iswhite(c))
13848 {
13849 c = ' ';
13850 k = 1;
13851 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013852
13853 if (z0 == 0)
13854 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013855 if (k && !p0 && reslen < MAXWLEN && c != NUL
13856 && (!slang->sl_collapse || reslen == 0
13857 || res[reslen - 1] != c))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013858 /* condense only double letters */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013859 res[reslen++] = c;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013860
13861 i++;
13862 z = 0;
13863 k = 0;
13864 }
13865 }
13866
Bram Moolenaard857f0e2005-06-21 22:37:39 +000013867 res[reslen] = NUL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000013868}
13869
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013870#ifdef FEAT_MBYTE
13871/*
13872 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
13873 * Multi-byte version of spell_soundfold().
13874 */
13875 static void
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013876spell_soundfold_wsal(slang, inword, res)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013877 slang_T *slang;
13878 char_u *inword;
13879 char_u *res;
13880{
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013881 salitem_T *smp = (salitem_T *)slang->sl_sal.ga_data;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013882 int word[MAXWLEN];
13883 int wres[MAXWLEN];
13884 int l;
13885 char_u *s;
13886 int *ws;
13887 char_u *t;
13888 int *pf;
13889 int i, j, z;
13890 int reslen;
13891 int n, k = 0;
13892 int z0;
13893 int k0;
13894 int n0;
13895 int c;
13896 int pri;
13897 int p0 = -333;
13898 int c0;
13899 int did_white = FALSE;
13900
13901 /*
13902 * Convert the multi-byte string to a wide-character string.
13903 * Remove accents, if wanted. We actually remove all non-word characters.
13904 * But keep white space.
13905 */
13906 n = 0;
13907 for (s = inword; *s != NUL; )
13908 {
13909 t = s;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000013910 c = mb_cptr2char_adv(&s);
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013911 if (slang->sl_rem_accents)
13912 {
13913 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c))
13914 {
13915 if (did_white)
13916 continue;
13917 c = ' ';
13918 did_white = TRUE;
13919 }
13920 else
13921 {
13922 did_white = FALSE;
Bram Moolenaar9c96f592005-06-30 21:52:39 +000013923 if (!spell_iswordp_nmw(t))
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013924 continue;
13925 }
13926 }
13927 word[n++] = c;
13928 }
13929 word[n] = NUL;
13930
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013931 /*
13932 * This comes from Aspell phonet.cpp.
13933 * Converted from C++ to C. Added support for multi-byte chars.
13934 * Changed to keep spaces.
13935 */
13936 i = reslen = z = 0;
13937 while ((c = word[i]) != NUL)
13938 {
13939 /* Start with the first rule that has the character in the word. */
13940 n = slang->sl_sal_first[c & 0xff];
13941 z0 = 0;
13942
13943 if (n >= 0)
13944 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013945 /* check all rules for the same index byte */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013946 for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff); ++n)
13947 {
13948 /* Quickly skip entries that don't match the word. Most
13949 * entries are less then three chars, optimize for that. */
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013950 if (c != ws[0])
13951 continue;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013952 k = smp[n].sm_leadlen;
13953 if (k > 1)
13954 {
13955 if (word[i + 1] != ws[1])
13956 continue;
13957 if (k > 2)
13958 {
13959 for (j = 2; j < k; ++j)
13960 if (word[i + j] != ws[j])
13961 break;
13962 if (j < k)
13963 continue;
13964 }
13965 }
13966
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013967 if ((pf = smp[n].sm_oneof_w) != NULL)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013968 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +000013969 /* Check for match with one of the chars in "sm_oneof". */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000013970 while (*pf != NUL && *pf != word[i + k])
13971 ++pf;
13972 if (*pf == NUL)
13973 continue;
13974 ++k;
13975 }
13976 s = smp[n].sm_rules;
13977 pri = 5; /* default priority */
13978
13979 p0 = *s;
13980 k0 = k;
13981 while (*s == '-' && k > 1)
13982 {
13983 k--;
13984 s++;
13985 }
13986 if (*s == '<')
13987 s++;
13988 if (VIM_ISDIGIT(*s))
13989 {
13990 /* determine priority */
13991 pri = *s - '0';
13992 s++;
13993 }
13994 if (*s == '^' && *(s + 1) == '^')
13995 s++;
13996
13997 if (*s == NUL
13998 || (*s == '^'
13999 && (i == 0 || !(word[i - 1] == ' '
Bram Moolenaar9c96f592005-06-30 21:52:39 +000014000 || spell_iswordp_w(word + i - 1, curbuf)))
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014001 && (*(s + 1) != '$'
Bram Moolenaar9c96f592005-06-30 21:52:39 +000014002 || (!spell_iswordp_w(word + i + k0, curbuf))))
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014003 || (*s == '$' && i > 0
Bram Moolenaar9c96f592005-06-30 21:52:39 +000014004 && spell_iswordp_w(word + i - 1, curbuf)
14005 && (!spell_iswordp_w(word + i + k0, curbuf))))
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014006 {
14007 /* search for followup rules, if: */
14008 /* followup and k > 1 and NO '-' in searchstring */
14009 c0 = word[i + k - 1];
14010 n0 = slang->sl_sal_first[c0 & 0xff];
14011
14012 if (slang->sl_followup && k > 1 && n0 >= 0
14013 && p0 != '-' && word[i + k] != NUL)
14014 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014015 /* Test follow-up rule for "word[i + k]"; loop over
14016 * all entries with the same index byte. */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014017 for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff)
14018 == (c0 & 0xff); ++n0)
14019 {
14020 /* Quickly skip entries that don't match the word.
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014021 */
14022 if (c0 != ws[0])
14023 continue;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014024 k0 = smp[n0].sm_leadlen;
14025 if (k0 > 1)
14026 {
14027 if (word[i + k] != ws[1])
14028 continue;
14029 if (k0 > 2)
14030 {
14031 pf = word + i + k + 1;
14032 for (j = 2; j < k0; ++j)
14033 if (*pf++ != ws[j])
14034 break;
14035 if (j < k0)
14036 continue;
14037 }
14038 }
14039 k0 += k - 1;
14040
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014041 if ((pf = smp[n0].sm_oneof_w) != NULL)
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014042 {
14043 /* Check for match with one of the chars in
Bram Moolenaar42eeac32005-06-29 22:40:58 +000014044 * "sm_oneof". */
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014045 while (*pf != NUL && *pf != word[i + k0])
14046 ++pf;
14047 if (*pf == NUL)
14048 continue;
14049 ++k0;
14050 }
14051
14052 p0 = 5;
14053 s = smp[n0].sm_rules;
14054 while (*s == '-')
14055 {
14056 /* "k0" gets NOT reduced because
14057 * "if (k0 == k)" */
14058 s++;
14059 }
14060 if (*s == '<')
14061 s++;
14062 if (VIM_ISDIGIT(*s))
14063 {
14064 p0 = *s - '0';
14065 s++;
14066 }
14067
14068 if (*s == NUL
14069 /* *s == '^' cuts */
14070 || (*s == '$'
Bram Moolenaar9c96f592005-06-30 21:52:39 +000014071 && !spell_iswordp_w(word + i + k0,
14072 curbuf)))
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014073 {
14074 if (k0 == k)
14075 /* this is just a piece of the string */
14076 continue;
14077
14078 if (p0 < pri)
14079 /* priority too low */
14080 continue;
14081 /* rule fits; stop search */
14082 break;
14083 }
14084 }
14085
14086 if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff)
14087 == (c0 & 0xff))
14088 continue;
14089 }
14090
14091 /* replace string */
14092 ws = smp[n].sm_to_w;
14093 s = smp[n].sm_rules;
14094 p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0;
14095 if (p0 == 1 && z == 0)
14096 {
14097 /* rule with '<' is used */
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000014098 if (reslen > 0 && ws != NULL && *ws != NUL
14099 && (wres[reslen - 1] == c
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014100 || wres[reslen - 1] == *ws))
14101 reslen--;
14102 z0 = 1;
14103 z = 1;
14104 k0 = 0;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000014105 if (ws != NULL)
14106 while (*ws != NUL && word[i + k0] != NUL)
14107 {
14108 word[i + k0] = *ws;
14109 k0++;
14110 ws++;
14111 }
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014112 if (k > k0)
14113 mch_memmove(word + i + k0, word + i + k,
14114 sizeof(int) * (STRLEN(word + i + k) + 1));
14115
14116 /* new "actual letter" */
14117 c = word[i];
14118 }
14119 else
14120 {
14121 /* no '<' rule used */
14122 i += k - 1;
14123 z = 0;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000014124 if (ws != NULL)
14125 while (*ws != NUL && ws[1] != NUL
14126 && reslen < MAXWLEN)
14127 {
14128 if (reslen == 0 || wres[reslen - 1] != *ws)
14129 wres[reslen++] = *ws;
14130 ws++;
14131 }
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014132 /* new "actual letter" */
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000014133 if (ws == NULL)
14134 c = NUL;
14135 else
14136 c = *ws;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014137 if (strstr((char *)s, "^^") != NULL)
14138 {
14139 if (c != NUL)
14140 wres[reslen++] = c;
14141 mch_memmove(word, word + i + 1,
14142 sizeof(int) * (STRLEN(word + i + 1) + 1));
14143 i = 0;
14144 z0 = 1;
14145 }
14146 }
14147 break;
14148 }
14149 }
14150 }
14151 else if (vim_iswhite(c))
14152 {
14153 c = ' ';
14154 k = 1;
14155 }
14156
14157 if (z0 == 0)
14158 {
14159 if (k && !p0 && reslen < MAXWLEN && c != NUL
14160 && (!slang->sl_collapse || reslen == 0
14161 || wres[reslen - 1] != c))
14162 /* condense only double letters */
14163 wres[reslen++] = c;
14164
14165 i++;
14166 z = 0;
14167 k = 0;
14168 }
14169 }
14170
14171 /* Convert wide characters in "wres" to a multi-byte string in "res". */
14172 l = 0;
14173 for (n = 0; n < reslen; ++n)
14174 {
14175 l += mb_char2bytes(wres[n], res + l);
14176 if (l + MB_MAXBYTES > MAXWLEN)
14177 break;
14178 }
14179 res[l] = NUL;
14180}
14181#endif
14182
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014183/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014184 * Compute a score for two sound-a-like words.
14185 * This permits up to two inserts/deletes/swaps/etc. to keep things fast.
14186 * Instead of a generic loop we write out the code. That keeps it fast by
14187 * avoiding checks that will not be possible.
14188 */
14189 static int
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014190soundalike_score(goodstart, badstart)
14191 char_u *goodstart; /* sound-folded good word */
14192 char_u *badstart; /* sound-folded bad word */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014193{
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014194 char_u *goodsound = goodstart;
14195 char_u *badsound = badstart;
14196 int goodlen;
14197 int badlen;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014198 int n;
14199 char_u *pl, *ps;
14200 char_u *pl2, *ps2;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014201 int score = 0;
14202
14203 /* adding/inserting "*" at the start (word starts with vowel) shouldn't be
14204 * counted so much, vowels halfway the word aren't counted at all. */
14205 if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound)
14206 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000014207 if (badsound[1] == goodsound[1]
14208 || (badsound[1] != NUL
14209 && goodsound[1] != NUL
14210 && badsound[2] == goodsound[2]))
14211 {
14212 /* handle like a substitute */
14213 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014214 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000014215 {
14216 score = 2 * SCORE_DEL / 3;
14217 if (*badsound == '*')
14218 ++badsound;
14219 else
14220 ++goodsound;
14221 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014222 }
14223
14224 goodlen = STRLEN(goodsound);
14225 badlen = STRLEN(badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014226
14227 /* Return quickly if the lenghts are too different to be fixed by two
14228 * changes. */
14229 n = goodlen - badlen;
14230 if (n < -2 || n > 2)
14231 return SCORE_MAXMAX;
14232
14233 if (n > 0)
14234 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014235 pl = goodsound; /* goodsound is longest */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014236 ps = badsound;
14237 }
14238 else
14239 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014240 pl = badsound; /* badsound is longest */
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014241 ps = goodsound;
14242 }
14243
14244 /* Skip over the identical part. */
14245 while (*pl == *ps && *pl != NUL)
14246 {
14247 ++pl;
14248 ++ps;
14249 }
14250
14251 switch (n)
14252 {
14253 case -2:
14254 case 2:
14255 /*
14256 * Must delete two characters from "pl".
14257 */
14258 ++pl; /* first delete */
14259 while (*pl == *ps)
14260 {
14261 ++pl;
14262 ++ps;
14263 }
14264 /* strings must be equal after second delete */
14265 if (STRCMP(pl + 1, ps) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014266 return score + SCORE_DEL * 2;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014267
14268 /* Failed to compare. */
14269 break;
14270
14271 case -1:
14272 case 1:
14273 /*
14274 * Minimal one delete from "pl" required.
14275 */
14276
14277 /* 1: delete */
14278 pl2 = pl + 1;
14279 ps2 = ps;
14280 while (*pl2 == *ps2)
14281 {
14282 if (*pl2 == NUL) /* reached the end */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014283 return score + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014284 ++pl2;
14285 ++ps2;
14286 }
14287
14288 /* 2: delete then swap, then rest must be equal */
14289 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
14290 && STRCMP(pl2 + 2, ps2 + 2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014291 return score + SCORE_DEL + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014292
14293 /* 3: delete then substitute, then the rest must be equal */
14294 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014295 return score + SCORE_DEL + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014296
14297 /* 4: first swap then delete */
14298 if (pl[0] == ps[1] && pl[1] == ps[0])
14299 {
14300 pl2 = pl + 2; /* swap, skip two chars */
14301 ps2 = ps + 2;
14302 while (*pl2 == *ps2)
14303 {
14304 ++pl2;
14305 ++ps2;
14306 }
14307 /* delete a char and then strings must be equal */
14308 if (STRCMP(pl2 + 1, ps2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014309 return score + SCORE_SWAP + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014310 }
14311
14312 /* 5: first substitute then delete */
14313 pl2 = pl + 1; /* substitute, skip one char */
14314 ps2 = ps + 1;
14315 while (*pl2 == *ps2)
14316 {
14317 ++pl2;
14318 ++ps2;
14319 }
14320 /* delete a char and then strings must be equal */
14321 if (STRCMP(pl2 + 1, ps2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014322 return score + SCORE_SUBST + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014323
14324 /* Failed to compare. */
14325 break;
14326
14327 case 0:
14328 /*
14329 * Lenghts are equal, thus changes must result in same length: An
14330 * insert is only possible in combination with a delete.
14331 * 1: check if for identical strings
14332 */
14333 if (*pl == NUL)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014334 return score;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014335
14336 /* 2: swap */
14337 if (pl[0] == ps[1] && pl[1] == ps[0])
14338 {
14339 pl2 = pl + 2; /* swap, skip two chars */
14340 ps2 = ps + 2;
14341 while (*pl2 == *ps2)
14342 {
14343 if (*pl2 == NUL) /* reached the end */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014344 return score + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014345 ++pl2;
14346 ++ps2;
14347 }
14348 /* 3: swap and swap again */
14349 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
14350 && STRCMP(pl2 + 2, ps2 + 2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014351 return score + SCORE_SWAP + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014352
14353 /* 4: swap and substitute */
14354 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014355 return score + SCORE_SWAP + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014356 }
14357
14358 /* 5: substitute */
14359 pl2 = pl + 1;
14360 ps2 = ps + 1;
14361 while (*pl2 == *ps2)
14362 {
14363 if (*pl2 == NUL) /* reached the end */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014364 return score + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014365 ++pl2;
14366 ++ps2;
14367 }
14368
14369 /* 6: substitute and swap */
14370 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
14371 && STRCMP(pl2 + 2, ps2 + 2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014372 return score + SCORE_SUBST + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014373
14374 /* 7: substitute and substitute */
14375 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014376 return score + SCORE_SUBST + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014377
14378 /* 8: insert then delete */
14379 pl2 = pl;
14380 ps2 = ps + 1;
14381 while (*pl2 == *ps2)
14382 {
14383 ++pl2;
14384 ++ps2;
14385 }
14386 if (STRCMP(pl2 + 1, ps2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014387 return score + SCORE_INS + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014388
14389 /* 9: delete then insert */
14390 pl2 = pl + 1;
14391 ps2 = ps;
14392 while (*pl2 == *ps2)
14393 {
14394 ++pl2;
14395 ++ps2;
14396 }
14397 if (STRCMP(pl2, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014398 return score + SCORE_INS + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014399
14400 /* Failed to compare. */
14401 break;
14402 }
14403
14404 return SCORE_MAXMAX;
14405}
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014406
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014407/*
14408 * Compute the "edit distance" to turn "badword" into "goodword". The less
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014409 * deletes/inserts/substitutes/swaps are required the lower the score.
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014410 *
Bram Moolenaard12a1322005-08-21 22:08:24 +000014411 * The algorithm is described by Du and Chang, 1992.
14412 * The implementation of the algorithm comes from Aspell editdist.cpp,
14413 * edit_distance(). It has been converted from C++ to C and modified to
14414 * support multi-byte characters.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014415 */
14416 static int
Bram Moolenaar4770d092006-01-12 23:22:24 +000014417spell_edit_score(slang, badword, goodword)
14418 slang_T *slang;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014419 char_u *badword;
14420 char_u *goodword;
14421{
14422 int *cnt;
Bram Moolenaara1ba8112005-06-28 23:23:32 +000014423 int badlen, goodlen; /* lenghts including NUL */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014424 int j, i;
14425 int t;
14426 int bc, gc;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014427 int pbc, pgc;
14428#ifdef FEAT_MBYTE
14429 char_u *p;
14430 int wbadword[MAXWLEN];
14431 int wgoodword[MAXWLEN];
14432
14433 if (has_mbyte)
14434 {
14435 /* Get the characters from the multi-byte strings and put them in an
14436 * int array for easy access. */
14437 for (p = badword, badlen = 0; *p != NUL; )
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000014438 wbadword[badlen++] = mb_cptr2char_adv(&p);
Bram Moolenaar97409f12005-07-08 22:17:29 +000014439 wbadword[badlen++] = 0;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014440 for (p = goodword, goodlen = 0; *p != NUL; )
Bram Moolenaar0fa313a2005-08-10 21:07:57 +000014441 wgoodword[goodlen++] = mb_cptr2char_adv(&p);
Bram Moolenaar97409f12005-07-08 22:17:29 +000014442 wgoodword[goodlen++] = 0;
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014443 }
14444 else
14445#endif
14446 {
14447 badlen = STRLEN(badword) + 1;
14448 goodlen = STRLEN(goodword) + 1;
14449 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014450
14451 /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */
14452#define CNT(a, b) cnt[(a) + (b) * (badlen + 1)]
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014453 cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)),
14454 TRUE);
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014455 if (cnt == NULL)
14456 return 0; /* out of memory */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014457
14458 CNT(0, 0) = 0;
14459 for (j = 1; j <= goodlen; ++j)
Bram Moolenaar4770d092006-01-12 23:22:24 +000014460 CNT(0, j) = CNT(0, j - 1) + SCORE_INS;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014461
14462 for (i = 1; i <= badlen; ++i)
14463 {
Bram Moolenaar4770d092006-01-12 23:22:24 +000014464 CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014465 for (j = 1; j <= goodlen; ++j)
14466 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014467#ifdef FEAT_MBYTE
14468 if (has_mbyte)
14469 {
14470 bc = wbadword[i - 1];
14471 gc = wgoodword[j - 1];
14472 }
14473 else
14474#endif
14475 {
14476 bc = badword[i - 1];
14477 gc = goodword[j - 1];
14478 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014479 if (bc == gc)
14480 CNT(i, j) = CNT(i - 1, j - 1);
14481 else
14482 {
14483 /* Use a better score when there is only a case difference. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014484 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014485 CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1);
14486 else
Bram Moolenaar4770d092006-01-12 23:22:24 +000014487 {
14488 /* For a similar character use SCORE_SIMILAR. */
14489 if (slang != NULL
14490 && slang->sl_has_map
14491 && similar_chars(slang, gc, bc))
14492 CNT(i, j) = SCORE_SIMILAR + CNT(i - 1, j - 1);
14493 else
14494 CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1);
14495 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014496
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014497 if (i > 1 && j > 1)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014498 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +000014499#ifdef FEAT_MBYTE
14500 if (has_mbyte)
14501 {
14502 pbc = wbadword[i - 2];
14503 pgc = wgoodword[j - 2];
14504 }
14505 else
14506#endif
14507 {
14508 pbc = badword[i - 2];
14509 pgc = goodword[j - 2];
14510 }
14511 if (bc == pgc && pbc == gc)
14512 {
14513 t = SCORE_SWAP + CNT(i - 2, j - 2);
14514 if (t < CNT(i, j))
14515 CNT(i, j) = t;
14516 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014517 }
14518 t = SCORE_DEL + CNT(i - 1, j);
14519 if (t < CNT(i, j))
14520 CNT(i, j) = t;
14521 t = SCORE_INS + CNT(i, j - 1);
14522 if (t < CNT(i, j))
14523 CNT(i, j) = t;
14524 }
14525 }
14526 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +000014527
14528 i = CNT(badlen - 1, goodlen - 1);
14529 vim_free(cnt);
14530 return i;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000014531}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000014532
Bram Moolenaar4770d092006-01-12 23:22:24 +000014533typedef struct
14534{
14535 int badi;
14536 int goodi;
14537 int score;
14538} limitscore_T;
14539
14540/*
14541 * Like spell_edit_score(), but with a limit on the score to make it faster.
14542 * May return SCORE_MAXMAX when the score is higher than "limit".
14543 *
14544 * This uses a stack for the edits still to be tried.
14545 * The idea comes from Aspell leditdist.cpp. Rewritten in C and added support
14546 * for multi-byte characters.
14547 */
14548 static int
14549spell_edit_score_limit(slang, badword, goodword, limit)
14550 slang_T *slang;
14551 char_u *badword;
14552 char_u *goodword;
14553 int limit;
14554{
14555 limitscore_T stack[10]; /* allow for over 3 * 2 edits */
14556 int stackidx;
14557 int bi, gi;
14558 int bi2, gi2;
14559 int bc, gc;
14560 int score;
14561 int score_off;
14562 int minscore;
14563 int round;
14564
14565#ifdef FEAT_MBYTE
14566 /* Multi-byte characters require a bit more work, use a different function
14567 * to avoid testing "has_mbyte" quite often. */
14568 if (has_mbyte)
14569 return spell_edit_score_limit_w(slang, badword, goodword, limit);
14570#endif
14571
14572 /*
14573 * The idea is to go from start to end over the words. So long as
14574 * characters are equal just continue, this always gives the lowest score.
14575 * When there is a difference try several alternatives. Each alternative
14576 * increases "score" for the edit distance. Some of the alternatives are
14577 * pushed unto a stack and tried later, some are tried right away. At the
14578 * end of the word the score for one alternative is known. The lowest
14579 * possible score is stored in "minscore".
14580 */
14581 stackidx = 0;
14582 bi = 0;
14583 gi = 0;
14584 score = 0;
14585 minscore = limit + 1;
14586
14587 for (;;)
14588 {
14589 /* Skip over an equal part, score remains the same. */
14590 for (;;)
14591 {
14592 bc = badword[bi];
14593 gc = goodword[gi];
14594 if (bc != gc) /* stop at a char that's different */
14595 break;
14596 if (bc == NUL) /* both words end */
14597 {
14598 if (score < minscore)
14599 minscore = score;
14600 goto pop; /* do next alternative */
14601 }
14602 ++bi;
14603 ++gi;
14604 }
14605
14606 if (gc == NUL) /* goodword ends, delete badword chars */
14607 {
14608 do
14609 {
14610 if ((score += SCORE_DEL) >= minscore)
14611 goto pop; /* do next alternative */
14612 } while (badword[++bi] != NUL);
14613 minscore = score;
14614 }
14615 else if (bc == NUL) /* badword ends, insert badword chars */
14616 {
14617 do
14618 {
14619 if ((score += SCORE_INS) >= minscore)
14620 goto pop; /* do next alternative */
14621 } while (goodword[++gi] != NUL);
14622 minscore = score;
14623 }
14624 else /* both words continue */
14625 {
14626 /* If not close to the limit, perform a change. Only try changes
14627 * that may lead to a lower score than "minscore".
14628 * round 0: try deleting a char from badword
14629 * round 1: try inserting a char in badword */
14630 for (round = 0; round <= 1; ++round)
14631 {
14632 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
14633 if (score_off < minscore)
14634 {
14635 if (score_off + SCORE_EDIT_MIN >= minscore)
14636 {
14637 /* Near the limit, rest of the words must match. We
14638 * can check that right now, no need to push an item
14639 * onto the stack. */
14640 bi2 = bi + 1 - round;
14641 gi2 = gi + round;
14642 while (goodword[gi2] == badword[bi2])
14643 {
14644 if (goodword[gi2] == NUL)
14645 {
14646 minscore = score_off;
14647 break;
14648 }
14649 ++bi2;
14650 ++gi2;
14651 }
14652 }
14653 else
14654 {
14655 /* try deleting/inserting a character later */
14656 stack[stackidx].badi = bi + 1 - round;
14657 stack[stackidx].goodi = gi + round;
14658 stack[stackidx].score = score_off;
14659 ++stackidx;
14660 }
14661 }
14662 }
14663
14664 if (score + SCORE_SWAP < minscore)
14665 {
14666 /* If swapping two characters makes a match then the
14667 * substitution is more expensive, thus there is no need to
14668 * try both. */
14669 if (gc == badword[bi + 1] && bc == goodword[gi + 1])
14670 {
14671 /* Swap two characters, that is: skip them. */
14672 gi += 2;
14673 bi += 2;
14674 score += SCORE_SWAP;
14675 continue;
14676 }
14677 }
14678
14679 /* Substitute one character for another which is the same
14680 * thing as deleting a character from both goodword and badword.
14681 * Use a better score when there is only a case difference. */
14682 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
14683 score += SCORE_ICASE;
14684 else
14685 {
14686 /* For a similar character use SCORE_SIMILAR. */
14687 if (slang != NULL
14688 && slang->sl_has_map
14689 && similar_chars(slang, gc, bc))
14690 score += SCORE_SIMILAR;
14691 else
14692 score += SCORE_SUBST;
14693 }
14694
14695 if (score < minscore)
14696 {
14697 /* Do the substitution. */
14698 ++gi;
14699 ++bi;
14700 continue;
14701 }
14702 }
14703pop:
14704 /*
14705 * Get here to try the next alternative, pop it from the stack.
14706 */
14707 if (stackidx == 0) /* stack is empty, finished */
14708 break;
14709
14710 /* pop an item from the stack */
14711 --stackidx;
14712 gi = stack[stackidx].goodi;
14713 bi = stack[stackidx].badi;
14714 score = stack[stackidx].score;
14715 }
14716
14717 /* When the score goes over "limit" it may actually be much higher.
14718 * Return a very large number to avoid going below the limit when giving a
14719 * bonus. */
14720 if (minscore > limit)
14721 return SCORE_MAXMAX;
14722 return minscore;
14723}
14724
14725#ifdef FEAT_MBYTE
14726/*
14727 * Multi-byte version of spell_edit_score_limit().
14728 * Keep it in sync with the above!
14729 */
14730 static int
14731spell_edit_score_limit_w(slang, badword, goodword, limit)
14732 slang_T *slang;
14733 char_u *badword;
14734 char_u *goodword;
14735 int limit;
14736{
14737 limitscore_T stack[10]; /* allow for over 3 * 2 edits */
14738 int stackidx;
14739 int bi, gi;
14740 int bi2, gi2;
14741 int bc, gc;
14742 int score;
14743 int score_off;
14744 int minscore;
14745 int round;
14746 char_u *p;
14747 int wbadword[MAXWLEN];
14748 int wgoodword[MAXWLEN];
14749
14750 /* Get the characters from the multi-byte strings and put them in an
14751 * int array for easy access. */
14752 bi = 0;
14753 for (p = badword; *p != NUL; )
14754 wbadword[bi++] = mb_cptr2char_adv(&p);
14755 wbadword[bi++] = 0;
14756 gi = 0;
14757 for (p = goodword; *p != NUL; )
14758 wgoodword[gi++] = mb_cptr2char_adv(&p);
14759 wgoodword[gi++] = 0;
14760
14761 /*
14762 * The idea is to go from start to end over the words. So long as
14763 * characters are equal just continue, this always gives the lowest score.
14764 * When there is a difference try several alternatives. Each alternative
14765 * increases "score" for the edit distance. Some of the alternatives are
14766 * pushed unto a stack and tried later, some are tried right away. At the
14767 * end of the word the score for one alternative is known. The lowest
14768 * possible score is stored in "minscore".
14769 */
14770 stackidx = 0;
14771 bi = 0;
14772 gi = 0;
14773 score = 0;
14774 minscore = limit + 1;
14775
14776 for (;;)
14777 {
14778 /* Skip over an equal part, score remains the same. */
14779 for (;;)
14780 {
14781 bc = wbadword[bi];
14782 gc = wgoodword[gi];
14783
14784 if (bc != gc) /* stop at a char that's different */
14785 break;
14786 if (bc == NUL) /* both words end */
14787 {
14788 if (score < minscore)
14789 minscore = score;
14790 goto pop; /* do next alternative */
14791 }
14792 ++bi;
14793 ++gi;
14794 }
14795
14796 if (gc == NUL) /* goodword ends, delete badword chars */
14797 {
14798 do
14799 {
14800 if ((score += SCORE_DEL) >= minscore)
14801 goto pop; /* do next alternative */
14802 } while (wbadword[++bi] != NUL);
14803 minscore = score;
14804 }
14805 else if (bc == NUL) /* badword ends, insert badword chars */
14806 {
14807 do
14808 {
14809 if ((score += SCORE_INS) >= minscore)
14810 goto pop; /* do next alternative */
14811 } while (wgoodword[++gi] != NUL);
14812 minscore = score;
14813 }
14814 else /* both words continue */
14815 {
14816 /* If not close to the limit, perform a change. Only try changes
14817 * that may lead to a lower score than "minscore".
14818 * round 0: try deleting a char from badword
14819 * round 1: try inserting a char in badword */
14820 for (round = 0; round <= 1; ++round)
14821 {
14822 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
14823 if (score_off < minscore)
14824 {
14825 if (score_off + SCORE_EDIT_MIN >= minscore)
14826 {
14827 /* Near the limit, rest of the words must match. We
14828 * can check that right now, no need to push an item
14829 * onto the stack. */
14830 bi2 = bi + 1 - round;
14831 gi2 = gi + round;
14832 while (wgoodword[gi2] == wbadword[bi2])
14833 {
14834 if (wgoodword[gi2] == NUL)
14835 {
14836 minscore = score_off;
14837 break;
14838 }
14839 ++bi2;
14840 ++gi2;
14841 }
14842 }
14843 else
14844 {
14845 /* try deleting a character from badword later */
14846 stack[stackidx].badi = bi + 1 - round;
14847 stack[stackidx].goodi = gi + round;
14848 stack[stackidx].score = score_off;
14849 ++stackidx;
14850 }
14851 }
14852 }
14853
14854 if (score + SCORE_SWAP < minscore)
14855 {
14856 /* If swapping two characters makes a match then the
14857 * substitution is more expensive, thus there is no need to
14858 * try both. */
14859 if (gc == wbadword[bi + 1] && bc == wgoodword[gi + 1])
14860 {
14861 /* Swap two characters, that is: skip them. */
14862 gi += 2;
14863 bi += 2;
14864 score += SCORE_SWAP;
14865 continue;
14866 }
14867 }
14868
14869 /* Substitute one character for another which is the same
14870 * thing as deleting a character from both goodword and badword.
14871 * Use a better score when there is only a case difference. */
14872 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
14873 score += SCORE_ICASE;
14874 else
14875 {
14876 /* For a similar character use SCORE_SIMILAR. */
14877 if (slang != NULL
14878 && slang->sl_has_map
14879 && similar_chars(slang, gc, bc))
14880 score += SCORE_SIMILAR;
14881 else
14882 score += SCORE_SUBST;
14883 }
14884
14885 if (score < minscore)
14886 {
14887 /* Do the substitution. */
14888 ++gi;
14889 ++bi;
14890 continue;
14891 }
14892 }
14893pop:
14894 /*
14895 * Get here to try the next alternative, pop it from the stack.
14896 */
14897 if (stackidx == 0) /* stack is empty, finished */
14898 break;
14899
14900 /* pop an item from the stack */
14901 --stackidx;
14902 gi = stack[stackidx].goodi;
14903 bi = stack[stackidx].badi;
14904 score = stack[stackidx].score;
14905 }
14906
14907 /* When the score goes over "limit" it may actually be much higher.
14908 * Return a very large number to avoid going below the limit when giving a
14909 * bonus. */
14910 if (minscore > limit)
14911 return SCORE_MAXMAX;
14912 return minscore;
14913}
14914#endif
14915
Bram Moolenaar362e1a32006-03-06 23:29:24 +000014916/*
14917 * ":spellinfo"
14918 */
14919/*ARGSUSED*/
14920 void
14921ex_spellinfo(eap)
14922 exarg_T *eap;
14923{
14924 int lpi;
14925 langp_T *lp;
14926 char_u *p;
14927
14928 if (no_spell_checking(curwin))
14929 return;
14930
14931 msg_start();
14932 for (lpi = 0; lpi < curbuf->b_langp.ga_len && !got_int; ++lpi)
14933 {
14934 lp = LANGP_ENTRY(curbuf->b_langp, lpi);
14935 msg_puts((char_u *)"file: ");
14936 msg_puts(lp->lp_slang->sl_fname);
14937 msg_putchar('\n');
14938 p = lp->lp_slang->sl_info;
14939 if (p != NULL)
14940 {
14941 msg_puts(p);
14942 msg_putchar('\n');
14943 }
14944 }
14945 msg_end();
14946}
14947
Bram Moolenaar4770d092006-01-12 23:22:24 +000014948#define DUMPFLAG_KEEPCASE 1 /* round 2: keep-case tree */
14949#define DUMPFLAG_COUNT 2 /* include word count */
Bram Moolenaarb475fb92006-03-02 22:40:52 +000014950#define DUMPFLAG_ICASE 4 /* ignore case when finding matches */
Bram Moolenaard0131a82006-03-04 21:46:13 +000014951#define DUMPFLAG_ONECAP 8 /* pattern starts with capital */
14952#define DUMPFLAG_ALLCAP 16 /* pattern is all capitals */
Bram Moolenaar4770d092006-01-12 23:22:24 +000014953
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014954/*
14955 * ":spelldump"
14956 */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014957 void
14958ex_spelldump(eap)
14959 exarg_T *eap;
14960{
14961 buf_T *buf = curbuf;
Bram Moolenaarb475fb92006-03-02 22:40:52 +000014962
14963 if (no_spell_checking(curwin))
14964 return;
14965
14966 /* Create a new empty buffer by splitting the window. */
14967 do_cmdline_cmd((char_u *)"new");
14968 if (!bufempty() || !buf_valid(buf))
14969 return;
14970
14971 spell_dump_compl(buf, NULL, 0, NULL, eap->forceit ? DUMPFLAG_COUNT : 0);
14972
14973 /* Delete the empty line that we started with. */
14974 if (curbuf->b_ml.ml_line_count > 1)
14975 ml_delete(curbuf->b_ml.ml_line_count, FALSE);
14976
14977 redraw_later(NOT_VALID);
14978}
14979
14980/*
14981 * Go through all possible words and:
14982 * 1. When "pat" is NULL: dump a list of all words in the current buffer.
14983 * "ic" and "dir" are not used.
14984 * 2. When "pat" is not NULL: add matching words to insert mode completion.
14985 */
14986 void
14987spell_dump_compl(buf, pat, ic, dir, dumpflags_arg)
14988 buf_T *buf; /* buffer with spell checking */
14989 char_u *pat; /* leading part of the word */
14990 int ic; /* ignore case */
14991 int *dir; /* direction for adding matches */
14992 int dumpflags_arg; /* DUMPFLAG_* */
14993{
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000014994 langp_T *lp;
14995 slang_T *slang;
14996 idx_T arridx[MAXWLEN];
14997 int curi[MAXWLEN];
14998 char_u word[MAXWLEN];
14999 int c;
15000 char_u *byts;
15001 idx_T *idxs;
15002 linenr_T lnum = 0;
15003 int round;
15004 int depth;
15005 int n;
15006 int flags;
Bram Moolenaar7887d882005-07-01 22:33:52 +000015007 char_u *region_names = NULL; /* region names being used */
15008 int do_region = TRUE; /* dump region names and numbers */
15009 char_u *p;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000015010 int lpi;
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015011 int dumpflags = dumpflags_arg;
15012 int patlen;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015013
Bram Moolenaard0131a82006-03-04 21:46:13 +000015014 /* When ignoring case or when the pattern starts with capital pass this on
15015 * to dump_word(). */
15016 if (pat != NULL)
15017 {
15018 if (ic)
15019 dumpflags |= DUMPFLAG_ICASE;
15020 else
15021 {
15022 n = captype(pat, NULL);
15023 if (n == WF_ONECAP)
15024 dumpflags |= DUMPFLAG_ONECAP;
15025 else if (n == WF_ALLCAP
15026#ifdef FEAT_MBYTE
Bram Moolenaar362e1a32006-03-06 23:29:24 +000015027 && (int)STRLEN(pat) > mb_ptr2len(pat)
Bram Moolenaard0131a82006-03-04 21:46:13 +000015028#else
Bram Moolenaar362e1a32006-03-06 23:29:24 +000015029 && (int)STRLEN(pat) > 1
Bram Moolenaard0131a82006-03-04 21:46:13 +000015030#endif
15031 )
15032 dumpflags |= DUMPFLAG_ALLCAP;
15033 }
15034 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015035
Bram Moolenaar7887d882005-07-01 22:33:52 +000015036 /* Find out if we can support regions: All languages must support the same
15037 * regions or none at all. */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000015038 for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi)
Bram Moolenaar7887d882005-07-01 22:33:52 +000015039 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000015040 lp = LANGP_ENTRY(buf->b_langp, lpi);
Bram Moolenaar7887d882005-07-01 22:33:52 +000015041 p = lp->lp_slang->sl_regions;
15042 if (p[0] != 0)
15043 {
15044 if (region_names == NULL) /* first language with regions */
15045 region_names = p;
15046 else if (STRCMP(region_names, p) != 0)
15047 {
15048 do_region = FALSE; /* region names are different */
15049 break;
15050 }
15051 }
15052 }
15053
15054 if (do_region && region_names != NULL)
15055 {
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015056 if (pat == NULL)
15057 {
15058 vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names);
15059 ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
15060 }
Bram Moolenaar7887d882005-07-01 22:33:52 +000015061 }
15062 else
15063 do_region = FALSE;
15064
15065 /*
15066 * Loop over all files loaded for the entries in 'spelllang'.
15067 */
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000015068 for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015069 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000015070 lp = LANGP_ENTRY(buf->b_langp, lpi);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015071 slang = lp->lp_slang;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000015072 if (slang->sl_fbyts == NULL) /* reloading failed */
15073 continue;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015074
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015075 if (pat == NULL)
15076 {
15077 vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
15078 ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
15079 }
15080
15081 /* When matching with a pattern and there are no prefixes only use
15082 * parts of the tree that match "pat". */
15083 if (pat != NULL && slang->sl_pbyts == NULL)
15084 patlen = STRLEN(pat);
15085 else
15086 patlen = 0;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015087
15088 /* round 1: case-folded tree
15089 * round 2: keep-case tree */
15090 for (round = 1; round <= 2; ++round)
15091 {
15092 if (round == 1)
15093 {
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015094 dumpflags &= ~DUMPFLAG_KEEPCASE;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015095 byts = slang->sl_fbyts;
15096 idxs = slang->sl_fidxs;
15097 }
15098 else
15099 {
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015100 dumpflags |= DUMPFLAG_KEEPCASE;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015101 byts = slang->sl_kbyts;
15102 idxs = slang->sl_kidxs;
15103 }
15104 if (byts == NULL)
15105 continue; /* array is empty */
15106
15107 depth = 0;
15108 arridx[0] = 0;
15109 curi[0] = 1;
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015110 while (depth >= 0 && !got_int
15111 && (pat == NULL || !compl_interrupted))
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015112 {
15113 if (curi[depth] > byts[arridx[depth]])
15114 {
15115 /* Done all bytes at this node, go up one level. */
15116 --depth;
15117 line_breakcheck();
Bram Moolenaara2031822006-03-07 22:29:51 +000015118 ins_compl_check_keys(50);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015119 }
15120 else
15121 {
15122 /* Do one more byte at this node. */
15123 n = arridx[depth] + curi[depth];
15124 ++curi[depth];
15125 c = byts[n];
15126 if (c == 0)
15127 {
15128 /* End of word, deal with the word.
15129 * Don't use keep-case words in the fold-case tree,
15130 * they will appear in the keep-case tree.
15131 * Only use the word when the region matches. */
15132 flags = (int)idxs[n];
15133 if ((round == 2 || (flags & WF_KEEPCAP) == 0)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +000015134 && (flags & WF_NEEDCOMP) == 0
Bram Moolenaar7887d882005-07-01 22:33:52 +000015135 && (do_region
15136 || (flags & WF_REGION) == 0
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000015137 || (((unsigned)flags >> 16)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015138 & lp->lp_region) != 0))
15139 {
15140 word[depth] = NUL;
Bram Moolenaar7887d882005-07-01 22:33:52 +000015141 if (!do_region)
15142 flags &= ~WF_REGION;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +000015143
15144 /* Dump the basic word if there is no prefix or
15145 * when it's the first one. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000015146 c = (unsigned)flags >> 24;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +000015147 if (c == 0 || curi[depth] == 2)
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015148 {
15149 dump_word(slang, word, pat, dir,
15150 dumpflags, flags, lnum);
15151 if (pat == NULL)
15152 ++lnum;
15153 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015154
15155 /* Apply the prefix, if there is one. */
Bram Moolenaar0a5fe212005-06-24 23:01:23 +000015156 if (c != 0)
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015157 lnum = dump_prefixes(slang, word, pat, dir,
15158 dumpflags, flags, lnum);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015159 }
15160 }
15161 else
15162 {
15163 /* Normal char, go one level deeper. */
15164 word[depth++] = c;
15165 arridx[depth] = idxs[n];
15166 curi[depth] = 1;
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015167
15168 /* Check if this characters matches with the pattern.
15169 * If not skip the whole tree below it.
Bram Moolenaard0131a82006-03-04 21:46:13 +000015170 * Always ignore case here, dump_word() will check
15171 * proper case later. This isn't exactly right when
15172 * length changes for multi-byte characters with
15173 * ignore case... */
15174 if (depth <= patlen
15175 && MB_STRNICMP(word, pat, depth) != 0)
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015176 --depth;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015177 }
15178 }
15179 }
15180 }
15181 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015182}
15183
15184/*
15185 * Dump one word: apply case modifications and append a line to the buffer.
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015186 * When "lnum" is zero add insert mode completion.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015187 */
15188 static void
Bram Moolenaard0131a82006-03-04 21:46:13 +000015189dump_word(slang, word, pat, dir, dumpflags, wordflags, lnum)
Bram Moolenaar4770d092006-01-12 23:22:24 +000015190 slang_T *slang;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015191 char_u *word;
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015192 char_u *pat;
15193 int *dir;
Bram Moolenaar4770d092006-01-12 23:22:24 +000015194 int dumpflags;
Bram Moolenaard0131a82006-03-04 21:46:13 +000015195 int wordflags;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015196 linenr_T lnum;
15197{
15198 int keepcap = FALSE;
15199 char_u *p;
Bram Moolenaar4770d092006-01-12 23:22:24 +000015200 char_u *tw;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015201 char_u cword[MAXWLEN];
Bram Moolenaar7887d882005-07-01 22:33:52 +000015202 char_u badword[MAXWLEN + 10];
15203 int i;
Bram Moolenaard0131a82006-03-04 21:46:13 +000015204 int flags = wordflags;
15205
15206 if (dumpflags & DUMPFLAG_ONECAP)
15207 flags |= WF_ONECAP;
15208 if (dumpflags & DUMPFLAG_ALLCAP)
15209 flags |= WF_ALLCAP;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015210
Bram Moolenaar4770d092006-01-12 23:22:24 +000015211 if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015212 {
15213 /* Need to fix case according to "flags". */
15214 make_case_word(word, cword, flags);
15215 p = cword;
15216 }
15217 else
15218 {
15219 p = word;
Bram Moolenaar4770d092006-01-12 23:22:24 +000015220 if ((dumpflags & DUMPFLAG_KEEPCASE)
15221 && ((captype(word, NULL) & WF_KEEPCAP) == 0
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000015222 || (flags & WF_FIXCAP) != 0))
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015223 keepcap = TRUE;
15224 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000015225 tw = p;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015226
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015227 if (pat == NULL)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015228 {
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015229 /* Add flags and regions after a slash. */
15230 if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap)
Bram Moolenaar4770d092006-01-12 23:22:24 +000015231 {
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015232 STRCPY(badword, p);
15233 STRCAT(badword, "/");
15234 if (keepcap)
15235 STRCAT(badword, "=");
15236 if (flags & WF_BANNED)
15237 STRCAT(badword, "!");
15238 else if (flags & WF_RARE)
15239 STRCAT(badword, "?");
15240 if (flags & WF_REGION)
15241 for (i = 0; i < 7; ++i)
15242 if (flags & (0x10000 << i))
15243 sprintf((char *)badword + STRLEN(badword), "%d", i + 1);
15244 p = badword;
Bram Moolenaar4770d092006-01-12 23:22:24 +000015245 }
Bram Moolenaar4770d092006-01-12 23:22:24 +000015246
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015247 if (dumpflags & DUMPFLAG_COUNT)
15248 {
15249 hashitem_T *hi;
15250
15251 /* Include the word count for ":spelldump!". */
15252 hi = hash_find(&slang->sl_wordcount, tw);
15253 if (!HASHITEM_EMPTY(hi))
15254 {
15255 vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d",
15256 tw, HI2WC(hi)->wc_count);
15257 p = IObuff;
15258 }
15259 }
15260
15261 ml_append(lnum, p, (colnr_T)0, FALSE);
15262 }
Bram Moolenaard0131a82006-03-04 21:46:13 +000015263 else if (((dumpflags & DUMPFLAG_ICASE)
15264 ? MB_STRNICMP(p, pat, STRLEN(pat)) == 0
15265 : STRNCMP(p, pat, STRLEN(pat)) == 0)
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015266 && ins_compl_add_infercase(p, (int)STRLEN(p),
15267 dumpflags & DUMPFLAG_ICASE,
15268 NULL, *dir, 0) == OK)
Bram Moolenaard0131a82006-03-04 21:46:13 +000015269 /* if dir was BACKWARD then honor it just once */
15270 *dir = FORWARD;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015271}
15272
15273/*
Bram Moolenaara1ba8112005-06-28 23:23:32 +000015274 * For ":spelldump": Find matching prefixes for "word". Prepend each to
15275 * "word" and append a line to the buffer.
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015276 * When "lnum" is zero add insert mode completion.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015277 * Return the updated line number.
15278 */
15279 static linenr_T
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015280dump_prefixes(slang, word, pat, dir, dumpflags, flags, startlnum)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015281 slang_T *slang;
15282 char_u *word; /* case-folded word */
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015283 char_u *pat;
15284 int *dir;
Bram Moolenaar4770d092006-01-12 23:22:24 +000015285 int dumpflags;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015286 int flags; /* flags with prefix ID */
15287 linenr_T startlnum;
15288{
15289 idx_T arridx[MAXWLEN];
15290 int curi[MAXWLEN];
15291 char_u prefix[MAXWLEN];
Bram Moolenaar53805d12005-08-01 07:08:33 +000015292 char_u word_up[MAXWLEN];
15293 int has_word_up = FALSE;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015294 int c;
15295 char_u *byts;
15296 idx_T *idxs;
15297 linenr_T lnum = startlnum;
15298 int depth;
15299 int n;
15300 int len;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015301 int i;
15302
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000015303 /* If the word starts with a lower-case letter make the word with an
Bram Moolenaar53805d12005-08-01 07:08:33 +000015304 * upper-case letter in word_up[]. */
15305 c = PTR2CHAR(word);
15306 if (SPELL_TOUPPER(c) != c)
15307 {
15308 onecap_copy(word, word_up, TRUE);
15309 has_word_up = TRUE;
15310 }
15311
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015312 byts = slang->sl_pbyts;
15313 idxs = slang->sl_pidxs;
15314 if (byts != NULL) /* array not is empty */
15315 {
15316 /*
15317 * Loop over all prefixes, building them byte-by-byte in prefix[].
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000015318 * When at the end of a prefix check that it supports "flags".
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015319 */
15320 depth = 0;
15321 arridx[0] = 0;
15322 curi[0] = 1;
15323 while (depth >= 0 && !got_int)
15324 {
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000015325 n = arridx[depth];
15326 len = byts[n];
15327 if (curi[depth] > len)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015328 {
15329 /* Done all bytes at this node, go up one level. */
15330 --depth;
15331 line_breakcheck();
15332 }
15333 else
15334 {
15335 /* Do one more byte at this node. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +000015336 n += curi[depth];
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015337 ++curi[depth];
15338 c = byts[n];
15339 if (c == 0)
15340 {
15341 /* End of prefix, find out how many IDs there are. */
15342 for (i = 1; i < len; ++i)
15343 if (byts[n + i] != 0)
15344 break;
15345 curi[depth] += i - 1;
15346
Bram Moolenaar53805d12005-08-01 07:08:33 +000015347 c = valid_word_prefix(i, n, flags, word, slang, FALSE);
15348 if (c != 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015349 {
Bram Moolenaar9c96f592005-06-30 21:52:39 +000015350 vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1);
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015351 dump_word(slang, prefix, pat, dir, dumpflags,
Bram Moolenaar53805d12005-08-01 07:08:33 +000015352 (c & WF_RAREPFX) ? (flags | WF_RARE)
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015353 : flags, lnum);
15354 if (lnum != 0)
15355 ++lnum;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015356 }
Bram Moolenaar53805d12005-08-01 07:08:33 +000015357
15358 /* Check for prefix that matches the word when the
15359 * first letter is upper-case, but only if the prefix has
15360 * a condition. */
15361 if (has_word_up)
15362 {
15363 c = valid_word_prefix(i, n, flags, word_up, slang,
15364 TRUE);
15365 if (c != 0)
15366 {
15367 vim_strncpy(prefix + depth, word_up,
15368 MAXWLEN - depth - 1);
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015369 dump_word(slang, prefix, pat, dir, dumpflags,
Bram Moolenaar53805d12005-08-01 07:08:33 +000015370 (c & WF_RAREPFX) ? (flags | WF_RARE)
Bram Moolenaarb475fb92006-03-02 22:40:52 +000015371 : flags, lnum);
15372 if (lnum != 0)
15373 ++lnum;
Bram Moolenaar53805d12005-08-01 07:08:33 +000015374 }
15375 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +000015376 }
15377 else
15378 {
15379 /* Normal char, go one level deeper. */
15380 prefix[depth++] = c;
15381 arridx[depth] = idxs[n];
15382 curi[depth] = 1;
15383 }
15384 }
15385 }
15386 }
15387
15388 return lnum;
15389}
15390
Bram Moolenaar95529562005-08-25 21:21:38 +000015391/*
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000015392 * Move "p" to the end of word "start".
15393 * Uses the spell-checking word characters.
Bram Moolenaar95529562005-08-25 21:21:38 +000015394 */
15395 char_u *
15396spell_to_word_end(start, buf)
15397 char_u *start;
15398 buf_T *buf;
15399{
15400 char_u *p = start;
15401
15402 while (*p != NUL && spell_iswordp(p, buf))
15403 mb_ptr_adv(p);
15404 return p;
15405}
15406
Bram Moolenaar8b59de92005-08-11 19:59:29 +000015407#if defined(FEAT_INS_EXPAND) || defined(PROTO)
Bram Moolenaar8b59de92005-08-11 19:59:29 +000015408/*
Bram Moolenaara40ceaf2006-01-13 22:35:40 +000015409 * For Insert mode completion CTRL-X s:
15410 * Find start of the word in front of column "startcol".
15411 * We don't check if it is badly spelled, with completion we can only change
15412 * the word in front of the cursor.
Bram Moolenaar8b59de92005-08-11 19:59:29 +000015413 * Returns the column number of the word.
15414 */
15415 int
15416spell_word_start(startcol)
15417 int startcol;
15418{
15419 char_u *line;
15420 char_u *p;
15421 int col = 0;
15422
Bram Moolenaar95529562005-08-25 21:21:38 +000015423 if (no_spell_checking(curwin))
Bram Moolenaar8b59de92005-08-11 19:59:29 +000015424 return startcol;
15425
15426 /* Find a word character before "startcol". */
15427 line = ml_get_curline();
15428 for (p = line + startcol; p > line; )
15429 {
15430 mb_ptr_back(line, p);
15431 if (spell_iswordp_nmw(p))
15432 break;
15433 }
15434
15435 /* Go back to start of the word. */
15436 while (p > line)
15437 {
15438 col = p - line;
15439 mb_ptr_back(line, p);
15440 if (!spell_iswordp(p, curbuf))
15441 break;
15442 col = 0;
15443 }
15444
Bram Moolenaar8b59de92005-08-11 19:59:29 +000015445 return col;
15446}
15447
15448/*
Bram Moolenaar4effc802005-09-30 21:12:02 +000015449 * Need to check for 'spellcapcheck' now, the word is removed before
15450 * expand_spelling() is called. Therefore the ugly global variable.
15451 */
15452static int spell_expand_need_cap;
15453
15454 void
15455spell_expand_check_cap(col)
15456 colnr_T col;
15457{
15458 spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col);
15459}
15460
15461/*
Bram Moolenaar8b59de92005-08-11 19:59:29 +000015462 * Get list of spelling suggestions.
15463 * Used for Insert mode completion CTRL-X ?.
15464 * Returns the number of matches. The matches are in "matchp[]", array of
15465 * allocated strings.
15466 */
15467/*ARGSUSED*/
15468 int
15469expand_spelling(lnum, col, pat, matchp)
15470 linenr_T lnum;
15471 int col;
15472 char_u *pat;
15473 char_u ***matchp;
15474{
15475 garray_T ga;
15476
Bram Moolenaar4770d092006-01-12 23:22:24 +000015477 spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE);
Bram Moolenaar8b59de92005-08-11 19:59:29 +000015478 *matchp = ga.ga_data;
15479 return ga.ga_len;
15480}
15481#endif
15482
Bram Moolenaarf71a3db2006-03-12 21:50:18 +000015483#endif /* FEAT_SPELL */