blob: 6446cd977ea220787d97ab0e2b42e8e30fcbb458 [file] [log] [blame]
Bram Moolenaare19defe2005-03-21 08:23:33 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * spell.c: code for spell checking
Bram Moolenaarfc735152005-03-22 22:54:12 +000012 *
Bram Moolenaar51485f02005-06-04 21:55:20 +000013 * The spell checking mechanism uses a tree (aka trie). Each node in the tree
14 * has a list of bytes that can appear (siblings). For each byte there is a
15 * pointer to the node with the byte that follows in the word (child).
Bram Moolenaar9f30f502005-06-14 22:01:04 +000016 *
17 * A NUL byte is used where the word may end. The bytes are sorted, so that
18 * binary searching can be used and the NUL bytes are at the start. The
19 * number of possible bytes is stored before the list of bytes.
20 *
21 * The tree uses two arrays: "byts" stores the characters, "idxs" stores
22 * either the next index or flags. The tree starts at index 0. For example,
23 * to lookup "vi" this sequence is followed:
24 * i = 0
25 * len = byts[i]
26 * n = where "v" appears in byts[i + 1] to byts[i + len]
27 * i = idxs[n]
28 * len = byts[i]
29 * n = where "i" appears in byts[i + 1] to byts[i + len]
30 * i = idxs[n]
31 * len = byts[i]
32 * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
Bram Moolenaar51485f02005-06-04 21:55:20 +000033 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +000034 * There are two word trees: one with case-folded words and one with words in
Bram Moolenaar51485f02005-06-04 21:55:20 +000035 * original case. The second one is only used for keep-case words and is
36 * usually small.
37 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +000038 * There is one additional tree for when prefixes are not applied when
39 * generating the .spl file. This tree stores all the possible prefixes, as
40 * if they were words. At each word (prefix) end the prefix nr is stored, the
41 * following word must support this prefix nr. And the condition nr is
42 * stored, used to lookup the condition that the word must match with.
43 *
Bram Moolenaar51485f02005-06-04 21:55:20 +000044 * Thanks to Olaf Seibert for providing an example implementation of this tree
45 * and the compression mechanism.
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +000046 *
47 * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +000048 *
Bram Moolenaar402d2fe2005-04-15 21:00:38 +000049 * Why doesn't Vim use aspell/ispell/myspell/etc.?
50 * See ":help develop-spell".
51 */
52
Bram Moolenaar51485f02005-06-04 21:55:20 +000053/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +000054 * Use this to adjust the score after finding suggestions, based on the
55 * suggested word sounding like the bad word. This is much faster than doing
56 * it for every possible suggestion.
57 * Disadvantage: When "the" is typed as "hte" it sounds different and goes
58 * down in the list.
Bram Moolenaard857f0e2005-06-21 22:37:39 +000059 * Used when 'spellsuggest' is set to "best".
60 */
61#define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4)
62
63/*
64 * The double scoring mechanism is based on the principle that there are two
65 * kinds of spelling mistakes:
66 * 1. You know how to spell the word, but mistype something. This results in
67 * a small editing distance (character swapped/omitted/inserted) and
68 * possibly a word that sounds completely different.
69 * 2. You don't know how to spell the word and type something that sounds
70 * right. The edit distance can be big but the word is similar after
71 * sound-folding.
72 * Since scores for these two mistakes will be very different we use a list
73 * for each.
74 * The sound-folding is slow, only do double scoring when 'spellsuggest' is
75 * "double".
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000076 */
77
78/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +000079 * Vim spell file format: <HEADER>
80 * <SUGGEST>
81 * <LWORDTREE>
82 * <KWORDTREE>
83 * <PREFIXTREE>
Bram Moolenaar51485f02005-06-04 21:55:20 +000084 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +000085 * <HEADER>: <fileID>
86 * <regioncnt> <regionname> ...
87 * <charflagslen> <charflags>
88 * <fcharslen> <fchars>
Bram Moolenaarcf6bf392005-06-27 22:27:46 +000089 * <midwordlen> <midword>
Bram Moolenaar1d73c882005-06-19 22:48:47 +000090 * <prefcondcnt> <prefcond> ...
Bram Moolenaar51485f02005-06-04 21:55:20 +000091 *
Bram Moolenaarcf6bf392005-06-27 22:27:46 +000092 * <fileID> 10 bytes "VIMspell08"
Bram Moolenaar51485f02005-06-04 21:55:20 +000093 * <regioncnt> 1 byte number of regions following (8 supported)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000094 * <regionname> 2 bytes Region name: ca, au, etc. Lower case.
Bram Moolenaar51485f02005-06-04 21:55:20 +000095 * First <regionname> is region 1.
96 *
97 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
98 * <charflags> N bytes List of flags (first one is for character 128):
Bram Moolenaar9f30f502005-06-14 22:01:04 +000099 * 0x01 word character CF_WORD
100 * 0x02 upper-case character CF_UPPER
Bram Moolenaar51485f02005-06-04 21:55:20 +0000101 * <fcharslen> 2 bytes Number of bytes in <fchars>.
102 * <fchars> N bytes Folded characters, first one is for character 128.
103 *
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000104 * <midwordlen> 2 bytes Number of bytes in <midword>.
105 * <midword> N bytes Characters that are word characters only when used
106 * in the middle of a word.
107 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000108 * <prefcondcnt> 2 bytes Number of <prefcond> items following.
109 *
110 * <prefcond> : <condlen> <condstr>
111 *
112 * <condlen> 1 byte Length of <condstr>.
113 *
114 * <condstr> N bytes Condition for the prefix.
115 *
Bram Moolenaar51485f02005-06-04 21:55:20 +0000116 *
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000117 * <SUGGEST> : <repcount> <rep> ...
118 * <salflags> <salcount> <sal> ...
119 * <maplen> <mapstr>
Bram Moolenaar51485f02005-06-04 21:55:20 +0000120 *
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000121 * <repcount> 2 bytes number of <rep> items, MSB first.
122 *
123 * <rep> : <repfromlen> <repfrom> <reptolen> <repto>
124 *
125 * <repfromlen> 1 byte length of <repfrom>
126 *
127 * <repfrom> N bytes "from" part of replacement
128 *
129 * <reptolen> 1 byte length of <repto>
130 *
131 * <repto> N bytes "to" part of replacement
132 *
133 * <salflags> 1 byte flags for soundsalike conversion:
134 * SAL_F0LLOWUP
135 * SAL_COLLAPSE
136 * SAL_REM_ACCENTS
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000137 * SAL_SOFO: SOFOFROM and SOFOTO used instead of SAL
138 *
139 * <salcount> 2 bytes number of <sal> items following
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000140 *
141 * <sal> : <salfromlen> <salfrom> <saltolen> <salto>
142 *
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000143 * <salfromlen> 1-2 bytes length of <salfrom> (2 bytes for SAL_SOFO)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000144 *
145 * <salfrom> N bytes "from" part of soundsalike
146 *
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000147 * <saltolen> 1-2 bytes length of <salto> (2 bytes for SAL_SOFO)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000148 *
149 * <salto> N bytes "to" part of soundsalike
150 *
151 * <maplen> 2 bytes length of <mapstr>, MSB first
152 *
153 * <mapstr> N bytes String with sequences of similar characters,
154 * separated by slashes.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000155 *
156 *
157 * <LWORDTREE>: <wordtree>
158 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000159 * <KWORDTREE>: <wordtree>
160 *
161 * <PREFIXTREE>: <wordtree>
162 *
163 *
Bram Moolenaar51485f02005-06-04 21:55:20 +0000164 * <wordtree>: <nodecount> <nodedata> ...
165 *
166 * <nodecount> 4 bytes Number of nodes following. MSB first.
167 *
168 * <nodedata>: <siblingcount> <sibling> ...
169 *
170 * <siblingcount> 1 byte Number of siblings in this node. The siblings
171 * follow in sorted order.
172 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000173 * <sibling>: <byte> [ <nodeidx> <xbyte>
174 * | <flags> [<region>] [<prefixID>]
175 * | <prefixID> <prefcondnr> ]
Bram Moolenaar51485f02005-06-04 21:55:20 +0000176 *
177 * <byte> 1 byte Byte value of the sibling. Special cases:
178 * BY_NOFLAGS: End of word without flags and for all
179 * regions.
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000180 * For PREFIXTREE <prefixID> and
181 * <prefcondnr> follow.
182 * BY_FLAGS: End of word, <flags> follow.
183 * For PREFIXTREE <prefixID> and
184 * <prefcondnr> follow for rare prefix.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000185 * BY_INDEX: Child of sibling is shared, <nodeidx>
186 * and <xbyte> follow.
187 *
188 * <nodeidx> 3 bytes Index of child for this sibling, MSB first.
189 *
190 * <xbyte> 1 byte byte value of the sibling.
191 *
192 * <flags> 1 byte bitmask of:
193 * WF_ALLCAP word must have only capitals
194 * WF_ONECAP first char of word must be capital
195 * WF_RARE rare word
196 * WF_REGION <region> follows
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000197 * WF_PFX <prefixID> follows
Bram Moolenaar51485f02005-06-04 21:55:20 +0000198 *
199 * <region> 1 byte Bitmask for regions in which word is valid. When
200 * omitted it's valid in all regions.
201 * Lowest bit is for region 1.
202 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000203 * <prefixID> 1 byte ID of prefix that can be used with this word. For
204 * PREFIXTREE used for the required prefix ID.
205 *
206 * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list
207 * from HEADER.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000208 *
Bram Moolenaar51485f02005-06-04 21:55:20 +0000209 * All text characters are in 'encoding', but stored as single bytes.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000210 */
211
Bram Moolenaare19defe2005-03-21 08:23:33 +0000212#if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
213# include <io.h> /* for lseek(), must be before vim.h */
214#endif
215
216#include "vim.h"
217
218#if defined(FEAT_SYN_HL) || defined(PROTO)
219
220#ifdef HAVE_FCNTL_H
221# include <fcntl.h>
222#endif
223
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000224#define MAXWLEN 250 /* Assume max. word len is this many bytes.
225 Some places assume a word length fits in a
226 byte, thus it can't be above 255. */
Bram Moolenaarfc735152005-03-22 22:54:12 +0000227
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000228/* Type used for indexes in the word tree need to be at least 3 bytes. If int
229 * is 8 bytes we could use something smaller, but what? */
230#if SIZEOF_INT > 2
231typedef int idx_T;
232#else
233typedef long idx_T;
234#endif
235
236/* Flags used for a word. Only the lowest byte can be used, the region byte
237 * comes above it. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000238#define WF_REGION 0x01 /* region byte follows */
239#define WF_ONECAP 0x02 /* word with one capital (or all capitals) */
240#define WF_ALLCAP 0x04 /* word must be all capitals */
241#define WF_RARE 0x08 /* rare word */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000242#define WF_BANNED 0x10 /* bad word */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000243#define WF_PFX 0x20 /* prefix ID list follows */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000244#define WF_KEEPCAP 0x80 /* keep-case word */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000245
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000246#define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000247
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000248#define WF_RAREPFX 0x1000000 /* in sl_pidxs: flag for rare postponed
249 prefix; must be above prefixID (one byte)
250 and prefcondnr (two bytes) */
251
Bram Moolenaar51485f02005-06-04 21:55:20 +0000252#define BY_NOFLAGS 0 /* end of word without flags or region */
253#define BY_FLAGS 1 /* end of word, flag byte follows */
254#define BY_INDEX 2 /* child is shared, index follows */
255#define BY_SPECIAL BY_INDEX /* hightest special byte value */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000256
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000257/* Info from "REP" and "SAL" entries in ".aff" file used in si_rep, sl_rep,
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000258 * and si_sal. Not for sl_sal!
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000259 * One replacement: from "ft_from" to "ft_to". */
260typedef struct fromto_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000261{
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000262 char_u *ft_from;
263 char_u *ft_to;
264} fromto_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000265
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000266/* Info from "SAL" entries in ".aff" file used in sl_sal.
267 * The info is split for quick processing by spell_soundfold().
268 * Note that "sm_oneof" and "sm_rules" point into sm_lead. */
269typedef struct salitem_S
270{
271 char_u *sm_lead; /* leading letters */
272 int sm_leadlen; /* length of "sm_lead" */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000273 char_u *sm_oneof; /* letters from () or NULL */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000274 char_u *sm_rules; /* rules like ^, $, priority */
275 char_u *sm_to; /* replacement. */
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000276#ifdef FEAT_MBYTE
277 int *sm_lead_w; /* wide character copy of "sm_lead" */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000278 int *sm_oneof_w; /* wide character copy of "sm_oneof" */
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000279 int *sm_to_w; /* wide character copy of "sm_to" */
280#endif
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000281} salitem_T;
282
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000283#ifdef FEAT_MBYTE
284typedef int salfirst_T;
285#else
286typedef short salfirst_T;
287#endif
288
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000289/*
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000290 * Structure used to store words and other info for one language, loaded from
291 * a .spl file.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000292 * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the
293 * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words.
294 *
295 * The "byts" array stores the possible bytes in each tree node, preceded by
296 * the number of possible bytes, sorted on byte value:
297 * <len> <byte1> <byte2> ...
298 * The "idxs" array stores the index of the child node corresponding to the
299 * byte in "byts".
300 * Exception: when the byte is zero, the word may end here and "idxs" holds
301 * the flags and region for the word. There may be several zeros in sequence
302 * for alternative flag/region combinations.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000303 */
304typedef struct slang_S slang_T;
305struct slang_S
306{
307 slang_T *sl_next; /* next language */
308 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */
Bram Moolenaarb765d632005-06-07 21:00:02 +0000309 char_u *sl_fname; /* name of .spl file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000310 int sl_add; /* TRUE if it's a .add file. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000311
Bram Moolenaar51485f02005-06-04 21:55:20 +0000312 char_u *sl_fbyts; /* case-folded word bytes */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000313 idx_T *sl_fidxs; /* case-folded word indexes */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000314 char_u *sl_kbyts; /* keep-case word bytes */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000315 idx_T *sl_kidxs; /* keep-case word indexes */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000316 char_u *sl_pbyts; /* prefix tree word bytes */
317 idx_T *sl_pidxs; /* prefix tree word indexes */
318
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000319 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000320
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000321 int sl_prefixcnt; /* number of items in "sl_prefprog" */
322 regprog_T **sl_prefprog; /* table with regprogs for prefixes */
323
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000324 garray_T sl_rep; /* list of fromto_T entries from REP lines */
325 short sl_rep_first[256]; /* indexes where byte first appears, -1 if
326 there is none */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000327 garray_T sl_sal; /* list of salitem_T entries from SAL lines */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000328 salfirst_T sl_sal_first[256]; /* indexes where byte first appears, -1 if
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000329 there is none */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000330 int sl_sofo; /* SOFOFROM and SOFOTO instead of SAL items:
331 * "sl_sal_first" maps chars, when has_mbyte
332 * "sl_sal" is a list of wide char lists. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000333 int sl_followup; /* SAL followup */
334 int sl_collapse; /* SAL collapse_result */
335 int sl_rem_accents; /* SAL remove_accents */
Bram Moolenaarea424162005-06-16 21:51:00 +0000336 int sl_has_map; /* TRUE if there is a MAP line */
337#ifdef FEAT_MBYTE
338 hashtab_T sl_map_hash; /* MAP for multi-byte chars */
339 int sl_map_array[256]; /* MAP for first 256 chars */
340#else
341 char_u sl_map_array[256]; /* MAP for first 256 chars */
342#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000343};
344
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000345/* First language that is loaded, start of the linked list of loaded
346 * languages. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000347static slang_T *first_lang = NULL;
348
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000349/* Flags used in .spl file for soundsalike flags. */
350#define SAL_F0LLOWUP 1
351#define SAL_COLLAPSE 2
352#define SAL_REM_ACCENTS 4
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000353#define SAL_SOFO 8 /* SOFOFROM and SOFOTO instead of SAL */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000354
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000355/*
356 * Structure used in "b_langp", filled from 'spelllang'.
357 */
358typedef struct langp_S
359{
360 slang_T *lp_slang; /* info for this language (NULL for last one) */
361 int lp_region; /* bitmask for region or REGION_ALL */
362} langp_T;
363
364#define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
365
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000366#define REGION_ALL 0xff /* word valid in all regions */
367
368/* Result values. Lower number is accepted over higher one. */
369#define SP_BANNED -1
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000370#define SP_OK 0
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000371#define SP_RARE 1
372#define SP_LOCAL 2
373#define SP_BAD 3
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000374
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000375#define VIMSPELLMAGIC "VIMspell08" /* string at start of Vim spell file */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000376#define VIMSPELLMAGICL 10
377
378/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000379 * Information used when looking for suggestions.
380 */
381typedef struct suginfo_S
382{
383 garray_T su_ga; /* suggestions, contains "suggest_T" */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000384 int su_maxcount; /* max. number of suggestions displayed */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000385 int su_maxscore; /* maximum score for adding to su_ga */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000386 garray_T su_sga; /* like su_ga, sound-folded scoring */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000387 char_u *su_badptr; /* start of bad word in line */
388 int su_badlen; /* length of detected bad word in line */
Bram Moolenaar0c405862005-06-22 22:26:26 +0000389 int su_badflags; /* caps flags for bad word */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000390 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */
391 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */
392 hashtab_T su_banned; /* table with banned words */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000393} suginfo_T;
394
395/* One word suggestion. Used in "si_ga". */
396typedef struct suggest_S
397{
398 char_u *st_word; /* suggested word, allocated string */
399 int st_orglen; /* length of replaced text */
400 int st_score; /* lower is better */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000401 int st_altscore; /* used when st_score compares equal */
402 int st_salscore; /* st_score is for soundalike */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000403 int st_had_bonus; /* bonus already included in score */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000404} suggest_T;
405
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000406#define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i])
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000407
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000408/* Number of suggestions kept when cleaning up. When rescore_suggestions() is
409 * called the score may change, thus we need to keep more than what is
410 * displayed. */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +0000411#define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 50 ? 50 : (su)->su_maxcount)
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000412
413/* Threshold for sorting and cleaning up suggestions. Don't want to keep lots
414 * of suggestions that are not going to be displayed. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000415#define SUG_MAX_COUNT(su) ((su)->su_maxcount + 50)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000416
417/* score for various changes */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000418#define SCORE_SPLIT 149 /* split bad word */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000419#define SCORE_ICASE 52 /* slightly different case */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000420#define SCORE_REGION 70 /* word is for different region */
421#define SCORE_RARE 180 /* rare word */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000422#define SCORE_SWAP 90 /* swap two characters */
423#define SCORE_SWAP3 110 /* swap two characters in three */
424#define SCORE_REP 87 /* REP replacement */
425#define SCORE_SUBST 93 /* substitute a character */
426#define SCORE_SIMILAR 33 /* substitute a similar character */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000427#define SCORE_DEL 94 /* delete a character */
Bram Moolenaarea408852005-06-25 22:49:46 +0000428#define SCORE_DELDUP 64 /* delete a duplicated character */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000429#define SCORE_INS 96 /* insert a character */
Bram Moolenaarea408852005-06-25 22:49:46 +0000430#define SCORE_INSDUP 66 /* insert a duplicate character */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000431#define SCORE_NONWORD 103 /* change non-word to word char */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000432
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000433#define SCORE_FILE 30 /* suggestion from a file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000434#define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower.
435 * 350 allows for about three changes. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000436
437#define SCORE_BIG SCORE_INS * 3 /* big difference */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000438#define SCORE_MAXMAX 999999 /* accept any score */
439
440/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000441 * Structure to store info for word matching.
442 */
443typedef struct matchinf_S
444{
445 langp_T *mi_lp; /* info for language and region */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000446
447 /* pointers to original text to be checked */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000448 char_u *mi_word; /* start of word being checked */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000449 char_u *mi_end; /* end of matching word so far */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000450 char_u *mi_fend; /* next char to be added to mi_fword */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000451 char_u *mi_cend; /* char after what was used for
452 mi_capflags */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000453
454 /* case-folded text */
455 char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000456 int mi_fwordlen; /* nr of valid bytes in mi_fword */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000457
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000458 /* for when checking word after a prefix */
459 int mi_prefarridx; /* index in sl_pidxs with list of
460 prefixID/condition */
461 int mi_prefcnt; /* number of entries at mi_prefarridx */
462 int mi_prefixlen; /* byte length of prefix */
463
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000464 /* others */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000465 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000466 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000467} matchinf_T;
468
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000469/*
470 * The tables used for recognizing word characters according to spelling.
471 * These are only used for the first 256 characters of 'encoding'.
472 */
473typedef struct spelltab_S
474{
475 char_u st_isw[256]; /* flags: is word char */
476 char_u st_isu[256]; /* flags: is uppercase char */
477 char_u st_fold[256]; /* chars: folded case */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000478 char_u st_upper[256]; /* chars: upper case */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000479} spelltab_T;
480
481static spelltab_T spelltab;
482static int did_set_spelltab;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000483static char_u spell_ismw[256]; /* flags: is midword char */
484#ifdef FEAT_MBYTE
485static char_u *spell_ismw_mb = NULL; /* multi-byte midword chars */
486#endif
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000487
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000488#define CF_WORD 0x01
489#define CF_UPPER 0x02
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000490
491static void clear_spell_chartab __ARGS((spelltab_T *sp));
492static int set_spell_finish __ARGS((spelltab_T *new_st));
Bram Moolenaarea408852005-06-25 22:49:46 +0000493static int spell_iswordp __ARGS((char_u *p));
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000494static void write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000495
496/*
Bram Moolenaarea408852005-06-25 22:49:46 +0000497 * Return TRUE if "p" points to a word character. Like spell_iswordp() but
498 * without the special handling of a single quote.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000499 * Checking for a word character is done very often, avoid the function call
500 * overhead.
501 */
502#ifdef FEAT_MBYTE
503# define SPELL_ISWORDP(p) ((has_mbyte && MB_BYTE2LEN(*(p)) > 1) \
504 ? (mb_get_class(p) >= 2) : spelltab.st_isw[*(p)])
505#else
506# define SPELL_ISWORDP(p) (spelltab.st_isw[*(p)])
507#endif
508
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000509/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000510 * For finding suggestions: At each node in the tree these states are tried:
Bram Moolenaarea424162005-06-16 21:51:00 +0000511 */
512typedef enum
513{
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000514 STATE_START = 0, /* At start of node check for NUL bytes (goodword
515 * ends); if badword ends there is a match, otherwise
516 * try splitting word. */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000517 STATE_NOPREFIX, /* try without prefix */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000518 STATE_SPLITUNDO, /* Undo splitting. */
Bram Moolenaarea424162005-06-16 21:51:00 +0000519 STATE_ENDNUL, /* Past NUL bytes at start of the node. */
520 STATE_PLAIN, /* Use each byte of the node. */
521 STATE_DEL, /* Delete a byte from the bad word. */
522 STATE_INS, /* Insert a byte in the bad word. */
523 STATE_SWAP, /* Swap two bytes. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000524 STATE_UNSWAP, /* Undo swap two characters. */
525 STATE_SWAP3, /* Swap two characters over three. */
526 STATE_UNSWAP3, /* Undo Swap two characters over three. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000527 STATE_UNROT3L, /* Undo rotate three characters left */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000528 STATE_UNROT3R, /* Undo rotate three characters right */
Bram Moolenaarea424162005-06-16 21:51:00 +0000529 STATE_REP_INI, /* Prepare for using REP items. */
530 STATE_REP, /* Use matching REP items from the .aff file. */
531 STATE_REP_UNDO, /* Undo a REP item replacement. */
532 STATE_FINAL /* End of this node. */
533} state_T;
534
535/*
Bram Moolenaar0c405862005-06-22 22:26:26 +0000536 * Struct to keep the state at each level in suggest_try_change().
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000537 */
538typedef struct trystate_S
539{
Bram Moolenaarea424162005-06-16 21:51:00 +0000540 state_T ts_state; /* state at this level, STATE_ */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000541 int ts_score; /* score */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000542 idx_T ts_arridx; /* index in tree array, start of node */
Bram Moolenaarea424162005-06-16 21:51:00 +0000543 short ts_curi; /* index in list of child nodes */
544 char_u ts_fidx; /* index in fword[], case-folded bad word */
545 char_u ts_fidxtry; /* ts_fidx at which bytes may be changed */
546 char_u ts_twordlen; /* valid length of tword[] */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000547 char_u ts_prefixdepth; /* stack depth for end of prefix or PREFIXTREE
548 * or NOPREFIX */
Bram Moolenaarea424162005-06-16 21:51:00 +0000549#ifdef FEAT_MBYTE
550 char_u ts_tcharlen; /* number of bytes in tword character */
551 char_u ts_tcharidx; /* current byte index in tword character */
552 char_u ts_isdiff; /* DIFF_ values */
553 char_u ts_fcharstart; /* index in fword where badword char started */
554#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000555 char_u ts_save_prewordlen; /* saved "prewordlen" */
Bram Moolenaarea424162005-06-16 21:51:00 +0000556 char_u ts_save_splitoff; /* su_splitoff saved here */
Bram Moolenaar0c405862005-06-22 22:26:26 +0000557 char_u ts_save_badflags; /* su_badflags saved here */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000558} trystate_T;
559
Bram Moolenaarea424162005-06-16 21:51:00 +0000560/* values for ts_isdiff */
561#define DIFF_NONE 0 /* no different byte (yet) */
562#define DIFF_YES 1 /* different byte found */
563#define DIFF_INSERT 2 /* inserting character */
564
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000565/* special values ts_prefixdepth */
566#define PREFIXTREE 0xfe /* walking through the prefix tree */
567#define NOPREFIX 0xff /* not using prefixes */
568
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000569/* mode values for find_word */
570#define FIND_FOLDWORD 0 /* find word case-folded */
571#define FIND_KEEPWORD 1 /* find keep-case word */
572#define FIND_PREFIX 2 /* find word after prefix */
573
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000574static slang_T *slang_alloc __ARGS((char_u *lang));
575static void slang_free __ARGS((slang_T *lp));
Bram Moolenaarb765d632005-06-07 21:00:02 +0000576static void slang_clear __ARGS((slang_T *lp));
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000577static void find_word __ARGS((matchinf_T *mip, int mode));
Bram Moolenaarf417f2b2005-06-23 22:29:21 +0000578static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int prefid, char_u *word, slang_T *slang));
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000579static void find_prefix __ARGS((matchinf_T *mip));
580static int fold_more __ARGS((matchinf_T *mip));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000581static int spell_valid_case __ARGS((int origflags, int treeflags));
Bram Moolenaarf417f2b2005-06-23 22:29:21 +0000582static int no_spell_checking __ARGS((void));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000583static void spell_load_lang __ARGS((char_u *lang));
Bram Moolenaarb765d632005-06-07 21:00:02 +0000584static char_u *spell_enc __ARGS((void));
585static void spell_load_cb __ARGS((char_u *fname, void *cookie));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000586static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent));
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000587#ifdef FEAT_MBYTE
588static int *mb_str2wide __ARGS((char_u *s));
589#endif
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000590static idx_T read_tree __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000591static int find_region __ARGS((char_u *rp, char_u *region));
592static int captype __ARGS((char_u *word, char_u *end));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000593static void spell_reload_one __ARGS((char_u *fname, int added_word));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000594static int set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000595static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp));
596static void write_spell_chartab __ARGS((FILE *fd));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000597static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen));
Bram Moolenaarea408852005-06-25 22:49:46 +0000598static void spell_find_suggest __ARGS((char_u *badptr, suginfo_T *su, int maxcount, int banbadword));
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000599#ifdef FEAT_EVAL
600static void spell_suggest_expr __ARGS((suginfo_T *su, char_u *expr));
601#endif
602static void spell_suggest_file __ARGS((suginfo_T *su, char_u *fname));
603static void spell_suggest_intern __ARGS((suginfo_T *su));
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000604static void spell_find_cleanup __ARGS((suginfo_T *su));
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000605static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper));
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000606static void allcap_copy __ARGS((char_u *word, char_u *wcopy));
Bram Moolenaar0c405862005-06-22 22:26:26 +0000607static void suggest_try_special __ARGS((suginfo_T *su));
608static void suggest_try_change __ARGS((suginfo_T *su));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000609static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add));
610static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword));
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000611static void score_comp_sal __ARGS((suginfo_T *su));
612static void score_combine __ARGS((suginfo_T *su));
Bram Moolenaarf417f2b2005-06-23 22:29:21 +0000613static int stp_sal_score __ARGS((suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound));
Bram Moolenaar0c405862005-06-22 22:26:26 +0000614static void suggest_try_soundalike __ARGS((suginfo_T *su));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000615static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags));
Bram Moolenaarea424162005-06-16 21:51:00 +0000616static void set_map_str __ARGS((slang_T *lp, char_u *map));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000617static int similar_chars __ARGS((slang_T *slang, int c1, int c2));
Bram Moolenaarf417f2b2005-06-23 22:29:21 +0000618static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000619static void add_banned __ARGS((suginfo_T *su, char_u *word));
620static int was_banned __ARGS((suginfo_T *su, char_u *word));
621static void free_banned __ARGS((suginfo_T *su));
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000622static void rescore_suggestions __ARGS((suginfo_T *su));
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000623static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep));
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000624static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, int folded, char_u *res));
625static void spell_soundfold_sofo __ARGS((slang_T *slang, char_u *inword, char_u *res));
626static void spell_soundfold_sal __ARGS((slang_T *slang, char_u *inword, char_u *res));
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000627#ifdef FEAT_MBYTE
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000628static void spell_soundfold_wsal __ARGS((slang_T *slang, char_u *inword, char_u *res));
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000629#endif
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000630static int soundalike_score __ARGS((char_u *goodsound, char_u *badsound));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000631static int spell_edit_score __ARGS((char_u *badword, char_u *goodword));
Bram Moolenaarf417f2b2005-06-23 22:29:21 +0000632static void dump_word __ARGS((char_u *word, int round, int flags, linenr_T lnum));
633static linenr_T apply_prefixes __ARGS((slang_T *slang, char_u *word, int round, int flags, linenr_T startlnum));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000634
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000635/*
636 * Use our own character-case definitions, because the current locale may
637 * differ from what the .spl file uses.
638 * These must not be called with negative number!
639 */
640#ifndef FEAT_MBYTE
641/* Non-multi-byte implementation. */
642# define SPELL_TOFOLD(c) ((c) < 256 ? spelltab.st_fold[c] : (c))
643# define SPELL_TOUPPER(c) ((c) < 256 ? spelltab.st_upper[c] : (c))
644# define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE)
645#else
646/* Multi-byte implementation. For Unicode we can call utf_*(), but don't do
647 * that for ASCII, because we don't want to use 'casemap' here. Otherwise use
648 * the "w" library function for characters above 255 if available. */
649# ifdef HAVE_TOWLOWER
650# define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
651 : (c) < 256 ? spelltab.st_fold[c] : towlower(c))
652# else
653# define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
654 : (c) < 256 ? spelltab.st_fold[c] : (c))
655# endif
656
657# ifdef HAVE_TOWUPPER
658# define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
659 : (c) < 256 ? spelltab.st_upper[c] : towupper(c))
660# else
661# define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
662 : (c) < 256 ? spelltab.st_upper[c] : (c))
663# endif
664
665# ifdef HAVE_ISWUPPER
666# define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
667 : (c) < 256 ? spelltab.st_isu[c] : iswupper(c))
668# else
669# define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
670 : (c) < 256 ? spelltab.st_isu[c] : (c))
671# endif
672#endif
673
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000674
675static char *e_format = N_("E759: Format error in spell file");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000676
677/*
678 * Main spell-checking function.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000679 * "ptr" points to a character that could be the start of a word.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000680 * "*attrp" is set to the attributes for a badly spelled word. For a non-word
681 * or when it's OK it remains unchanged.
682 * This must only be called when 'spelllang' is not empty.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000683 *
684 * "sug" is normally NULL. When looking for suggestions it points to
685 * suginfo_T. It's passed as a void pointer to keep the struct local.
686 *
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000687 * Returns the length of the word in bytes, also when it's OK, so that the
688 * caller can skip over the word.
689 */
690 int
Bram Moolenaar51485f02005-06-04 21:55:20 +0000691spell_check(wp, ptr, attrp)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000692 win_T *wp; /* current window */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000693 char_u *ptr;
694 int *attrp;
695{
696 matchinf_T mi; /* Most things are put in "mi" so that it can
697 be passed to functions quickly. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000698 int nrlen = 0; /* found a number first */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000699
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000700 /* A word never starts at a space or a control character. Return quickly
701 * then, skipping over the character. */
702 if (*ptr <= ' ')
703 return 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000704
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000705 /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and
Bram Moolenaar0c405862005-06-22 22:26:26 +0000706 * 0X99FF. But when a word character follows do check spelling to find
707 * "3GPP". */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000708 if (*ptr >= '0' && *ptr <= '9')
Bram Moolenaar51485f02005-06-04 21:55:20 +0000709 {
Bram Moolenaar3982c542005-06-08 21:56:31 +0000710 if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
711 mi.mi_end = skiphex(ptr + 2);
Bram Moolenaar51485f02005-06-04 21:55:20 +0000712 else
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000713 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000714 mi.mi_end = skipdigits(ptr);
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000715 nrlen = mi.mi_end - ptr;
716 }
Bram Moolenaarea408852005-06-25 22:49:46 +0000717 if (!spell_iswordp(mi.mi_end))
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000718 return (int)(mi.mi_end - ptr);
Bram Moolenaar0c405862005-06-22 22:26:26 +0000719
720 /* Try including the digits in the word. */
721 mi.mi_fend = ptr + nrlen;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000722 }
Bram Moolenaar0c405862005-06-22 22:26:26 +0000723 else
724 mi.mi_fend = ptr;
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000725
Bram Moolenaar0c405862005-06-22 22:26:26 +0000726 /* Find the normal end of the word (until the next non-word character). */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000727 mi.mi_word = ptr;
Bram Moolenaarea408852005-06-25 22:49:46 +0000728 if (spell_iswordp(mi.mi_fend))
Bram Moolenaar51485f02005-06-04 21:55:20 +0000729 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000730 do
Bram Moolenaar51485f02005-06-04 21:55:20 +0000731 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000732 mb_ptr_adv(mi.mi_fend);
Bram Moolenaarea408852005-06-25 22:49:46 +0000733 } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend));
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000734 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000735
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000736 /* We always use the characters up to the next non-word character,
737 * also for bad words. */
738 mi.mi_end = mi.mi_fend;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000739
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000740 /* Check caps type later. */
741 mi.mi_capflags = 0;
742 mi.mi_cend = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000743
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000744 /* Include one non-word character so that we can check for the
745 * word end. */
746 if (*mi.mi_fend != NUL)
747 mb_ptr_adv(mi.mi_fend);
748
749 (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
750 MAXWLEN + 1);
751 mi.mi_fwordlen = STRLEN(mi.mi_fword);
752
753 /* The word is bad unless we recognize it. */
754 mi.mi_result = SP_BAD;
755
756 /*
757 * Loop over the languages specified in 'spelllang'.
758 * We check them all, because a matching word may be longer than an
759 * already found matching word.
760 */
761 for (mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
762 mi.mi_lp->lp_slang != NULL; ++mi.mi_lp)
763 {
764 /* Check for a matching word in case-folded words. */
765 find_word(&mi, FIND_FOLDWORD);
766
767 /* Check for a matching word in keep-case words. */
768 find_word(&mi, FIND_KEEPWORD);
769
770 /* Check for matching prefixes. */
771 find_prefix(&mi);
772 }
773
774 if (mi.mi_result != SP_OK)
775 {
Bram Moolenaar0c405862005-06-22 22:26:26 +0000776 /* If we found a number skip over it. Allows for "42nd". Do flag
777 * rare and local words, e.g., "3GPP". */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000778 if (nrlen > 0)
Bram Moolenaar0c405862005-06-22 22:26:26 +0000779 {
780 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
781 return nrlen;
782 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000783
784 /* When we are at a non-word character there is no error, just
785 * skip over the character (try looking for a word after it). */
Bram Moolenaar0c405862005-06-22 22:26:26 +0000786 else if (!SPELL_ISWORDP(ptr))
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000787 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000788#ifdef FEAT_MBYTE
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000789 if (has_mbyte)
790 return mb_ptr2len_check(ptr);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000791#endif
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000792 return 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000793 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000794
795 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
796 *attrp = highlight_attr[HLF_SPB];
797 else if (mi.mi_result == SP_RARE)
798 *attrp = highlight_attr[HLF_SPR];
799 else
800 *attrp = highlight_attr[HLF_SPL];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000801 }
802
Bram Moolenaar51485f02005-06-04 21:55:20 +0000803 return (int)(mi.mi_end - ptr);
804}
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000805
Bram Moolenaar51485f02005-06-04 21:55:20 +0000806/*
807 * Check if the word at "mip->mi_word" is in the tree.
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000808 * When "mode" is FIND_FOLDWORD check in fold-case word tree.
809 * When "mode" is FIND_KEEPWORD check in keep-case word tree.
810 * When "mode" is FIND_PREFIX check for word after prefix in fold-case word
811 * tree.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000812 *
813 * For a match mip->mi_result is updated.
814 */
815 static void
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000816find_word(mip, mode)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000817 matchinf_T *mip;
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000818 int mode;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000819{
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000820 idx_T arridx = 0;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000821 int endlen[MAXWLEN]; /* length at possible word endings */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000822 idx_T endidx[MAXWLEN]; /* possible word endings */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000823 int endidxcnt = 0;
824 int len;
825 int wlen = 0;
826 int flen;
827 int c;
828 char_u *ptr;
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000829 idx_T lo, hi, m;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000830#ifdef FEAT_MBYTE
831 char_u *s;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000832 char_u *p;
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000833#endif
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000834 int res = SP_BAD;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000835 slang_T *slang = mip->mi_lp->lp_slang;
836 unsigned flags;
837 char_u *byts;
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000838 idx_T *idxs;
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000839 int prefid;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000840
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000841 if (mode == FIND_KEEPWORD)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000842 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000843 /* Check for word with matching case in keep-case tree. */
844 ptr = mip->mi_word;
845 flen = 9999; /* no case folding, always enough bytes */
846 byts = slang->sl_kbyts;
847 idxs = slang->sl_kidxs;
848 }
849 else
850 {
851 /* Check for case-folded in case-folded tree. */
852 ptr = mip->mi_fword;
853 flen = mip->mi_fwordlen; /* available case-folded bytes */
854 byts = slang->sl_fbyts;
855 idxs = slang->sl_fidxs;
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000856
857 if (mode == FIND_PREFIX)
858 {
859 /* Skip over the prefix. */
860 wlen = mip->mi_prefixlen;
861 flen -= mip->mi_prefixlen;
862 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000863 }
864
Bram Moolenaar51485f02005-06-04 21:55:20 +0000865 if (byts == NULL)
866 return; /* array is empty */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000867
Bram Moolenaar51485f02005-06-04 21:55:20 +0000868 /*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000869 * Repeat advancing in the tree until:
870 * - there is a byte that doesn't match,
871 * - we reach the end of the tree,
872 * - or we reach the end of the line.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000873 */
874 for (;;)
875 {
Bram Moolenaar0c405862005-06-22 22:26:26 +0000876 if (flen <= 0 && *mip->mi_fend != NUL)
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000877 flen = fold_more(mip);
Bram Moolenaar51485f02005-06-04 21:55:20 +0000878
879 len = byts[arridx++];
880
881 /* If the first possible byte is a zero the word could end here.
882 * Remember this index, we first check for the longest word. */
883 if (byts[arridx] == 0)
884 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000885 if (endidxcnt == MAXWLEN)
886 {
887 /* Must be a corrupted spell file. */
888 EMSG(_(e_format));
889 return;
890 }
Bram Moolenaar51485f02005-06-04 21:55:20 +0000891 endlen[endidxcnt] = wlen;
892 endidx[endidxcnt++] = arridx++;
893 --len;
894
895 /* Skip over the zeros, there can be several flag/region
896 * combinations. */
897 while (len > 0 && byts[arridx] == 0)
898 {
899 ++arridx;
900 --len;
901 }
902 if (len == 0)
903 break; /* no children, word must end here */
904 }
905
906 /* Stop looking at end of the line. */
907 if (ptr[wlen] == NUL)
908 break;
909
910 /* Perform a binary search in the list of accepted bytes. */
911 c = ptr[wlen];
Bram Moolenaar0c405862005-06-22 22:26:26 +0000912 if (c == TAB) /* <Tab> is handled like <Space> */
913 c = ' ';
Bram Moolenaar51485f02005-06-04 21:55:20 +0000914 lo = arridx;
915 hi = arridx + len - 1;
916 while (lo < hi)
917 {
918 m = (lo + hi) / 2;
919 if (byts[m] > c)
920 hi = m - 1;
921 else if (byts[m] < c)
922 lo = m + 1;
923 else
924 {
925 lo = hi = m;
926 break;
927 }
928 }
929
930 /* Stop if there is no matching byte. */
931 if (hi < lo || byts[lo] != c)
932 break;
933
934 /* Continue at the child (if there is one). */
935 arridx = idxs[lo];
936 ++wlen;
937 --flen;
Bram Moolenaar0c405862005-06-22 22:26:26 +0000938
939 /* One space in the good word may stand for several spaces in the
940 * checked word. */
941 if (c == ' ')
942 {
943 for (;;)
944 {
945 if (flen <= 0 && *mip->mi_fend != NUL)
946 flen = fold_more(mip);
947 if (ptr[wlen] != ' ' && ptr[wlen] != TAB)
948 break;
949 ++wlen;
950 --flen;
951 }
952 }
Bram Moolenaar51485f02005-06-04 21:55:20 +0000953 }
954
955 /*
956 * Verify that one of the possible endings is valid. Try the longest
957 * first.
958 */
959 while (endidxcnt > 0)
960 {
961 --endidxcnt;
962 arridx = endidx[endidxcnt];
963 wlen = endlen[endidxcnt];
964
965#ifdef FEAT_MBYTE
966 if ((*mb_head_off)(ptr, ptr + wlen) > 0)
967 continue; /* not at first byte of character */
968#endif
Bram Moolenaarea408852005-06-25 22:49:46 +0000969 if (spell_iswordp(ptr + wlen))
Bram Moolenaar51485f02005-06-04 21:55:20 +0000970 continue; /* next char is a word character */
971
972#ifdef FEAT_MBYTE
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000973 if (mode != FIND_KEEPWORD && has_mbyte)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000974 {
975 /* Compute byte length in original word, length may change
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000976 * when folding case. This can be slow, take a shortcut when the
977 * case-folded word is equal to the keep-case word. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000978 p = mip->mi_word;
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000979 if (STRNCMP(ptr, p, wlen) != 0)
980 {
981 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
982 mb_ptr_adv(p);
983 wlen = p - mip->mi_word;
984 }
Bram Moolenaar51485f02005-06-04 21:55:20 +0000985 }
986#endif
987
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000988 /* Check flags and region. For FIND_PREFIX check the condition and
989 * prefix ID.
990 * Repeat this if there are more flags/region alternatives until there
991 * is a match. */
992 res = SP_BAD;
993 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0;
994 --len, ++arridx)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000995 {
996 flags = idxs[arridx];
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000997
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000998 /* For the fold-case tree check that the case of the checked word
999 * matches with what the word in the tree requires.
1000 * For keep-case tree the case is always right. For prefixes we
1001 * don't bother to check. */
1002 if (mode == FIND_FOLDWORD)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001003 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001004 if (mip->mi_cend != mip->mi_word + wlen)
1005 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001006 /* mi_capflags was set for a different word length, need
1007 * to do it again. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001008 mip->mi_cend = mip->mi_word + wlen;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001009 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001010 }
1011
Bram Moolenaar0c405862005-06-22 22:26:26 +00001012 if (mip->mi_capflags == WF_KEEPCAP
1013 || !spell_valid_case(mip->mi_capflags, flags))
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001014 continue;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001015 }
1016
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001017 /* When mode is FIND_PREFIX the word must support the prefix:
1018 * check the prefix ID and the condition. Do that for the list at
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001019 * mip->mi_prefarridx that find_prefix() filled. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001020 if (mode == FIND_PREFIX)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001021 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001022 /* The prefix ID is stored two bytes above the flags. */
1023 prefid = (unsigned)flags >> 16;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001024 c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001025 prefid, mip->mi_fword + mip->mi_prefixlen,
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001026 slang);
1027 if (c == 0)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001028 continue;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001029
1030 /* Use the WF_RARE flag for a rare prefix. */
1031 if (c & WF_RAREPFX)
1032 flags |= WF_RARE;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001033 }
1034
1035 if (flags & WF_BANNED)
1036 res = SP_BANNED;
1037 else if (flags & WF_REGION)
1038 {
1039 /* Check region. */
1040 if ((mip->mi_lp->lp_region & (flags >> 8)) != 0)
1041 res = SP_OK;
1042 else
1043 res = SP_LOCAL;
1044 }
1045 else if (flags & WF_RARE)
1046 res = SP_RARE;
1047 else
1048 res = SP_OK;
1049
1050 /* Always use the longest match and the best result. */
1051 if (mip->mi_result > res)
1052 {
1053 mip->mi_result = res;
1054 mip->mi_end = mip->mi_word + wlen;
1055 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001056 else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001057 mip->mi_end = mip->mi_word + wlen;
1058
1059 if (res == SP_OK)
1060 break;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001061 }
1062
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001063 if (res == SP_OK)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001064 break;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001065 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001066}
1067
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001068/*
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001069 * Return non-zero if the prefix indicated by "mip->mi_prefarridx" matches
1070 * with the prefix ID "prefid" for the word "word".
1071 * The WF_RAREPFX flag is included in the return value for a rare prefix.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001072 */
1073 static int
1074valid_word_prefix(totprefcnt, arridx, prefid, word, slang)
1075 int totprefcnt; /* nr of prefix IDs */
1076 int arridx; /* idx in sl_pidxs[] */
1077 int prefid;
1078 char_u *word;
1079 slang_T *slang;
1080{
1081 int prefcnt;
1082 int pidx;
1083 regprog_T *rp;
1084 regmatch_T regmatch;
1085
1086 for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt)
1087 {
1088 pidx = slang->sl_pidxs[arridx + prefcnt];
1089
1090 /* Check the prefix ID. */
1091 if (prefid != (pidx & 0xff))
1092 continue;
1093
1094 /* Check the condition, if there is one. The condition index is
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001095 * stored in the two bytes above the prefix ID byte. */
1096 rp = slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff];
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001097 if (rp != NULL)
1098 {
1099 regmatch.regprog = rp;
1100 regmatch.rm_ic = FALSE;
1101 if (!vim_regexec(&regmatch, word, 0))
1102 continue;
1103 }
1104
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001105 /* It's a match! Return the WF_RAREPFX flag. */
1106 return pidx;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001107 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001108 return 0;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001109}
1110
1111/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001112 * Check if the word at "mip->mi_word" has a matching prefix.
1113 * If it does, then check the following word.
1114 *
1115 * For a match mip->mi_result is updated.
1116 */
1117 static void
1118find_prefix(mip)
1119 matchinf_T *mip;
1120{
1121 idx_T arridx = 0;
1122 int len;
1123 int wlen = 0;
1124 int flen;
1125 int c;
1126 char_u *ptr;
1127 idx_T lo, hi, m;
1128 slang_T *slang = mip->mi_lp->lp_slang;
1129 char_u *byts;
1130 idx_T *idxs;
1131
Bram Moolenaar42eeac32005-06-29 22:40:58 +00001132 byts = slang->sl_pbyts;
1133 if (byts == NULL)
1134 return; /* array is empty */
1135
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001136 /* We use the case-folded word here, since prefixes are always
1137 * case-folded. */
1138 ptr = mip->mi_fword;
1139 flen = mip->mi_fwordlen; /* available case-folded bytes */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001140 idxs = slang->sl_pidxs;
1141
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001142 /*
1143 * Repeat advancing in the tree until:
1144 * - there is a byte that doesn't match,
1145 * - we reach the end of the tree,
1146 * - or we reach the end of the line.
1147 */
1148 for (;;)
1149 {
1150 if (flen == 0 && *mip->mi_fend != NUL)
1151 flen = fold_more(mip);
1152
1153 len = byts[arridx++];
1154
1155 /* If the first possible byte is a zero the prefix could end here.
1156 * Check if the following word matches and supports the prefix. */
1157 if (byts[arridx] == 0)
1158 {
1159 /* There can be several prefixes with different conditions. We
1160 * try them all, since we don't know which one will give the
1161 * longest match. The word is the same each time, pass the list
1162 * of possible prefixes to find_word(). */
1163 mip->mi_prefarridx = arridx;
1164 mip->mi_prefcnt = len;
1165 while (len > 0 && byts[arridx] == 0)
1166 {
1167 ++arridx;
1168 --len;
1169 }
1170 mip->mi_prefcnt -= len;
1171
1172 /* Find the word that comes after the prefix. */
1173 mip->mi_prefixlen = wlen;
1174 find_word(mip, FIND_PREFIX);
1175
1176
1177 if (len == 0)
1178 break; /* no children, word must end here */
1179 }
1180
1181 /* Stop looking at end of the line. */
1182 if (ptr[wlen] == NUL)
1183 break;
1184
1185 /* Perform a binary search in the list of accepted bytes. */
1186 c = ptr[wlen];
1187 lo = arridx;
1188 hi = arridx + len - 1;
1189 while (lo < hi)
1190 {
1191 m = (lo + hi) / 2;
1192 if (byts[m] > c)
1193 hi = m - 1;
1194 else if (byts[m] < c)
1195 lo = m + 1;
1196 else
1197 {
1198 lo = hi = m;
1199 break;
1200 }
1201 }
1202
1203 /* Stop if there is no matching byte. */
1204 if (hi < lo || byts[lo] != c)
1205 break;
1206
1207 /* Continue at the child (if there is one). */
1208 arridx = idxs[lo];
1209 ++wlen;
1210 --flen;
1211 }
1212}
1213
1214/*
1215 * Need to fold at least one more character. Do until next non-word character
1216 * for efficiency.
1217 * Return the length of the folded chars in bytes.
1218 */
1219 static int
1220fold_more(mip)
1221 matchinf_T *mip;
1222{
1223 int flen;
1224 char_u *p;
1225
1226 p = mip->mi_fend;
1227 do
1228 {
1229 mb_ptr_adv(mip->mi_fend);
Bram Moolenaarea408852005-06-25 22:49:46 +00001230 } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend));
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001231
1232 /* Include the non-word character so that we can check for the
1233 * word end. */
1234 if (*mip->mi_fend != NUL)
1235 mb_ptr_adv(mip->mi_fend);
1236
1237 (void)spell_casefold(p, (int)(mip->mi_fend - p),
1238 mip->mi_fword + mip->mi_fwordlen,
1239 MAXWLEN - mip->mi_fwordlen);
1240 flen = STRLEN(mip->mi_fword + mip->mi_fwordlen);
1241 mip->mi_fwordlen += flen;
1242 return flen;
1243}
1244
1245/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001246 * Check case flags for a word. Return TRUE if the word has the requested
1247 * case.
1248 */
1249 static int
1250spell_valid_case(origflags, treeflags)
1251 int origflags; /* flags for the checked word. */
1252 int treeflags; /* flags for the word in the spell tree */
1253{
1254 return (origflags == WF_ALLCAP
1255 || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0
1256 && ((treeflags & WF_ONECAP) == 0 || origflags == WF_ONECAP)));
1257}
1258
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001259/*
1260 * Return TRUE if spell checking is not enabled.
1261 */
1262 static int
1263no_spell_checking()
1264{
1265 if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL)
1266 {
1267 EMSG(_("E756: Spell checking is not enabled"));
1268 return TRUE;
1269 }
1270 return FALSE;
1271}
Bram Moolenaar51485f02005-06-04 21:55:20 +00001272
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001273/*
1274 * Move to next spell error.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001275 * "curline" is TRUE for "z?": find word under/after cursor in the same line.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001276 * Return OK if found, FAIL otherwise.
1277 */
1278 int
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001279spell_move_to(dir, allwords, curline)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001280 int dir; /* FORWARD or BACKWARD */
1281 int allwords; /* TRUE for "[s" and "]s" */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001282 int curline;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001283{
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001284 linenr_T lnum;
1285 pos_T found_pos;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001286 char_u *line;
1287 char_u *p;
Bram Moolenaar0c405862005-06-22 22:26:26 +00001288 char_u *endp;
1289 int attr;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001290 int len;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001291 int has_syntax = syntax_present(curbuf);
1292 int col;
1293 int can_spell;
Bram Moolenaar0c405862005-06-22 22:26:26 +00001294 char_u *buf = NULL;
1295 int buflen = 0;
1296 int skip = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001297
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001298 if (no_spell_checking())
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001299 return FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001300
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001301 /*
1302 * Start looking for bad word at the start of the line, because we can't
Bram Moolenaar0c405862005-06-22 22:26:26 +00001303 * start halfway a word, we don't know where the it starts or ends.
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001304 *
1305 * When searching backwards, we continue in the line to find the last
1306 * bad word (in the cursor line: before the cursor).
Bram Moolenaar0c405862005-06-22 22:26:26 +00001307 *
1308 * We concatenate the start of the next line, so that wrapped words work
1309 * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards
1310 * though...
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001311 */
1312 lnum = curwin->w_cursor.lnum;
1313 found_pos.lnum = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001314
1315 while (!got_int)
1316 {
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001317 line = ml_get(lnum);
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001318
Bram Moolenaar0c405862005-06-22 22:26:26 +00001319 len = STRLEN(line);
1320 if (buflen < len + MAXWLEN + 2)
1321 {
1322 vim_free(buf);
1323 buflen = len + MAXWLEN + 2;
1324 buf = alloc(buflen);
1325 if (buf == NULL)
1326 break;
1327 }
1328
1329 /* Copy the line into "buf" and append the start of the next line if
1330 * possible. */
1331 STRCPY(buf, line);
1332 if (lnum < curbuf->b_ml.ml_line_count)
1333 spell_cat_line(buf + STRLEN(buf), ml_get(lnum + 1), MAXWLEN);
1334
1335 p = buf + skip;
1336 endp = buf + len;
1337 while (p < endp)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001338 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001339 /* When searching backward don't search after the cursor. */
1340 if (dir == BACKWARD
1341 && lnum == curwin->w_cursor.lnum
Bram Moolenaar0c405862005-06-22 22:26:26 +00001342 && (colnr_T)(p - buf) >= curwin->w_cursor.col)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001343 break;
1344
1345 /* start of word */
Bram Moolenaar0c405862005-06-22 22:26:26 +00001346 attr = 0;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001347 len = spell_check(curwin, p, &attr);
1348
1349 if (attr != 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001350 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001351 /* We found a bad word. Check the attribute. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001352 if (allwords || attr == highlight_attr[HLF_SPB])
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001353 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001354 /* When searching forward only accept a bad word after
1355 * the cursor. */
1356 if (dir == BACKWARD
1357 || lnum > curwin->w_cursor.lnum
1358 || (lnum == curwin->w_cursor.lnum
Bram Moolenaar0c405862005-06-22 22:26:26 +00001359 && (colnr_T)(curline ? p - buf + len
1360 : p - buf)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001361 > curwin->w_cursor.col))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001362 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001363 if (has_syntax)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001364 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00001365 col = p - buf;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001366 (void)syn_get_id(lnum, (colnr_T)col,
1367 FALSE, &can_spell);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001368 }
1369 else
1370 can_spell = TRUE;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001371
Bram Moolenaar51485f02005-06-04 21:55:20 +00001372 if (can_spell)
1373 {
1374 found_pos.lnum = lnum;
Bram Moolenaar0c405862005-06-22 22:26:26 +00001375 found_pos.col = p - buf;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001376#ifdef FEAT_VIRTUALEDIT
Bram Moolenaar51485f02005-06-04 21:55:20 +00001377 found_pos.coladd = 0;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001378#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00001379 if (dir == FORWARD)
1380 {
1381 /* No need to search further. */
1382 curwin->w_cursor = found_pos;
Bram Moolenaar0c405862005-06-22 22:26:26 +00001383 vim_free(buf);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001384 return OK;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001385 }
1386 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001387 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001388 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001389 }
1390
Bram Moolenaar51485f02005-06-04 21:55:20 +00001391 /* advance to character after the word */
1392 p += len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001393 }
1394
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001395 if (curline)
Bram Moolenaar0c405862005-06-22 22:26:26 +00001396 break; /* only check cursor line */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001397
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001398 /* Advance to next line. */
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001399 if (dir == BACKWARD)
1400 {
1401 if (found_pos.lnum != 0)
1402 {
1403 /* Use the last match in the line. */
1404 curwin->w_cursor = found_pos;
Bram Moolenaar0c405862005-06-22 22:26:26 +00001405 vim_free(buf);
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001406 return OK;
1407 }
1408 if (lnum == 1)
Bram Moolenaar0c405862005-06-22 22:26:26 +00001409 break;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001410 --lnum;
1411 }
1412 else
1413 {
1414 if (lnum == curbuf->b_ml.ml_line_count)
Bram Moolenaar0c405862005-06-22 22:26:26 +00001415 break;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001416 ++lnum;
Bram Moolenaar0c405862005-06-22 22:26:26 +00001417
1418 /* Skip the characters at the start of the next line that were
1419 * included in a match crossing line boundaries. */
1420 if (attr == 0)
1421 skip = p - endp;
1422 else
1423 skip = 0;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001424 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001425
1426 line_breakcheck();
1427 }
1428
Bram Moolenaar0c405862005-06-22 22:26:26 +00001429 vim_free(buf);
1430 return FAIL;
1431}
1432
1433/*
1434 * For spell checking: concatenate the start of the following line "line" into
1435 * "buf", blanking-out special characters. Copy less then "maxlen" bytes.
1436 */
1437 void
1438spell_cat_line(buf, line, maxlen)
1439 char_u *buf;
1440 char_u *line;
1441 int maxlen;
1442{
1443 char_u *p;
1444 int n;
1445
1446 p = skipwhite(line);
1447 while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL)
1448 p = skipwhite(p + 1);
1449
1450 if (*p != NUL)
1451 {
1452 *buf = ' ';
1453 vim_strncpy(buf + 1, line, maxlen - 1);
1454 n = p - line;
1455 if (n >= maxlen)
1456 n = maxlen - 1;
1457 vim_memset(buf + 1, ' ', n);
1458 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001459}
1460
1461/*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001462 * Load word list(s) for "lang" from Vim spell file(s).
Bram Moolenaarb765d632005-06-07 21:00:02 +00001463 * "lang" must be the language without the region: e.g., "en".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001464 */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001465 static void
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001466spell_load_lang(lang)
1467 char_u *lang;
1468{
Bram Moolenaarb765d632005-06-07 21:00:02 +00001469 char_u fname_enc[85];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001470 int r;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001471 char_u langcp[MAXWLEN + 1];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001472
Bram Moolenaarb765d632005-06-07 21:00:02 +00001473 /* Copy the language name to pass it to spell_load_cb() as a cookie.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001474 * It's truncated when an error is detected. */
1475 STRCPY(langcp, lang);
1476
Bram Moolenaarb765d632005-06-07 21:00:02 +00001477 /*
1478 * Find the first spell file for "lang" in 'runtimepath' and load it.
1479 */
1480 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
1481 "spell/%s.%s.spl", lang, spell_enc());
1482 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &langcp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001483
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001484 if (r == FAIL && *langcp != NUL)
1485 {
1486 /* Try loading the ASCII version. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00001487 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
Bram Moolenaar9c13b352005-05-19 20:53:52 +00001488 "spell/%s.ascii.spl", lang);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001489 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &langcp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001490 }
1491
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001492 if (r == FAIL)
1493 smsg((char_u *)_("Warning: Cannot find word list \"%s\""),
1494 fname_enc + 6);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001495 else if (*langcp != NUL)
1496 {
1497 /* Load all the additions. */
1498 STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl");
1499 do_in_runtimepath(fname_enc, TRUE, spell_load_cb, &langcp);
1500 }
1501}
1502
1503/*
1504 * Return the encoding used for spell checking: Use 'encoding', except that we
1505 * use "latin1" for "latin9". And limit to 60 characters (just in case).
1506 */
1507 static char_u *
1508spell_enc()
1509{
1510
1511#ifdef FEAT_MBYTE
1512 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
1513 return p_enc;
1514#endif
1515 return (char_u *)"latin1";
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001516}
1517
1518/*
1519 * Allocate a new slang_T.
1520 * Caller must fill "sl_next".
1521 */
1522 static slang_T *
1523slang_alloc(lang)
1524 char_u *lang;
1525{
1526 slang_T *lp;
1527
Bram Moolenaar51485f02005-06-04 21:55:20 +00001528 lp = (slang_T *)alloc_clear(sizeof(slang_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001529 if (lp != NULL)
1530 {
1531 lp->sl_name = vim_strsave(lang);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001532 ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001533 ga_init2(&lp->sl_sal, sizeof(salitem_T), 10);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001534 }
1535 return lp;
1536}
1537
1538/*
1539 * Free the contents of an slang_T and the structure itself.
1540 */
1541 static void
1542slang_free(lp)
1543 slang_T *lp;
1544{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001545 vim_free(lp->sl_name);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001546 vim_free(lp->sl_fname);
1547 slang_clear(lp);
1548 vim_free(lp);
1549}
1550
1551/*
1552 * Clear an slang_T so that the file can be reloaded.
1553 */
1554 static void
1555slang_clear(lp)
1556 slang_T *lp;
1557{
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001558 garray_T *gap;
1559 fromto_T *ftp;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001560 salitem_T *smp;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001561 int i;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001562
Bram Moolenaar51485f02005-06-04 21:55:20 +00001563 vim_free(lp->sl_fbyts);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001564 lp->sl_fbyts = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001565 vim_free(lp->sl_kbyts);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001566 lp->sl_kbyts = NULL;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001567 vim_free(lp->sl_pbyts);
1568 lp->sl_pbyts = NULL;
1569
Bram Moolenaar51485f02005-06-04 21:55:20 +00001570 vim_free(lp->sl_fidxs);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001571 lp->sl_fidxs = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001572 vim_free(lp->sl_kidxs);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001573 lp->sl_kidxs = NULL;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001574 vim_free(lp->sl_pidxs);
1575 lp->sl_pidxs = NULL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001576
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001577 gap = &lp->sl_rep;
1578 while (gap->ga_len > 0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001579 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001580 ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len];
1581 vim_free(ftp->ft_from);
1582 vim_free(ftp->ft_to);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001583 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001584 ga_clear(gap);
1585
1586 gap = &lp->sl_sal;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00001587 if (lp->sl_sofo)
1588 /* SOFOFROM and SOFOTO items: free lists of wide characters. */
1589 for (i = 0; i < gap->ga_len; ++i)
1590 vim_free(((int **)gap->ga_data)[i]);
1591 else
1592 /* SAL items: free salitem_T items */
1593 while (gap->ga_len > 0)
1594 {
1595 smp = &((salitem_T *)gap->ga_data)[--gap->ga_len];
1596 vim_free(smp->sm_lead);
1597 /* Don't free sm_oneof and sm_rules, they point into sm_lead. */
1598 vim_free(smp->sm_to);
1599#ifdef FEAT_MBYTE
1600 vim_free(smp->sm_lead_w);
1601 vim_free(smp->sm_oneof_w);
1602 vim_free(smp->sm_to_w);
1603#endif
1604 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001605 ga_clear(gap);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001606
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001607 for (i = 0; i < lp->sl_prefixcnt; ++i)
1608 vim_free(lp->sl_prefprog[i]);
1609 vim_free(lp->sl_prefprog);
1610
Bram Moolenaarea424162005-06-16 21:51:00 +00001611#ifdef FEAT_MBYTE
1612 {
1613 int todo = lp->sl_map_hash.ht_used;
1614 hashitem_T *hi;
1615
1616 for (hi = lp->sl_map_hash.ht_array; todo > 0; ++hi)
1617 if (!HASHITEM_EMPTY(hi))
1618 {
1619 --todo;
1620 vim_free(hi->hi_key);
1621 }
1622 }
1623 hash_clear(&lp->sl_map_hash);
1624#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001625}
1626
1627/*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001628 * Load one spell file and store the info into a slang_T.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001629 * Invoked through do_in_runtimepath().
1630 */
1631 static void
Bram Moolenaarb765d632005-06-07 21:00:02 +00001632spell_load_cb(fname, cookie)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001633 char_u *fname;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001634 void *cookie; /* points to the language name */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001635{
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001636 (void)spell_load_file(fname, (char_u *)cookie, NULL, FALSE);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001637}
1638
1639/*
1640 * Load one spell file and store the info into a slang_T.
1641 *
1642 * This is invoked in two ways:
1643 * - From spell_load_cb() to load a spell file for the first time. "lang" is
1644 * the language name, "old_lp" is NULL. Will allocate an slang_T.
1645 * - To reload a spell file that was changed. "lang" is NULL and "old_lp"
1646 * points to the existing slang_T.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001647 * Returns the slang_T the spell file was loaded into. NULL for error.
Bram Moolenaarb765d632005-06-07 21:00:02 +00001648 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001649 static slang_T *
1650spell_load_file(fname, lang, old_lp, silent)
Bram Moolenaarb765d632005-06-07 21:00:02 +00001651 char_u *fname;
1652 char_u *lang;
1653 slang_T *old_lp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001654 int silent; /* no error if file doesn't exist */
Bram Moolenaarb765d632005-06-07 21:00:02 +00001655{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001656 FILE *fd;
1657 char_u buf[MAXWLEN + 1];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001658 char_u *p;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001659 char_u *bp;
1660 idx_T *ip;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001661 int i;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001662 int n;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001663 int len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001664 int round;
1665 char_u *save_sourcing_name = sourcing_name;
1666 linenr_T save_sourcing_lnum = sourcing_lnum;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001667 int cnt, ccnt;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001668 char_u *fol;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001669 slang_T *lp = NULL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001670 garray_T *gap;
1671 fromto_T *ftp;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001672 salitem_T *smp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001673 int rr;
1674 short *first;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00001675 salfirst_T *sfirst;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001676 idx_T idx;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001677 int c = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001678
Bram Moolenaarb765d632005-06-07 21:00:02 +00001679 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001680 if (fd == NULL)
1681 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001682 if (!silent)
1683 EMSG2(_(e_notopen), fname);
1684 else if (p_verbose > 2)
1685 {
1686 verbose_enter();
1687 smsg((char_u *)e_notopen, fname);
1688 verbose_leave();
1689 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001690 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001691 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00001692 if (p_verbose > 2)
1693 {
1694 verbose_enter();
1695 smsg((char_u *)_("Reading spell file \"%s\""), fname);
1696 verbose_leave();
1697 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001698
Bram Moolenaarb765d632005-06-07 21:00:02 +00001699 if (old_lp == NULL)
1700 {
1701 lp = slang_alloc(lang);
1702 if (lp == NULL)
1703 goto endFAIL;
1704
1705 /* Remember the file name, used to reload the file when it's updated. */
1706 lp->sl_fname = vim_strsave(fname);
1707 if (lp->sl_fname == NULL)
1708 goto endFAIL;
1709
1710 /* Check for .add.spl. */
1711 lp->sl_add = strstr((char *)gettail(fname), ".add.") != NULL;
1712 }
1713 else
1714 lp = old_lp;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001715
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001716 /* Set sourcing_name, so that error messages mention the file name. */
1717 sourcing_name = fname;
1718 sourcing_lnum = 0;
1719
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001720 /* <HEADER>: <fileID>
1721 * <regioncnt> <regionname> ...
1722 * <charflagslen> <charflags>
1723 * <fcharslen> <fchars>
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001724 * <midwordlen> <midword>
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001725 * <prefcondcnt> <prefcond> ...
1726 */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001727 for (i = 0; i < VIMSPELLMAGICL; ++i)
1728 buf[i] = getc(fd); /* <fileID> */
1729 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
1730 {
1731 EMSG(_("E757: Wrong file ID in spell file"));
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001732 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001733 }
1734
1735 cnt = getc(fd); /* <regioncnt> */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001736 if (cnt < 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001737 {
1738truncerr:
1739 EMSG(_("E758: Truncated spell file"));
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001740 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001741 }
1742 if (cnt > 8)
1743 {
1744formerr:
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001745 EMSG(_(e_format));
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001746 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001747 }
1748 for (i = 0; i < cnt; ++i)
1749 {
1750 lp->sl_regions[i * 2] = getc(fd); /* <regionname> */
1751 lp->sl_regions[i * 2 + 1] = getc(fd);
1752 }
1753 lp->sl_regions[cnt * 2] = NUL;
1754
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001755 cnt = getc(fd); /* <charflagslen> */
1756 if (cnt > 0)
1757 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001758 p = alloc((unsigned)cnt);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001759 if (p == NULL)
1760 goto endFAIL;
1761 for (i = 0; i < cnt; ++i)
1762 p[i] = getc(fd); /* <charflags> */
1763
1764 ccnt = (getc(fd) << 8) + getc(fd); /* <fcharslen> */
1765 if (ccnt <= 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001766 {
1767 vim_free(p);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001768 goto formerr;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001769 }
1770 fol = alloc((unsigned)ccnt + 1);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001771 if (fol == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001772 {
1773 vim_free(p);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001774 goto endFAIL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001775 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001776 for (i = 0; i < ccnt; ++i)
1777 fol[i] = getc(fd); /* <fchars> */
1778 fol[i] = NUL;
1779
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001780 /* Set the word-char flags and fill SPELL_ISUPPER() table. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001781 i = set_spell_charflags(p, cnt, fol);
1782 vim_free(p);
1783 vim_free(fol);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001784#if 0 /* tolerate the differences */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001785 if (i == FAIL)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001786 goto formerr;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001787#endif
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001788 }
1789 else
1790 {
1791 /* When <charflagslen> is zero then <fcharlen> must also be zero. */
1792 cnt = (getc(fd) << 8) + getc(fd);
1793 if (cnt != 0)
1794 goto formerr;
1795 }
1796
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001797 /* <midwordlen> <midword> */
1798 cnt = (getc(fd) << 8) + getc(fd);
1799 if (cnt < 0)
1800 goto truncerr;
1801 if (cnt > 0)
1802 {
1803 for (i = 0; i < cnt; ++i)
1804 if (i < MAXWLEN) /* truncate at reasonable length */
1805 buf[i] = getc(fd);
1806 if (i < MAXWLEN)
1807 buf[i] = NUL;
1808 else
1809 buf[MAXWLEN] = NUL;
1810
1811 /* The midword characters add up to any midword characters from other
1812 * .spel files. */
1813 for (p = buf; *p != NUL; )
1814#ifdef FEAT_MBYTE
1815 if (has_mbyte)
1816 {
1817 c = mb_ptr2char(p);
1818 i = mb_ptr2len_check(p);
1819 if (c < 256)
1820 spell_ismw[c] = TRUE;
1821 else if (spell_ismw_mb == NULL)
1822 /* First multi-byte char in "spell_ismw_mb". */
1823 spell_ismw_mb = vim_strnsave(p, i);
1824 else
1825 {
1826 /* Append multi-byte chars to "spell_ismw_mb". */
1827 n = STRLEN(spell_ismw_mb);
1828 bp = vim_strnsave(spell_ismw_mb, n + i);
1829 if (bp != NULL)
1830 {
1831 vim_free(spell_ismw_mb);
1832 spell_ismw_mb = bp;
1833 vim_strncpy(bp + n, p, i);
1834 }
1835 }
1836 p += i;
1837 }
1838 else
1839#endif
1840 spell_ismw[*p++] = TRUE;
1841 }
1842
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001843 /* <prefcondcnt> <prefcond> ... */
1844 cnt = (getc(fd) << 8) + getc(fd); /* <prefcondcnt> */
1845 if (cnt > 0)
1846 {
1847 lp->sl_prefprog = (regprog_T **)alloc_clear(
1848 (unsigned)sizeof(regprog_T *) * cnt);
1849 if (lp->sl_prefprog == NULL)
1850 goto endFAIL;
1851 lp->sl_prefixcnt = cnt;
1852
1853 for (i = 0; i < cnt; ++i)
1854 {
1855 /* <prefcond> : <condlen> <condstr> */
1856 n = getc(fd); /* <condlen> */
1857 if (n < 0)
1858 goto formerr;
1859 /* When <condlen> is zero we have an empty condition. Otherwise
1860 * compile the regexp program used to check for the condition. */
1861 if (n > 0)
1862 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001863 buf[0] = '^'; /* always match at one position only */
1864 p = buf + 1;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001865 while (n-- > 0)
1866 *p++ = getc(fd); /* <condstr> */
1867 *p = NUL;
1868 lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
1869 }
1870 }
1871 }
1872
1873
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001874 /* <SUGGEST> : <repcount> <rep> ...
1875 * <salflags> <salcount> <sal> ...
1876 * <maplen> <mapstr> */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001877
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001878 cnt = (getc(fd) << 8) + getc(fd); /* <repcount> */
1879 if (cnt < 0)
1880 goto formerr;
1881
1882 gap = &lp->sl_rep;
1883 if (ga_grow(gap, cnt) == FAIL)
1884 goto endFAIL;
1885
1886 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
1887 for (; gap->ga_len < cnt; ++gap->ga_len)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001888 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001889 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
1890 for (rr = 1; rr <= 2; ++rr)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001891 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001892 ccnt = getc(fd);
1893 if (ccnt < 0)
1894 {
1895 if (rr == 2)
1896 vim_free(ftp->ft_from);
1897 goto formerr;
1898 }
1899 if ((p = alloc(ccnt + 1)) == NULL)
1900 {
1901 if (rr == 2)
1902 vim_free(ftp->ft_from);
1903 goto endFAIL;
1904 }
1905 for (i = 0; i < ccnt; ++i)
1906 p[i] = getc(fd); /* <repfrom> or <repto> */
1907 p[i] = NUL;
1908 if (rr == 1)
1909 ftp->ft_from = p;
1910 else
1911 ftp->ft_to = p;
1912 }
1913 }
1914
1915 /* Fill the first-index table. */
1916 first = lp->sl_rep_first;
1917 for (i = 0; i < 256; ++i)
1918 first[i] = -1;
1919 for (i = 0; i < gap->ga_len; ++i)
1920 {
1921 ftp = &((fromto_T *)gap->ga_data)[i];
1922 if (first[*ftp->ft_from] == -1)
1923 first[*ftp->ft_from] = i;
1924 }
1925
1926 i = getc(fd); /* <salflags> */
1927 if (i & SAL_F0LLOWUP)
1928 lp->sl_followup = TRUE;
1929 if (i & SAL_COLLAPSE)
1930 lp->sl_collapse = TRUE;
1931 if (i & SAL_REM_ACCENTS)
1932 lp->sl_rem_accents = TRUE;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00001933 if (i & SAL_SOFO)
1934 lp->sl_sofo = TRUE;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001935
1936 cnt = (getc(fd) << 8) + getc(fd); /* <salcount> */
1937 if (cnt < 0)
1938 goto formerr;
1939
Bram Moolenaar42eeac32005-06-29 22:40:58 +00001940 if (lp->sl_sofo)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001941 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +00001942 /*
1943 * SOFOFROM and SOFOTO items come in one <salfrom> and <salto>
1944 */
1945 if (cnt != 1)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001946 goto formerr;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00001947
1948 cnt = (getc(fd) << 8) + getc(fd); /* <salfromlen> */
1949 if (cnt < 0)
1950 goto formerr;
1951 if ((bp = alloc(cnt + 1)) == NULL)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001952 goto endFAIL;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00001953 for (i = 0; i < cnt; ++i)
1954 bp[i] = getc(fd); /* <salfrom> */
1955 bp[i] = NUL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001956
Bram Moolenaar42eeac32005-06-29 22:40:58 +00001957 ccnt = (getc(fd) << 8) + getc(fd); /* <saltolen> */
1958 if (ccnt < 0)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001959 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +00001960 vim_free(bp);
1961 goto formerr;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001962 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +00001963 if ((fol = alloc(ccnt + 1)) == NULL)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001964 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +00001965 vim_free(bp);
1966 goto endFAIL;
1967 }
1968 for (i = 0; i < ccnt; ++i)
1969 fol[i] = getc(fd); /* <salto> */
1970 fol[i] = NUL;
1971
1972#ifdef FEAT_MBYTE
1973 if (has_mbyte)
1974 {
1975 char_u *s;
1976
1977 /* Use "sl_sal" as an array with 256 pointers to a list of wide
1978 * characters. The index is the low byte of the character.
1979 * The list contains from-to pairs with a terminating NUL.
1980 * sl_sal_first[] is used for latin1 "from" characters. */
1981 gap = &lp->sl_sal;
1982 ga_init2(gap, sizeof(int *), 1);
1983 if (ga_grow(gap, 256) == FAIL)
1984 {
1985sofoFAIL:
1986 vim_free(bp);
1987 vim_free(fol);
1988 goto endFAIL;
1989 }
1990 vim_memset(gap->ga_data, 0, sizeof(int *) * 256);
1991 gap->ga_len = 256;
1992
1993 /* First count the number of items for each list. Temporarily use
1994 * sl_sal_first[] for this. */
1995 for (p = bp, s = fol; *p != NUL && *s != NUL; )
1996 {
1997 c = mb_ptr2char_adv(&p);
1998 mb_ptr_adv(s);
1999 if (c >= 256)
2000 ++lp->sl_sal_first[c & 0xff];
2001 }
2002 if (*p != NUL || *s != NUL) /* lengths differ */
2003 goto sofoerr;
2004
2005 /* Allocate the lists. */
2006 for (i = 0; i < 256; ++i)
2007 if (lp->sl_sal_first[i] > 0)
2008 {
2009 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
2010 if (p == NULL)
2011 goto sofoFAIL;
2012 ((int **)gap->ga_data)[i] = (int *)p;
2013 *(int *)p = 0;
2014 }
2015
2016 /* Put the characters in sl_sal_first[] or a sl_sal list. */
2017 vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
2018 for (p = bp, s = fol; *p != NUL && *s != NUL; )
2019 {
2020 c = mb_ptr2char_adv(&p);
2021 i = mb_ptr2char_adv(&s);
2022 if (c >= 256)
2023 {
2024 int *inp;
2025
2026 /* Append the from-to chars at the end of the list with
2027 * the low byte. */
2028 inp = ((int **)gap->ga_data)[c & 0xff];
2029 while (*inp != 0)
2030 ++inp;
2031 *inp++ = c; /* from char */
2032 *inp++ = i; /* to char */
2033 *inp++ = NUL; /* NUL at the end */
2034 }
2035 else
2036 /* mapping byte to char is done in sl_sal_first[] */
2037 lp->sl_sal_first[c] = i;
2038 }
2039 }
2040 else
2041#endif
2042 {
2043 /* mapping bytes to bytes is done in sl_sal_first[] */
2044 if (cnt != ccnt)
2045 {
2046#ifdef FEAT_MBYTE
2047sofoerr:
2048#endif
2049 vim_free(bp);
2050 vim_free(fol);
2051 goto formerr;
2052 }
2053 for (i = 0; i < cnt; ++i)
2054 lp->sl_sal_first[bp[i]] = fol[i];
2055 lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */
2056 }
2057 vim_free(bp);
2058 vim_free(fol);
2059 }
2060 else
2061 {
2062 /*
2063 * SAL items
2064 */
2065 gap = &lp->sl_sal;
2066 if (ga_grow(gap, cnt) == FAIL)
2067 goto endFAIL;
2068
2069 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
2070 for (; gap->ga_len < cnt; ++gap->ga_len)
2071 {
2072 smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
2073 ccnt = getc(fd); /* <salfromlen> */
2074 if (ccnt < 0)
2075 goto formerr;
2076 if ((p = alloc(ccnt + 2)) == NULL)
2077 goto endFAIL;
2078 smp->sm_lead = p;
2079
2080 /* Read up to the first special char into sm_lead. */
2081 for (i = 0; i < ccnt; ++i)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002082 {
2083 c = getc(fd); /* <salfrom> */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002084 if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002085 break;
2086 *p++ = c;
2087 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002088 smp->sm_leadlen = p - smp->sm_lead;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002089 *p++ = NUL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002090
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002091 /* Put (abc) chars in sm_oneof, if any. */
2092 if (c == '(')
2093 {
2094 smp->sm_oneof = p;
2095 for (++i; i < ccnt; ++i)
2096 {
2097 c = getc(fd); /* <salfrom> */
2098 if (c == ')')
2099 break;
2100 *p++ = c;
2101 }
2102 *p++ = NUL;
2103 if (++i < ccnt)
2104 c = getc(fd);
2105 }
Bram Moolenaara1ba8112005-06-28 23:23:32 +00002106 else
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002107 smp->sm_oneof = NULL;
2108
2109 /* Any following chars go in sm_rules. */
2110 smp->sm_rules = p;
2111 if (i < ccnt)
2112 /* store the char we got while checking for end of sm_lead */
2113 *p++ = c;
2114 for (++i; i < ccnt; ++i)
2115 *p++ = getc(fd); /* <salfrom> */
2116 *p++ = NUL;
2117
2118 ccnt = getc(fd); /* <saltolen> */
2119 if (ccnt < 0)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00002120 {
2121 vim_free(smp->sm_lead);
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002122 goto formerr;
2123 }
2124 if ((p = alloc(ccnt + 1)) == NULL)
2125 {
2126 vim_free(smp->sm_lead);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00002127 goto endFAIL;
2128 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002129 smp->sm_to = p;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002130
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002131 for (i = 0; i < ccnt; ++i)
2132 *p++ = getc(fd); /* <salto> */
2133 *p++ = NUL;
2134
Bram Moolenaara1ba8112005-06-28 23:23:32 +00002135#ifdef FEAT_MBYTE
2136 if (has_mbyte)
2137 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002138 /* convert the multi-byte strings to wide char strings */
2139 smp->sm_lead_w = mb_str2wide(smp->sm_lead);
2140 smp->sm_leadlen = mb_charlen(smp->sm_lead);
2141 if (smp->sm_oneof == NULL)
2142 smp->sm_oneof_w = NULL;
2143 else
2144 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
2145 smp->sm_to_w = mb_str2wide(smp->sm_to);
2146 if (smp->sm_lead_w == NULL
2147 || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL)
2148 || smp->sm_to_w == NULL)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00002149 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002150 vim_free(smp->sm_lead);
2151 vim_free(smp->sm_to);
2152 vim_free(smp->sm_lead_w);
2153 vim_free(smp->sm_oneof_w);
2154 vim_free(smp->sm_to_w);
2155 goto endFAIL;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00002156 }
Bram Moolenaara1ba8112005-06-28 23:23:32 +00002157 }
2158#endif
2159 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002160
2161 /* Fill the first-index table. */
2162 sfirst = lp->sl_sal_first;
2163 for (i = 0; i < 256; ++i)
2164 sfirst[i] = -1;
2165 smp = (salitem_T *)gap->ga_data;
2166 for (i = 0; i < gap->ga_len; ++i)
2167 {
2168#ifdef FEAT_MBYTE
2169 if (has_mbyte)
2170 /* Use the lowest byte of the first character. For latin1 it's
2171 * the character, for other encodings it should differ for most
2172 * characters. */
2173 c = *smp[i].sm_lead_w & 0xff;
2174 else
2175#endif
2176 c = *smp[i].sm_lead;
2177 if (sfirst[c] == -1)
2178 {
2179 sfirst[c] = i;
2180#ifdef FEAT_MBYTE
2181 if (has_mbyte)
2182 {
2183 /* Make sure all entries with this byte are following each
2184 * other. Move the ones that are in the wrong position. Do
2185 * keep the same ordering! */
2186 while (i + 1 < gap->ga_len
2187 && (*smp[i + 1].sm_lead_w & 0xff) == c)
2188 /* Skip over entry with same index byte. */
2189 ++i;
2190
2191 for (n = 1; i + n < gap->ga_len; ++n)
2192 if ((*smp[i + n].sm_lead_w & 0xff) == c)
2193 {
2194 salitem_T tsal;
2195
2196 /* Move entry with same index byte after the entries
2197 * we already found. */
2198 ++i;
2199 --n;
2200 tsal = smp[i + n];
2201 mch_memmove(smp + i + 1, smp + i,
2202 sizeof(salitem_T) * n);
2203 smp[i] = tsal;
2204 }
2205 }
2206#endif
2207 }
2208 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002209 }
2210
2211 cnt = (getc(fd) << 8) + getc(fd); /* <maplen> */
2212 if (cnt < 0)
2213 goto formerr;
2214 p = alloc(cnt + 1);
2215 if (p == NULL)
2216 goto endFAIL;
2217 for (i = 0; i < cnt; ++i)
2218 p[i] = getc(fd); /* <mapstr> */
2219 p[i] = NUL;
Bram Moolenaarea424162005-06-16 21:51:00 +00002220 set_map_str(lp, p);
2221 vim_free(p);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002222
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002223
Bram Moolenaar51485f02005-06-04 21:55:20 +00002224 /* round 1: <LWORDTREE>
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002225 * round 2: <KWORDTREE>
2226 * round 3: <PREFIXTREE> */
2227 for (round = 1; round <= 3; ++round)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002228 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002229 /* The tree size was computed when writing the file, so that we can
2230 * allocate it as one long block. <nodecount> */
2231 len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
2232 if (len < 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002233 goto truncerr;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002234 if (len > 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002235 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002236 /* Allocate the byte array. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002237 bp = lalloc((long_u)len, TRUE);
2238 if (bp == NULL)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002239 goto endFAIL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002240 if (round == 1)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002241 lp->sl_fbyts = bp;
2242 else if (round == 2)
2243 lp->sl_kbyts = bp;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002244 else
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002245 lp->sl_pbyts = bp;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002246
2247 /* Allocate the index array. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002248 ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE);
2249 if (ip == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002250 goto endFAIL;
2251 if (round == 1)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002252 lp->sl_fidxs = ip;
2253 else if (round == 2)
2254 lp->sl_kidxs = ip;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002255 else
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002256 lp->sl_pidxs = ip;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002257
2258 /* Read the tree and store it in the array. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002259 idx = read_tree(fd, bp, ip, len, 0, round == 3, lp->sl_prefixcnt);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002260 if (idx == -1)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002261 goto truncerr;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002262 if (idx < 0)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002263 goto formerr;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002264 }
2265 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002266
Bram Moolenaarb765d632005-06-07 21:00:02 +00002267 /* For a new file link it in the list of spell files. */
2268 if (old_lp == NULL)
2269 {
2270 lp->sl_next = first_lang;
2271 first_lang = lp;
2272 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002273
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002274 goto endOK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002275
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002276endFAIL:
Bram Moolenaarb765d632005-06-07 21:00:02 +00002277 if (lang != NULL)
2278 /* truncating the name signals the error to spell_load_lang() */
2279 *lang = NUL;
2280 if (lp != NULL && old_lp == NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002281 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002282 slang_free(lp);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002283 lp = NULL;
2284 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002285
2286endOK:
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002287 if (fd != NULL)
2288 fclose(fd);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002289 sourcing_name = save_sourcing_name;
2290 sourcing_lnum = save_sourcing_lnum;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002291
2292 return lp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002293}
2294
Bram Moolenaara1ba8112005-06-28 23:23:32 +00002295#ifdef FEAT_MBYTE
2296/*
2297 * Turn a multi-byte string into a wide character string.
2298 * Return it in allocated memory (NULL for out-of-memory)
2299 */
2300 static int *
2301mb_str2wide(s)
2302 char_u *s;
2303{
2304 int *res;
2305 char_u *p;
2306 int i = 0;
2307
2308 res = (int *)alloc(sizeof(int) * (mb_charlen(s) + 1));
2309 if (res != NULL)
2310 {
2311 for (p = s; *p != NUL; )
2312 res[i++] = mb_ptr2char_adv(&p);
2313 res[i] = NUL;
2314 }
2315 return res;
2316}
2317#endif
2318
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002319/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00002320 * Read one row of siblings from the spell file and store it in the byte array
2321 * "byts" and index array "idxs". Recursively read the children.
2322 *
Bram Moolenaar0c405862005-06-22 22:26:26 +00002323 * NOTE: The code here must match put_node().
Bram Moolenaar51485f02005-06-04 21:55:20 +00002324 *
2325 * Returns the index follosing the siblings.
2326 * Returns -1 if the file is shorter than expected.
2327 * Returns -2 if there is a format error.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002328 */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002329 static idx_T
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002330read_tree(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002331 FILE *fd;
2332 char_u *byts;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002333 idx_T *idxs;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002334 int maxidx; /* size of arrays */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002335 idx_T startidx; /* current index in "byts" and "idxs" */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002336 int prefixtree; /* TRUE for reading PREFIXTREE */
2337 int maxprefcondnr; /* maximum for <prefcondnr> */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002338{
Bram Moolenaar51485f02005-06-04 21:55:20 +00002339 int len;
2340 int i;
2341 int n;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002342 idx_T idx = startidx;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002343 int c;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00002344 int c2;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002345#define SHARED_MASK 0x8000000
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002346
Bram Moolenaar51485f02005-06-04 21:55:20 +00002347 len = getc(fd); /* <siblingcount> */
2348 if (len <= 0)
2349 return -1;
2350
2351 if (startidx + len >= maxidx)
2352 return -2;
2353 byts[idx++] = len;
2354
2355 /* Read the byte values, flag/region bytes and shared indexes. */
2356 for (i = 1; i <= len; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002357 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002358 c = getc(fd); /* <byte> */
2359 if (c < 0)
2360 return -1;
2361 if (c <= BY_SPECIAL)
2362 {
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00002363 if (c == BY_NOFLAGS && !prefixtree)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002364 {
2365 /* No flags, all regions. */
2366 idxs[idx] = 0;
2367 c = 0;
2368 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00002369 else if (c == BY_FLAGS || c == BY_NOFLAGS)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002370 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002371 if (prefixtree)
2372 {
2373 /* Read the prefix ID and the condition nr. In idxs[]
2374 * store the prefix ID in the low byte, the condition
2375 * index shifted up 8 bits. */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00002376 c2 = getc(fd); /* <prefixID> */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002377 n = (getc(fd) << 8) + getc(fd); /* <prefcondnr> */
2378 if (n >= maxprefcondnr)
2379 return -2;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00002380 c2 += (n << 8);
2381 if (c == BY_NOFLAGS)
2382 c = c2;
2383 else
2384 c = c2 | WF_RAREPFX;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002385 }
2386 else
2387 {
2388 /* Read flags and optional region and prefix ID. In
2389 * idxs[] the flags go in the low byte, region above that
2390 * and prefix ID above the region. */
2391 c = getc(fd); /* <flags> */
2392 if (c & WF_REGION)
2393 c = (getc(fd) << 8) + c; /* <region> */
2394 if (c & WF_PFX)
2395 c = (getc(fd) << 16) + c; /* <prefixID> */
2396 }
2397
Bram Moolenaar51485f02005-06-04 21:55:20 +00002398 idxs[idx] = c;
2399 c = 0;
2400 }
2401 else /* c == BY_INDEX */
2402 {
2403 /* <nodeidx> */
2404 n = (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
2405 if (n < 0 || n >= maxidx)
2406 return -2;
2407 idxs[idx] = n + SHARED_MASK;
2408 c = getc(fd); /* <xbyte> */
2409 }
2410 }
2411 byts[idx++] = c;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002412 }
2413
Bram Moolenaar51485f02005-06-04 21:55:20 +00002414 /* Recursively read the children for non-shared siblings.
2415 * Skip the end-of-word ones (zero byte value) and the shared ones (and
2416 * remove SHARED_MASK) */
2417 for (i = 1; i <= len; ++i)
2418 if (byts[startidx + i] != 0)
2419 {
2420 if (idxs[startidx + i] & SHARED_MASK)
2421 idxs[startidx + i] &= ~SHARED_MASK;
2422 else
2423 {
2424 idxs[startidx + i] = idx;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002425 idx = read_tree(fd, byts, idxs, maxidx, idx,
2426 prefixtree, maxprefcondnr);
Bram Moolenaar51485f02005-06-04 21:55:20 +00002427 if (idx < 0)
2428 break;
2429 }
2430 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002431
Bram Moolenaar51485f02005-06-04 21:55:20 +00002432 return idx;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002433}
2434
2435/*
2436 * Parse 'spelllang' and set buf->b_langp accordingly.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002437 * Returns NULL if it's OK, an error message otherwise.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002438 */
2439 char_u *
2440did_set_spelllang(buf)
2441 buf_T *buf;
2442{
2443 garray_T ga;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002444 char_u *splp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002445 char_u *region;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002446 int filename;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002447 int region_mask;
2448 slang_T *lp;
2449 int c;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002450 char_u lang[MAXWLEN + 1];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002451 char_u spf_name[MAXPATHL];
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002452 int load_spf;
2453 int len;
2454 char_u *p;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002455
2456 ga_init2(&ga, sizeof(langp_T), 2);
2457
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002458 /* Make the name of the .spl file associated with 'spellfile'. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002459 if (*buf->b_p_spf == NUL)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002460 load_spf = FALSE;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002461 else
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002462 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002463 vim_snprintf((char *)spf_name, sizeof(spf_name), "%s.spl",
2464 buf->b_p_spf);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002465 load_spf = TRUE;
2466 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002467
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002468 /* loop over comma separated language names. */
2469 for (splp = buf->b_p_spl; *splp != NUL; )
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002470 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002471 /* Get one language name. */
2472 copy_option_part(&splp, lang, MAXWLEN, ",");
2473
Bram Moolenaar5482f332005-04-17 20:18:43 +00002474 region = NULL;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002475 len = STRLEN(lang);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002476
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002477 /* If the name ends in ".spl" use it as the name of the spell file.
2478 * If there is a region name let "region" point to it and remove it
2479 * from the name. */
2480 if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0)
2481 {
2482 filename = TRUE;
2483
2484 /* Check if we loaded this language before. */
2485 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
2486 if (fullpathcmp(lang, lp->sl_fname, FALSE) == FPC_SAME)
2487 break;
2488 }
2489 else
2490 {
2491 filename = FALSE;
2492 if (len > 3 && lang[len - 3] == '_')
2493 {
2494 region = lang + len - 2;
2495 len -= 3;
2496 lang[len] = NUL;
2497 }
2498
2499 /* Check if we loaded this language before. */
2500 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
2501 if (STRICMP(lang, lp->sl_name) == 0)
2502 break;
2503 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002504
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002505 /* If not found try loading the language now. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002506 if (lp == NULL)
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002507 {
2508 if (filename)
2509 (void)spell_load_file(lang, lang, NULL, FALSE);
2510 else
2511 spell_load_lang(lang);
2512 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002513
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002514 /*
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002515 * Loop over the languages, there can be several files for "lang".
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002516 */
2517 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002518 if (filename ? fullpathcmp(lang, lp->sl_fname, FALSE) == FPC_SAME
2519 : STRICMP(lang, lp->sl_name) == 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002520 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00002521 region_mask = REGION_ALL;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002522 if (!filename && region != NULL)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002523 {
2524 /* find region in sl_regions */
2525 c = find_region(lp->sl_regions, region);
2526 if (c == REGION_ALL)
2527 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00002528 if (!lp->sl_add)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002529 smsg((char_u *)
2530 _("Warning: region %s not supported"),
2531 region);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002532 }
2533 else
2534 region_mask = 1 << c;
2535 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002536
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002537 if (ga_grow(&ga, 1) == FAIL)
2538 {
2539 ga_clear(&ga);
2540 return e_outofmem;
2541 }
2542 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
2543 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
2544 ++ga.ga_len;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002545
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002546 /* Check if this is the spell file related to 'spellfile'. */
2547 if (load_spf && fullpathcmp(spf_name, lp->sl_fname, FALSE)
2548 == FPC_SAME)
2549 load_spf = FALSE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002550 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002551 }
2552
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002553 /*
2554 * Make sure the 'spellfile' file is loaded. It may be in 'runtimepath',
2555 * then it's probably loaded above already. Otherwise load it here.
2556 */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002557 if (load_spf)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002558 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002559 /* Check if it was loaded already. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002560 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
2561 if (fullpathcmp(spf_name, lp->sl_fname, FALSE) == FPC_SAME)
2562 break;
2563 if (lp == NULL)
2564 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002565 /* Not loaded, try loading it now. The language name includes the
2566 * region name, the region is ignored otherwise. */
2567 vim_strncpy(lang, gettail(buf->b_p_spf), MAXWLEN);
2568 p = vim_strchr(lang, '.');
2569 if (p != NULL)
2570 *p = NUL; /* truncate at ".encoding.add" */
2571 lp = spell_load_file(spf_name, lang, NULL, TRUE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002572 }
2573 if (lp != NULL && ga_grow(&ga, 1) == OK)
2574 {
2575 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
2576 LANGP_ENTRY(ga, ga.ga_len)->lp_region = REGION_ALL;
2577 ++ga.ga_len;
2578 }
2579 }
2580
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002581 /* Add a NULL entry to mark the end of the list. */
2582 if (ga_grow(&ga, 1) == FAIL)
2583 {
2584 ga_clear(&ga);
2585 return e_outofmem;
2586 }
2587 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = NULL;
2588 ++ga.ga_len;
2589
2590 /* Everything is fine, store the new b_langp value. */
2591 ga_clear(&buf->b_langp);
2592 buf->b_langp = ga;
2593
2594 return NULL;
2595}
2596
2597/*
2598 * Find the region "region[2]" in "rp" (points to "sl_regions").
2599 * Each region is simply stored as the two characters of it's name.
2600 * Returns the index if found, REGION_ALL if not found.
2601 */
2602 static int
2603find_region(rp, region)
2604 char_u *rp;
2605 char_u *region;
2606{
2607 int i;
2608
2609 for (i = 0; ; i += 2)
2610 {
2611 if (rp[i] == NUL)
2612 return REGION_ALL;
2613 if (rp[i] == region[0] && rp[i + 1] == region[1])
2614 break;
2615 }
2616 return i / 2;
2617}
2618
2619/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002620 * Return case type of word:
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002621 * w word 0
Bram Moolenaar51485f02005-06-04 21:55:20 +00002622 * Word WF_ONECAP
2623 * W WORD WF_ALLCAP
2624 * WoRd wOrd WF_KEEPCAP
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002625 */
2626 static int
2627captype(word, end)
2628 char_u *word;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002629 char_u *end; /* When NULL use up to NUL byte. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002630{
2631 char_u *p;
2632 int c;
2633 int firstcap;
2634 int allcap;
2635 int past_second = FALSE; /* past second word char */
2636
2637 /* find first letter */
Bram Moolenaarea408852005-06-25 22:49:46 +00002638 for (p = word; !spell_iswordp(p); mb_ptr_adv(p))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002639 if (end == NULL ? *p == NUL : p >= end)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002640 return 0; /* only non-word characters, illegal word */
2641#ifdef FEAT_MBYTE
Bram Moolenaarb765d632005-06-07 21:00:02 +00002642 if (has_mbyte)
2643 c = mb_ptr2char_adv(&p);
2644 else
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002645#endif
Bram Moolenaarb765d632005-06-07 21:00:02 +00002646 c = *p++;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002647 firstcap = allcap = SPELL_ISUPPER(c);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002648
2649 /*
2650 * Need to check all letters to find a word with mixed upper/lower.
2651 * But a word with an upper char only at start is a ONECAP.
2652 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002653 for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p))
Bram Moolenaarea408852005-06-25 22:49:46 +00002654 if (spell_iswordp(p))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002655 {
2656#ifdef FEAT_MBYTE
2657 c = mb_ptr2char(p);
2658#else
2659 c = *p;
2660#endif
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002661 if (!SPELL_ISUPPER(c))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002662 {
2663 /* UUl -> KEEPCAP */
2664 if (past_second && allcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002665 return WF_KEEPCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002666 allcap = FALSE;
2667 }
2668 else if (!allcap)
2669 /* UlU -> KEEPCAP */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002670 return WF_KEEPCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002671 past_second = TRUE;
2672 }
2673
2674 if (allcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002675 return WF_ALLCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002676 if (firstcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002677 return WF_ONECAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002678 return 0;
2679}
2680
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002681# if defined(FEAT_MBYTE) || defined(EXITFREE) || defined(PROTO)
2682/*
2683 * Free all languages.
2684 */
2685 void
2686spell_free_all()
2687{
2688 slang_T *lp;
2689 buf_T *buf;
2690
2691 /* Go through all buffers and handle 'spelllang'. */
2692 for (buf = firstbuf; buf != NULL; buf = buf->b_next)
2693 ga_clear(&buf->b_langp);
2694
2695 while (first_lang != NULL)
2696 {
2697 lp = first_lang;
2698 first_lang = lp->sl_next;
2699 slang_free(lp);
2700 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00002701
2702 init_spell_chartab();
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002703}
2704# endif
2705
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002706# if defined(FEAT_MBYTE) || defined(PROTO)
2707/*
2708 * Clear all spelling tables and reload them.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002709 * Used after 'encoding' is set and when ":mkspell" was used.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002710 */
2711 void
2712spell_reload()
2713{
2714 buf_T *buf;
Bram Moolenaar3982c542005-06-08 21:56:31 +00002715 win_T *wp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002716
Bram Moolenaarea408852005-06-25 22:49:46 +00002717 /* Initialize the table for spell_iswordp(). */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002718 init_spell_chartab();
2719
2720 /* Unload all allocated memory. */
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002721 spell_free_all();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002722
2723 /* Go through all buffers and handle 'spelllang'. */
2724 for (buf = firstbuf; buf != NULL; buf = buf->b_next)
2725 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00002726 /* Only load the wordlists when 'spelllang' is set and there is a
2727 * window for this buffer in which 'spell' is set. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002728 if (*buf->b_p_spl != NUL)
Bram Moolenaar3982c542005-06-08 21:56:31 +00002729 {
2730 FOR_ALL_WINDOWS(wp)
2731 if (wp->w_buffer == buf && wp->w_p_spell)
2732 {
2733 (void)did_set_spelllang(buf);
2734# ifdef FEAT_WINDOWS
2735 break;
2736# endif
2737 }
2738 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002739 }
2740}
2741# endif
2742
Bram Moolenaarb765d632005-06-07 21:00:02 +00002743/*
2744 * Reload the spell file "fname" if it's loaded.
2745 */
2746 static void
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002747spell_reload_one(fname, added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00002748 char_u *fname;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002749 int added_word; /* invoked through "zg" */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002750{
2751 slang_T *lp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002752 int didit = FALSE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002753
Bram Moolenaarb765d632005-06-07 21:00:02 +00002754 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
2755 if (fullpathcmp(fname, lp->sl_fname, FALSE) == FPC_SAME)
2756 {
2757 slang_clear(lp);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002758 (void)spell_load_file(fname, NULL, lp, FALSE);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002759 redraw_all_later(NOT_VALID);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002760 didit = TRUE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00002761 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002762
2763 /* When "zg" was used and the file wasn't loaded yet, should redo
2764 * 'spelllang' to get it loaded. */
2765 if (added_word && !didit)
2766 did_set_spelllang(curbuf);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002767}
2768
2769
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002770/*
2771 * Functions for ":mkspell".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002772 */
2773
Bram Moolenaar51485f02005-06-04 21:55:20 +00002774#define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002775 and .dic file. */
2776/*
2777 * Main structure to store the contents of a ".aff" file.
2778 */
2779typedef struct afffile_S
2780{
2781 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002782 int af_rar; /* RAR ID for rare word */
2783 int af_kep; /* KEP ID for keep-case word */
Bram Moolenaar0c405862005-06-22 22:26:26 +00002784 int af_bad; /* BAD ID for banned word */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002785 int af_pfxpostpone; /* postpone prefixes without chop string */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002786 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */
2787 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002788} afffile_T;
2789
2790typedef struct affentry_S affentry_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002791/* Affix entry from ".aff" file. Used for prefixes and suffixes. */
2792struct affentry_S
2793{
2794 affentry_T *ae_next; /* next affix with same name/number */
2795 char_u *ae_chop; /* text to chop off basic word (can be NULL) */
2796 char_u *ae_add; /* text to add to basic word (can be NULL) */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002797 char_u *ae_cond; /* condition (NULL for ".") */
2798 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00002799 int ae_rare; /* rare affix */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002800};
2801
2802/* Affix header from ".aff" file. Used for af_pref and af_suff. */
2803typedef struct affheader_S
2804{
2805 char_u ah_key[2]; /* key for hashtable == name of affix entry */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002806 int ah_newID; /* prefix ID after renumbering */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002807 int ah_combine; /* suffix may combine with prefix */
2808 affentry_T *ah_first; /* first affix entry */
2809} affheader_T;
2810
2811#define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
2812
2813/*
2814 * Structure that is used to store the items in the word tree. This avoids
2815 * the need to keep track of each allocated thing, it's freed all at once
2816 * after ":mkspell" is done.
2817 */
2818#define SBLOCKSIZE 16000 /* size of sb_data */
2819typedef struct sblock_S sblock_T;
2820struct sblock_S
2821{
2822 sblock_T *sb_next; /* next block in list */
2823 int sb_used; /* nr of bytes already in use */
2824 char_u sb_data[1]; /* data, actually longer */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002825};
2826
2827/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00002828 * A node in the tree.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002829 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002830typedef struct wordnode_S wordnode_T;
2831struct wordnode_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002832{
Bram Moolenaar0c405862005-06-22 22:26:26 +00002833 union /* shared to save space */
2834 {
2835 char_u hashkey[6]; /* room for the hash key */
2836 int index; /* index in written nodes (valid after first
2837 round) */
2838 } wn_u1;
2839 union /* shared to save space */
2840 {
2841 wordnode_T *next; /* next node with same hash key */
2842 wordnode_T *wnode; /* parent node that will write this node */
2843 } wn_u2;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002844 wordnode_T *wn_child; /* child (next byte in word) */
2845 wordnode_T *wn_sibling; /* next sibling (alternate byte in word,
2846 always sorted) */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002847 char_u wn_byte; /* Byte for this node. NUL for word end */
2848 char_u wn_flags; /* when wn_byte is NUL: WF_ flags */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002849 short wn_region; /* when wn_byte is NUL: region mask; for
2850 PREFIXTREE it's the prefcondnr */
2851 char_u wn_prefixID; /* supported/required prefix ID or 0 */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002852};
2853
Bram Moolenaar51485f02005-06-04 21:55:20 +00002854#define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002855
Bram Moolenaar51485f02005-06-04 21:55:20 +00002856/*
2857 * Info used while reading the spell files.
2858 */
2859typedef struct spellinfo_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002860{
Bram Moolenaar51485f02005-06-04 21:55:20 +00002861 wordnode_T *si_foldroot; /* tree with case-folded words */
Bram Moolenaar8db73182005-06-17 21:51:16 +00002862 long si_foldwcount; /* nr of words in si_foldroot */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002863 wordnode_T *si_keeproot; /* tree with keep-case words */
Bram Moolenaar8db73182005-06-17 21:51:16 +00002864 long si_keepwcount; /* nr of words in si_keeproot */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002865 wordnode_T *si_prefroot; /* tree with postponed prefixes */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002866 sblock_T *si_blocks; /* memory blocks used */
2867 int si_ascii; /* handling only ASCII words */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002868 int si_add; /* addition file */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002869 int si_clear_chartab; /* when TRUE clear char tables */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002870 int si_region; /* region mask */
2871 vimconv_T si_conv; /* for conversion to 'encoding' */
Bram Moolenaar50cde822005-06-05 21:54:54 +00002872 int si_memtot; /* runtime memory used */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002873 int si_verbose; /* verbose messages */
Bram Moolenaar3982c542005-06-08 21:56:31 +00002874 int si_region_count; /* number of regions supported (1 when there
2875 are no regions) */
2876 char_u si_region_name[16]; /* region names (if count > 1) */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002877
2878 garray_T si_rep; /* list of fromto_T entries from REP lines */
2879 garray_T si_sal; /* list of fromto_T entries from SAL lines */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002880 char_u *si_sofofr; /* SOFOFROM text */
2881 char_u *si_sofoto; /* SOFOTO text */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002882 int si_followup; /* soundsalike: ? */
2883 int si_collapse; /* soundsalike: ? */
2884 int si_rem_accents; /* soundsalike: remove accents */
2885 garray_T si_map; /* MAP info concatenated */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00002886 char_u *si_midword; /* MIDWORD chars, alloc'ed string or NULL */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002887 garray_T si_prefcond; /* table with conditions for postponed
2888 * prefixes, each stored as a string */
2889 int si_newID; /* current value for ah_newID */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002890} spellinfo_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002891
Bram Moolenaar51485f02005-06-04 21:55:20 +00002892static afffile_T *spell_read_aff __ARGS((char_u *fname, spellinfo_T *spin));
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002893static int str_equal __ARGS((char_u *s1, char_u *s2));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002894static void add_fromto __ARGS((spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to));
2895static int sal_to_bool __ARGS((char_u *s));
Bram Moolenaar5482f332005-04-17 20:18:43 +00002896static int has_non_ascii __ARGS((char_u *s));
Bram Moolenaar51485f02005-06-04 21:55:20 +00002897static void spell_free_aff __ARGS((afffile_T *aff));
2898static int spell_read_dic __ARGS((char_u *fname, spellinfo_T *spin, afffile_T *affile));
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002899static char_u *get_pfxlist __ARGS((afffile_T *affile, char_u *afflist, sblock_T **blp));
2900static int store_aff_word __ARGS((char_u *word, spellinfo_T *spin, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int comb, int flags, char_u *pfxlist));
Bram Moolenaar51485f02005-06-04 21:55:20 +00002901static int spell_read_wordfile __ARGS((char_u *fname, spellinfo_T *spin));
2902static void *getroom __ARGS((sblock_T **blp, size_t len));
2903static char_u *getroom_save __ARGS((sblock_T **blp, char_u *s));
2904static void free_blocks __ARGS((sblock_T *bl));
2905static wordnode_T *wordtree_alloc __ARGS((sblock_T **blp));
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002906static int store_word __ARGS((char_u *word, spellinfo_T *spin, int flags, int region, char_u *pfxlist));
2907static int tree_add_word __ARGS((char_u *word, wordnode_T *tree, int flags, int region, int prefixID, sblock_T **blp));
Bram Moolenaarb765d632005-06-07 21:00:02 +00002908static void wordtree_compress __ARGS((wordnode_T *root, spellinfo_T *spin));
Bram Moolenaar51485f02005-06-04 21:55:20 +00002909static int node_compress __ARGS((wordnode_T *node, hashtab_T *ht, int *tot));
2910static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2));
Bram Moolenaar3982c542005-06-08 21:56:31 +00002911static void write_vim_spell __ARGS((char_u *fname, spellinfo_T *spin));
Bram Moolenaar0c405862005-06-22 22:26:26 +00002912static void clear_node __ARGS((wordnode_T *node));
2913static int put_node __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002914static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word));
Bram Moolenaarb765d632005-06-07 21:00:02 +00002915static void init_spellfile __ARGS((void));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002916
2917/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002918 * Read the affix file "fname".
Bram Moolenaar3982c542005-06-08 21:56:31 +00002919 * Returns an afffile_T, NULL for complete failure.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002920 */
2921 static afffile_T *
Bram Moolenaar51485f02005-06-04 21:55:20 +00002922spell_read_aff(fname, spin)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002923 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002924 spellinfo_T *spin;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002925{
2926 FILE *fd;
2927 afffile_T *aff;
2928 char_u rline[MAXLINELEN];
2929 char_u *line;
2930 char_u *pc = NULL;
Bram Moolenaar8db73182005-06-17 21:51:16 +00002931#define MAXITEMCNT 7
2932 char_u *(items[MAXITEMCNT]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002933 int itemcnt;
2934 char_u *p;
2935 int lnum = 0;
2936 affheader_T *cur_aff = NULL;
2937 int aff_todo = 0;
2938 hashtab_T *tp;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002939 char_u *low = NULL;
2940 char_u *fol = NULL;
2941 char_u *upp = NULL;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002942 static char *e_affname = N_("Affix name too long in %s line %d: %s");
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002943 int do_rep;
2944 int do_sal;
2945 int do_map;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00002946 int do_midword;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002947 int do_sofo;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002948 int found_map = FALSE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002949 hashitem_T *hi;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002950
Bram Moolenaar51485f02005-06-04 21:55:20 +00002951 /*
2952 * Open the file.
2953 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002954 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002955 if (fd == NULL)
2956 {
2957 EMSG2(_(e_notopen), fname);
2958 return NULL;
2959 }
2960
Bram Moolenaarb765d632005-06-07 21:00:02 +00002961 if (spin->si_verbose || p_verbose > 2)
2962 {
2963 if (!spin->si_verbose)
2964 verbose_enter();
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00002965 smsg((char_u *)_("Reading affix file %s ..."), fname);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002966 out_flush();
2967 if (!spin->si_verbose)
2968 verbose_leave();
2969 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002970
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002971 /* Only do REP lines when not done in another .aff file already. */
2972 do_rep = spin->si_rep.ga_len == 0;
2973
2974 /* Only do SAL lines when not done in another .aff file already. */
2975 do_sal = spin->si_sal.ga_len == 0;
2976
2977 /* Only do MAP lines when not done in another .aff file already. */
2978 do_map = spin->si_map.ga_len == 0;
2979
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00002980 /* Only do MIDWORD line when not done in another .aff file already */
2981 do_midword = spin->si_midword == NULL;
2982
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002983 /* Only do SOFOFROM and SOFOTO when not done in another .aff file already */
2984 do_sofo = spin->si_sofofr == NULL;
2985
Bram Moolenaar51485f02005-06-04 21:55:20 +00002986 /*
2987 * Allocate and init the afffile_T structure.
2988 */
2989 aff = (afffile_T *)getroom(&spin->si_blocks, sizeof(afffile_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002990 if (aff == NULL)
2991 return NULL;
2992 hash_init(&aff->af_pref);
2993 hash_init(&aff->af_suff);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002994
2995 /*
2996 * Read all the lines in the file one by one.
2997 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002998 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002999 {
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003000 line_breakcheck();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003001 ++lnum;
3002
3003 /* Skip comment lines. */
3004 if (*rline == '#')
3005 continue;
3006
3007 /* Convert from "SET" to 'encoding' when needed. */
3008 vim_free(pc);
Bram Moolenaarb765d632005-06-07 21:00:02 +00003009#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00003010 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003011 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003012 pc = string_convert(&spin->si_conv, rline, NULL);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003013 if (pc == NULL)
3014 {
3015 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
3016 fname, lnum, rline);
3017 continue;
3018 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003019 line = pc;
3020 }
3021 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00003022#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003023 {
3024 pc = NULL;
3025 line = rline;
3026 }
3027
3028 /* Split the line up in white separated items. Put a NUL after each
3029 * item. */
3030 itemcnt = 0;
3031 for (p = line; ; )
3032 {
3033 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */
3034 ++p;
3035 if (*p == NUL)
3036 break;
Bram Moolenaar8db73182005-06-17 21:51:16 +00003037 if (itemcnt == MAXITEMCNT) /* too many items */
Bram Moolenaar51485f02005-06-04 21:55:20 +00003038 break;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003039 items[itemcnt++] = p;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003040 while (*p > ' ') /* skip until white space or CR/NL */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003041 ++p;
3042 if (*p == NUL)
3043 break;
3044 *p++ = NUL;
3045 }
3046
3047 /* Handle non-empty lines. */
3048 if (itemcnt > 0)
3049 {
3050 if (STRCMP(items[0], "SET") == 0 && itemcnt == 2
3051 && aff->af_enc == NULL)
3052 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00003053#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00003054 /* Setup for conversion from "ENC" to 'encoding'. */
3055 aff->af_enc = enc_canonize(items[1]);
3056 if (aff->af_enc != NULL && !spin->si_ascii
3057 && convert_setup(&spin->si_conv, aff->af_enc,
3058 p_enc) == FAIL)
3059 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
3060 fname, aff->af_enc, p_enc);
Bram Moolenaar42eeac32005-06-29 22:40:58 +00003061 spin->si_conv.vc_fail = TRUE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00003062#else
3063 smsg((char_u *)_("Conversion in %s not supported"), fname);
3064#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003065 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003066 else if (STRCMP(items[0], "MIDWORD") == 0 && itemcnt == 2)
3067 {
3068 if (do_midword)
3069 spin->si_midword = vim_strsave(items[1]);
3070 }
Bram Moolenaar50cde822005-06-05 21:54:54 +00003071 else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1)
3072 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003073 /* ignored, we always split */
Bram Moolenaar50cde822005-06-05 21:54:54 +00003074 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003075 else if (STRCMP(items[0], "TRY") == 0 && itemcnt == 2)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003076 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003077 /* ignored, we look in the tree for what chars may appear */
Bram Moolenaar51485f02005-06-04 21:55:20 +00003078 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003079 else if (STRCMP(items[0], "RAR") == 0 && itemcnt == 2
3080 && aff->af_rar == 0)
3081 {
3082 aff->af_rar = items[1][0];
3083 if (items[1][1] != NUL)
3084 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
3085 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00003086 else if (STRCMP(items[0], "KEP") == 0 && itemcnt == 2
3087 && aff->af_kep == 0)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003088 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00003089 aff->af_kep = items[1][0];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003090 if (items[1][1] != NUL)
3091 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
3092 }
Bram Moolenaar0c405862005-06-22 22:26:26 +00003093 else if (STRCMP(items[0], "BAD") == 0 && itemcnt == 2
3094 && aff->af_bad == 0)
3095 {
3096 aff->af_bad = items[1][0];
3097 if (items[1][1] != NUL)
3098 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
3099 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003100 else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1)
3101 {
3102 aff->af_pfxpostpone = TRUE;
3103 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003104 else if ((STRCMP(items[0], "PFX") == 0
3105 || STRCMP(items[0], "SFX") == 0)
3106 && aff_todo == 0
Bram Moolenaar8db73182005-06-17 21:51:16 +00003107 && itemcnt >= 4)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003108 {
Bram Moolenaar8db73182005-06-17 21:51:16 +00003109 /* Myspell allows extra text after the item, but that might
3110 * mean mistakes go unnoticed. Require a comment-starter. */
3111 if (itemcnt > 4 && *items[4] != '#')
3112 smsg((char_u *)_("Trailing text in %s line %d: %s"),
3113 fname, lnum, items[4]);
3114
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003115 /* New affix letter. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00003116 cur_aff = (affheader_T *)getroom(&spin->si_blocks,
3117 sizeof(affheader_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003118 if (cur_aff == NULL)
3119 break;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003120 cur_aff->ah_key[0] = *items[1]; /* TODO: multi-byte? */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003121 cur_aff->ah_key[1] = NUL;
3122 if (items[1][1] != NUL)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003123 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003124 if (*items[2] == 'Y')
3125 cur_aff->ah_combine = TRUE;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003126 else if (*items[2] != 'N')
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003127 smsg((char_u *)_("Expected Y or N in %s line %d: %s"),
3128 fname, lnum, items[2]);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003129
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003130 if (*items[0] == 'P')
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003131 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003132 tp = &aff->af_pref;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003133 /* Use a new number in the .spl file later, to be able to
3134 * handle multiple .aff files. */
3135 if (aff->af_pfxpostpone)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003136 cur_aff->ah_newID = ++spin->si_newID;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003137 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003138 else
3139 tp = &aff->af_suff;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003140 aff_todo = atoi((char *)items[3]);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003141 hi = hash_find(tp, cur_aff->ah_key);
3142 if (!HASHITEM_EMPTY(hi))
Bram Moolenaar51485f02005-06-04 21:55:20 +00003143 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003144 smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
3145 fname, lnum, items[1]);
Bram Moolenaar51485f02005-06-04 21:55:20 +00003146 aff_todo = 0;
3147 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003148 else
3149 hash_add(tp, cur_aff->ah_key);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003150 }
3151 else if ((STRCMP(items[0], "PFX") == 0
3152 || STRCMP(items[0], "SFX") == 0)
3153 && aff_todo > 0
3154 && STRCMP(cur_aff->ah_key, items[1]) == 0
Bram Moolenaar8db73182005-06-17 21:51:16 +00003155 && itemcnt >= 5)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003156 {
3157 affentry_T *aff_entry;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003158 int rare = FALSE;
3159 int lasti = 5;
3160
3161 /* Check for "rare" after the other info. */
3162 if (itemcnt > 5 && STRICMP(items[5], "rare") == 0)
3163 {
3164 rare = TRUE;
3165 lasti = 6;
3166 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003167
Bram Moolenaar8db73182005-06-17 21:51:16 +00003168 /* Myspell allows extra text after the item, but that might
3169 * mean mistakes go unnoticed. Require a comment-starter. */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003170 if (itemcnt > lasti && *items[lasti] != '#')
Bram Moolenaar8db73182005-06-17 21:51:16 +00003171 smsg((char_u *)_("Trailing text in %s line %d: %s"),
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003172 fname, lnum, items[lasti]);
Bram Moolenaar8db73182005-06-17 21:51:16 +00003173
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003174 /* New item for an affix letter. */
3175 --aff_todo;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003176 aff_entry = (affentry_T *)getroom(&spin->si_blocks,
3177 sizeof(affentry_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003178 if (aff_entry == NULL)
3179 break;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003180 aff_entry->ae_rare = rare;
Bram Moolenaar5482f332005-04-17 20:18:43 +00003181
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003182 if (STRCMP(items[2], "0") != 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003183 aff_entry->ae_chop = getroom_save(&spin->si_blocks,
3184 items[2]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003185 if (STRCMP(items[3], "0") != 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003186 aff_entry->ae_add = getroom_save(&spin->si_blocks,
3187 items[3]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003188
Bram Moolenaar51485f02005-06-04 21:55:20 +00003189 /* Don't use an affix entry with non-ASCII characters when
3190 * "spin->si_ascii" is TRUE. */
3191 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop)
Bram Moolenaar5482f332005-04-17 20:18:43 +00003192 || has_non_ascii(aff_entry->ae_add)))
3193 {
Bram Moolenaar5482f332005-04-17 20:18:43 +00003194 aff_entry->ae_next = cur_aff->ah_first;
3195 cur_aff->ah_first = aff_entry;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003196
3197 if (STRCMP(items[4], ".") != 0)
3198 {
3199 char_u buf[MAXLINELEN];
3200
3201 aff_entry->ae_cond = getroom_save(&spin->si_blocks,
3202 items[4]);
3203 if (*items[0] == 'P')
3204 sprintf((char *)buf, "^%s", items[4]);
3205 else
3206 sprintf((char *)buf, "%s$", items[4]);
3207 aff_entry->ae_prog = vim_regcomp(buf,
3208 RE_MAGIC + RE_STRING);
3209 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003210
3211 /* For postponed prefixes we need an entry in si_prefcond
3212 * for the condition. Use an existing one if possible. */
3213 if (*items[0] == 'P' && aff->af_pfxpostpone
3214 && aff_entry->ae_chop == NULL)
3215 {
3216 int idx;
3217 char_u **pp;
3218
3219 for (idx = spin->si_prefcond.ga_len - 1; idx >= 0;
3220 --idx)
3221 {
3222 p = ((char_u **)spin->si_prefcond.ga_data)[idx];
3223 if (str_equal(p, aff_entry->ae_cond))
3224 break;
3225 }
3226 if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK)
3227 {
3228 /* Not found, add a new condition. */
3229 idx = spin->si_prefcond.ga_len++;
3230 pp = ((char_u **)spin->si_prefcond.ga_data) + idx;
3231 if (aff_entry->ae_cond == NULL)
3232 *pp = NULL;
3233 else
3234 *pp = getroom_save(&spin->si_blocks,
3235 aff_entry->ae_cond);
3236 }
3237
3238 /* Add the prefix to the prefix tree. */
3239 if (aff_entry->ae_add == NULL)
3240 p = (char_u *)"";
3241 else
3242 p = aff_entry->ae_add;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003243 tree_add_word(p, spin->si_prefroot, rare ? -2 : -1,
3244 idx, cur_aff->ah_newID, &spin->si_blocks);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003245 }
Bram Moolenaar5482f332005-04-17 20:18:43 +00003246 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003247 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003248 else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2)
3249 {
3250 if (fol != NULL)
3251 smsg((char_u *)_("Duplicate FOL in %s line %d"),
3252 fname, lnum);
3253 else
3254 fol = vim_strsave(items[1]);
3255 }
3256 else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2)
3257 {
3258 if (low != NULL)
3259 smsg((char_u *)_("Duplicate LOW in %s line %d"),
3260 fname, lnum);
3261 else
3262 low = vim_strsave(items[1]);
3263 }
3264 else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2)
3265 {
3266 if (upp != NULL)
3267 smsg((char_u *)_("Duplicate UPP in %s line %d"),
3268 fname, lnum);
3269 else
3270 upp = vim_strsave(items[1]);
3271 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003272 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003273 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003274 /* Ignore REP count */;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003275 if (!isdigit(*items[1]))
3276 smsg((char_u *)_("Expected REP count in %s line %d"),
3277 fname, lnum);
3278 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003279 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 3)
3280 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003281 /* REP item */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003282 if (do_rep)
3283 add_fromto(spin, &spin->si_rep, items[1], items[2]);
3284 }
3285 else if (STRCMP(items[0], "MAP") == 0 && itemcnt == 2)
3286 {
3287 /* MAP item or count */
3288 if (!found_map)
3289 {
3290 /* First line contains the count. */
3291 found_map = TRUE;
3292 if (!isdigit(*items[1]))
3293 smsg((char_u *)_("Expected MAP count in %s line %d"),
3294 fname, lnum);
3295 }
3296 else if (do_map)
3297 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00003298 int c;
3299
3300 /* Check that every character appears only once. */
3301 for (p = items[1]; *p != NUL; )
3302 {
3303#ifdef FEAT_MBYTE
3304 c = mb_ptr2char_adv(&p);
3305#else
3306 c = *p++;
3307#endif
3308 if ((spin->si_map.ga_len > 0
3309 && vim_strchr(spin->si_map.ga_data, c)
3310 != NULL)
3311 || vim_strchr(p, c) != NULL)
3312 smsg((char_u *)_("Duplicate character in MAP in %s line %d"),
3313 fname, lnum);
3314 }
3315
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003316 /* We simply concatenate all the MAP strings, separated by
3317 * slashes. */
3318 ga_concat(&spin->si_map, items[1]);
3319 ga_append(&spin->si_map, '/');
3320 }
3321 }
3322 else if (STRCMP(items[0], "SAL") == 0 && itemcnt == 3)
3323 {
3324 if (do_sal)
3325 {
3326 /* SAL item (sounds-a-like)
3327 * Either one of the known keys or a from-to pair. */
3328 if (STRCMP(items[1], "followup") == 0)
3329 spin->si_followup = sal_to_bool(items[2]);
3330 else if (STRCMP(items[1], "collapse_result") == 0)
3331 spin->si_collapse = sal_to_bool(items[2]);
3332 else if (STRCMP(items[1], "remove_accents") == 0)
3333 spin->si_rem_accents = sal_to_bool(items[2]);
3334 else
3335 /* when "to" is "_" it means empty */
3336 add_fromto(spin, &spin->si_sal, items[1],
3337 STRCMP(items[2], "_") == 0 ? (char_u *)""
3338 : items[2]);
3339 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003340 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +00003341 else if (STRCMP(items[0], "SOFOFROM") == 0 && itemcnt == 2
3342 && (!do_sofo || spin->si_sofofr == NULL))
3343 {
3344 if (do_sofo)
3345 spin->si_sofofr = vim_strsave(items[1]);
3346 }
3347 else if (STRCMP(items[0], "SOFOTO") == 0 && itemcnt == 2
3348 && (!do_sofo || spin->si_sofoto == NULL))
3349 {
3350 if (do_sofo)
3351 spin->si_sofoto = vim_strsave(items[1]);
3352 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00003353 else
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003354 smsg((char_u *)_("Unrecognized item in %s line %d: %s"),
3355 fname, lnum, items[0]);
3356 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003357 }
3358
Bram Moolenaar42eeac32005-06-29 22:40:58 +00003359 if (do_sofo && (spin->si_sofofr == NULL) != (spin->si_sofoto == NULL))
3360 smsg((char_u *)_("Missing SOFO%s line in %s"),
3361 spin->si_sofofr == NULL ? "FROM" : "TO", fname);
3362 if (spin->si_sofofr != NULL && spin->si_sal.ga_len > 0)
3363 smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname);
3364
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003365 if (fol != NULL || low != NULL || upp != NULL)
3366 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00003367 if (spin->si_clear_chartab)
3368 {
3369 /* Clear the char type tables, don't want to use any of the
3370 * currently used spell properties. */
3371 init_spell_chartab();
3372 spin->si_clear_chartab = FALSE;
3373 }
3374
Bram Moolenaar3982c542005-06-08 21:56:31 +00003375 /*
3376 * Don't write a word table for an ASCII file, so that we don't check
3377 * for conflicts with a word table that matches 'encoding'.
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003378 * Don't write one for utf-8 either, we use utf_*() and
Bram Moolenaar3982c542005-06-08 21:56:31 +00003379 * mb_get_class(), the list of chars in the file will be incomplete.
3380 */
3381 if (!spin->si_ascii
3382#ifdef FEAT_MBYTE
3383 && !enc_utf8
3384#endif
3385 )
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00003386 {
3387 if (fol == NULL || low == NULL || upp == NULL)
3388 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
3389 else
Bram Moolenaar3982c542005-06-08 21:56:31 +00003390 (void)set_spell_chartab(fol, low, upp);
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00003391 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003392
3393 vim_free(fol);
3394 vim_free(low);
3395 vim_free(upp);
3396 }
3397
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003398 vim_free(pc);
3399 fclose(fd);
3400 return aff;
3401}
3402
3403/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003404 * Return TRUE if strings "s1" and "s2" are equal. Also consider both being
3405 * NULL as equal.
3406 */
3407 static int
3408str_equal(s1, s2)
3409 char_u *s1;
3410 char_u *s2;
3411{
3412 if (s1 == NULL || s2 == NULL)
3413 return s1 == s2;
3414 return STRCMP(s1, s2) == 0;
3415}
3416
3417/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003418 * Add a from-to item to "gap". Used for REP and SAL items.
3419 * They are stored case-folded.
3420 */
3421 static void
3422add_fromto(spin, gap, from, to)
3423 spellinfo_T *spin;
3424 garray_T *gap;
3425 char_u *from;
3426 char_u *to;
3427{
3428 fromto_T *ftp;
3429 char_u word[MAXWLEN];
3430
3431 if (ga_grow(gap, 1) == OK)
3432 {
3433 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
3434 (void)spell_casefold(from, STRLEN(from), word, MAXWLEN);
3435 ftp->ft_from = getroom_save(&spin->si_blocks, word);
3436 (void)spell_casefold(to, STRLEN(to), word, MAXWLEN);
3437 ftp->ft_to = getroom_save(&spin->si_blocks, word);
3438 ++gap->ga_len;
3439 }
3440}
3441
3442/*
3443 * Convert a boolean argument in a SAL line to TRUE or FALSE;
3444 */
3445 static int
3446sal_to_bool(s)
3447 char_u *s;
3448{
3449 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0;
3450}
3451
3452/*
Bram Moolenaar5482f332005-04-17 20:18:43 +00003453 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
3454 * When "s" is NULL FALSE is returned.
3455 */
3456 static int
3457has_non_ascii(s)
3458 char_u *s;
3459{
3460 char_u *p;
3461
3462 if (s != NULL)
3463 for (p = s; *p != NUL; ++p)
3464 if (*p >= 128)
3465 return TRUE;
3466 return FALSE;
3467}
3468
3469/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003470 * Free the structure filled by spell_read_aff().
3471 */
3472 static void
3473spell_free_aff(aff)
3474 afffile_T *aff;
3475{
3476 hashtab_T *ht;
3477 hashitem_T *hi;
3478 int todo;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003479 affheader_T *ah;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003480 affentry_T *ae;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003481
3482 vim_free(aff->af_enc);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003483
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003484 /* All this trouble to free the "ae_prog" items... */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003485 for (ht = &aff->af_pref; ; ht = &aff->af_suff)
3486 {
3487 todo = ht->ht_used;
3488 for (hi = ht->ht_array; todo > 0; ++hi)
3489 {
3490 if (!HASHITEM_EMPTY(hi))
3491 {
3492 --todo;
3493 ah = HI2AH(hi);
Bram Moolenaar51485f02005-06-04 21:55:20 +00003494 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
3495 vim_free(ae->ae_prog);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003496 }
3497 }
3498 if (ht == &aff->af_suff)
3499 break;
3500 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00003501
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003502 hash_clear(&aff->af_pref);
3503 hash_clear(&aff->af_suff);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003504}
3505
3506/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00003507 * Read dictionary file "fname".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003508 * Returns OK or FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003509 */
3510 static int
Bram Moolenaar51485f02005-06-04 21:55:20 +00003511spell_read_dic(fname, spin, affile)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003512 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003513 spellinfo_T *spin;
3514 afffile_T *affile;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003515{
Bram Moolenaar51485f02005-06-04 21:55:20 +00003516 hashtab_T ht;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003517 char_u line[MAXLINELEN];
Bram Moolenaar51485f02005-06-04 21:55:20 +00003518 char_u *afflist;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003519 char_u *pfxlist;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003520 char_u *dw;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003521 char_u *pc;
3522 char_u *w;
3523 int l;
3524 hash_T hash;
3525 hashitem_T *hi;
3526 FILE *fd;
3527 int lnum = 1;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003528 int non_ascii = 0;
3529 int retval = OK;
3530 char_u message[MAXLINELEN + MAXWLEN];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003531 int flags;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003532
Bram Moolenaar51485f02005-06-04 21:55:20 +00003533 /*
3534 * Open the file.
3535 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003536 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003537 if (fd == NULL)
3538 {
3539 EMSG2(_(e_notopen), fname);
3540 return FAIL;
3541 }
3542
Bram Moolenaar51485f02005-06-04 21:55:20 +00003543 /* The hashtable is only used to detect duplicated words. */
3544 hash_init(&ht);
3545
Bram Moolenaar8db73182005-06-17 21:51:16 +00003546 spin->si_foldwcount = 0;
3547 spin->si_keepwcount = 0;
3548
Bram Moolenaarb765d632005-06-07 21:00:02 +00003549 if (spin->si_verbose || p_verbose > 2)
3550 {
3551 if (!spin->si_verbose)
3552 verbose_enter();
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003553 smsg((char_u *)_("Reading dictionary file %s ..."), fname);
Bram Moolenaarb765d632005-06-07 21:00:02 +00003554 out_flush();
3555 if (!spin->si_verbose)
3556 verbose_leave();
3557 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003558
3559 /* Read and ignore the first line: word count. */
3560 (void)vim_fgets(line, MAXLINELEN, fd);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003561 if (!vim_isdigit(*skipwhite(line)))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003562 EMSG2(_("E760: No word count in %s"), fname);
3563
3564 /*
3565 * Read all the lines in the file one by one.
3566 * The words are converted to 'encoding' here, before being added to
3567 * the hashtable.
3568 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003569 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003570 {
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003571 line_breakcheck();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003572 ++lnum;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00003573 if (line[0] == '#')
3574 continue; /* comment line */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003575
Bram Moolenaar51485f02005-06-04 21:55:20 +00003576 /* Remove CR, LF and white space from the end. White space halfway
3577 * the word is kept to allow e.g., "et al.". */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003578 l = STRLEN(line);
3579 while (l > 0 && line[l - 1] <= ' ')
3580 --l;
3581 if (l == 0)
3582 continue; /* empty line */
3583 line[l] = NUL;
3584
Bram Moolenaar51485f02005-06-04 21:55:20 +00003585 /* Find the optional affix names. */
3586 afflist = vim_strchr(line, '/');
3587 if (afflist != NULL)
3588 *afflist++ = NUL;
3589
3590 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
3591 if (spin->si_ascii && has_non_ascii(line))
3592 {
3593 ++non_ascii;
Bram Moolenaar5482f332005-04-17 20:18:43 +00003594 continue;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003595 }
Bram Moolenaar5482f332005-04-17 20:18:43 +00003596
Bram Moolenaarb765d632005-06-07 21:00:02 +00003597#ifdef FEAT_MBYTE
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003598 /* Convert from "SET" to 'encoding' when needed. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00003599 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003600 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003601 pc = string_convert(&spin->si_conv, line, NULL);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003602 if (pc == NULL)
3603 {
3604 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
3605 fname, lnum, line);
3606 continue;
3607 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003608 w = pc;
3609 }
3610 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00003611#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003612 {
3613 pc = NULL;
3614 w = line;
3615 }
3616
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003617 /* This takes time, print a message now and then. */
3618 if (spin->si_verbose && (lnum & 0x3ff) == 0)
3619 {
3620 vim_snprintf((char *)message, sizeof(message),
3621 _("line %6d, word %6d - %s"),
3622 lnum, spin->si_foldwcount + spin->si_keepwcount, w);
3623 msg_start();
3624 msg_puts_long_attr(message, 0);
3625 msg_clr_eos();
3626 msg_didout = FALSE;
3627 msg_col = 0;
3628 out_flush();
3629 }
3630
Bram Moolenaar51485f02005-06-04 21:55:20 +00003631 /* Store the word in the hashtable to be able to find duplicates. */
3632 dw = (char_u *)getroom_save(&spin->si_blocks, w);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003633 if (dw == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003634 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003635 vim_free(pc);
Bram Moolenaar51485f02005-06-04 21:55:20 +00003636 if (retval == FAIL)
3637 break;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003638
Bram Moolenaar51485f02005-06-04 21:55:20 +00003639 hash = hash_hash(dw);
3640 hi = hash_lookup(&ht, dw, hash);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003641 if (!HASHITEM_EMPTY(hi))
3642 smsg((char_u *)_("Duplicate word in %s line %d: %s"),
Bram Moolenaar42eeac32005-06-29 22:40:58 +00003643 fname, lnum, dw);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003644 else
Bram Moolenaar51485f02005-06-04 21:55:20 +00003645 hash_add_item(&ht, hi, dw, hash);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003646
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003647 flags = 0;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003648 pfxlist = NULL;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003649 if (afflist != NULL)
3650 {
3651 /* Check for affix name that stands for keep-case word and stands
3652 * for rare word (if defined). */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003653 if (affile->af_kep != NUL
3654 && vim_strchr(afflist, affile->af_kep) != NULL)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003655 flags |= WF_KEEPCAP;
3656 if (affile->af_rar != NUL
3657 && vim_strchr(afflist, affile->af_rar) != NULL)
3658 flags |= WF_RARE;
Bram Moolenaar0c405862005-06-22 22:26:26 +00003659 if (affile->af_bad != NUL
3660 && vim_strchr(afflist, affile->af_bad) != NULL)
3661 flags |= WF_BANNED;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003662
3663 if (affile->af_pfxpostpone)
3664 /* Need to store the list of prefix IDs with the word. */
3665 pfxlist = get_pfxlist(affile, afflist, &spin->si_blocks);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003666 }
3667
Bram Moolenaar51485f02005-06-04 21:55:20 +00003668 /* Add the word to the word tree(s). */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003669 if (store_word(dw, spin, flags, spin->si_region, pfxlist) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003670 retval = FAIL;
3671
3672 if (afflist != NULL)
3673 {
3674 /* Find all matching suffixes and add the resulting words.
3675 * Additionally do matching prefixes that combine. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003676 if (store_aff_word(dw, spin, afflist, affile,
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003677 &affile->af_suff, &affile->af_pref,
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003678 FALSE, flags, pfxlist) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003679 retval = FAIL;
3680
3681 /* Find all matching prefixes and add the resulting words. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003682 if (store_aff_word(dw, spin, afflist, affile,
3683 &affile->af_pref, NULL,
3684 FALSE, flags, pfxlist) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003685 retval = FAIL;
3686 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003687 }
3688
Bram Moolenaar51485f02005-06-04 21:55:20 +00003689 if (spin->si_ascii && non_ascii > 0)
3690 smsg((char_u *)_("Ignored %d words with non-ASCII characters"),
3691 non_ascii);
3692 hash_clear(&ht);
3693
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003694 fclose(fd);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003695 return retval;
3696}
3697
3698/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003699 * Get the list of prefix IDs from the affix list "afflist".
3700 * Used for PFXPOSTPONE.
3701 * Returns a string allocated with getroom(). NULL when there are no prefixes
3702 * or when out of memory.
3703 */
3704 static char_u *
3705get_pfxlist(affile, afflist, blp)
3706 afffile_T *affile;
3707 char_u *afflist;
3708 sblock_T **blp;
3709{
3710 char_u *p;
3711 int cnt;
3712 int round;
3713 char_u *res = NULL;
3714 char_u key[2];
3715 hashitem_T *hi;
3716
3717 key[1] = NUL;
3718
3719 /* round 1: count the number of prefix IDs.
3720 * round 2: move prefix IDs to "res" */
3721 for (round = 1; round <= 2; ++round)
3722 {
3723 cnt = 0;
3724 for (p = afflist; *p != NUL; ++p)
3725 {
3726 key[0] = *p;
3727 hi = hash_find(&affile->af_pref, key);
3728 if (!HASHITEM_EMPTY(hi))
3729 {
3730 /* This is a prefix ID, use the new number. */
3731 if (round == 2)
3732 res[cnt] = HI2AH(hi)->ah_newID;
3733 ++cnt;
3734 }
3735 }
3736 if (round == 1 && cnt > 0)
3737 res = getroom(blp, cnt + 1);
3738 if (res == NULL)
3739 break;
3740 }
3741
3742 if (res != NULL)
3743 res[cnt] = NUL;
3744 return res;
3745}
3746
3747/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00003748 * Apply affixes to a word and store the resulting words.
3749 * "ht" is the hashtable with affentry_T that need to be applied, either
3750 * prefixes or suffixes.
3751 * "xht", when not NULL, is the prefix hashtable, to be used additionally on
3752 * the resulting words for combining affixes.
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003753 *
3754 * Returns FAIL when out of memory.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003755 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003756 static int
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003757store_aff_word(word, spin, afflist, affile, ht, xht, comb, flags, pfxlist)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003758 char_u *word; /* basic word start */
3759 spellinfo_T *spin; /* spell info */
3760 char_u *afflist; /* list of names of supported affixes */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003761 afffile_T *affile;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003762 hashtab_T *ht;
3763 hashtab_T *xht;
3764 int comb; /* only use affixes that combine */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003765 int flags; /* flags for the word */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003766 char_u *pfxlist; /* list of prefix IDs */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003767{
3768 int todo;
3769 hashitem_T *hi;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003770 affheader_T *ah;
3771 affentry_T *ae;
3772 regmatch_T regmatch;
3773 char_u newword[MAXWLEN];
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003774 int retval = OK;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003775 int i;
3776 char_u *p;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003777 int use_flags;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003778
Bram Moolenaar51485f02005-06-04 21:55:20 +00003779 todo = ht->ht_used;
3780 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003781 {
3782 if (!HASHITEM_EMPTY(hi))
3783 {
3784 --todo;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003785 ah = HI2AH(hi);
Bram Moolenaar5482f332005-04-17 20:18:43 +00003786
Bram Moolenaar51485f02005-06-04 21:55:20 +00003787 /* Check that the affix combines, if required, and that the word
3788 * supports this affix. */
3789 if ((!comb || ah->ah_combine)
3790 && vim_strchr(afflist, *ah->ah_key) != NULL)
Bram Moolenaar5482f332005-04-17 20:18:43 +00003791 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003792 /* Loop over all affix entries with this name. */
3793 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003794 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003795 /* Check the condition. It's not logical to match case
3796 * here, but it is required for compatibility with
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003797 * Myspell.
3798 * For prefixes, when "PFXPOSTPONE" was used, only do
3799 * prefixes with a chop string. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00003800 regmatch.regprog = ae->ae_prog;
3801 regmatch.rm_ic = FALSE;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003802 if ((xht != NULL || !affile->af_pfxpostpone
3803 || ae->ae_chop != NULL)
3804 && (ae->ae_prog == NULL
3805 || vim_regexec(&regmatch, word, (colnr_T)0)))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003806 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003807 /* Match. Remove the chop and add the affix. */
3808 if (xht == NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003809 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003810 /* prefix: chop/add at the start of the word */
3811 if (ae->ae_add == NULL)
3812 *newword = NUL;
3813 else
3814 STRCPY(newword, ae->ae_add);
3815 p = word;
3816 if (ae->ae_chop != NULL)
Bram Moolenaarb765d632005-06-07 21:00:02 +00003817 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003818 /* Skip chop string. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003819#ifdef FEAT_MBYTE
3820 if (has_mbyte)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003821 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00003822 i = mb_charlen(ae->ae_chop);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003823 for ( ; i > 0; --i)
3824 mb_ptr_adv(p);
3825 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00003826 else
3827#endif
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003828 p += STRLEN(ae->ae_chop);
Bram Moolenaarb765d632005-06-07 21:00:02 +00003829 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00003830 STRCAT(newword, p);
3831 }
3832 else
3833 {
3834 /* suffix: chop/add at the end of the word */
3835 STRCPY(newword, word);
3836 if (ae->ae_chop != NULL)
3837 {
3838 /* Remove chop string. */
3839 p = newword + STRLEN(newword);
Bram Moolenaarb765d632005-06-07 21:00:02 +00003840#ifdef FEAT_MBYTE
3841 if (has_mbyte)
3842 i = mb_charlen(ae->ae_chop);
3843 else
3844#endif
3845 i = STRLEN(ae->ae_chop);
3846 for ( ; i > 0; --i)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003847 mb_ptr_back(newword, p);
3848 *p = NUL;
3849 }
3850 if (ae->ae_add != NULL)
3851 STRCAT(newword, ae->ae_add);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003852 }
3853
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003854 /* Obey the "rare" flag of the affix. */
3855 if (ae->ae_rare)
3856 use_flags = flags | WF_RARE;
3857 else
3858 use_flags = flags;
3859
Bram Moolenaar51485f02005-06-04 21:55:20 +00003860 /* Store the modified word. */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003861 if (store_word(newword, spin, use_flags,
3862 spin->si_region, pfxlist) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003863 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003864
Bram Moolenaar51485f02005-06-04 21:55:20 +00003865 /* When added a suffix and combining is allowed also
3866 * try adding prefixes additionally. */
3867 if (xht != NULL && ah->ah_combine)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00003868 if (store_aff_word(newword, spin, afflist, affile,
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003869 xht, NULL, TRUE, use_flags, pfxlist)
3870 == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003871 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003872 }
3873 }
3874 }
3875 }
3876 }
3877
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003878 return retval;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003879}
3880
3881/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00003882 * Read a file with a list of words.
3883 */
3884 static int
3885spell_read_wordfile(fname, spin)
3886 char_u *fname;
3887 spellinfo_T *spin;
3888{
3889 FILE *fd;
3890 long lnum = 0;
3891 char_u rline[MAXLINELEN];
3892 char_u *line;
3893 char_u *pc = NULL;
3894 int l;
3895 int retval = OK;
3896 int did_word = FALSE;
3897 int non_ascii = 0;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003898 int flags;
Bram Moolenaar3982c542005-06-08 21:56:31 +00003899 int regionmask;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003900
3901 /*
3902 * Open the file.
3903 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003904 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar51485f02005-06-04 21:55:20 +00003905 if (fd == NULL)
3906 {
3907 EMSG2(_(e_notopen), fname);
3908 return FAIL;
3909 }
3910
Bram Moolenaarb765d632005-06-07 21:00:02 +00003911 if (spin->si_verbose || p_verbose > 2)
3912 {
3913 if (!spin->si_verbose)
3914 verbose_enter();
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003915 smsg((char_u *)_("Reading word file %s ..."), fname);
Bram Moolenaarb765d632005-06-07 21:00:02 +00003916 out_flush();
3917 if (!spin->si_verbose)
3918 verbose_leave();
3919 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00003920
3921 /*
3922 * Read all the lines in the file one by one.
3923 */
3924 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
3925 {
3926 line_breakcheck();
3927 ++lnum;
3928
3929 /* Skip comment lines. */
3930 if (*rline == '#')
3931 continue;
3932
3933 /* Remove CR, LF and white space from the end. */
3934 l = STRLEN(rline);
3935 while (l > 0 && rline[l - 1] <= ' ')
3936 --l;
3937 if (l == 0)
3938 continue; /* empty or blank line */
3939 rline[l] = NUL;
3940
3941 /* Convert from "=encoding={encoding}" to 'encoding' when needed. */
3942 vim_free(pc);
Bram Moolenaarb765d632005-06-07 21:00:02 +00003943#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00003944 if (spin->si_conv.vc_type != CONV_NONE)
3945 {
3946 pc = string_convert(&spin->si_conv, rline, NULL);
3947 if (pc == NULL)
3948 {
3949 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
3950 fname, lnum, rline);
3951 continue;
3952 }
3953 line = pc;
3954 }
3955 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00003956#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00003957 {
3958 pc = NULL;
3959 line = rline;
3960 }
3961
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003962 flags = 0;
Bram Moolenaar3982c542005-06-08 21:56:31 +00003963 regionmask = spin->si_region;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003964
3965 if (*line == '/')
Bram Moolenaar51485f02005-06-04 21:55:20 +00003966 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003967 ++line;
Bram Moolenaar3982c542005-06-08 21:56:31 +00003968
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003969 if (STRNCMP(line, "encoding=", 9) == 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003970 {
3971 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar3982c542005-06-08 21:56:31 +00003972 smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"),
3973 fname, lnum, line - 1);
Bram Moolenaar51485f02005-06-04 21:55:20 +00003974 else if (did_word)
Bram Moolenaar3982c542005-06-08 21:56:31 +00003975 smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"),
3976 fname, lnum, line - 1);
Bram Moolenaar51485f02005-06-04 21:55:20 +00003977 else
3978 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00003979#ifdef FEAT_MBYTE
3980 char_u *enc;
3981
Bram Moolenaar51485f02005-06-04 21:55:20 +00003982 /* Setup for conversion to 'encoding'. */
Bram Moolenaar3982c542005-06-08 21:56:31 +00003983 line += 10;
3984 enc = enc_canonize(line);
Bram Moolenaar51485f02005-06-04 21:55:20 +00003985 if (enc != NULL && !spin->si_ascii
3986 && convert_setup(&spin->si_conv, enc,
3987 p_enc) == FAIL)
3988 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
Bram Moolenaar3982c542005-06-08 21:56:31 +00003989 fname, line, p_enc);
Bram Moolenaar51485f02005-06-04 21:55:20 +00003990 vim_free(enc);
Bram Moolenaar42eeac32005-06-29 22:40:58 +00003991 spin->si_conv.vc_fail = TRUE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00003992#else
3993 smsg((char_u *)_("Conversion in %s not supported"), fname);
3994#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00003995 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003996 continue;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003997 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003998
Bram Moolenaar3982c542005-06-08 21:56:31 +00003999 if (STRNCMP(line, "regions=", 8) == 0)
4000 {
4001 if (spin->si_region_count > 1)
4002 smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"),
4003 fname, lnum, line);
4004 else
4005 {
4006 line += 8;
4007 if (STRLEN(line) > 16)
4008 smsg((char_u *)_("Too many regions in %s line %d: %s"),
4009 fname, lnum, line);
4010 else
4011 {
4012 spin->si_region_count = STRLEN(line) / 2;
4013 STRCPY(spin->si_region_name, line);
4014 }
4015 }
4016 continue;
4017 }
4018
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004019 if (*line == '=')
4020 {
4021 /* keep-case word */
4022 flags |= WF_KEEPCAP;
4023 ++line;
4024 }
4025
4026 if (*line == '!')
4027 {
4028 /* Bad, bad, wicked word. */
4029 flags |= WF_BANNED;
4030 ++line;
4031 }
4032 else if (*line == '?')
4033 {
4034 /* Rare word. */
4035 flags |= WF_RARE;
4036 ++line;
4037 }
4038
Bram Moolenaar3982c542005-06-08 21:56:31 +00004039 if (VIM_ISDIGIT(*line))
4040 {
4041 /* region number(s) */
4042 regionmask = 0;
4043 while (VIM_ISDIGIT(*line))
4044 {
4045 l = *line - '0';
4046 if (l > spin->si_region_count)
4047 {
4048 smsg((char_u *)_("Invalid region nr in %s line %d: %s"),
4049 fname, lnum, line);
4050 break;
4051 }
4052 regionmask |= 1 << (l - 1);
4053 ++line;
4054 }
4055 flags |= WF_REGION;
4056 }
4057
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004058 if (flags == 0)
4059 {
4060 smsg((char_u *)_("/ line ignored in %s line %d: %s"),
Bram Moolenaar51485f02005-06-04 21:55:20 +00004061 fname, lnum, line);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004062 continue;
4063 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00004064 }
4065
4066 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
4067 if (spin->si_ascii && has_non_ascii(line))
4068 {
4069 ++non_ascii;
4070 continue;
4071 }
4072
4073 /* Normal word: store it. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004074 if (store_word(line, spin, flags, regionmask, NULL) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004075 {
4076 retval = FAIL;
4077 break;
4078 }
4079 did_word = TRUE;
4080 }
4081
4082 vim_free(pc);
4083 fclose(fd);
4084
Bram Moolenaarb765d632005-06-07 21:00:02 +00004085 if (spin->si_ascii && non_ascii > 0 && (spin->si_verbose || p_verbose > 2))
4086 {
4087 if (p_verbose > 2)
4088 verbose_enter();
Bram Moolenaar51485f02005-06-04 21:55:20 +00004089 smsg((char_u *)_("Ignored %d words with non-ASCII characters"),
4090 non_ascii);
Bram Moolenaarb765d632005-06-07 21:00:02 +00004091 if (p_verbose > 2)
4092 verbose_leave();
4093 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00004094 return retval;
4095}
4096
4097/*
4098 * Get part of an sblock_T, "len" bytes long.
4099 * This avoids calling free() for every little struct we use.
4100 * The memory is cleared to all zeros.
4101 * Returns NULL when out of memory.
4102 */
4103 static void *
4104getroom(blp, len)
4105 sblock_T **blp;
4106 size_t len; /* length needed */
4107{
4108 char_u *p;
4109 sblock_T *bl = *blp;
4110
4111 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE)
4112 {
4113 /* Allocate a block of memory. This is not freed until much later. */
4114 bl = (sblock_T *)alloc_clear((unsigned)(sizeof(sblock_T) + SBLOCKSIZE));
4115 if (bl == NULL)
4116 return NULL;
4117 bl->sb_next = *blp;
4118 *blp = bl;
4119 bl->sb_used = 0;
4120 }
4121
4122 p = bl->sb_data + bl->sb_used;
4123 bl->sb_used += len;
4124
4125 return p;
4126}
4127
4128/*
4129 * Make a copy of a string into memory allocated with getroom().
4130 */
4131 static char_u *
4132getroom_save(blp, s)
4133 sblock_T **blp;
4134 char_u *s;
4135{
4136 char_u *sc;
4137
4138 sc = (char_u *)getroom(blp, STRLEN(s) + 1);
4139 if (sc != NULL)
4140 STRCPY(sc, s);
4141 return sc;
4142}
4143
4144
4145/*
4146 * Free the list of allocated sblock_T.
4147 */
4148 static void
4149free_blocks(bl)
4150 sblock_T *bl;
4151{
4152 sblock_T *next;
4153
4154 while (bl != NULL)
4155 {
4156 next = bl->sb_next;
4157 vim_free(bl);
4158 bl = next;
4159 }
4160}
4161
4162/*
4163 * Allocate the root of a word tree.
4164 */
4165 static wordnode_T *
4166wordtree_alloc(blp)
4167 sblock_T **blp;
4168{
4169 return (wordnode_T *)getroom(blp, sizeof(wordnode_T));
4170}
4171
4172/*
4173 * Store a word in the tree(s).
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004174 * Always store it in the case-folded tree. A keep-case word can also be used
4175 * with all caps.
Bram Moolenaar51485f02005-06-04 21:55:20 +00004176 * For a keep-case word also store it in the keep-case tree.
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004177 * When "pfxlist" is not NULL store the word for each prefix ID.
Bram Moolenaar51485f02005-06-04 21:55:20 +00004178 */
4179 static int
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004180store_word(word, spin, flags, region, pfxlist)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004181 char_u *word;
4182 spellinfo_T *spin;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004183 int flags; /* extra flags, WF_BANNED */
Bram Moolenaar3982c542005-06-08 21:56:31 +00004184 int region; /* supported region(s) */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004185 char_u *pfxlist; /* list of prefix IDs or NULL */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004186{
4187 int len = STRLEN(word);
4188 int ct = captype(word, word + len);
4189 char_u foldword[MAXWLEN];
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004190 int res = OK;
4191 char_u *p;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004192
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004193 (void)spell_casefold(word, len, foldword, MAXWLEN);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004194 for (p = pfxlist; res == OK; ++p)
4195 {
4196 res = tree_add_word(foldword, spin->si_foldroot, ct | flags,
4197 region, p == NULL ? 0 : *p, &spin->si_blocks);
4198 if (p == NULL || *p == NUL)
4199 break;
4200 }
Bram Moolenaar8db73182005-06-17 21:51:16 +00004201 ++spin->si_foldwcount;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004202
4203 if (res == OK && (ct == WF_KEEPCAP || flags & WF_KEEPCAP))
Bram Moolenaar8db73182005-06-17 21:51:16 +00004204 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004205 for (p = pfxlist; res == OK; ++p)
4206 {
4207 res = tree_add_word(word, spin->si_keeproot, flags,
4208 region, p == NULL ? 0 : *p, &spin->si_blocks);
4209 if (p == NULL || *p == NUL)
4210 break;
4211 }
Bram Moolenaar8db73182005-06-17 21:51:16 +00004212 ++spin->si_keepwcount;
4213 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00004214 return res;
4215}
4216
4217/*
4218 * Add word "word" to a word tree at "root".
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004219 * When "flags" < 0 we are adding to the prefix tree where flags is used for
4220 * "rare" and "region" is the condition nr.
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00004221 * Returns FAIL when out of memory.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004222 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00004223 static int
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004224tree_add_word(word, root, flags, region, prefixID, blp)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004225 char_u *word;
4226 wordnode_T *root;
4227 int flags;
4228 int region;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004229 int prefixID;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004230 sblock_T **blp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004231{
Bram Moolenaar51485f02005-06-04 21:55:20 +00004232 wordnode_T *node = root;
4233 wordnode_T *np;
4234 wordnode_T **prev = NULL;
4235 int i;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004236
Bram Moolenaar51485f02005-06-04 21:55:20 +00004237 /* Add each byte of the word to the tree, including the NUL at the end. */
4238 for (i = 0; ; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004239 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00004240 /* Look for the sibling that has the same character. They are sorted
4241 * on byte value, thus stop searching when a sibling is found with a
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004242 * higher byte value. For zero bytes (end of word) the sorting is
4243 * done on flags and then on prefixID
Bram Moolenaar51485f02005-06-04 21:55:20 +00004244 */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004245 while (node != NULL
4246 && (node->wn_byte < word[i]
4247 || (node->wn_byte == NUL
4248 && (flags < 0
4249 ? node->wn_prefixID < prefixID
4250 : node->wn_flags < (flags & 0xff)
4251 || (node->wn_flags == (flags & 0xff)
4252 && node->wn_prefixID < prefixID)))))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004253 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00004254 prev = &node->wn_sibling;
4255 node = *prev;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004256 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004257 if (node == NULL
4258 || node->wn_byte != word[i]
4259 || (word[i] == NUL
4260 && (flags < 0
4261 || node->wn_flags != (flags & 0xff)
4262 || node->wn_prefixID != prefixID)))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004263 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00004264 /* Allocate a new node. */
4265 np = (wordnode_T *)getroom(blp, sizeof(wordnode_T));
4266 if (np == NULL)
4267 return FAIL;
4268 np->wn_byte = word[i];
4269 *prev = np;
4270 np->wn_sibling = node;
4271 node = np;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004272 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004273
Bram Moolenaar51485f02005-06-04 21:55:20 +00004274 if (word[i] == NUL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004275 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00004276 node->wn_flags = flags;
4277 node->wn_region |= region;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004278 node->wn_prefixID = prefixID;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004279 break;
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +00004280 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00004281 prev = &node->wn_child;
4282 node = *prev;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00004283 }
4284
4285 return OK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004286}
4287
4288/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00004289 * Compress a tree: find tails that are identical and can be shared.
4290 */
4291 static void
Bram Moolenaarb765d632005-06-07 21:00:02 +00004292wordtree_compress(root, spin)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004293 wordnode_T *root;
Bram Moolenaarb765d632005-06-07 21:00:02 +00004294 spellinfo_T *spin;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004295{
4296 hashtab_T ht;
4297 int n;
4298 int tot = 0;
4299
4300 if (root != NULL)
4301 {
4302 hash_init(&ht);
4303 n = node_compress(root, &ht, &tot);
Bram Moolenaarb765d632005-06-07 21:00:02 +00004304 if (spin->si_verbose || p_verbose > 2)
4305 {
4306 if (!spin->si_verbose)
4307 verbose_enter();
4308 smsg((char_u *)_("Compressed %d of %d nodes; %d%% remaining"),
Bram Moolenaar51485f02005-06-04 21:55:20 +00004309 n, tot, (tot - n) * 100 / tot);
Bram Moolenaarb765d632005-06-07 21:00:02 +00004310 if (p_verbose > 2)
4311 verbose_leave();
4312 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00004313 hash_clear(&ht);
4314 }
4315}
4316
4317/*
4318 * Compress a node, its siblings and its children, depth first.
4319 * Returns the number of compressed nodes.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004320 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00004321 static int
Bram Moolenaar51485f02005-06-04 21:55:20 +00004322node_compress(node, ht, tot)
4323 wordnode_T *node;
4324 hashtab_T *ht;
4325 int *tot; /* total count of nodes before compressing,
4326 incremented while going through the tree */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004327{
Bram Moolenaar51485f02005-06-04 21:55:20 +00004328 wordnode_T *np;
4329 wordnode_T *tp;
4330 wordnode_T *child;
4331 hash_T hash;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004332 hashitem_T *hi;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004333 int len = 0;
4334 unsigned nr, n;
4335 int compressed = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004336
Bram Moolenaar51485f02005-06-04 21:55:20 +00004337 /*
4338 * Go through the list of siblings. Compress each child and then try
4339 * finding an identical child to replace it.
4340 * Note that with "child" we mean not just the node that is pointed to,
4341 * but the whole list of siblings, of which the node is the first.
4342 */
4343 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004344 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00004345 ++len;
4346 if ((child = np->wn_child) != NULL)
4347 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00004348 /* Compress the child. This fills hashkey. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004349 compressed += node_compress(child, ht, tot);
4350
4351 /* Try to find an identical child. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00004352 hash = hash_hash(child->wn_u1.hashkey);
4353 hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
Bram Moolenaar51485f02005-06-04 21:55:20 +00004354 tp = NULL;
4355 if (!HASHITEM_EMPTY(hi))
4356 {
4357 /* There are children with an identical hash value. Now check
4358 * if there is one that is really identical. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00004359 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004360 if (node_equal(child, tp))
4361 {
4362 /* Found one! Now use that child in place of the
4363 * current one. This means the current child is
4364 * dropped from the tree. */
4365 np->wn_child = tp;
4366 ++compressed;
4367 break;
4368 }
4369 if (tp == NULL)
4370 {
4371 /* No other child with this hash value equals the child of
4372 * the node, add it to the linked list after the first
4373 * item. */
4374 tp = HI2WN(hi);
Bram Moolenaar0c405862005-06-22 22:26:26 +00004375 child->wn_u2.next = tp->wn_u2.next;
4376 tp->wn_u2.next = child;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004377 }
4378 }
4379 else
4380 /* No other child has this hash value, add it to the
4381 * hashtable. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00004382 hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
Bram Moolenaar51485f02005-06-04 21:55:20 +00004383 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004384 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00004385 *tot += len;
4386
4387 /*
4388 * Make a hash key for the node and its siblings, so that we can quickly
4389 * find a lookalike node. This must be done after compressing the sibling
4390 * list, otherwise the hash key would become invalid by the compression.
4391 */
Bram Moolenaar0c405862005-06-22 22:26:26 +00004392 node->wn_u1.hashkey[0] = len;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004393 nr = 0;
4394 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004395 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00004396 if (np->wn_byte == NUL)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004397 /* end node: use wn_flags, wn_region and wn_prefixID */
4398 n = np->wn_flags + (np->wn_region << 8) + (np->wn_prefixID << 16);
Bram Moolenaar51485f02005-06-04 21:55:20 +00004399 else
4400 /* byte node: use the byte value and the child pointer */
4401 n = np->wn_byte + ((long_u)np->wn_child << 8);
4402 nr = nr * 101 + n;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004403 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00004404
4405 /* Avoid NUL bytes, it terminates the hash key. */
4406 n = nr & 0xff;
Bram Moolenaar0c405862005-06-22 22:26:26 +00004407 node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004408 n = (nr >> 8) & 0xff;
Bram Moolenaar0c405862005-06-22 22:26:26 +00004409 node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004410 n = (nr >> 16) & 0xff;
Bram Moolenaar0c405862005-06-22 22:26:26 +00004411 node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004412 n = (nr >> 24) & 0xff;
Bram Moolenaar0c405862005-06-22 22:26:26 +00004413 node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
4414 node->wn_u1.hashkey[5] = NUL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004415
4416 return compressed;
4417}
4418
4419/*
4420 * Return TRUE when two nodes have identical siblings and children.
4421 */
4422 static int
4423node_equal(n1, n2)
4424 wordnode_T *n1;
4425 wordnode_T *n2;
4426{
4427 wordnode_T *p1;
4428 wordnode_T *p2;
4429
4430 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
4431 p1 = p1->wn_sibling, p2 = p2->wn_sibling)
4432 if (p1->wn_byte != p2->wn_byte
4433 || (p1->wn_byte == NUL
4434 ? (p1->wn_flags != p2->wn_flags
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004435 || p1->wn_region != p2->wn_region
4436 || p1->wn_prefixID != p2->wn_prefixID)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004437 : (p1->wn_child != p2->wn_child)))
4438 break;
4439
4440 return p1 == NULL && p2 == NULL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004441}
4442
4443/*
4444 * Write a number to file "fd", MSB first, in "len" bytes.
4445 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00004446 void
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004447put_bytes(fd, nr, len)
4448 FILE *fd;
4449 long_u nr;
4450 int len;
4451{
4452 int i;
4453
4454 for (i = len - 1; i >= 0; --i)
4455 putc((int)(nr >> (i * 8)), fd);
4456}
4457
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004458static int
4459#ifdef __BORLANDC__
4460_RTLENTRYF
4461#endif
4462rep_compare __ARGS((const void *s1, const void *s2));
4463
4464/*
4465 * Function given to qsort() to sort the REP items on "from" string.
4466 */
4467 static int
4468#ifdef __BORLANDC__
4469_RTLENTRYF
4470#endif
4471rep_compare(s1, s2)
4472 const void *s1;
4473 const void *s2;
4474{
4475 fromto_T *p1 = (fromto_T *)s1;
4476 fromto_T *p2 = (fromto_T *)s2;
4477
4478 return STRCMP(p1->ft_from, p2->ft_from);
4479}
4480
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004481/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004482 * Write the Vim spell file "fname".
4483 */
4484 static void
Bram Moolenaar3982c542005-06-08 21:56:31 +00004485write_vim_spell(fname, spin)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004486 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004487 spellinfo_T *spin;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004488{
Bram Moolenaar51485f02005-06-04 21:55:20 +00004489 FILE *fd;
4490 int regionmask;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004491 int round;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004492 wordnode_T *tree;
4493 int nodecount;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004494 int i;
4495 int l;
4496 garray_T *gap;
4497 fromto_T *ftp;
4498 char_u *p;
4499 int rr;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004500
Bram Moolenaarb765d632005-06-07 21:00:02 +00004501 fd = mch_fopen((char *)fname, "w");
Bram Moolenaar51485f02005-06-04 21:55:20 +00004502 if (fd == NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004503 {
4504 EMSG2(_(e_notopen), fname);
4505 return;
4506 }
4507
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00004508 /* <HEADER>: <fileID> <regioncnt> <regionname> ...
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004509 * <charflagslen> <charflags>
4510 * <fcharslen> <fchars>
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004511 * <midwordlen> <midword>
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004512 * <prefcondcnt> <prefcond> ... */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004513
4514 /* <fileID> */
4515 if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1)
4516 EMSG(_(e_write));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004517
4518 /* write the region names if there is more than one */
Bram Moolenaar3982c542005-06-08 21:56:31 +00004519 if (spin->si_region_count > 1)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004520 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00004521 putc(spin->si_region_count, fd); /* <regioncnt> <regionname> ... */
4522 fwrite(spin->si_region_name, (size_t)(spin->si_region_count * 2),
4523 (size_t)1, fd);
4524 regionmask = (1 << spin->si_region_count) - 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004525 }
4526 else
4527 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00004528 putc(0, fd);
4529 regionmask = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004530 }
4531
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004532 /*
4533 * Write the table with character flags and table for case folding.
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00004534 * <charflagslen> <charflags> <fcharlen> <fchars>
4535 * Skip this for ASCII, the table may conflict with the one used for
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004536 * 'encoding'.
4537 * Also skip this for an .add.spl file, the main spell file must contain
4538 * the table (avoids that it conflicts). File is shorter too.
4539 */
4540 if (spin->si_ascii || spin->si_add)
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00004541 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00004542 putc(0, fd);
4543 putc(0, fd);
4544 putc(0, fd);
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00004545 }
4546 else
Bram Moolenaar51485f02005-06-04 21:55:20 +00004547 write_spell_chartab(fd);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00004548
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004549
4550 if (spin->si_midword == NULL)
4551 put_bytes(fd, 0L, 2); /* <midwordlen> */
4552 else
4553 {
4554 i = STRLEN(spin->si_midword);
4555 put_bytes(fd, (long_u)i, 2); /* <midwordlen> */
4556 fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); /* <midword> */
4557 }
4558
4559
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004560 /* Write the prefix conditions. */
4561 write_spell_prefcond(fd, &spin->si_prefcond);
4562
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004563 /* <SUGGEST> : <repcount> <rep> ...
4564 * <salflags> <salcount> <sal> ...
4565 * <maplen> <mapstr> */
4566
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004567 /* Sort the REP items. */
4568 qsort(spin->si_rep.ga_data, (size_t)spin->si_rep.ga_len,
4569 sizeof(fromto_T), rep_compare);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004570
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004571 /* round 1: REP items
4572 * round 2: SAL items (unless SOFO is used) */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004573 for (round = 1; round <= 2; ++round)
4574 {
4575 if (round == 1)
4576 gap = &spin->si_rep;
4577 else
4578 {
4579 gap = &spin->si_sal;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004580
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004581 i = 0;
4582 if (spin->si_followup)
4583 i |= SAL_F0LLOWUP;
4584 if (spin->si_collapse)
4585 i |= SAL_COLLAPSE;
4586 if (spin->si_rem_accents)
4587 i |= SAL_REM_ACCENTS;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004588 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
4589 i |= SAL_SOFO;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004590 putc(i, fd); /* <salflags> */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004591 if (i & SAL_SOFO)
4592 break;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004593 }
4594
4595 put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */
4596 for (i = 0; i < gap->ga_len; ++i)
4597 {
4598 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
4599 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
4600 ftp = &((fromto_T *)gap->ga_data)[i];
4601 for (rr = 1; rr <= 2; ++rr)
4602 {
4603 p = rr == 1 ? ftp->ft_from : ftp->ft_to;
4604 l = STRLEN(p);
4605 putc(l, fd);
4606 fwrite(p, l, (size_t)1, fd);
4607 }
4608 }
4609 }
4610
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004611 /* SOFOFROM and SOFOTO */
4612 if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
4613 {
4614 put_bytes(fd, 1L, 2); /* <salcount> */
4615
4616 l = STRLEN(spin->si_sofofr);
4617 put_bytes(fd, (long_u)l, 2); /* <salfromlen> */
4618 fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <salfrom> */
4619
4620 l = STRLEN(spin->si_sofoto);
4621 put_bytes(fd, (long_u)l, 2); /* <saltolen> */
4622 fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <salto> */
4623 }
4624
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004625 put_bytes(fd, (long_u)spin->si_map.ga_len, 2); /* <maplen> */
4626 if (spin->si_map.ga_len > 0) /* <mapstr> */
4627 fwrite(spin->si_map.ga_data, (size_t)spin->si_map.ga_len,
4628 (size_t)1, fd);
Bram Moolenaar50cde822005-06-05 21:54:54 +00004629
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004630 /*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004631 * <LWORDTREE> <KWORDTREE> <PREFIXTREE>
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004632 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004633 spin->si_memtot = 0;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004634 for (round = 1; round <= 3; ++round)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004635 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004636 if (round == 1)
4637 tree = spin->si_foldroot;
4638 else if (round == 2)
4639 tree = spin->si_keeproot;
4640 else
4641 tree = spin->si_prefroot;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004642
Bram Moolenaar0c405862005-06-22 22:26:26 +00004643 /* Clear the index and wnode fields in the tree. */
4644 clear_node(tree);
4645
Bram Moolenaar51485f02005-06-04 21:55:20 +00004646 /* Count the number of nodes. Needed to be able to allocate the
Bram Moolenaar0c405862005-06-22 22:26:26 +00004647 * memory when reading the nodes. Also fills in index for shared
Bram Moolenaar51485f02005-06-04 21:55:20 +00004648 * nodes. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00004649 nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004650
Bram Moolenaar51485f02005-06-04 21:55:20 +00004651 /* number of nodes in 4 bytes */
4652 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
Bram Moolenaar50cde822005-06-05 21:54:54 +00004653 spin->si_memtot += nodecount + nodecount * sizeof(int);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004654
Bram Moolenaar51485f02005-06-04 21:55:20 +00004655 /* Write the nodes. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00004656 (void)put_node(fd, tree, 0, regionmask, round == 3);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004657 }
4658
Bram Moolenaar51485f02005-06-04 21:55:20 +00004659 fclose(fd);
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00004660}
4661
4662/*
Bram Moolenaar0c405862005-06-22 22:26:26 +00004663 * Clear the index and wnode fields of "node", it siblings and its
4664 * children. This is needed because they are a union with other items to save
4665 * space.
4666 */
4667 static void
4668clear_node(node)
4669 wordnode_T *node;
4670{
4671 wordnode_T *np;
4672
4673 if (node != NULL)
4674 for (np = node; np != NULL; np = np->wn_sibling)
4675 {
4676 np->wn_u1.index = 0;
4677 np->wn_u2.wnode = NULL;
4678
4679 if (np->wn_byte != NUL)
4680 clear_node(np->wn_child);
4681 }
4682}
4683
4684
4685/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00004686 * Dump a word tree at node "node".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004687 *
Bram Moolenaar51485f02005-06-04 21:55:20 +00004688 * This first writes the list of possible bytes (siblings). Then for each
4689 * byte recursively write the children.
4690 *
4691 * NOTE: The code here must match the code in read_tree(), since assumptions
4692 * are made about the indexes (so that we don't have to write them in the
4693 * file).
4694 *
4695 * Returns the number of nodes used.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004696 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004697 static int
Bram Moolenaar0c405862005-06-22 22:26:26 +00004698put_node(fd, node, index, regionmask, prefixtree)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004699 FILE *fd; /* NULL when only counting */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004700 wordnode_T *node;
4701 int index;
4702 int regionmask;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004703 int prefixtree; /* TRUE for PREFIXTREE */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004704{
Bram Moolenaar51485f02005-06-04 21:55:20 +00004705 int newindex = index;
4706 int siblingcount = 0;
4707 wordnode_T *np;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004708 int flags;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004709
Bram Moolenaar51485f02005-06-04 21:55:20 +00004710 /* If "node" is zero the tree is empty. */
4711 if (node == NULL)
4712 return 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004713
Bram Moolenaar51485f02005-06-04 21:55:20 +00004714 /* Store the index where this node is written. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00004715 node->wn_u1.index = index;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004716
4717 /* Count the number of siblings. */
4718 for (np = node; np != NULL; np = np->wn_sibling)
4719 ++siblingcount;
4720
4721 /* Write the sibling count. */
4722 if (fd != NULL)
4723 putc(siblingcount, fd); /* <siblingcount> */
4724
4725 /* Write each sibling byte and optionally extra info. */
4726 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004727 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00004728 if (np->wn_byte == 0)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00004729 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00004730 if (fd != NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004731 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004732 /* For a NUL byte (end of word) write the flags etc. */
4733 if (prefixtree)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00004734 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004735 /* In PREFIXTREE write the required prefixID and the
4736 * associated condition nr (stored in wn_region). */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004737 if (np->wn_flags == (char_u)-2)
4738 putc(BY_FLAGS, fd); /* <byte> rare */
4739 else
4740 putc(BY_NOFLAGS, fd); /* <byte> */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004741 putc(np->wn_prefixID, fd); /* <prefixID> */
4742 put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */
Bram Moolenaar51485f02005-06-04 21:55:20 +00004743 }
4744 else
4745 {
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004746 /* For word trees we write the flag/region items. */
4747 flags = np->wn_flags;
4748 if (regionmask != 0 && np->wn_region != regionmask)
4749 flags |= WF_REGION;
4750 if (np->wn_prefixID != 0)
4751 flags |= WF_PFX;
4752 if (flags == 0)
4753 {
4754 /* word without flags or region */
4755 putc(BY_NOFLAGS, fd); /* <byte> */
4756 }
4757 else
4758 {
4759 putc(BY_FLAGS, fd); /* <byte> */
4760 putc(flags, fd); /* <flags> */
4761 if (flags & WF_REGION)
4762 putc(np->wn_region, fd); /* <region> */
4763 if (flags & WF_PFX)
4764 putc(np->wn_prefixID, fd); /* <prefixID> */
4765 }
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00004766 }
4767 }
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00004768 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00004769 else
4770 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00004771 if (np->wn_child->wn_u1.index != 0
4772 && np->wn_child->wn_u2.wnode != node)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004773 {
4774 /* The child is written elsewhere, write the reference. */
4775 if (fd != NULL)
4776 {
4777 putc(BY_INDEX, fd); /* <byte> */
4778 /* <nodeidx> */
Bram Moolenaar0c405862005-06-22 22:26:26 +00004779 put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
Bram Moolenaar51485f02005-06-04 21:55:20 +00004780 }
4781 }
Bram Moolenaar0c405862005-06-22 22:26:26 +00004782 else if (np->wn_child->wn_u2.wnode == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004783 /* We will write the child below and give it an index. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00004784 np->wn_child->wn_u2.wnode = node;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00004785
Bram Moolenaar51485f02005-06-04 21:55:20 +00004786 if (fd != NULL)
4787 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
4788 {
4789 EMSG(_(e_write));
4790 return 0;
4791 }
4792 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004793 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00004794
4795 /* Space used in the array when reading: one for each sibling and one for
4796 * the count. */
4797 newindex += siblingcount + 1;
4798
4799 /* Recursively dump the children of each sibling. */
4800 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar0c405862005-06-22 22:26:26 +00004801 if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
4802 newindex = put_node(fd, np->wn_child, newindex, regionmask,
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004803 prefixtree);
Bram Moolenaar51485f02005-06-04 21:55:20 +00004804
4805 return newindex;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004806}
4807
4808
4809/*
Bram Moolenaarb765d632005-06-07 21:00:02 +00004810 * ":mkspell [-ascii] outfile infile ..."
4811 * ":mkspell [-ascii] addfile"
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004812 */
4813 void
4814ex_mkspell(eap)
4815 exarg_T *eap;
4816{
4817 int fcount;
4818 char_u **fnames;
Bram Moolenaarb765d632005-06-07 21:00:02 +00004819 char_u *arg = eap->arg;
4820 int ascii = FALSE;
4821
4822 if (STRNCMP(arg, "-ascii", 6) == 0)
4823 {
4824 ascii = TRUE;
4825 arg = skipwhite(arg + 6);
4826 }
4827
4828 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
4829 if (get_arglist_exp(arg, &fcount, &fnames) == OK)
4830 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004831 mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
Bram Moolenaarb765d632005-06-07 21:00:02 +00004832 FreeWild(fcount, fnames);
4833 }
4834}
4835
4836/*
4837 * Create a Vim spell file from one or more word lists.
4838 * "fnames[0]" is the output file name.
4839 * "fnames[fcount - 1]" is the last input file name.
4840 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name
4841 * and ".spl" is appended to make the output file name.
4842 */
4843 static void
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004844mkspell(fcount, fnames, ascii, overwrite, added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00004845 int fcount;
4846 char_u **fnames;
4847 int ascii; /* -ascii argument given */
4848 int overwrite; /* overwrite existing output file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004849 int added_word; /* invoked through "zg" */
Bram Moolenaarb765d632005-06-07 21:00:02 +00004850{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004851 char_u fname[MAXPATHL];
4852 char_u wfname[MAXPATHL];
Bram Moolenaarb765d632005-06-07 21:00:02 +00004853 char_u **innames;
4854 int incount;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004855 afffile_T *(afile[8]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004856 int i;
4857 int len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004858 struct stat st;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00004859 int error = FALSE;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004860 spellinfo_T spin;
4861
4862 vim_memset(&spin, 0, sizeof(spin));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004863 spin.si_verbose = !added_word;
Bram Moolenaarb765d632005-06-07 21:00:02 +00004864 spin.si_ascii = ascii;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004865 spin.si_followup = TRUE;
4866 spin.si_rem_accents = TRUE;
4867 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20);
4868 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
4869 ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004870 ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004871
Bram Moolenaarb765d632005-06-07 21:00:02 +00004872 /* default: fnames[0] is output file, following are input files */
4873 innames = &fnames[1];
4874 incount = fcount - 1;
4875
4876 if (fcount >= 1)
Bram Moolenaar5482f332005-04-17 20:18:43 +00004877 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00004878 len = STRLEN(fnames[0]);
4879 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
4880 {
4881 /* For ":mkspell path/en.latin1.add" output file is
4882 * "path/en.latin1.add.spl". */
4883 innames = &fnames[0];
4884 incount = 1;
4885 vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]);
4886 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004887 else if (fcount == 1)
4888 {
4889 /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */
4890 innames = &fnames[0];
4891 incount = 1;
4892 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
4893 spin.si_ascii ? (char_u *)"ascii" : spell_enc());
4894 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00004895 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
4896 {
4897 /* Name ends in ".spl", use as the file name. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004898 vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1);
Bram Moolenaarb765d632005-06-07 21:00:02 +00004899 }
4900 else
4901 /* Name should be language, make the file name from it. */
4902 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
4903 spin.si_ascii ? (char_u *)"ascii" : spell_enc());
4904
4905 /* Check for .ascii.spl. */
4906 if (strstr((char *)gettail(wfname), ".ascii.") != NULL)
4907 spin.si_ascii = TRUE;
4908
4909 /* Check for .add.spl. */
4910 if (strstr((char *)gettail(wfname), ".add.") != NULL)
4911 spin.si_add = TRUE;
Bram Moolenaar5482f332005-04-17 20:18:43 +00004912 }
4913
Bram Moolenaarb765d632005-06-07 21:00:02 +00004914 if (incount <= 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004915 EMSG(_(e_invarg)); /* need at least output and input names */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004916 else if (vim_strchr(gettail(wfname), '_') != NULL)
4917 EMSG(_("E751: Output file name must not have region name"));
Bram Moolenaarb765d632005-06-07 21:00:02 +00004918 else if (incount > 8)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004919 EMSG(_("E754: Only up to 8 regions supported"));
4920 else
4921 {
4922 /* Check for overwriting before doing things that may take a lot of
4923 * time. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00004924 if (!overwrite && mch_stat((char *)wfname, &st) >= 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004925 {
4926 EMSG(_(e_exists));
Bram Moolenaarb765d632005-06-07 21:00:02 +00004927 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004928 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00004929 if (mch_isdir(wfname))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004930 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00004931 EMSG2(_(e_isadir2), wfname);
4932 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004933 }
4934
4935 /*
4936 * Init the aff and dic pointers.
4937 * Get the region names if there are more than 2 arguments.
4938 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00004939 for (i = 0; i < incount; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004940 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00004941 afile[i] = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004942
Bram Moolenaar3982c542005-06-08 21:56:31 +00004943 if (incount > 1)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004944 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00004945 len = STRLEN(innames[i]);
4946 if (STRLEN(gettail(innames[i])) < 5
4947 || innames[i][len - 3] != '_')
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004948 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00004949 EMSG2(_("E755: Invalid region in %s"), innames[i]);
4950 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004951 }
Bram Moolenaar3982c542005-06-08 21:56:31 +00004952 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
4953 spin.si_region_name[i * 2 + 1] =
4954 TOLOWER_ASC(innames[i][len - 1]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004955 }
4956 }
Bram Moolenaar3982c542005-06-08 21:56:31 +00004957 spin.si_region_count = incount;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004958
Bram Moolenaar51485f02005-06-04 21:55:20 +00004959 spin.si_foldroot = wordtree_alloc(&spin.si_blocks);
4960 spin.si_keeproot = wordtree_alloc(&spin.si_blocks);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00004961 spin.si_prefroot = wordtree_alloc(&spin.si_blocks);
4962 if (spin.si_foldroot == NULL
4963 || spin.si_keeproot == NULL
4964 || spin.si_prefroot == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004965 {
4966 error = TRUE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00004967 return;
Bram Moolenaar51485f02005-06-04 21:55:20 +00004968 }
4969
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00004970 /* When not producing a .add.spl file clear the character table when
4971 * we encounter one in the .aff file. This means we dump the current
4972 * one in the .spl file if the .aff file doesn't define one. That's
4973 * better than guessing the contents, the table will match a
4974 * previously loaded spell file. */
4975 if (!spin.si_add)
4976 spin.si_clear_chartab = TRUE;
4977
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004978 /*
4979 * Read all the .aff and .dic files.
4980 * Text is converted to 'encoding'.
Bram Moolenaar51485f02005-06-04 21:55:20 +00004981 * Words are stored in the case-folded and keep-case trees.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004982 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00004983 for (i = 0; i < incount && !error; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004984 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00004985 spin.si_conv.vc_type = CONV_NONE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00004986 spin.si_region = 1 << i;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00004987
Bram Moolenaarb765d632005-06-07 21:00:02 +00004988 vim_snprintf((char *)fname, sizeof(fname), "%s.aff", innames[i]);
Bram Moolenaar51485f02005-06-04 21:55:20 +00004989 if (mch_stat((char *)fname, &st) >= 0)
4990 {
4991 /* Read the .aff file. Will init "spin->si_conv" based on the
4992 * "SET" line. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00004993 afile[i] = spell_read_aff(fname, &spin);
4994 if (afile[i] == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00004995 error = TRUE;
4996 else
4997 {
4998 /* Read the .dic file and store the words in the trees. */
4999 vim_snprintf((char *)fname, sizeof(fname), "%s.dic",
Bram Moolenaarb765d632005-06-07 21:00:02 +00005000 innames[i]);
5001 if (spell_read_dic(fname, &spin, afile[i]) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00005002 error = TRUE;
5003 }
5004 }
5005 else
5006 {
5007 /* No .aff file, try reading the file as a word list. Store
5008 * the words in the trees. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00005009 if (spell_read_wordfile(innames[i], &spin) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00005010 error = TRUE;
5011 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005012
Bram Moolenaarb765d632005-06-07 21:00:02 +00005013#ifdef FEAT_MBYTE
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005014 /* Free any conversion stuff. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00005015 convert_setup(&spin.si_conv, NULL, NULL);
Bram Moolenaarb765d632005-06-07 21:00:02 +00005016#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005017 }
5018
Bram Moolenaar51485f02005-06-04 21:55:20 +00005019 if (!error)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005020 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00005021 /*
5022 * Remove the dummy NUL from the start of the tree root.
5023 */
5024 spin.si_foldroot = spin.si_foldroot->wn_sibling;
5025 spin.si_keeproot = spin.si_keeproot->wn_sibling;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005026 spin.si_prefroot = spin.si_prefroot->wn_sibling;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005027
5028 /*
Bram Moolenaar51485f02005-06-04 21:55:20 +00005029 * Combine tails in the tree.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005030 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005031 if (!added_word || p_verbose > 2)
Bram Moolenaarb765d632005-06-07 21:00:02 +00005032 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005033 if (added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00005034 verbose_enter();
5035 MSG(_("Compressing word tree..."));
5036 out_flush();
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005037 if (added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00005038 verbose_leave();
5039 }
5040 wordtree_compress(spin.si_foldroot, &spin);
5041 wordtree_compress(spin.si_keeproot, &spin);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005042 wordtree_compress(spin.si_prefroot, &spin);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005043 }
5044
Bram Moolenaar51485f02005-06-04 21:55:20 +00005045 if (!error)
5046 {
5047 /*
5048 * Write the info in the spell file.
5049 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005050 if (!added_word || p_verbose > 2)
Bram Moolenaarb765d632005-06-07 21:00:02 +00005051 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005052 if (added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00005053 verbose_enter();
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005054 smsg((char_u *)_("Writing spell file %s ..."), wfname);
Bram Moolenaarb765d632005-06-07 21:00:02 +00005055 out_flush();
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005056 if (added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00005057 verbose_leave();
5058 }
Bram Moolenaar50cde822005-06-05 21:54:54 +00005059
Bram Moolenaar3982c542005-06-08 21:56:31 +00005060 write_vim_spell(wfname, &spin);
Bram Moolenaarb765d632005-06-07 21:00:02 +00005061
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005062 if (!added_word || p_verbose > 2)
Bram Moolenaarb765d632005-06-07 21:00:02 +00005063 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005064 if (added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00005065 verbose_enter();
5066 MSG(_("Done!"));
5067 smsg((char_u *)_("Estimated runtime memory use: %d bytes"),
Bram Moolenaar50cde822005-06-05 21:54:54 +00005068 spin.si_memtot);
Bram Moolenaarb765d632005-06-07 21:00:02 +00005069 out_flush();
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005070 if (added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00005071 verbose_leave();
5072 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005073
Bram Moolenaarb765d632005-06-07 21:00:02 +00005074 /* If the file is loaded need to reload it. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005075 spell_reload_one(wfname, added_word);
Bram Moolenaar51485f02005-06-04 21:55:20 +00005076 }
5077
5078 /* Free the allocated memory. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005079 ga_clear(&spin.si_rep);
5080 ga_clear(&spin.si_sal);
5081 ga_clear(&spin.si_map);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005082 ga_clear(&spin.si_prefcond);
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005083 vim_free(spin.si_midword);
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005084 vim_free(spin.si_sofofr);
5085 vim_free(spin.si_sofoto);
Bram Moolenaar51485f02005-06-04 21:55:20 +00005086
5087 /* Free the .aff file structures. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00005088 for (i = 0; i < incount; ++i)
5089 if (afile[i] != NULL)
5090 spell_free_aff(afile[i]);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005091
5092 /* Free all the bits and pieces at once. */
5093 free_blocks(spin.si_blocks);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005094 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005095}
5096
Bram Moolenaarb765d632005-06-07 21:00:02 +00005097
5098/*
5099 * ":spellgood {word}"
5100 * ":spellwrong {word}"
5101 */
5102 void
5103ex_spell(eap)
5104 exarg_T *eap;
5105{
5106 spell_add_word(eap->arg, STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong);
5107}
5108
5109/*
5110 * Add "word[len]" to 'spellfile' as a good or bad word.
5111 */
5112 void
5113spell_add_word(word, len, bad)
5114 char_u *word;
5115 int len;
5116 int bad;
5117{
5118 FILE *fd;
5119 buf_T *buf;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00005120 int new_spf = FALSE;
5121 struct stat st;
Bram Moolenaarb765d632005-06-07 21:00:02 +00005122
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00005123 /* If 'spellfile' isn't set figure out a good default value. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00005124 if (*curbuf->b_p_spf == NUL)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00005125 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00005126 init_spellfile();
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00005127 new_spf = TRUE;
5128 }
5129
Bram Moolenaarb765d632005-06-07 21:00:02 +00005130 if (*curbuf->b_p_spf == NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005131 EMSG(_("E764: 'spellfile' is not set"));
Bram Moolenaarb765d632005-06-07 21:00:02 +00005132 else
5133 {
5134 /* Check that the user isn't editing the .add file somewhere. */
5135 buf = buflist_findname_exp(curbuf->b_p_spf);
5136 if (buf != NULL && buf->b_ml.ml_mfp == NULL)
5137 buf = NULL;
5138 if (buf != NULL && bufIsChanged(buf))
5139 EMSG(_(e_bufloaded));
5140 else
5141 {
5142 fd = mch_fopen((char *)curbuf->b_p_spf, "a");
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00005143 if (fd == NULL && new_spf)
5144 {
5145 /* We just initialized the 'spellfile' option and can't open
5146 * the file. We may need to create the "spell" directory
5147 * first. We already checked the runtime directory is
5148 * writable in init_spellfile(). */
5149 STRCPY(NameBuff, curbuf->b_p_spf);
5150 *gettail_sep(NameBuff) = NUL;
5151 if (mch_stat((char *)NameBuff, &st) < 0)
5152 {
5153 /* The directory doesn't exist. Try creating it and
5154 * opening the file again. */
5155 vim_mkdir(NameBuff, 0755);
5156 fd = mch_fopen((char *)curbuf->b_p_spf, "a");
5157 }
5158 }
5159
Bram Moolenaarb765d632005-06-07 21:00:02 +00005160 if (fd == NULL)
5161 EMSG2(_(e_notopen), curbuf->b_p_spf);
5162 else
5163 {
5164 if (bad)
5165 fprintf(fd, "/!%.*s\n", len, word);
5166 else
5167 fprintf(fd, "%.*s\n", len, word);
5168 fclose(fd);
5169
5170 /* Update the .add.spl file. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005171 mkspell(1, &curbuf->b_p_spf, FALSE, TRUE, TRUE);
Bram Moolenaarb765d632005-06-07 21:00:02 +00005172
5173 /* If the .add file is edited somewhere, reload it. */
5174 if (buf != NULL)
5175 buf_reload(buf);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005176
5177 redraw_all_later(NOT_VALID);
Bram Moolenaarb765d632005-06-07 21:00:02 +00005178 }
5179 }
5180 }
5181}
5182
5183/*
5184 * Initialize 'spellfile' for the current buffer.
5185 */
5186 static void
5187init_spellfile()
5188{
5189 char_u buf[MAXPATHL];
5190 int l;
5191 slang_T *sl;
5192 char_u *rtp;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00005193 char_u *lend;
Bram Moolenaarb765d632005-06-07 21:00:02 +00005194
5195 if (*curbuf->b_p_spl != NUL && curbuf->b_langp.ga_len > 0)
5196 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00005197 /* Find the end of the language name. Exclude the region. */
5198 for (lend = curbuf->b_p_spl; *lend != NUL
5199 && vim_strchr((char_u *)",._", *lend) == NULL; ++lend)
5200 ;
5201
5202 /* Loop over all entries in 'runtimepath'. Use the first one where we
5203 * are allowed to write. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00005204 rtp = p_rtp;
5205 while (*rtp != NUL)
5206 {
5207 /* Copy the path from 'runtimepath' to buf[]. */
5208 copy_option_part(&rtp, buf, MAXPATHL, ",");
5209 if (filewritable(buf) == 2)
5210 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00005211 /* Use the first language name from 'spelllang' and the
5212 * encoding used in the first loaded .spl file. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00005213 sl = LANGP_ENTRY(curbuf->b_langp, 0)->lp_slang;
5214 l = STRLEN(buf);
5215 vim_snprintf((char *)buf + l, MAXPATHL - l,
Bram Moolenaar3982c542005-06-08 21:56:31 +00005216 "/spell/%.*s.%s.add",
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00005217 (int)(lend - curbuf->b_p_spl), curbuf->b_p_spl,
Bram Moolenaarb765d632005-06-07 21:00:02 +00005218 strstr((char *)gettail(sl->sl_fname), ".ascii.") != NULL
5219 ? (char_u *)"ascii" : spell_enc());
5220 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
5221 break;
5222 }
5223 }
5224 }
5225}
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00005226
Bram Moolenaar51485f02005-06-04 21:55:20 +00005227
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005228/*
5229 * Init the chartab used for spelling for ASCII.
5230 * EBCDIC is not supported!
5231 */
5232 static void
5233clear_spell_chartab(sp)
5234 spelltab_T *sp;
5235{
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005236 int i;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005237
5238 /* Init everything to FALSE. */
5239 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
5240 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
5241 for (i = 0; i < 256; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005242 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005243 sp->st_fold[i] = i;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005244 sp->st_upper[i] = i;
5245 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005246
5247 /* We include digits. A word shouldn't start with a digit, but handling
5248 * that is done separately. */
5249 for (i = '0'; i <= '9'; ++i)
5250 sp->st_isw[i] = TRUE;
5251 for (i = 'A'; i <= 'Z'; ++i)
5252 {
5253 sp->st_isw[i] = TRUE;
5254 sp->st_isu[i] = TRUE;
5255 sp->st_fold[i] = i + 0x20;
5256 }
5257 for (i = 'a'; i <= 'z'; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005258 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005259 sp->st_isw[i] = TRUE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005260 sp->st_upper[i] = i - 0x20;
5261 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005262}
5263
5264/*
5265 * Init the chartab used for spelling. Only depends on 'encoding'.
5266 * Called once while starting up and when 'encoding' changes.
5267 * The default is to use isalpha(), but the spell file should define the word
5268 * characters to make it possible that 'encoding' differs from the current
5269 * locale.
5270 */
5271 void
5272init_spell_chartab()
5273{
5274 int i;
5275
5276 did_set_spelltab = FALSE;
5277 clear_spell_chartab(&spelltab);
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005278 vim_memset(spell_ismw, FALSE, sizeof(spell_ismw));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005279#ifdef FEAT_MBYTE
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005280 vim_free(spell_ismw_mb);
5281 spell_ismw_mb = NULL;
5282
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005283 if (enc_dbcs)
5284 {
5285 /* DBCS: assume double-wide characters are word characters. */
5286 for (i = 128; i <= 255; ++i)
5287 if (MB_BYTE2LEN(i) == 2)
5288 spelltab.st_isw[i] = TRUE;
5289 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005290 else if (enc_utf8)
5291 {
5292 for (i = 128; i < 256; ++i)
5293 {
5294 spelltab.st_isu[i] = utf_isupper(i);
5295 spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
5296 spelltab.st_fold[i] = utf_fold(i);
5297 spelltab.st_upper[i] = utf_toupper(i);
5298 }
5299 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005300 else
5301#endif
5302 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005303 /* Rough guess: use locale-dependent library functions. */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005304 for (i = 128; i < 256; ++i)
5305 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005306 if (MB_ISUPPER(i))
5307 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005308 spelltab.st_isw[i] = TRUE;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005309 spelltab.st_isu[i] = TRUE;
5310 spelltab.st_fold[i] = MB_TOLOWER(i);
5311 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005312 else if (MB_ISLOWER(i))
5313 {
5314 spelltab.st_isw[i] = TRUE;
5315 spelltab.st_upper[i] = MB_TOUPPER(i);
5316 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005317 }
5318 }
5319}
5320
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005321static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
5322static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
5323
5324/*
5325 * Set the spell character tables from strings in the affix file.
5326 */
5327 static int
5328set_spell_chartab(fol, low, upp)
5329 char_u *fol;
5330 char_u *low;
5331 char_u *upp;
5332{
5333 /* We build the new tables here first, so that we can compare with the
5334 * previous one. */
5335 spelltab_T new_st;
5336 char_u *pf = fol, *pl = low, *pu = upp;
5337 int f, l, u;
5338
5339 clear_spell_chartab(&new_st);
5340
5341 while (*pf != NUL)
5342 {
5343 if (*pl == NUL || *pu == NUL)
5344 {
5345 EMSG(_(e_affform));
5346 return FAIL;
5347 }
5348#ifdef FEAT_MBYTE
5349 f = mb_ptr2char_adv(&pf);
5350 l = mb_ptr2char_adv(&pl);
5351 u = mb_ptr2char_adv(&pu);
5352#else
5353 f = *pf++;
5354 l = *pl++;
5355 u = *pu++;
5356#endif
5357 /* Every character that appears is a word character. */
5358 if (f < 256)
5359 new_st.st_isw[f] = TRUE;
5360 if (l < 256)
5361 new_st.st_isw[l] = TRUE;
5362 if (u < 256)
5363 new_st.st_isw[u] = TRUE;
5364
5365 /* if "LOW" and "FOL" are not the same the "LOW" char needs
5366 * case-folding */
5367 if (l < 256 && l != f)
5368 {
5369 if (f >= 256)
5370 {
5371 EMSG(_(e_affrange));
5372 return FAIL;
5373 }
5374 new_st.st_fold[l] = f;
5375 }
5376
5377 /* if "UPP" and "FOL" are not the same the "UPP" char needs
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005378 * case-folding, it's upper case and the "UPP" is the upper case of
5379 * "FOL" . */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005380 if (u < 256 && u != f)
5381 {
5382 if (f >= 256)
5383 {
5384 EMSG(_(e_affrange));
5385 return FAIL;
5386 }
5387 new_st.st_fold[u] = f;
5388 new_st.st_isu[u] = TRUE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005389 new_st.st_upper[f] = u;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005390 }
5391 }
5392
5393 if (*pl != NUL || *pu != NUL)
5394 {
5395 EMSG(_(e_affform));
5396 return FAIL;
5397 }
5398
5399 return set_spell_finish(&new_st);
5400}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005401
5402/*
5403 * Set the spell character tables from strings in the .spl file.
5404 */
5405 static int
5406set_spell_charflags(flags, cnt, upp)
5407 char_u *flags;
5408 int cnt;
5409 char_u *upp;
5410{
5411 /* We build the new tables here first, so that we can compare with the
5412 * previous one. */
5413 spelltab_T new_st;
5414 int i;
5415 char_u *p = upp;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005416 int c;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005417
5418 clear_spell_chartab(&new_st);
5419
5420 for (i = 0; i < cnt; ++i)
5421 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005422 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
5423 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005424
5425 if (*p == NUL)
5426 return FAIL;
5427#ifdef FEAT_MBYTE
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005428 c = mb_ptr2char_adv(&p);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005429#else
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005430 c = *p++;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005431#endif
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005432 new_st.st_fold[i + 128] = c;
5433 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
5434 new_st.st_upper[c] = i + 128;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005435 }
5436
5437 return set_spell_finish(&new_st);
5438}
5439
5440 static int
5441set_spell_finish(new_st)
5442 spelltab_T *new_st;
5443{
5444 int i;
5445
5446 if (did_set_spelltab)
5447 {
5448 /* check that it's the same table */
5449 for (i = 0; i < 256; ++i)
5450 {
5451 if (spelltab.st_isw[i] != new_st->st_isw[i]
5452 || spelltab.st_isu[i] != new_st->st_isu[i]
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005453 || spelltab.st_fold[i] != new_st->st_fold[i]
5454 || spelltab.st_upper[i] != new_st->st_upper[i])
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005455 {
5456 EMSG(_("E763: Word characters differ between spell files"));
5457 return FAIL;
5458 }
5459 }
5460 }
5461 else
5462 {
5463 /* copy the new spelltab into the one being used */
5464 spelltab = *new_st;
5465 did_set_spelltab = TRUE;
5466 }
5467
5468 return OK;
5469}
5470
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005471/*
Bram Moolenaarea408852005-06-25 22:49:46 +00005472 * Return TRUE if "p" points to a word character.
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005473 * As a special case we see "midword" characters as word character when it is
Bram Moolenaarea408852005-06-25 22:49:46 +00005474 * followed by a word character. This finds they'there but not 'they there'.
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005475 * Thus this only works properly when past the first character of the word.
Bram Moolenaarea408852005-06-25 22:49:46 +00005476 */
5477 static int
5478spell_iswordp(p)
5479 char_u *p;
5480{
Bram Moolenaarea408852005-06-25 22:49:46 +00005481#ifdef FEAT_MBYTE
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005482 char_u *s;
5483 int l;
5484 int c;
5485
5486 if (has_mbyte)
5487 {
5488 l = MB_BYTE2LEN(*p);
5489 s = p;
5490 if (l == 1)
5491 {
5492 /* be quick for ASCII */
5493 if (spell_ismw[*p])
5494 {
5495 s = p + 1; /* skip a mid-word character */
5496 l = MB_BYTE2LEN(*s);
5497 }
5498 }
5499 else
5500 {
5501 c = mb_ptr2char(p);
5502 if (c < 256 ? spell_ismw[c] : (spell_ismw_mb != NULL
5503 && vim_strchr(spell_ismw_mb, c) != NULL))
5504 {
5505 s = p + l;
5506 l = MB_BYTE2LEN(*s);
5507 }
5508 }
5509
5510 if (l > 1)
5511 return mb_get_class(s) >= 2;
5512 return spelltab.st_isw[*s];
5513 }
Bram Moolenaarea408852005-06-25 22:49:46 +00005514#endif
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00005515
5516 return spelltab.st_isw[spell_ismw[*p] ? p[1] : p[0]];
Bram Moolenaarea408852005-06-25 22:49:46 +00005517}
5518
Bram Moolenaara1ba8112005-06-28 23:23:32 +00005519#ifdef FEAT_MBYTE
5520/*
5521 * Return TRUE if "p" points to a word character.
5522 * Wide version of spell_iswordp().
5523 */
5524 static int
5525spell_iswordp_w(p)
5526 int *p;
5527{
5528 int *s;
5529
5530 if (*p < 256 ? spell_ismw[*p] : (spell_ismw_mb != NULL
5531 && vim_strchr(spell_ismw_mb, *p) != NULL))
5532 s = p + 1;
5533 else
5534 s = p;
5535
5536 if (mb_char2len(*s) > 1)
5537 {
5538 if (enc_utf8)
5539 return utf_class(*s) >= 2;
5540 if (enc_dbcs)
5541 return dbcs_class((unsigned)*s >> 8, *s & 0xff) >= 2;
5542 return 0;
5543 }
5544 return spelltab.st_isw[*s];
5545}
5546#endif
5547
Bram Moolenaarea408852005-06-25 22:49:46 +00005548/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00005549 * Write the table with prefix conditions to the .spl file.
5550 */
5551 static void
5552write_spell_prefcond(fd, gap)
5553 FILE *fd;
5554 garray_T *gap;
5555{
5556 int i;
5557 char_u *p;
5558 int len;
5559
5560 put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */
5561
5562 for (i = 0; i < gap->ga_len; ++i)
5563 {
5564 /* <prefcond> : <condlen> <condstr> */
5565 p = ((char_u **)gap->ga_data)[i];
5566 if (p == NULL)
5567 fputc(0, fd);
5568 else
5569 {
5570 len = STRLEN(p);
5571 fputc(len, fd);
5572 fwrite(p, (size_t)len, (size_t)1, fd);
5573 }
5574 }
5575}
5576
5577/*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005578 * Write the current tables into the .spl file.
5579 * This makes sure the same characters are recognized as word characters when
5580 * generating an when using a spell file.
5581 */
5582 static void
5583write_spell_chartab(fd)
5584 FILE *fd;
5585{
5586 char_u charbuf[256 * 4];
5587 int len = 0;
5588 int flags;
5589 int i;
5590
5591 fputc(128, fd); /* <charflagslen> */
5592 for (i = 128; i < 256; ++i)
5593 {
5594 flags = 0;
5595 if (spelltab.st_isw[i])
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005596 flags |= CF_WORD;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005597 if (spelltab.st_isu[i])
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005598 flags |= CF_UPPER;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005599 fputc(flags, fd); /* <charflags> */
5600
Bram Moolenaarb765d632005-06-07 21:00:02 +00005601#ifdef FEAT_MBYTE
5602 if (has_mbyte)
5603 len += mb_char2bytes(spelltab.st_fold[i], charbuf + len);
5604 else
5605#endif
5606 charbuf[len++] = spelltab.st_fold[i];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005607 }
5608
5609 put_bytes(fd, (long_u)len, 2); /* <fcharlen> */
5610 fwrite(charbuf, (size_t)len, (size_t)1, fd); /* <fchars> */
5611}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005612
5613/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005614 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated.
5615 * Uses the character definitions from the .spl file.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005616 * When using a multi-byte 'encoding' the length may change!
5617 * Returns FAIL when something wrong.
5618 */
5619 static int
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005620spell_casefold(str, len, buf, buflen)
5621 char_u *str;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005622 int len;
5623 char_u *buf;
5624 int buflen;
5625{
5626 int i;
5627
5628 if (len >= buflen)
5629 {
5630 buf[0] = NUL;
5631 return FAIL; /* result will not fit */
5632 }
5633
5634#ifdef FEAT_MBYTE
5635 if (has_mbyte)
5636 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005637 int outi = 0;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005638 char_u *p;
5639 int c;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005640
5641 /* Fold one character at a time. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005642 for (p = str; p < str + len; )
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005643 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005644 if (outi + MB_MAXBYTES > buflen)
5645 {
5646 buf[outi] = NUL;
5647 return FAIL;
5648 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005649 c = mb_ptr2char_adv(&p);
5650 outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005651 }
5652 buf[outi] = NUL;
5653 }
5654 else
5655#endif
5656 {
5657 /* Be quick for non-multibyte encodings. */
5658 for (i = 0; i < len; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005659 buf[i] = spelltab.st_fold[str[i]];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00005660 buf[i] = NUL;
5661 }
5662
5663 return OK;
5664}
5665
Bram Moolenaara1ba8112005-06-28 23:23:32 +00005666#define SPS_BEST 1
5667#define SPS_FAST 2
5668#define SPS_DOUBLE 4
5669
5670static int sps_flags = SPS_BEST;
5671
5672/*
5673 * Check the 'spellsuggest' option. Return FAIL if it's wrong.
5674 * Sets "sps_flags".
5675 */
5676 int
5677spell_check_sps()
5678{
5679 char_u *p;
5680 char_u buf[MAXPATHL];
5681 int f;
5682
5683 sps_flags = 0;
5684
5685 for (p = p_sps; *p != NUL; )
5686 {
5687 copy_option_part(&p, buf, MAXPATHL, ",");
5688
5689 f = 0;
5690 if (STRCMP(buf, "best") == 0)
5691 f = SPS_BEST;
5692 else if (STRCMP(buf, "fast") == 0)
5693 f = SPS_FAST;
5694 else if (STRCMP(buf, "double") == 0)
5695 f = SPS_DOUBLE;
5696 else if (STRNCMP(buf, "expr:", 5) != 0
5697 && STRNCMP(buf, "file:", 5) != 0)
5698 f = -1;
5699
5700 if (f == -1 || (sps_flags != 0 && f != 0))
5701 {
5702 sps_flags = SPS_BEST;
5703 return FAIL;
5704 }
5705 if (f != 0)
5706 sps_flags = f;
5707 }
5708
5709 if (sps_flags == 0)
5710 sps_flags = SPS_BEST;
5711
5712 return OK;
5713}
5714
5715/* Remember what "z?" replaced. */
5716static char_u *repl_from = NULL;
5717static char_u *repl_to = NULL;
5718
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005719/*
5720 * "z?": Find badly spelled word under or after the cursor.
5721 * Give suggestions for the properly spelled word.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005722 */
5723 void
5724spell_suggest()
5725{
5726 char_u *line;
5727 pos_T prev_cursor = curwin->w_cursor;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005728 char_u wcopy[MAXWLEN + 2];
5729 char_u *p;
5730 int i;
5731 int c;
5732 suginfo_T sug;
5733 suggest_T *stp;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00005734 int mouse_used;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005735
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005736 /* Find the start of the badly spelled word. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00005737 if (spell_move_to(FORWARD, TRUE, TRUE) == FAIL
5738 || curwin->w_cursor.col > prev_cursor.col)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005739 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00005740 if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL)
5741 return;
5742
5743 /* No bad word or it starts after the cursor: use the word under the
5744 * cursor. */
5745 curwin->w_cursor = prev_cursor;
5746 line = ml_get_curline();
5747 p = line + curwin->w_cursor.col;
5748 /* Backup to before start of word. */
5749 while (p > line && SPELL_ISWORDP(p))
5750 mb_ptr_back(line, p);
5751 /* Forward to start of word. */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005752 while (*p != NUL && !SPELL_ISWORDP(p))
Bram Moolenaar0c405862005-06-22 22:26:26 +00005753 mb_ptr_adv(p);
5754
5755 if (!SPELL_ISWORDP(p)) /* No word found. */
5756 {
5757 beep_flush();
5758 return;
5759 }
5760 curwin->w_cursor.col = p - line;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005761 }
5762
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005763 /* Get the word and its length. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005764 line = ml_get_curline();
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005765
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005766 /* Get the list of suggestions */
Bram Moolenaarea408852005-06-25 22:49:46 +00005767 spell_find_suggest(line + curwin->w_cursor.col, &sug, (int)Rows - 2, TRUE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005768
5769 if (sug.su_ga.ga_len == 0)
5770 MSG(_("Sorry, no suggestions"));
5771 else
5772 {
Bram Moolenaara1ba8112005-06-28 23:23:32 +00005773 vim_free(repl_from);
5774 repl_from = NULL;
5775 vim_free(repl_to);
5776 repl_to = NULL;
5777
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005778 /* List the suggestions. */
5779 msg_start();
Bram Moolenaara1ba8112005-06-28 23:23:32 +00005780 lines_left = Rows; /* avoid more prompt */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005781 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"),
5782 sug.su_badlen, sug.su_badptr);
5783 msg_puts(IObuff);
5784 msg_clr_eos();
5785 msg_putchar('\n');
Bram Moolenaar0c405862005-06-22 22:26:26 +00005786
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005787 msg_scroll = TRUE;
5788 for (i = 0; i < sug.su_ga.ga_len; ++i)
5789 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005790 stp = &SUG(sug.su_ga, i);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005791
5792 /* The suggested word may replace only part of the bad word, add
5793 * the not replaced part. */
5794 STRCPY(wcopy, stp->st_word);
5795 if (sug.su_badlen > stp->st_orglen)
5796 vim_strncpy(wcopy + STRLEN(wcopy),
5797 sug.su_badptr + stp->st_orglen,
5798 sug.su_badlen - stp->st_orglen);
Bram Moolenaar0c405862005-06-22 22:26:26 +00005799 vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\""), i + 1, wcopy);
5800 msg_puts(IObuff);
5801
5802 /* The word may replace more than "su_badlen". */
5803 if (sug.su_badlen < stp->st_orglen)
5804 {
5805 vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""),
5806 stp->st_orglen, sug.su_badptr);
5807 msg_puts(IObuff);
5808 }
5809
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005810 if (p_verbose > 0)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005811 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00005812 /* Add the score. */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00005813 if (sps_flags & (SPS_DOUBLE | SPS_BEST))
Bram Moolenaar0c405862005-06-22 22:26:26 +00005814 vim_snprintf((char *)IObuff, IOSIZE, _(" (%s%d - %d)"),
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005815 stp->st_salscore ? "s " : "",
5816 stp->st_score, stp->st_altscore);
5817 else
Bram Moolenaar0c405862005-06-22 22:26:26 +00005818 vim_snprintf((char *)IObuff, IOSIZE, _(" (%d)"),
5819 stp->st_score);
5820 msg_advance(30);
5821 msg_puts(IObuff);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005822 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005823 msg_putchar('\n');
5824 }
5825
5826 /* Ask for choice. */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00005827 i = prompt_for_number(&mouse_used);
5828 if (mouse_used)
5829 i -= lines_left;
5830
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005831 if (i > 0 && i <= sug.su_ga.ga_len && u_save_cursor() == OK)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005832 {
Bram Moolenaara1ba8112005-06-28 23:23:32 +00005833 /* Save the from and to text for :spellrepall. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005834 stp = &SUG(sug.su_ga, i - 1);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00005835 repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen);
5836 repl_to = vim_strsave(stp->st_word);
5837
5838 /* Replace the word. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005839 p = alloc(STRLEN(line) - stp->st_orglen + STRLEN(stp->st_word) + 1);
5840 if (p != NULL)
5841 {
5842 c = sug.su_badptr - line;
5843 mch_memmove(p, line, c);
5844 STRCPY(p + c, stp->st_word);
5845 STRCAT(p, sug.su_badptr + stp->st_orglen);
5846 ml_replace(curwin->w_cursor.lnum, p, FALSE);
5847 curwin->w_cursor.col = c;
5848 changed_bytes(curwin->w_cursor.lnum, c);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005849
5850 /* For redo we use a change-word command. */
5851 ResetRedobuff();
5852 AppendToRedobuff((char_u *)"ciw");
5853 AppendToRedobuff(stp->st_word);
5854 AppendCharToRedobuff(ESC);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005855 }
5856 }
5857 else
5858 curwin->w_cursor = prev_cursor;
5859 }
5860
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005861 spell_find_cleanup(&sug);
5862}
5863
5864/*
Bram Moolenaara1ba8112005-06-28 23:23:32 +00005865 * ":spellrepall"
5866 */
5867/*ARGSUSED*/
5868 void
5869ex_spellrepall(eap)
5870 exarg_T *eap;
5871{
5872 pos_T pos = curwin->w_cursor;
5873 char_u *frompat;
5874 int addlen;
5875 char_u *line;
5876 char_u *p;
5877 int didone = FALSE;
5878 int save_ws = p_ws;
5879
5880 if (repl_from == NULL || repl_to == NULL)
5881 {
5882 EMSG(_("E752: No previous spell replacement"));
5883 return;
5884 }
5885 addlen = STRLEN(repl_to) - STRLEN(repl_from);
5886
5887 frompat = alloc(STRLEN(repl_from) + 7);
5888 if (frompat == NULL)
5889 return;
5890 sprintf((char *)frompat, "\\V\\<%s\\>", repl_from);
5891 p_ws = FALSE;
5892
5893 curwin->w_cursor.lnum = 0;
5894 while (!got_int)
5895 {
5896 if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP) == 0
5897 || u_save_cursor() == FAIL)
5898 break;
5899
5900 /* Only replace when the right word isn't there yet. This happens
5901 * when changing "etc" to "etc.". */
5902 line = ml_get_curline();
5903 if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col,
5904 repl_to, STRLEN(repl_to)) != 0)
5905 {
5906 p = alloc(STRLEN(line) + addlen + 1);
5907 if (p == NULL)
5908 break;
5909 mch_memmove(p, line, curwin->w_cursor.col);
5910 STRCPY(p + curwin->w_cursor.col, repl_to);
5911 STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from));
5912 ml_replace(curwin->w_cursor.lnum, p, FALSE);
5913 changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col);
5914 didone = TRUE;
5915 }
5916 curwin->w_cursor.col += STRLEN(repl_to);
5917 }
5918
5919 p_ws = save_ws;
5920 curwin->w_cursor = pos;
5921 vim_free(frompat);
5922
5923 if (!didone)
5924 EMSG2(_("E753: Not found: %s"), repl_from);
5925}
5926
5927/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005928 * Find spell suggestions for "word". Return them in the growarray "*gap" as
5929 * a list of allocated strings.
5930 */
5931 void
5932spell_suggest_list(gap, word, maxcount)
5933 garray_T *gap;
5934 char_u *word;
5935 int maxcount; /* maximum nr of suggestions */
5936{
5937 suginfo_T sug;
5938 int i;
5939 suggest_T *stp;
5940 char_u *wcopy;
5941
Bram Moolenaarea408852005-06-25 22:49:46 +00005942 spell_find_suggest(word, &sug, maxcount, FALSE);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005943
5944 /* Make room in "gap". */
5945 ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1);
5946 if (ga_grow(gap, sug.su_ga.ga_len) == FAIL)
5947 return;
5948
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005949 for (i = 0; i < sug.su_ga.ga_len; ++i)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005950 {
5951 stp = &SUG(sug.su_ga, i);
5952
5953 /* The suggested word may replace only part of "word", add the not
5954 * replaced part. */
5955 wcopy = alloc(STRLEN(stp->st_word)
5956 + STRLEN(sug.su_badptr + stp->st_orglen) + 1);
5957 if (wcopy == NULL)
5958 break;
5959 STRCPY(wcopy, stp->st_word);
5960 STRCAT(wcopy, sug.su_badptr + stp->st_orglen);
5961 ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy;
5962 }
5963
5964 spell_find_cleanup(&sug);
5965}
5966
5967/*
5968 * Find spell suggestions for the word at the start of "badptr".
5969 * Return the suggestions in "su->su_ga".
5970 * The maximum number of suggestions is "maxcount".
5971 * Note: does use info for the current window.
5972 * This is based on the mechanisms of Aspell, but completely reimplemented.
5973 */
5974 static void
Bram Moolenaarea408852005-06-25 22:49:46 +00005975spell_find_suggest(badptr, su, maxcount, banbadword)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005976 char_u *badptr;
5977 suginfo_T *su;
5978 int maxcount;
Bram Moolenaarea408852005-06-25 22:49:46 +00005979 int banbadword; /* don't include badword in suggestions */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005980{
5981 int attr;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00005982 char_u buf[MAXPATHL];
5983 char_u *p;
5984 int do_combine = FALSE;
5985 char_u *sps_copy;
5986#ifdef FEAT_EVAL
5987 static int expr_busy = FALSE;
5988#endif
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005989
5990 /*
5991 * Set the info in "*su".
5992 */
5993 vim_memset(su, 0, sizeof(suginfo_T));
5994 ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10);
5995 ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10);
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00005996 if (*badptr == NUL)
5997 return;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005998 hash_init(&su->su_banned);
5999
6000 su->su_badptr = badptr;
6001 su->su_badlen = spell_check(curwin, su->su_badptr, &attr);
6002 su->su_maxcount = maxcount;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00006003 su->su_maxscore = SCORE_MAXINIT;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006004
6005 if (su->su_badlen >= MAXWLEN)
6006 su->su_badlen = MAXWLEN - 1; /* just in case */
6007 vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen);
6008 (void)spell_casefold(su->su_badptr, su->su_badlen,
6009 su->su_fbadword, MAXWLEN);
Bram Moolenaar0c405862005-06-22 22:26:26 +00006010 /* get caps flags for bad word */
6011 su->su_badflags = captype(su->su_badptr, su->su_badptr + su->su_badlen);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006012
6013 /* Ban the bad word itself. It may appear in another region. */
Bram Moolenaarea408852005-06-25 22:49:46 +00006014 if (banbadword)
6015 add_banned(su, su->su_badword);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006016
Bram Moolenaara1ba8112005-06-28 23:23:32 +00006017 /* Make a copy of 'spellsuggest', because the expression may change it. */
6018 sps_copy = vim_strsave(p_sps);
6019 if (sps_copy == NULL)
6020 return;
6021
6022 /* Loop over the items in 'spellsuggest'. */
6023 for (p = sps_copy; *p != NUL; )
6024 {
6025 copy_option_part(&p, buf, MAXPATHL, ",");
6026
6027 if (STRNCMP(buf, "expr:", 5) == 0)
6028 {
6029#ifdef FEAT_EVAL
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006030 /* Evaluate an expression. Skip this when called recursively,
6031 * when using spellsuggest() in the expression. */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00006032 if (!expr_busy)
6033 {
6034 expr_busy = TRUE;
6035 spell_suggest_expr(su, buf + 5);
6036 expr_busy = FALSE;
6037 }
6038#endif
6039 }
6040 else if (STRNCMP(buf, "file:", 5) == 0)
6041 /* Use list of suggestions in a file. */
6042 spell_suggest_file(su, buf + 5);
6043 else
6044 {
6045 /* Use internal method. */
6046 spell_suggest_intern(su);
6047 if (sps_flags & SPS_DOUBLE)
6048 do_combine = TRUE;
6049 }
6050 }
6051
6052 vim_free(sps_copy);
6053
6054 if (do_combine)
6055 /* Combine the two list of suggestions. This must be done last,
6056 * because sorting changes the order again. */
6057 score_combine(su);
6058}
6059
6060#ifdef FEAT_EVAL
6061/*
6062 * Find suggestions by evaluating expression "expr".
6063 */
6064 static void
6065spell_suggest_expr(su, expr)
6066 suginfo_T *su;
6067 char_u *expr;
6068{
6069 list_T *list;
6070 listitem_T *li;
6071 int score;
6072 char_u *p;
6073
6074 /* The work is split up in a few parts to avoid having to export
6075 * suginfo_T.
6076 * First evaluate the expression and get the resulting list. */
6077 list = eval_spell_expr(su->su_badword, expr);
6078 if (list != NULL)
6079 {
6080 /* Loop over the items in the list. */
6081 for (li = list->lv_first; li != NULL; li = li->li_next)
6082 if (li->li_tv.v_type == VAR_LIST)
6083 {
6084 /* Get the word and the score from the items. */
6085 score = get_spellword(li->li_tv.vval.v_list, &p);
6086 if (score >= 0)
6087 add_suggestion(su, &su->su_ga, p,
6088 su->su_badlen, score, 0, TRUE);
6089 }
6090 list_unref(list);
6091 }
6092
6093 /* Sort the suggestions and truncate at "maxcount". */
6094 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
6095}
6096#endif
6097
6098/*
6099 * Find suggestions a file "fname".
6100 */
6101 static void
6102spell_suggest_file(su, fname)
6103 suginfo_T *su;
6104 char_u *fname;
6105{
6106 FILE *fd;
6107 char_u line[MAXWLEN * 2];
6108 char_u *p;
6109 int len;
6110 char_u cword[MAXWLEN];
6111
6112 /* Open the file. */
6113 fd = mch_fopen((char *)fname, "r");
6114 if (fd == NULL)
6115 {
6116 EMSG2(_(e_notopen), fname);
6117 return;
6118 }
6119
6120 /* Read it line by line. */
6121 while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int)
6122 {
6123 line_breakcheck();
6124
6125 p = vim_strchr(line, '/');
6126 if (p == NULL)
6127 continue; /* No Tab found, just skip the line. */
6128 *p++ = NUL;
6129 if (STRICMP(su->su_badword, line) == 0)
6130 {
6131 /* Match! Isolate the good word, until CR or NL. */
6132 for (len = 0; p[len] >= ' '; ++len)
6133 ;
6134 p[len] = NUL;
6135
6136 /* If the suggestion doesn't have specific case duplicate the case
6137 * of the bad word. */
6138 if (captype(p, NULL) == 0)
6139 {
6140 make_case_word(p, cword, su->su_badflags);
6141 p = cword;
6142 }
6143
6144 add_suggestion(su, &su->su_ga, p, su->su_badlen,
6145 SCORE_FILE, 0, TRUE);
6146 }
6147 }
6148
6149 fclose(fd);
6150
6151 /* Sort the suggestions and truncate at "maxcount". */
6152 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
6153}
6154
6155/*
6156 * Find suggestions for the internal method indicated by "sps_flags".
6157 */
6158 static void
6159spell_suggest_intern(su)
6160 suginfo_T *su;
6161{
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006162 /*
Bram Moolenaar0c405862005-06-22 22:26:26 +00006163 * 1. Try special cases, such as repeating a word: "the the" -> "the".
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006164 *
6165 * Set a maximum score to limit the combination of operations that is
6166 * tried.
6167 */
Bram Moolenaar0c405862005-06-22 22:26:26 +00006168 suggest_try_special(su);
6169
6170 /*
6171 * 2. Try inserting/deleting/swapping/changing a letter, use REP entries
6172 * from the .aff file and inserting a space (split the word).
6173 */
6174 suggest_try_change(su);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006175
6176 /* For the resulting top-scorers compute the sound-a-like score. */
6177 if (sps_flags & SPS_DOUBLE)
6178 score_comp_sal(su);
6179
6180 /*
Bram Moolenaar0c405862005-06-22 22:26:26 +00006181 * 3. Try finding sound-a-like words.
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006182 *
6183 * Only do this when we don't have a lot of suggestions yet, because it's
6184 * very slow and often doesn't find new suggestions.
6185 */
6186 if ((sps_flags & SPS_DOUBLE)
6187 || (!(sps_flags & SPS_FAST)
6188 && su->su_ga.ga_len < SUG_CLEAN_COUNT(su)))
6189 {
6190 /* Allow a higher score now. */
6191 su->su_maxscore = SCORE_MAXMAX;
Bram Moolenaar0c405862005-06-22 22:26:26 +00006192 suggest_try_soundalike(su);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006193 }
6194
6195 /* When CTRL-C was hit while searching do show the results. */
6196 ui_breakcheck();
6197 if (got_int)
6198 {
6199 (void)vgetc();
6200 got_int = FALSE;
6201 }
6202
Bram Moolenaara1ba8112005-06-28 23:23:32 +00006203 if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006204 {
6205 if (sps_flags & SPS_BEST)
6206 /* Adjust the word score for how it sounds like. */
6207 rescore_suggestions(su);
6208
6209 /* Sort the suggestions and truncate at "maxcount". */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00006210 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006211 }
6212}
6213
6214/*
6215 * Free the info put in "*su" by spell_find_suggest().
6216 */
6217 static void
6218spell_find_cleanup(su)
6219 suginfo_T *su;
6220{
6221 int i;
6222
6223 /* Free the suggestions. */
6224 for (i = 0; i < su->su_ga.ga_len; ++i)
6225 vim_free(SUG(su->su_ga, i).st_word);
6226 ga_clear(&su->su_ga);
6227 for (i = 0; i < su->su_sga.ga_len; ++i)
6228 vim_free(SUG(su->su_sga, i).st_word);
6229 ga_clear(&su->su_sga);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006230
6231 /* Free the banned words. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006232 free_banned(su);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006233}
6234
6235/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006236 * Make a copy of "word", with the first letter upper or lower cased, to
6237 * "wcopy[MAXWLEN]". "word" must not be empty.
6238 * The result is NUL terminated.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006239 */
6240 static void
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006241onecap_copy(word, wcopy, upper)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006242 char_u *word;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006243 char_u *wcopy;
6244 int upper; /* TRUE: first letter made upper case */
6245{
6246 char_u *p;
6247 int c;
6248 int l;
6249
6250 p = word;
6251#ifdef FEAT_MBYTE
6252 if (has_mbyte)
6253 c = mb_ptr2char_adv(&p);
6254 else
6255#endif
6256 c = *p++;
6257 if (upper)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006258 c = SPELL_TOUPPER(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006259 else
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006260 c = SPELL_TOFOLD(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006261#ifdef FEAT_MBYTE
6262 if (has_mbyte)
6263 l = mb_char2bytes(c, wcopy);
6264 else
6265#endif
6266 {
6267 l = 1;
6268 wcopy[0] = c;
6269 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006270 vim_strncpy(wcopy + l, p, MAXWLEN - l);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006271}
6272
6273/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006274 * Make a copy of "word" with all the letters upper cased into
6275 * "wcopy[MAXWLEN]". The result is NUL terminated.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006276 */
6277 static void
6278allcap_copy(word, wcopy)
6279 char_u *word;
6280 char_u *wcopy;
6281{
6282 char_u *s;
6283 char_u *d;
6284 int c;
6285
6286 d = wcopy;
6287 for (s = word; *s != NUL; )
6288 {
6289#ifdef FEAT_MBYTE
6290 if (has_mbyte)
6291 c = mb_ptr2char_adv(&s);
6292 else
6293#endif
6294 c = *s++;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006295 c = SPELL_TOUPPER(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006296
6297#ifdef FEAT_MBYTE
6298 if (has_mbyte)
6299 {
6300 if (d - wcopy >= MAXWLEN - MB_MAXBYTES)
6301 break;
6302 d += mb_char2bytes(c, d);
6303 }
6304 else
6305#endif
6306 {
6307 if (d - wcopy >= MAXWLEN - 1)
6308 break;
6309 *d++ = c;
6310 }
6311 }
6312 *d = NUL;
6313}
6314
6315/*
Bram Moolenaar0c405862005-06-22 22:26:26 +00006316 * Try finding suggestions by recognizing specific situations.
6317 */
6318 static void
6319suggest_try_special(su)
6320 suginfo_T *su;
6321{
6322 char_u *p;
6323 int len;
6324 int c;
6325 char_u word[MAXWLEN];
6326
6327 /*
6328 * Recognize a word that is repeated: "the the".
6329 */
6330 p = skiptowhite(su->su_fbadword);
6331 len = p - su->su_fbadword;
6332 p = skipwhite(p);
6333 if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0)
6334 {
6335 /* Include badflags: if the badword is onecap or allcap
6336 * use that for the goodword too: "The the" -> "The". */
6337 c = su->su_fbadword[len];
6338 su->su_fbadword[len] = NUL;
6339 make_case_word(su->su_fbadword, word, su->su_badflags);
6340 su->su_fbadword[len] = c;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006341 add_suggestion(su, &su->su_ga, word, su->su_badlen, SCORE_DEL, 0, TRUE);
Bram Moolenaar0c405862005-06-22 22:26:26 +00006342 }
6343}
6344
6345/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006346 * Try finding suggestions by adding/removing/swapping letters.
Bram Moolenaarea424162005-06-16 21:51:00 +00006347 *
6348 * This uses a state machine. At each node in the tree we try various
6349 * operations. When trying if an operation work "depth" is increased and the
6350 * stack[] is used to store info. This allows combinations, thus insert one
6351 * character, replace one and delete another. The number of changes is
6352 * limited by su->su_maxscore, checked in try_deeper().
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006353 */
6354 static void
Bram Moolenaar0c405862005-06-22 22:26:26 +00006355suggest_try_change(su)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006356 suginfo_T *su;
6357{
6358 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */
6359 char_u tword[MAXWLEN]; /* good word collected so far */
6360 trystate_T stack[MAXWLEN];
6361 char_u preword[MAXWLEN * 3]; /* word found with proper case (appended
6362 * to for word split) */
6363 char_u prewordlen = 0; /* length of word in "preword" */
6364 int splitoff = 0; /* index in tword after last split */
6365 trystate_T *sp;
6366 int newscore;
6367 langp_T *lp;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006368 char_u *byts, *fbyts, *pbyts;
6369 idx_T *idxs, *fidxs, *pidxs;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006370 int depth;
Bram Moolenaarea424162005-06-16 21:51:00 +00006371 int c, c2, c3;
6372 int n = 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006373 int flags;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006374 garray_T *gap;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006375 idx_T arridx;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006376 int len;
6377 char_u *p;
6378 fromto_T *ftp;
Bram Moolenaarea424162005-06-16 21:51:00 +00006379 int fl = 0, tl;
Bram Moolenaar0c405862005-06-22 22:26:26 +00006380 int repextra = 0; /* extra bytes in fword[] from REP item */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006381
6382 /* We make a copy of the case-folded bad word, so that we can modify it
Bram Moolenaar0c405862005-06-22 22:26:26 +00006383 * to find matches (esp. REP items). Append some more text, changing
6384 * chars after the bad word may help. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006385 STRCPY(fword, su->su_fbadword);
Bram Moolenaar0c405862005-06-22 22:26:26 +00006386 n = STRLEN(fword);
6387 p = su->su_badptr + su->su_badlen;
6388 (void)spell_casefold(p, STRLEN(p), fword + n, MAXWLEN - n);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006389
6390 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
6391 lp->lp_slang != NULL; ++lp)
6392 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006393 /*
6394 * Go through the whole case-fold tree, try changes at each node.
6395 * "tword[]" contains the word collected from nodes in the tree.
6396 * "fword[]" the word we are trying to match with (initially the bad
6397 * word).
6398 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006399 depth = 0;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006400 sp = &stack[0];
6401 sp->ts_state = STATE_START;
6402 sp->ts_score = 0;
6403 sp->ts_curi = 1;
6404 sp->ts_fidx = 0;
6405 sp->ts_fidxtry = 0;
6406 sp->ts_twordlen = 0;
6407 sp->ts_arridx = 0;
Bram Moolenaarea424162005-06-16 21:51:00 +00006408#ifdef FEAT_MBYTE
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006409 sp->ts_tcharlen = 0;
Bram Moolenaarea424162005-06-16 21:51:00 +00006410#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006411
Bram Moolenaarea424162005-06-16 21:51:00 +00006412 /*
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006413 * When there are postponed prefixes we need to use these first. At
6414 * the end of the prefix we continue in the case-fold tree.
6415 */
6416 fbyts = lp->lp_slang->sl_fbyts;
6417 fidxs = lp->lp_slang->sl_fidxs;
6418 pbyts = lp->lp_slang->sl_pbyts;
6419 pidxs = lp->lp_slang->sl_pidxs;
6420 if (pbyts != NULL)
6421 {
6422 byts = pbyts;
6423 idxs = pidxs;
6424 sp->ts_prefixdepth = PREFIXTREE;
6425 sp->ts_state = STATE_NOPREFIX; /* try without prefix first */
6426 }
6427 else
6428 {
6429 byts = fbyts;
6430 idxs = fidxs;
6431 sp->ts_prefixdepth = NOPREFIX;
6432 }
6433
6434 /*
Bram Moolenaarea424162005-06-16 21:51:00 +00006435 * Loop to find all suggestions. At each round we either:
6436 * - For the current state try one operation, advance "ts_curi",
6437 * increase "depth".
6438 * - When a state is done go to the next, set "ts_state".
6439 * - When all states are tried decrease "depth".
6440 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006441 while (depth >= 0 && !got_int)
6442 {
6443 sp = &stack[depth];
6444 switch (sp->ts_state)
6445 {
6446 case STATE_START:
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006447 case STATE_NOPREFIX:
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006448 /*
6449 * Start of node: Deal with NUL bytes, which means
6450 * tword[] may end here.
6451 */
6452 arridx = sp->ts_arridx; /* current node in the tree */
6453 len = byts[arridx]; /* bytes in this node */
6454 arridx += sp->ts_curi; /* index of current byte */
6455
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006456 if (sp->ts_prefixdepth == PREFIXTREE)
6457 {
6458 /* Skip over the NUL bytes, we use them later. */
6459 for (n = 0; n < len && byts[arridx + n] == 0; ++n)
6460 ;
6461 sp->ts_curi += n;
6462
6463 /* At end of a prefix or at start of prefixtree: check for
6464 * following word. */
6465 if (byts[arridx] == 0 || sp->ts_state == STATE_NOPREFIX)
6466 {
6467 sp->ts_state = STATE_START;
6468 ++depth;
6469 stack[depth] = stack[depth - 1];
6470 sp = &stack[depth];
6471 sp->ts_prefixdepth = depth - 1;
6472 byts = fbyts;
6473 idxs = fidxs;
6474 sp->ts_state = STATE_START;
6475 sp->ts_curi = 1; /* start just after length byte */
6476 sp->ts_arridx = 0;
6477
6478 /* Move the prefix to preword[] so that
6479 * find_keepcap_word() works. */
6480 prewordlen = splitoff = sp->ts_twordlen;
6481 mch_memmove(preword, tword, splitoff);
6482 break;
6483 }
6484
6485 /* Always past NUL bytes now. */
6486 sp->ts_state = STATE_ENDNUL;
6487 break;
6488 }
6489
Bram Moolenaar0c405862005-06-22 22:26:26 +00006490 if (sp->ts_curi > len || byts[arridx] != 0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006491 {
6492 /* Past bytes in node and/or past NUL bytes. */
6493 sp->ts_state = STATE_ENDNUL;
6494 break;
6495 }
6496
6497 /*
6498 * End of word in tree.
6499 */
6500 ++sp->ts_curi; /* eat one NUL byte */
6501
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006502 flags = (int)idxs[arridx];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006503
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006504 if (sp->ts_prefixdepth < MAXWLEN)
6505 {
6506 /* There was a prefix before the word. Check that the
6507 * prefix can be used with this word. */
6508 /* Count the length of the NULs in the prefix. If there
6509 * are none this must be the first try without a prefix.
6510 */
6511 n = stack[sp->ts_prefixdepth].ts_arridx;
6512 len = pbyts[n++];
6513 for (c = 0; c < len && pbyts[n + c] == 0; ++c)
6514 ;
6515 if (c > 0)
6516 {
6517 /* The prefix ID is stored two bytes above the flags. */
6518 c = valid_word_prefix(c, n, (unsigned)flags >> 16,
6519 tword + splitoff, lp->lp_slang);
6520 if (c == 0)
6521 break;
6522
6523 /* Use the WF_RARE flag for a rare prefix. */
6524 if (c & WF_RAREPFX)
6525 flags |= WF_RARE;
6526 }
6527 }
6528
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006529 /*
6530 * Form the word with proper case in preword.
6531 * If there is a word from a previous split, append.
6532 */
6533 tword[sp->ts_twordlen] = NUL;
6534 if (flags & WF_KEEPCAP)
6535 /* Must find the word in the keep-case tree. */
6536 find_keepcap_word(lp->lp_slang, tword + splitoff,
6537 preword + prewordlen);
6538 else
Bram Moolenaar0c405862005-06-22 22:26:26 +00006539 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006540 /* Include badflags: if the badword is onecap or allcap
Bram Moolenaar0c405862005-06-22 22:26:26 +00006541 * use that for the goodword too. But if the badword is
6542 * allcap and it's only one char long use onecap. */
6543 c = su->su_badflags;
6544 if ((c & WF_ALLCAP)
6545#ifdef FEAT_MBYTE
6546 && su->su_badlen == mb_ptr2len_check(su->su_badptr)
6547#else
6548 && su->su_badlen == 1
6549#endif
6550 )
6551 c = WF_ONECAP;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006552 make_case_word(tword + splitoff,
Bram Moolenaar0c405862005-06-22 22:26:26 +00006553 preword + prewordlen, flags | c);
6554 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006555
6556 /* Don't use a banned word. It may appear again as a good
6557 * word, thus remember it. */
6558 if (flags & WF_BANNED)
6559 {
6560 add_banned(su, preword + prewordlen);
6561 break;
6562 }
Bram Moolenaara1ba8112005-06-28 23:23:32 +00006563 if (was_banned(su, preword + prewordlen)
6564 || was_banned(su, preword))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006565 break;
6566
6567 newscore = 0;
6568 if ((flags & WF_REGION)
6569 && (((unsigned)flags >> 8) & lp->lp_region) == 0)
6570 newscore += SCORE_REGION;
6571 if (flags & WF_RARE)
6572 newscore += SCORE_RARE;
6573
Bram Moolenaar0c405862005-06-22 22:26:26 +00006574 if (!spell_valid_case(su->su_badflags,
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006575 captype(preword + prewordlen, NULL)))
6576 newscore += SCORE_ICASE;
6577
Bram Moolenaar0c405862005-06-22 22:26:26 +00006578 if ((fword[sp->ts_fidx] == NUL
Bram Moolenaarea408852005-06-25 22:49:46 +00006579 || !spell_iswordp(fword + sp->ts_fidx))
Bram Moolenaar0c405862005-06-22 22:26:26 +00006580 && sp->ts_fidx >= sp->ts_fidxtry)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006581 {
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00006582 /* The badword also ends: add suggestions. Give a penalty
6583 * when changing non-word char to word char, e.g., "thes,"
6584 * -> "these". */
6585 p = fword + sp->ts_fidx;
6586#ifdef FEAT_MBYTE
6587 if (has_mbyte)
6588 mb_ptr_back(fword, p);
6589 else
6590#endif
6591 --p;
6592 if (!spell_iswordp(p))
6593 {
6594 p = preword + STRLEN(preword);
6595#ifdef FEAT_MBYTE
6596 if (has_mbyte)
6597 mb_ptr_back(preword, p);
6598 else
6599#endif
6600 --p;
6601 if (spell_iswordp(p))
6602 newscore += SCORE_NONWORD;
6603 }
6604
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006605 add_suggestion(su, &su->su_ga, preword,
Bram Moolenaar0c405862005-06-22 22:26:26 +00006606 sp->ts_fidx - repextra,
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006607 sp->ts_score + newscore, 0, FALSE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006608 }
Bram Moolenaarea424162005-06-16 21:51:00 +00006609 else if (sp->ts_fidx >= sp->ts_fidxtry
6610#ifdef FEAT_MBYTE
6611 /* Don't split halfway a character. */
6612 && (!has_mbyte || sp->ts_tcharlen == 0)
6613#endif
6614 )
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006615 {
6616 /* The word in the tree ends but the badword
6617 * continues: try inserting a space and check that a valid
6618 * words starts at fword[sp->ts_fidx]. */
6619 if (try_deeper(su, stack, depth, newscore + SCORE_SPLIT))
6620 {
6621 /* Save things to be restored at STATE_SPLITUNDO. */
6622 sp->ts_save_prewordlen = prewordlen;
Bram Moolenaar0c405862005-06-22 22:26:26 +00006623 sp->ts_save_badflags = su->su_badflags;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006624 sp->ts_save_splitoff = splitoff;
6625
6626 /* Append a space to preword. */
6627 STRCAT(preword, " ");
6628 prewordlen = STRLEN(preword);
6629 splitoff = sp->ts_twordlen;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006630#ifdef FEAT_MBYTE
6631 if (has_mbyte)
6632 {
6633 int i = 0;
6634
6635 /* Case-folding may change the number of bytes:
6636 * Count nr of chars in fword[sp->ts_fidx] and
6637 * advance that many chars in su->su_badptr. */
6638 for (p = fword; p < fword + sp->ts_fidx;
6639 mb_ptr_adv(p))
6640 ++i;
6641 for (p = su->su_badptr; i > 0; mb_ptr_adv(p))
6642 --i;
6643 }
6644 else
6645#endif
6646 p = su->su_badptr + sp->ts_fidx;
Bram Moolenaar0c405862005-06-22 22:26:26 +00006647 su->su_badflags = captype(p, su->su_badptr
6648 + su->su_badlen);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006649
6650 sp->ts_state = STATE_SPLITUNDO;
6651 ++depth;
6652 /* Restart at top of the tree. */
6653 stack[depth].ts_arridx = 0;
6654 }
6655 }
6656 break;
6657
6658 case STATE_SPLITUNDO:
Bram Moolenaar0c405862005-06-22 22:26:26 +00006659 /* Undo the changes done for word split. */
6660 su->su_badflags = sp->ts_save_badflags;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006661 splitoff = sp->ts_save_splitoff;
6662 prewordlen = sp->ts_save_prewordlen;
6663
6664 /* Continue looking for NUL bytes. */
6665 sp->ts_state = STATE_START;
6666 break;
6667
6668 case STATE_ENDNUL:
6669 /* Past the NUL bytes in the node. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00006670 if (fword[sp->ts_fidx] == NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006671 {
6672 /* The badword ends, can't use the bytes in this node. */
6673 sp->ts_state = STATE_DEL;
6674 break;
6675 }
6676 sp->ts_state = STATE_PLAIN;
6677 /*FALLTHROUGH*/
6678
6679 case STATE_PLAIN:
6680 /*
6681 * Go over all possible bytes at this node, add each to
6682 * tword[] and use child node. "ts_curi" is the index.
6683 */
6684 arridx = sp->ts_arridx;
6685 if (sp->ts_curi > byts[arridx])
6686 {
6687 /* Done all bytes at this node, do next state. When still
6688 * at already changed bytes skip the other tricks. */
6689 if (sp->ts_fidx >= sp->ts_fidxtry)
6690 sp->ts_state = STATE_DEL;
6691 else
6692 sp->ts_state = STATE_FINAL;
6693 }
6694 else
6695 {
6696 arridx += sp->ts_curi++;
6697 c = byts[arridx];
6698
6699 /* Normal byte, go one level deeper. If it's not equal to
6700 * the byte in the bad word adjust the score. But don't
6701 * even try when the byte was already changed. */
Bram Moolenaarea424162005-06-16 21:51:00 +00006702 if (c == fword[sp->ts_fidx]
6703#ifdef FEAT_MBYTE
6704 || (sp->ts_tcharlen > 0
6705 && sp->ts_isdiff != DIFF_NONE)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006706#endif
Bram Moolenaarea424162005-06-16 21:51:00 +00006707 )
6708 newscore = 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006709 else
6710 newscore = SCORE_SUBST;
6711 if ((newscore == 0 || sp->ts_fidx >= sp->ts_fidxtry)
6712 && try_deeper(su, stack, depth, newscore))
6713 {
6714 ++depth;
Bram Moolenaarea424162005-06-16 21:51:00 +00006715 sp = &stack[depth];
6716 ++sp->ts_fidx;
6717 tword[sp->ts_twordlen++] = c;
6718 sp->ts_arridx = idxs[arridx];
6719#ifdef FEAT_MBYTE
6720 if (newscore == SCORE_SUBST)
6721 sp->ts_isdiff = DIFF_YES;
6722 if (has_mbyte)
6723 {
6724 /* Multi-byte characters are a bit complicated to
6725 * handle: They differ when any of the bytes
6726 * differ and then their length may also differ. */
6727 if (sp->ts_tcharlen == 0)
6728 {
6729 /* First byte. */
6730 sp->ts_tcharidx = 0;
6731 sp->ts_tcharlen = MB_BYTE2LEN(c);
6732 sp->ts_fcharstart = sp->ts_fidx - 1;
6733 sp->ts_isdiff = (newscore != 0)
6734 ? DIFF_YES : DIFF_NONE;
6735 }
6736 else if (sp->ts_isdiff == DIFF_INSERT)
6737 /* When inserting trail bytes don't advance in
6738 * the bad word. */
6739 --sp->ts_fidx;
6740 if (++sp->ts_tcharidx == sp->ts_tcharlen)
6741 {
6742 /* Last byte of character. */
6743 if (sp->ts_isdiff == DIFF_YES)
6744 {
6745 /* Correct ts_fidx for the byte length of
6746 * the character (we didn't check that
6747 * before). */
6748 sp->ts_fidx = sp->ts_fcharstart
6749 + MB_BYTE2LEN(
6750 fword[sp->ts_fcharstart]);
6751
6752 /* For a similar character adjust score
6753 * from SCORE_SUBST to SCORE_SIMILAR. */
6754 if (lp->lp_slang->sl_has_map
6755 && similar_chars(lp->lp_slang,
6756 mb_ptr2char(tword
6757 + sp->ts_twordlen
6758 - sp->ts_tcharlen),
6759 mb_ptr2char(fword
6760 + sp->ts_fcharstart)))
6761 sp->ts_score -=
6762 SCORE_SUBST - SCORE_SIMILAR;
6763 }
Bram Moolenaarea408852005-06-25 22:49:46 +00006764 else if (sp->ts_isdiff == DIFF_INSERT
6765 && sp->ts_twordlen > sp->ts_tcharlen)
6766 {
6767 /* If the previous character was the same,
6768 * thus doubling a character, give a bonus
6769 * to the score. */
6770 p = tword + sp->ts_twordlen
6771 - sp->ts_tcharlen;
6772 c = mb_ptr2char(p);
6773 mb_ptr_back(tword, p);
6774 if (c == mb_ptr2char(p))
6775 sp->ts_score -= SCORE_INS
6776 - SCORE_INSDUP;
6777 }
Bram Moolenaarea424162005-06-16 21:51:00 +00006778
6779 /* Starting a new char, reset the length. */
6780 sp->ts_tcharlen = 0;
6781 }
6782 }
6783 else
6784#endif
6785 {
6786 /* If we found a similar char adjust the score.
6787 * We do this after calling try_deeper() because
6788 * it's slow. */
6789 if (newscore != 0
6790 && lp->lp_slang->sl_has_map
6791 && similar_chars(lp->lp_slang,
6792 c, fword[sp->ts_fidx - 1]))
6793 sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
6794 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006795 }
6796 }
6797 break;
6798
6799 case STATE_DEL:
Bram Moolenaarea424162005-06-16 21:51:00 +00006800#ifdef FEAT_MBYTE
6801 /* When past the first byte of a multi-byte char don't try
6802 * delete/insert/swap a character. */
6803 if (has_mbyte && sp->ts_tcharlen > 0)
6804 {
6805 sp->ts_state = STATE_FINAL;
6806 break;
6807 }
6808#endif
6809 /*
6810 * Try skipping one character in the bad word (delete it).
6811 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006812 sp->ts_state = STATE_INS;
6813 sp->ts_curi = 1;
6814 if (fword[sp->ts_fidx] != NUL
6815 && try_deeper(su, stack, depth, SCORE_DEL))
6816 {
6817 ++depth;
Bram Moolenaarea408852005-06-25 22:49:46 +00006818
6819 /* Advance over the character in fword[]. Give a bonus to
6820 * the score if the same character is following "nn" ->
6821 * "n". */
Bram Moolenaarea424162005-06-16 21:51:00 +00006822#ifdef FEAT_MBYTE
6823 if (has_mbyte)
Bram Moolenaarea408852005-06-25 22:49:46 +00006824 {
6825 c = mb_ptr2char(fword + sp->ts_fidx);
Bram Moolenaarea424162005-06-16 21:51:00 +00006826 stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]);
Bram Moolenaarea408852005-06-25 22:49:46 +00006827 if (c == mb_ptr2char(fword + stack[depth].ts_fidx))
6828 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
6829 }
Bram Moolenaarea424162005-06-16 21:51:00 +00006830 else
6831#endif
Bram Moolenaarea408852005-06-25 22:49:46 +00006832 {
Bram Moolenaarea424162005-06-16 21:51:00 +00006833 ++stack[depth].ts_fidx;
Bram Moolenaarea408852005-06-25 22:49:46 +00006834 if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1])
6835 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
6836 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006837 break;
6838 }
6839 /*FALLTHROUGH*/
6840
6841 case STATE_INS:
Bram Moolenaarea424162005-06-16 21:51:00 +00006842 /* Insert one byte. Do this for each possible byte at this
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006843 * node. */
6844 n = sp->ts_arridx;
6845 if (sp->ts_curi > byts[n])
6846 {
6847 /* Done all bytes at this node, do next state. */
6848 sp->ts_state = STATE_SWAP;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006849 }
6850 else
6851 {
Bram Moolenaarea424162005-06-16 21:51:00 +00006852 /* Do one more byte at this node. Skip NUL bytes. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006853 n += sp->ts_curi++;
6854 c = byts[n];
6855 if (c != 0 && try_deeper(su, stack, depth, SCORE_INS))
6856 {
6857 ++depth;
Bram Moolenaarea424162005-06-16 21:51:00 +00006858 sp = &stack[depth];
6859 tword[sp->ts_twordlen++] = c;
6860 sp->ts_arridx = idxs[n];
6861#ifdef FEAT_MBYTE
6862 if (has_mbyte)
6863 {
6864 fl = MB_BYTE2LEN(c);
6865 if (fl > 1)
6866 {
6867 /* There are following bytes for the same
6868 * character. We must find all bytes before
6869 * trying delete/insert/swap/etc. */
6870 sp->ts_tcharlen = fl;
6871 sp->ts_tcharidx = 1;
6872 sp->ts_isdiff = DIFF_INSERT;
6873 }
6874 }
Bram Moolenaarea408852005-06-25 22:49:46 +00006875 else
6876 fl = 1;
6877 if (fl == 1)
Bram Moolenaarea424162005-06-16 21:51:00 +00006878#endif
Bram Moolenaarea408852005-06-25 22:49:46 +00006879 {
6880 /* If the previous character was the same, thus
6881 * doubling a character, give a bonus to the
6882 * score. */
6883 if (sp->ts_twordlen >= 2
6884 && tword[sp->ts_twordlen - 2] == c)
6885 sp->ts_score -= SCORE_INS - SCORE_INSDUP;
6886 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006887 }
6888 }
6889 break;
6890
6891 case STATE_SWAP:
Bram Moolenaarea424162005-06-16 21:51:00 +00006892 /*
6893 * Swap two bytes in the bad word: "12" -> "21".
6894 * We change "fword" here, it's changed back afterwards.
6895 */
6896 p = fword + sp->ts_fidx;
6897 c = *p;
6898 if (c == NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006899 {
Bram Moolenaarea424162005-06-16 21:51:00 +00006900 /* End of word, can't swap or replace. */
6901 sp->ts_state = STATE_FINAL;
6902 break;
6903 }
6904#ifdef FEAT_MBYTE
6905 if (has_mbyte)
6906 {
6907 n = mb_ptr2len_check(p);
6908 c = mb_ptr2char(p);
6909 c2 = mb_ptr2char(p + n);
6910 }
6911 else
6912#endif
6913 c2 = p[1];
6914 if (c == c2)
6915 {
6916 /* Characters are identical, swap won't do anything. */
6917 sp->ts_state = STATE_SWAP3;
6918 break;
6919 }
6920 if (c2 != NUL && try_deeper(su, stack, depth, SCORE_SWAP))
6921 {
6922 sp->ts_state = STATE_UNSWAP;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006923 ++depth;
Bram Moolenaarea424162005-06-16 21:51:00 +00006924#ifdef FEAT_MBYTE
6925 if (has_mbyte)
6926 {
6927 fl = mb_char2len(c2);
6928 mch_memmove(p, p + n, fl);
6929 mb_char2bytes(c, p + fl);
6930 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
6931 }
6932 else
6933#endif
6934 {
6935 p[0] = c2;
6936 p[1] = c;
6937 stack[depth].ts_fidxtry = sp->ts_fidx + 2;
6938 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006939 }
6940 else
6941 /* If this swap doesn't work then SWAP3 won't either. */
6942 sp->ts_state = STATE_REP_INI;
6943 break;
6944
Bram Moolenaarea424162005-06-16 21:51:00 +00006945 case STATE_UNSWAP:
6946 /* Undo the STATE_SWAP swap: "21" -> "12". */
6947 p = fword + sp->ts_fidx;
6948#ifdef FEAT_MBYTE
6949 if (has_mbyte)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006950 {
Bram Moolenaarea424162005-06-16 21:51:00 +00006951 n = MB_BYTE2LEN(*p);
6952 c = mb_ptr2char(p + n);
6953 mch_memmove(p + MB_BYTE2LEN(p[n]), p, n);
6954 mb_char2bytes(c, p);
6955 }
6956 else
6957#endif
6958 {
6959 c = *p;
6960 *p = p[1];
6961 p[1] = c;
6962 }
6963 /*FALLTHROUGH*/
6964
6965 case STATE_SWAP3:
6966 /* Swap two bytes, skipping one: "123" -> "321". We change
6967 * "fword" here, it's changed back afterwards. */
6968 p = fword + sp->ts_fidx;
6969#ifdef FEAT_MBYTE
6970 if (has_mbyte)
6971 {
6972 n = mb_ptr2len_check(p);
6973 c = mb_ptr2char(p);
6974 fl = mb_ptr2len_check(p + n);
6975 c2 = mb_ptr2char(p + n);
6976 c3 = mb_ptr2char(p + n + fl);
6977 }
6978 else
6979#endif
6980 {
6981 c = *p;
6982 c2 = p[1];
6983 c3 = p[2];
6984 }
6985
6986 /* When characters are identical: "121" then SWAP3 result is
6987 * identical, ROT3L result is same as SWAP: "211", ROT3L
6988 * result is same as SWAP on next char: "112". Thus skip all
6989 * swapping. Also skip when c3 is NUL. */
6990 if (c == c3 || c3 == NUL)
6991 {
6992 sp->ts_state = STATE_REP_INI;
6993 break;
6994 }
6995 if (try_deeper(su, stack, depth, SCORE_SWAP3))
6996 {
6997 sp->ts_state = STATE_UNSWAP3;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006998 ++depth;
Bram Moolenaarea424162005-06-16 21:51:00 +00006999#ifdef FEAT_MBYTE
7000 if (has_mbyte)
7001 {
7002 tl = mb_char2len(c3);
7003 mch_memmove(p, p + n + fl, tl);
7004 mb_char2bytes(c2, p + tl);
7005 mb_char2bytes(c, p + fl + tl);
7006 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl;
7007 }
7008 else
7009#endif
7010 {
7011 p[0] = p[2];
7012 p[2] = c;
7013 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
7014 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007015 }
7016 else
7017 sp->ts_state = STATE_REP_INI;
7018 break;
7019
Bram Moolenaarea424162005-06-16 21:51:00 +00007020 case STATE_UNSWAP3:
7021 /* Undo STATE_SWAP3: "321" -> "123" */
7022 p = fword + sp->ts_fidx;
7023#ifdef FEAT_MBYTE
7024 if (has_mbyte)
7025 {
7026 n = MB_BYTE2LEN(*p);
7027 c2 = mb_ptr2char(p + n);
7028 fl = MB_BYTE2LEN(p[n]);
7029 c = mb_ptr2char(p + n + fl);
7030 tl = MB_BYTE2LEN(p[n + fl]);
7031 mch_memmove(p + fl + tl, p, n);
7032 mb_char2bytes(c, p);
7033 mb_char2bytes(c2, p + tl);
7034 }
7035 else
7036#endif
7037 {
7038 c = *p;
7039 *p = p[2];
7040 p[2] = c;
7041 }
Bram Moolenaarea424162005-06-16 21:51:00 +00007042
Bram Moolenaarea424162005-06-16 21:51:00 +00007043 /* Rotate three characters left: "123" -> "231". We change
7044 * "fword" here, it's changed back afterwards. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007045 if (try_deeper(su, stack, depth, SCORE_SWAP3))
7046 {
Bram Moolenaarea424162005-06-16 21:51:00 +00007047 sp->ts_state = STATE_UNROT3L;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007048 ++depth;
Bram Moolenaarea424162005-06-16 21:51:00 +00007049 p = fword + sp->ts_fidx;
7050#ifdef FEAT_MBYTE
7051 if (has_mbyte)
7052 {
7053 n = mb_ptr2len_check(p);
7054 c = mb_ptr2char(p);
7055 fl = mb_ptr2len_check(p + n);
7056 fl += mb_ptr2len_check(p + n + fl);
7057 mch_memmove(p, p + n, fl);
7058 mb_char2bytes(c, p + fl);
7059 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
7060 }
7061 else
7062#endif
7063 {
7064 c = *p;
7065 *p = p[1];
7066 p[1] = p[2];
7067 p[2] = c;
7068 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
7069 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007070 }
7071 else
7072 sp->ts_state = STATE_REP_INI;
7073 break;
7074
Bram Moolenaarea424162005-06-16 21:51:00 +00007075 case STATE_UNROT3L:
Bram Moolenaar0c405862005-06-22 22:26:26 +00007076 /* Undo ROT3L: "231" -> "123" */
Bram Moolenaarea424162005-06-16 21:51:00 +00007077 p = fword + sp->ts_fidx;
7078#ifdef FEAT_MBYTE
7079 if (has_mbyte)
7080 {
7081 n = MB_BYTE2LEN(*p);
7082 n += MB_BYTE2LEN(p[n]);
7083 c = mb_ptr2char(p + n);
7084 tl = MB_BYTE2LEN(p[n]);
7085 mch_memmove(p + tl, p, n);
7086 mb_char2bytes(c, p);
7087 }
7088 else
7089#endif
7090 {
7091 c = p[2];
7092 p[2] = p[1];
7093 p[1] = *p;
7094 *p = c;
7095 }
Bram Moolenaarea424162005-06-16 21:51:00 +00007096
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007097 /* Rotate three bytes right: "123" -> "312". We change
Bram Moolenaarea424162005-06-16 21:51:00 +00007098 * "fword" here, it's changed back afterwards. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007099 if (try_deeper(su, stack, depth, SCORE_SWAP3))
7100 {
Bram Moolenaarea424162005-06-16 21:51:00 +00007101 sp->ts_state = STATE_UNROT3R;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007102 ++depth;
Bram Moolenaarea424162005-06-16 21:51:00 +00007103 p = fword + sp->ts_fidx;
7104#ifdef FEAT_MBYTE
7105 if (has_mbyte)
7106 {
7107 n = mb_ptr2len_check(p);
7108 n += mb_ptr2len_check(p + n);
7109 c = mb_ptr2char(p + n);
7110 tl = mb_ptr2len_check(p + n);
7111 mch_memmove(p + tl, p, n);
7112 mb_char2bytes(c, p);
7113 stack[depth].ts_fidxtry = sp->ts_fidx + n + tl;
7114 }
7115 else
7116#endif
7117 {
7118 c = p[2];
7119 p[2] = p[1];
7120 p[1] = *p;
7121 *p = c;
7122 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
7123 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007124 }
7125 else
7126 sp->ts_state = STATE_REP_INI;
7127 break;
7128
Bram Moolenaarea424162005-06-16 21:51:00 +00007129 case STATE_UNROT3R:
Bram Moolenaar0c405862005-06-22 22:26:26 +00007130 /* Undo ROT3R: "312" -> "123" */
Bram Moolenaarea424162005-06-16 21:51:00 +00007131 p = fword + sp->ts_fidx;
7132#ifdef FEAT_MBYTE
7133 if (has_mbyte)
7134 {
7135 c = mb_ptr2char(p);
7136 tl = MB_BYTE2LEN(*p);
7137 n = MB_BYTE2LEN(p[tl]);
7138 n += MB_BYTE2LEN(p[tl + n]);
7139 mch_memmove(p, p + tl, n);
7140 mb_char2bytes(c, p + n);
7141 }
7142 else
7143#endif
7144 {
7145 c = *p;
7146 *p = p[1];
7147 p[1] = p[2];
7148 p[2] = c;
7149 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007150 /*FALLTHROUGH*/
7151
7152 case STATE_REP_INI:
7153 /* Check if matching with REP items from the .aff file would
7154 * work. Quickly skip if there are no REP items or the score
7155 * is going to be too high anyway. */
7156 gap = &lp->lp_slang->sl_rep;
7157 if (gap->ga_len == 0
7158 || sp->ts_score + SCORE_REP >= su->su_maxscore)
7159 {
7160 sp->ts_state = STATE_FINAL;
7161 break;
7162 }
7163
7164 /* Use the first byte to quickly find the first entry that
Bram Moolenaarea424162005-06-16 21:51:00 +00007165 * may match. If the index is -1 there is none. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007166 sp->ts_curi = lp->lp_slang->sl_rep_first[fword[sp->ts_fidx]];
7167 if (sp->ts_curi < 0)
7168 {
7169 sp->ts_state = STATE_FINAL;
7170 break;
7171 }
7172
7173 sp->ts_state = STATE_REP;
7174 /*FALLTHROUGH*/
7175
7176 case STATE_REP:
7177 /* Try matching with REP items from the .aff file. For each
Bram Moolenaarea424162005-06-16 21:51:00 +00007178 * match replace the characters and check if the resulting
7179 * word is valid. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007180 p = fword + sp->ts_fidx;
7181
7182 gap = &lp->lp_slang->sl_rep;
7183 while (sp->ts_curi < gap->ga_len)
7184 {
7185 ftp = (fromto_T *)gap->ga_data + sp->ts_curi++;
7186 if (*ftp->ft_from != *p)
7187 {
7188 /* past possible matching entries */
7189 sp->ts_curi = gap->ga_len;
7190 break;
7191 }
7192 if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0
7193 && try_deeper(su, stack, depth, SCORE_REP))
7194 {
7195 /* Need to undo this afterwards. */
7196 sp->ts_state = STATE_REP_UNDO;
7197
7198 /* Change the "from" to the "to" string. */
7199 ++depth;
7200 fl = STRLEN(ftp->ft_from);
7201 tl = STRLEN(ftp->ft_to);
7202 if (fl != tl)
Bram Moolenaar0c405862005-06-22 22:26:26 +00007203 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007204 mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1);
Bram Moolenaar0c405862005-06-22 22:26:26 +00007205 repextra += tl - fl;
7206 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007207 mch_memmove(p, ftp->ft_to, tl);
7208 stack[depth].ts_fidxtry = sp->ts_fidx + tl;
Bram Moolenaarea424162005-06-16 21:51:00 +00007209#ifdef FEAT_MBYTE
7210 stack[depth].ts_tcharlen = 0;
7211#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007212 break;
7213 }
7214 }
7215
7216 if (sp->ts_curi >= gap->ga_len)
7217 /* No (more) matches. */
7218 sp->ts_state = STATE_FINAL;
7219
7220 break;
7221
7222 case STATE_REP_UNDO:
7223 /* Undo a REP replacement and continue with the next one. */
7224 ftp = (fromto_T *)lp->lp_slang->sl_rep.ga_data
7225 + sp->ts_curi - 1;
7226 fl = STRLEN(ftp->ft_from);
7227 tl = STRLEN(ftp->ft_to);
7228 p = fword + sp->ts_fidx;
7229 if (fl != tl)
Bram Moolenaar0c405862005-06-22 22:26:26 +00007230 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007231 mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1);
Bram Moolenaar0c405862005-06-22 22:26:26 +00007232 repextra -= tl - fl;
7233 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007234 mch_memmove(p, ftp->ft_from, fl);
7235 sp->ts_state = STATE_REP;
7236 break;
7237
7238 default:
7239 /* Did all possible states at this level, go up one level. */
7240 --depth;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007241
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007242 if (depth >= 0 && stack[depth].ts_prefixdepth == PREFIXTREE)
7243 {
7244 /* Continue in or go back to the prefix tree. */
7245 byts = pbyts;
7246 idxs = pidxs;
7247 splitoff = 0;
7248 }
7249
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007250 /* Don't check for CTRL-C too often, it takes time. */
7251 line_breakcheck();
7252 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007253 }
7254 }
7255}
7256
7257/*
7258 * Try going one level deeper in the tree.
7259 */
7260 static int
7261try_deeper(su, stack, depth, score_add)
7262 suginfo_T *su;
7263 trystate_T *stack;
7264 int depth;
7265 int score_add;
7266{
7267 int newscore;
7268
7269 /* Refuse to go deeper if the scrore is getting too big. */
7270 newscore = stack[depth].ts_score + score_add;
7271 if (newscore >= su->su_maxscore)
7272 return FALSE;
7273
Bram Moolenaarea424162005-06-16 21:51:00 +00007274 stack[depth + 1] = stack[depth];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007275 stack[depth + 1].ts_state = STATE_START;
7276 stack[depth + 1].ts_score = newscore;
7277 stack[depth + 1].ts_curi = 1; /* start just after length byte */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007278 return TRUE;
7279}
7280
7281/*
7282 * "fword" is a good word with case folded. Find the matching keep-case
7283 * words and put it in "kword".
7284 * Theoretically there could be several keep-case words that result in the
7285 * same case-folded word, but we only find one...
7286 */
7287 static void
7288find_keepcap_word(slang, fword, kword)
7289 slang_T *slang;
7290 char_u *fword;
7291 char_u *kword;
7292{
7293 char_u uword[MAXWLEN]; /* "fword" in upper-case */
7294 int depth;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007295 idx_T tryidx;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007296
7297 /* The following arrays are used at each depth in the tree. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007298 idx_T arridx[MAXWLEN];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007299 int round[MAXWLEN];
7300 int fwordidx[MAXWLEN];
7301 int uwordidx[MAXWLEN];
7302 int kwordlen[MAXWLEN];
7303
7304 int flen, ulen;
7305 int l;
7306 int len;
7307 int c;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007308 idx_T lo, hi, m;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007309 char_u *p;
7310 char_u *byts = slang->sl_kbyts; /* array with bytes of the words */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007311 idx_T *idxs = slang->sl_kidxs; /* array with indexes */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007312
7313 if (byts == NULL)
7314 {
7315 /* array is empty: "cannot happen" */
7316 *kword = NUL;
7317 return;
7318 }
7319
7320 /* Make an all-cap version of "fword". */
7321 allcap_copy(fword, uword);
7322
7323 /*
7324 * Each character needs to be tried both case-folded and upper-case.
7325 * All this gets very complicated if we keep in mind that changing case
7326 * may change the byte length of a multi-byte character...
7327 */
7328 depth = 0;
7329 arridx[0] = 0;
7330 round[0] = 0;
7331 fwordidx[0] = 0;
7332 uwordidx[0] = 0;
7333 kwordlen[0] = 0;
7334 while (depth >= 0)
7335 {
7336 if (fword[fwordidx[depth]] == NUL)
7337 {
7338 /* We are at the end of "fword". If the tree allows a word to end
7339 * here we have found a match. */
7340 if (byts[arridx[depth] + 1] == 0)
7341 {
7342 kword[kwordlen[depth]] = NUL;
7343 return;
7344 }
7345
7346 /* kword is getting too long, continue one level up */
7347 --depth;
7348 }
7349 else if (++round[depth] > 2)
7350 {
7351 /* tried both fold-case and upper-case character, continue one
7352 * level up */
7353 --depth;
7354 }
7355 else
7356 {
7357 /*
7358 * round[depth] == 1: Try using the folded-case character.
7359 * round[depth] == 2: Try using the upper-case character.
7360 */
7361#ifdef FEAT_MBYTE
7362 if (has_mbyte)
7363 {
7364 flen = mb_ptr2len_check(fword + fwordidx[depth]);
7365 ulen = mb_ptr2len_check(uword + uwordidx[depth]);
7366 }
7367 else
7368#endif
7369 ulen = flen = 1;
7370 if (round[depth] == 1)
7371 {
7372 p = fword + fwordidx[depth];
7373 l = flen;
7374 }
7375 else
7376 {
7377 p = uword + uwordidx[depth];
7378 l = ulen;
7379 }
7380
7381 for (tryidx = arridx[depth]; l > 0; --l)
7382 {
7383 /* Perform a binary search in the list of accepted bytes. */
7384 len = byts[tryidx++];
7385 c = *p++;
7386 lo = tryidx;
7387 hi = tryidx + len - 1;
7388 while (lo < hi)
7389 {
7390 m = (lo + hi) / 2;
7391 if (byts[m] > c)
7392 hi = m - 1;
7393 else if (byts[m] < c)
7394 lo = m + 1;
7395 else
7396 {
7397 lo = hi = m;
7398 break;
7399 }
7400 }
7401
7402 /* Stop if there is no matching byte. */
7403 if (hi < lo || byts[lo] != c)
7404 break;
7405
7406 /* Continue at the child (if there is one). */
7407 tryidx = idxs[lo];
7408 }
7409
7410 if (l == 0)
7411 {
7412 /*
7413 * Found the matching char. Copy it to "kword" and go a
7414 * level deeper.
7415 */
7416 if (round[depth] == 1)
7417 {
7418 STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth],
7419 flen);
7420 kwordlen[depth + 1] = kwordlen[depth] + flen;
7421 }
7422 else
7423 {
7424 STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth],
7425 ulen);
7426 kwordlen[depth + 1] = kwordlen[depth] + ulen;
7427 }
7428 fwordidx[depth + 1] = fwordidx[depth] + flen;
7429 uwordidx[depth + 1] = uwordidx[depth] + ulen;
7430
7431 ++depth;
7432 arridx[depth] = tryidx;
7433 round[depth] = 0;
7434 }
7435 }
7436 }
7437
7438 /* Didn't find it: "cannot happen". */
7439 *kword = NUL;
7440}
7441
7442/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007443 * Compute the sound-a-like score for suggestions in su->su_ga and add them to
7444 * su->su_sga.
7445 */
7446 static void
7447score_comp_sal(su)
7448 suginfo_T *su;
7449{
7450 langp_T *lp;
7451 char_u badsound[MAXWLEN];
7452 int i;
7453 suggest_T *stp;
7454 suggest_T *sstp;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007455 int score;
7456
7457 if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL)
7458 return;
7459
7460 /* Use the sound-folding of the first language that supports it. */
7461 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
7462 lp->lp_slang != NULL; ++lp)
7463 if (lp->lp_slang->sl_sal.ga_len > 0)
7464 {
7465 /* soundfold the bad word */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007466 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007467
7468 for (i = 0; i < su->su_ga.ga_len; ++i)
7469 {
7470 stp = &SUG(su->su_ga, i);
7471
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007472 /* Case-fold the suggested word, sound-fold it and compute the
7473 * sound-a-like score. */
7474 score = stp_sal_score(stp, su, lp->lp_slang, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007475 if (score < SCORE_MAXMAX)
7476 {
7477 /* Add the suggestion. */
7478 sstp = &SUG(su->su_sga, su->su_sga.ga_len);
7479 sstp->st_word = vim_strsave(stp->st_word);
7480 if (sstp->st_word != NULL)
7481 {
7482 sstp->st_score = score;
7483 sstp->st_altscore = 0;
7484 sstp->st_orglen = stp->st_orglen;
7485 ++su->su_sga.ga_len;
7486 }
7487 }
7488 }
7489 break;
7490 }
7491}
7492
7493/*
7494 * Combine the list of suggestions in su->su_ga and su->su_sga.
7495 * They are intwined.
7496 */
7497 static void
7498score_combine(su)
7499 suginfo_T *su;
7500{
7501 int i;
7502 int j;
7503 garray_T ga;
7504 garray_T *gap;
7505 langp_T *lp;
7506 suggest_T *stp;
7507 char_u *p;
7508 char_u badsound[MAXWLEN];
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007509 int round;
7510
7511 /* Add the alternate score to su_ga. */
7512 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
7513 lp->lp_slang != NULL; ++lp)
7514 {
7515 if (lp->lp_slang->sl_sal.ga_len > 0)
7516 {
7517 /* soundfold the bad word */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007518 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007519
7520 for (i = 0; i < su->su_ga.ga_len; ++i)
7521 {
7522 stp = &SUG(su->su_ga, i);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007523 stp->st_altscore = stp_sal_score(stp, su, lp->lp_slang,
7524 badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007525 if (stp->st_altscore == SCORE_MAXMAX)
7526 stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4;
7527 else
7528 stp->st_score = (stp->st_score * 3
7529 + stp->st_altscore) / 4;
7530 stp->st_salscore = FALSE;
7531 }
7532 break;
7533 }
7534 }
7535
7536 /* Add the alternate score to su_sga. */
7537 for (i = 0; i < su->su_sga.ga_len; ++i)
7538 {
7539 stp = &SUG(su->su_sga, i);
7540 stp->st_altscore = spell_edit_score(su->su_badword, stp->st_word);
7541 if (stp->st_score == SCORE_MAXMAX)
7542 stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8;
7543 else
7544 stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8;
7545 stp->st_salscore = TRUE;
7546 }
7547
7548 /* Sort the suggestions and truncate at "maxcount" for both lists. */
7549 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
7550 (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount);
7551
7552 ga_init2(&ga, (int)sizeof(suginfo_T), 1);
7553 if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL)
7554 return;
7555
7556 stp = &SUG(ga, 0);
7557 for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i)
7558 {
7559 /* round 1: get a suggestion from su_ga
7560 * round 2: get a suggestion from su_sga */
7561 for (round = 1; round <= 2; ++round)
7562 {
7563 gap = round == 1 ? &su->su_ga : &su->su_sga;
7564 if (i < gap->ga_len)
7565 {
7566 /* Don't add a word if it's already there. */
7567 p = SUG(*gap, i).st_word;
7568 for (j = 0; j < ga.ga_len; ++j)
7569 if (STRCMP(stp[j].st_word, p) == 0)
7570 break;
7571 if (j == ga.ga_len)
7572 stp[ga.ga_len++] = SUG(*gap, i);
7573 else
7574 vim_free(p);
7575 }
7576 }
7577 }
7578
7579 ga_clear(&su->su_ga);
7580 ga_clear(&su->su_sga);
7581
7582 /* Truncate the list to the number of suggestions that will be displayed. */
7583 if (ga.ga_len > su->su_maxcount)
7584 {
7585 for (i = su->su_maxcount; i < ga.ga_len; ++i)
7586 vim_free(stp[i].st_word);
7587 ga.ga_len = su->su_maxcount;
7588 }
7589
7590 su->su_ga = ga;
7591}
7592
7593/*
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007594 * For the goodword in "stp" compute the soundalike score compared to the
7595 * badword.
7596 */
7597 static int
7598stp_sal_score(stp, su, slang, badsound)
7599 suggest_T *stp;
7600 suginfo_T *su;
7601 slang_T *slang;
7602 char_u *badsound; /* sound-folded badword */
7603{
7604 char_u *p;
7605 char_u badsound2[MAXWLEN];
7606 char_u fword[MAXWLEN];
7607 char_u goodsound[MAXWLEN];
7608
7609 if (stp->st_orglen <= su->su_badlen)
7610 p = badsound;
7611 else
7612 {
7613 /* soundfold the bad word with more characters following */
7614 (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN);
7615
7616 /* When joining two words the sound often changes a lot. E.g., "t he"
7617 * sounds like "t h" while "the" sounds like "@". Avoid that by
7618 * removing the space. Don't do it when the good word also contains a
7619 * space. */
7620 if (vim_iswhite(su->su_badptr[su->su_badlen])
7621 && *skiptowhite(stp->st_word) == NUL)
7622 for (p = fword; *(p = skiptowhite(p)) != NUL; )
7623 mch_memmove(p, p + 1, STRLEN(p));
7624
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007625 spell_soundfold(slang, fword, TRUE, badsound2);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007626 p = badsound2;
7627 }
7628
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007629 /* Sound-fold the word and compute the score for the difference. */
7630 spell_soundfold(slang, stp->st_word, FALSE, goodsound);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007631
7632 return soundalike_score(goodsound, p);
7633}
7634
7635/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007636 * Find suggestions by comparing the word in a sound-a-like form.
7637 */
7638 static void
Bram Moolenaar0c405862005-06-22 22:26:26 +00007639suggest_try_soundalike(su)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007640 suginfo_T *su;
7641{
7642 char_u salword[MAXWLEN];
7643 char_u tword[MAXWLEN];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007644 char_u tsalword[MAXWLEN];
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007645 idx_T arridx[MAXWLEN];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007646 int curi[MAXWLEN];
7647 langp_T *lp;
7648 char_u *byts;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007649 idx_T *idxs;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007650 int depth;
7651 int c;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007652 idx_T n;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007653 int round;
7654 int flags;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007655 int sound_score;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007656
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007657 /* Do this for all languages that support sound folding. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007658 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
7659 lp->lp_slang != NULL; ++lp)
7660 {
7661 if (lp->lp_slang->sl_sal.ga_len > 0)
7662 {
7663 /* soundfold the bad word */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007664 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, salword);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007665
7666 /*
7667 * Go through the whole tree, soundfold each word and compare.
7668 * round 1: use the case-folded tree.
7669 * round 2: use the keep-case tree.
7670 */
7671 for (round = 1; round <= 2; ++round)
7672 {
7673 if (round == 1)
7674 {
7675 byts = lp->lp_slang->sl_fbyts;
7676 idxs = lp->lp_slang->sl_fidxs;
7677 }
7678 else
7679 {
7680 byts = lp->lp_slang->sl_kbyts;
7681 idxs = lp->lp_slang->sl_kidxs;
7682 }
7683
7684 depth = 0;
7685 arridx[0] = 0;
7686 curi[0] = 1;
7687 while (depth >= 0 && !got_int)
7688 {
7689 if (curi[depth] > byts[arridx[depth]])
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007690 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007691 /* Done all bytes at this node, go up one level. */
7692 --depth;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007693 line_breakcheck();
7694 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007695 else
7696 {
7697 /* Do one more byte at this node. */
7698 n = arridx[depth] + curi[depth];
7699 ++curi[depth];
7700 c = byts[n];
7701 if (c == 0)
7702 {
7703 /* End of word, deal with the word. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007704 flags = (int)idxs[n];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007705 if (round == 2 || (flags & WF_KEEPCAP) == 0)
7706 {
7707 tword[depth] = NUL;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007708 /* Sound-fold. Only in keep-case tree need to
7709 * case-fold the word. */
7710 spell_soundfold(lp->lp_slang, tword,
7711 round == 1, tsalword);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007712
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007713 /* Compute the edit distance between the
7714 * sound-a-like words. */
7715 sound_score = soundalike_score(salword,
7716 tsalword);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007717 if (sound_score < SCORE_MAXMAX)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007718 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007719 char_u cword[MAXWLEN];
7720 char_u *p;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007721 int score;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007722
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007723 if (round == 1 && (flags & WF_CAPMASK) != 0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007724 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007725 /* Need to fix case according to
7726 * "flags". */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007727 make_case_word(tword, cword, flags);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007728 p = cword;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007729 }
7730 else
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007731 p = tword;
7732
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007733 if (sps_flags & SPS_DOUBLE)
7734 add_suggestion(su, &su->su_sga, p,
Bram Moolenaar0c405862005-06-22 22:26:26 +00007735 su->su_badlen,
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007736 sound_score, 0, FALSE);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007737 else
7738 {
7739 /* Compute the score. */
7740 score = spell_edit_score(
7741 su->su_badword, p);
7742 if (sps_flags & SPS_BEST)
7743 /* give a bonus for the good word
7744 * sounding the same as the bad
7745 * word */
7746 add_suggestion(su, &su->su_ga, p,
Bram Moolenaar0c405862005-06-22 22:26:26 +00007747 su->su_badlen,
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007748 RESCORE(score, sound_score),
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007749 sound_score, TRUE);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007750 else
7751 add_suggestion(su, &su->su_ga, p,
Bram Moolenaar0c405862005-06-22 22:26:26 +00007752 su->su_badlen,
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007753 score + sound_score, 0, FALSE);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007754 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007755 }
7756 }
7757
7758 /* Skip over other NUL bytes. */
7759 while (byts[n + 1] == 0)
7760 {
7761 ++n;
7762 ++curi[depth];
7763 }
7764 }
7765 else
7766 {
7767 /* Normal char, go one level deeper. */
7768 tword[depth++] = c;
7769 arridx[depth] = idxs[n];
7770 curi[depth] = 1;
7771 }
7772 }
7773 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007774 }
7775 }
7776 }
7777}
7778
7779/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007780 * Copy "fword" to "cword", fixing case according to "flags".
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007781 */
7782 static void
7783make_case_word(fword, cword, flags)
7784 char_u *fword;
7785 char_u *cword;
7786 int flags;
7787{
7788 if (flags & WF_ALLCAP)
7789 /* Make it all upper-case */
7790 allcap_copy(fword, cword);
7791 else if (flags & WF_ONECAP)
7792 /* Make the first letter upper-case */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007793 onecap_copy(fword, cword, TRUE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007794 else
7795 /* Use goodword as-is. */
7796 STRCPY(cword, fword);
7797}
7798
Bram Moolenaarea424162005-06-16 21:51:00 +00007799/*
7800 * Use map string "map" for languages "lp".
7801 */
7802 static void
7803set_map_str(lp, map)
7804 slang_T *lp;
7805 char_u *map;
7806{
7807 char_u *p;
7808 int headc = 0;
7809 int c;
7810 int i;
7811
7812 if (*map == NUL)
7813 {
7814 lp->sl_has_map = FALSE;
7815 return;
7816 }
7817 lp->sl_has_map = TRUE;
7818
7819 /* Init the array and hash table empty. */
7820 for (i = 0; i < 256; ++i)
7821 lp->sl_map_array[i] = 0;
7822#ifdef FEAT_MBYTE
7823 hash_init(&lp->sl_map_hash);
7824#endif
7825
7826 /*
7827 * The similar characters are stored separated with slashes:
7828 * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and
7829 * before the same slash. For characters above 255 sl_map_hash is used.
7830 */
7831 for (p = map; *p != NUL; )
7832 {
7833#ifdef FEAT_MBYTE
7834 c = mb_ptr2char_adv(&p);
7835#else
7836 c = *p++;
7837#endif
7838 if (c == '/')
7839 headc = 0;
7840 else
7841 {
7842 if (headc == 0)
7843 headc = c;
7844
7845#ifdef FEAT_MBYTE
7846 /* Characters above 255 don't fit in sl_map_array[], put them in
7847 * the hash table. Each entry is the char, a NUL the headchar and
7848 * a NUL. */
7849 if (c >= 256)
7850 {
7851 int cl = mb_char2len(c);
7852 int headcl = mb_char2len(headc);
7853 char_u *b;
7854 hash_T hash;
7855 hashitem_T *hi;
7856
7857 b = alloc((unsigned)(cl + headcl + 2));
7858 if (b == NULL)
7859 return;
7860 mb_char2bytes(c, b);
7861 b[cl] = NUL;
7862 mb_char2bytes(headc, b + cl + 1);
7863 b[cl + 1 + headcl] = NUL;
7864 hash = hash_hash(b);
7865 hi = hash_lookup(&lp->sl_map_hash, b, hash);
7866 if (HASHITEM_EMPTY(hi))
7867 hash_add_item(&lp->sl_map_hash, hi, b, hash);
7868 else
7869 {
7870 /* This should have been checked when generating the .spl
7871 * file. */
7872 EMSG(_("E999: duplicate char in MAP entry"));
7873 vim_free(b);
7874 }
7875 }
7876 else
7877#endif
7878 lp->sl_map_array[c] = headc;
7879 }
7880 }
7881}
7882
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007883/*
7884 * Return TRUE if "c1" and "c2" are similar characters according to the MAP
7885 * lines in the .aff file.
7886 */
7887 static int
7888similar_chars(slang, c1, c2)
7889 slang_T *slang;
7890 int c1;
7891 int c2;
7892{
Bram Moolenaarea424162005-06-16 21:51:00 +00007893 int m1, m2;
7894#ifdef FEAT_MBYTE
7895 char_u buf[MB_MAXBYTES];
7896 hashitem_T *hi;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007897
Bram Moolenaarea424162005-06-16 21:51:00 +00007898 if (c1 >= 256)
7899 {
7900 buf[mb_char2bytes(c1, buf)] = 0;
7901 hi = hash_find(&slang->sl_map_hash, buf);
7902 if (HASHITEM_EMPTY(hi))
7903 m1 = 0;
7904 else
7905 m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
7906 }
7907 else
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007908#endif
Bram Moolenaarea424162005-06-16 21:51:00 +00007909 m1 = slang->sl_map_array[c1];
7910 if (m1 == 0)
7911 return FALSE;
7912
7913
7914#ifdef FEAT_MBYTE
7915 if (c2 >= 256)
7916 {
7917 buf[mb_char2bytes(c2, buf)] = 0;
7918 hi = hash_find(&slang->sl_map_hash, buf);
7919 if (HASHITEM_EMPTY(hi))
7920 m2 = 0;
7921 else
7922 m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
7923 }
7924 else
7925#endif
7926 m2 = slang->sl_map_array[c2];
7927
7928 return m1 == m2;
7929}
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007930
7931/*
7932 * Add a suggestion to the list of suggestions.
7933 * Do not add a duplicate suggestion or suggestions with a bad score.
7934 * When "use_score" is not zero it's used, otherwise the score is computed
7935 * with spell_edit_score().
7936 */
7937 static void
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007938add_suggestion(su, gap, goodword, badlen, score, altscore, had_bonus)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007939 suginfo_T *su;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007940 garray_T *gap;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007941 char_u *goodword;
Bram Moolenaar0c405862005-06-22 22:26:26 +00007942 int badlen; /* length of bad word used */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007943 int score;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007944 int altscore;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007945 int had_bonus; /* value for st_had_bonus */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007946{
7947 suggest_T *stp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007948 int i;
Bram Moolenaar0c405862005-06-22 22:26:26 +00007949 char_u *p = NULL;
7950 int c = 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007951
7952 /* Check that the word wasn't banned. */
7953 if (was_banned(su, goodword))
7954 return;
7955
Bram Moolenaar0c405862005-06-22 22:26:26 +00007956 /* If past "su_badlen" and the rest is identical stop at "su_badlen".
7957 * Remove the common part from "goodword". */
7958 i = badlen - su->su_badlen;
7959 if (i > 0)
7960 {
7961 /* This assumes there was no case folding or it didn't change the
7962 * length... */
7963 p = goodword + STRLEN(goodword) - i;
7964 if (p > goodword && STRNICMP(su->su_badptr + su->su_badlen, p, i) == 0)
7965 {
7966 badlen = su->su_badlen;
7967 c = *p;
7968 *p = NUL;
7969 }
7970 else
7971 p = NULL;
7972 }
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007973 else if (i < 0)
7974 {
7975 /* When replacing part of the word check that we actually change
7976 * something. For "the the" a suggestion can be replacing the first
7977 * "the" with itself, since "the" wasn't banned. */
7978 if (badlen == STRLEN(goodword)
7979 && STRNCMP(su->su_badword, goodword, badlen) == 0)
7980 return;
7981 }
7982
Bram Moolenaar0c405862005-06-22 22:26:26 +00007983
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007984 if (score <= su->su_maxscore)
7985 {
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00007986 /* Check if the word is already there. Also check the length that is
7987 * being replaced "thes," -> "these" is a different suggestion from
7988 * "thes" -> "these". */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007989 stp = &SUG(*gap, 0);
7990 for (i = gap->ga_len - 1; i >= 0; --i)
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00007991 if (STRCMP(stp[i].st_word, goodword) == 0
7992 && stp[i].st_orglen == badlen)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007993 {
7994 /* Found it. Remember the lowest score. */
7995 if (stp[i].st_score > score)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007996 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007997 stp[i].st_score = score;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007998 stp[i].st_had_bonus = had_bonus;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007999 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008000 break;
8001 }
8002
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008003 if (i < 0 && ga_grow(gap, 1) == OK)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008004 {
8005 /* Add a suggestion. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008006 stp = &SUG(*gap, gap->ga_len);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008007 stp->st_word = vim_strsave(goodword);
8008 if (stp->st_word != NULL)
8009 {
8010 stp->st_score = score;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008011 stp->st_altscore = altscore;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008012 stp->st_had_bonus = had_bonus;
Bram Moolenaar0c405862005-06-22 22:26:26 +00008013 stp->st_orglen = badlen;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008014 ++gap->ga_len;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008015
8016 /* If we have too many suggestions now, sort the list and keep
8017 * the best suggestions. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008018 if (gap->ga_len > SUG_MAX_COUNT(su))
8019 su->su_maxscore = cleanup_suggestions(gap, su->su_maxscore,
8020 SUG_CLEAN_COUNT(su));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008021 }
8022 }
8023 }
Bram Moolenaar0c405862005-06-22 22:26:26 +00008024
8025 if (p != NULL)
8026 *p = c; /* restore "goodword" */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008027}
8028
8029/*
8030 * Add a word to be banned.
8031 */
8032 static void
8033add_banned(su, word)
8034 suginfo_T *su;
8035 char_u *word;
8036{
8037 char_u *s = vim_strsave(word);
8038 hash_T hash;
8039 hashitem_T *hi;
8040
8041 if (s != NULL)
8042 {
8043 hash = hash_hash(s);
8044 hi = hash_lookup(&su->su_banned, s, hash);
8045 if (HASHITEM_EMPTY(hi))
8046 hash_add_item(&su->su_banned, hi, s, hash);
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00008047 else
8048 vim_free(s);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008049 }
8050}
8051
8052/*
8053 * Return TRUE if a word appears in the list of banned words.
8054 */
8055 static int
8056was_banned(su, word)
8057 suginfo_T *su;
8058 char_u *word;
8059{
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008060 hashitem_T *hi = hash_find(&su->su_banned, word);
8061
8062 return !HASHITEM_EMPTY(hi);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008063}
8064
8065/*
8066 * Free the banned words in "su".
8067 */
8068 static void
8069free_banned(su)
8070 suginfo_T *su;
8071{
8072 int todo;
8073 hashitem_T *hi;
8074
8075 todo = su->su_banned.ht_used;
8076 for (hi = su->su_banned.ht_array; todo > 0; ++hi)
8077 {
8078 if (!HASHITEM_EMPTY(hi))
8079 {
8080 vim_free(hi->hi_key);
8081 --todo;
8082 }
8083 }
8084 hash_clear(&su->su_banned);
8085}
8086
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008087/*
8088 * Recompute the score if sound-folding is possible. This is slow,
8089 * thus only done for the final results.
8090 */
8091 static void
8092rescore_suggestions(su)
8093 suginfo_T *su;
8094{
8095 langp_T *lp;
8096 suggest_T *stp;
8097 char_u sal_badword[MAXWLEN];
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008098 int i;
8099
8100 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
8101 lp->lp_slang != NULL; ++lp)
8102 {
8103 if (lp->lp_slang->sl_sal.ga_len > 0)
8104 {
8105 /* soundfold the bad word */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008106 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, sal_badword);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008107
8108 for (i = 0; i < su->su_ga.ga_len; ++i)
8109 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008110 stp = &SUG(su->su_ga, i);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008111 if (!stp->st_had_bonus)
8112 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008113 stp->st_altscore = stp_sal_score(stp, su,
8114 lp->lp_slang, sal_badword);
8115 if (stp->st_altscore == SCORE_MAXMAX)
8116 stp->st_altscore = SCORE_BIG;
8117 stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008118 }
8119 }
8120 break;
8121 }
8122 }
8123}
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008124
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008125static int
8126#ifdef __BORLANDC__
8127_RTLENTRYF
8128#endif
8129sug_compare __ARGS((const void *s1, const void *s2));
8130
8131/*
8132 * Function given to qsort() to sort the suggestions on st_score.
8133 */
8134 static int
8135#ifdef __BORLANDC__
8136_RTLENTRYF
8137#endif
8138sug_compare(s1, s2)
8139 const void *s1;
8140 const void *s2;
8141{
8142 suggest_T *p1 = (suggest_T *)s1;
8143 suggest_T *p2 = (suggest_T *)s2;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008144 int n = p1->st_score - p2->st_score;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008145
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008146 if (n == 0)
8147 return p1->st_altscore - p2->st_altscore;
8148 return n;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008149}
8150
8151/*
8152 * Cleanup the suggestions:
8153 * - Sort on score.
8154 * - Remove words that won't be displayed.
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008155 * Returns the maximum score in the list or "maxscore" unmodified.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008156 */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008157 static int
8158cleanup_suggestions(gap, maxscore, keep)
8159 garray_T *gap;
8160 int maxscore;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008161 int keep; /* nr of suggestions to keep */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008162{
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008163 suggest_T *stp = &SUG(*gap, 0);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008164 int i;
8165
8166 /* Sort the list. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008167 qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008168
8169 /* Truncate the list to the number of suggestions that will be displayed. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008170 if (gap->ga_len > keep)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008171 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008172 for (i = keep; i < gap->ga_len; ++i)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008173 vim_free(stp[i].st_word);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008174 gap->ga_len = keep;
8175 return stp[keep - 1].st_score;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008176 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008177 return maxscore;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008178}
8179
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008180#if defined(FEAT_EVAL) || defined(PROTO)
8181/*
8182 * Soundfold a string, for soundfold().
8183 * Result is in allocated memory, NULL for an error.
8184 */
8185 char_u *
8186eval_soundfold(word)
8187 char_u *word;
8188{
8189 langp_T *lp;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008190 char_u sound[MAXWLEN];
8191
8192 if (curwin->w_p_spell && *curbuf->b_p_spl != NUL)
8193 /* Use the sound-folding of the first language that supports it. */
8194 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
8195 lp->lp_slang != NULL; ++lp)
8196 if (lp->lp_slang->sl_sal.ga_len > 0)
8197 {
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008198 /* soundfold the word */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008199 spell_soundfold(lp->lp_slang, word, FALSE, sound);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008200 return vim_strsave(sound);
8201 }
8202
8203 /* No language with sound folding, return word as-is. */
8204 return vim_strsave(word);
8205}
8206#endif
8207
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008208/*
8209 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
8210 */
8211 static void
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008212spell_soundfold(slang, inword, folded, res)
8213 slang_T *slang;
8214 char_u *inword;
8215 int folded; /* "inword" is already case-folded */
8216 char_u *res;
8217{
8218 char_u fword[MAXWLEN];
8219 char_u *word;
8220
8221 if (slang->sl_sofo)
8222 /* SOFOFROM and SOFOTO used */
8223 spell_soundfold_sofo(slang, inword, res);
8224 else
8225 {
8226 /* SAL items used. Requires the word to be case-folded. */
8227 if (folded)
8228 word = inword;
8229 else
8230 {
8231 (void)spell_casefold(inword, STRLEN(inword), fword, MAXWLEN);
8232 word = fword;
8233 }
8234
8235#ifdef FEAT_MBYTE
8236 if (has_mbyte)
8237 spell_soundfold_wsal(slang, word, res);
8238 else
8239#endif
8240 spell_soundfold_sal(slang, word, res);
8241 }
8242}
8243
8244/*
8245 * Perform sound folding of "inword" into "res" according to SOFOFROM and
8246 * SOFOTO lines.
8247 */
8248 static void
8249spell_soundfold_sofo(slang, inword, res)
8250 slang_T *slang;
8251 char_u *inword;
8252 char_u *res;
8253{
8254 char_u *s;
8255 int ri = 0;
8256 int c;
8257
8258#ifdef FEAT_MBYTE
8259 if (has_mbyte)
8260 {
8261 int prevc = 0;
8262 int *ip;
8263
8264 /* The sl_sal_first[] table contains the translation for chars up to
8265 * 255, sl_sal the rest. */
8266 for (s = inword; *s != NUL; )
8267 {
8268 c = mb_ptr2char_adv(&s);
8269 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c))
8270 c = ' ';
8271 else if (c < 256)
8272 c = slang->sl_sal_first[c];
8273 else
8274 {
8275 ip = ((int **)slang->sl_sal.ga_data)[c & 0xff];
8276 if (ip == NULL) /* empty list, can't match */
8277 c = NUL;
8278 else
8279 for (;;) /* find "c" in the list */
8280 {
8281 if (*ip == 0) /* not found */
8282 {
8283 c = NUL;
8284 break;
8285 }
8286 if (*ip == c) /* match! */
8287 {
8288 c = ip[1];
8289 break;
8290 }
8291 ip += 2;
8292 }
8293 }
8294
8295 if (c != NUL && c != prevc)
8296 {
8297 ri += mb_char2bytes(c, res + ri);
8298 if (ri + MB_MAXBYTES > MAXWLEN)
8299 break;
8300 prevc = c;
8301 }
8302 }
8303 }
8304 else
8305#endif
8306 {
8307 /* The sl_sal_first[] table contains the translation. */
8308 for (s = inword; (c = *s) != NUL; ++s)
8309 {
8310 if (vim_iswhite(c))
8311 c = ' ';
8312 else
8313 c = slang->sl_sal_first[c];
8314 if (c != NUL && (ri == 0 || res[ri - 1] != c))
8315 res[ri++] = c;
8316 }
8317 }
8318
8319 res[ri] = NUL;
8320}
8321
8322 static void
8323spell_soundfold_sal(slang, inword, res)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008324 slang_T *slang;
8325 char_u *inword;
8326 char_u *res;
8327{
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008328 salitem_T *smp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008329 char_u word[MAXWLEN];
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008330 char_u *s = inword;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008331 char_u *t;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008332 char_u *pf;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008333 int i, j, z;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008334 int reslen;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008335 int n, k = 0;
8336 int z0;
8337 int k0;
8338 int n0;
8339 int c;
8340 int pri;
8341 int p0 = -333;
8342 int c0;
8343
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008344 /* Remove accents, if wanted. We actually remove all non-word characters.
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008345 * But keep white space. We need a copy, the word may be changed here. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008346 if (slang->sl_rem_accents)
8347 {
8348 t = word;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008349 while (*s != NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008350 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008351 if (vim_iswhite(*s))
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008352 {
8353 *t++ = ' ';
8354 s = skipwhite(s);
8355 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008356 else
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008357 {
Bram Moolenaarea408852005-06-25 22:49:46 +00008358 if (spell_iswordp(s))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008359 *t++ = *s;
8360 ++s;
8361 }
8362 }
8363 *t = NUL;
8364 }
8365 else
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008366 STRCPY(word, s);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008367
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008368 smp = (salitem_T *)slang->sl_sal.ga_data;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008369
8370 /*
8371 * This comes from Aspell phonet.cpp. Converted from C++ to C.
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008372 * Changed to keep spaces.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008373 */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008374 i = reslen = z = 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008375 while ((c = word[i]) != NUL)
8376 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008377 /* Start with the first rule that has the character in the word. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008378 n = slang->sl_sal_first[c];
8379 z0 = 0;
8380
8381 if (n >= 0)
8382 {
8383 /* check all rules for the same letter */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008384 for (; (s = smp[n].sm_lead)[0] == c; ++n)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008385 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008386 /* Quickly skip entries that don't match the word. Most
8387 * entries are less then three chars, optimize for that. */
8388 k = smp[n].sm_leadlen;
8389 if (k > 1)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008390 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008391 if (word[i + 1] != s[1])
8392 continue;
8393 if (k > 2)
8394 {
8395 for (j = 2; j < k; ++j)
8396 if (word[i + j] != s[j])
8397 break;
8398 if (j < k)
8399 continue;
8400 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008401 }
8402
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008403 if ((pf = smp[n].sm_oneof) != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008404 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008405 /* Check for match with one of the chars in "sm_oneof". */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008406 while (*pf != NUL && *pf != word[i + k])
8407 ++pf;
8408 if (*pf == NUL)
8409 continue;
8410 ++k;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008411 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008412 s = smp[n].sm_rules;
8413 pri = 5; /* default priority */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008414
8415 p0 = *s;
8416 k0 = k;
8417 while (*s == '-' && k > 1)
8418 {
8419 k--;
8420 s++;
8421 }
8422 if (*s == '<')
8423 s++;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008424 if (VIM_ISDIGIT(*s))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008425 {
8426 /* determine priority */
8427 pri = *s - '0';
8428 s++;
8429 }
8430 if (*s == '^' && *(s + 1) == '^')
8431 s++;
8432
8433 if (*s == NUL
8434 || (*s == '^'
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008435 && (i == 0 || !(word[i - 1] == ' '
Bram Moolenaarea408852005-06-25 22:49:46 +00008436 || spell_iswordp(word + i - 1)))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008437 && (*(s + 1) != '$'
Bram Moolenaarea408852005-06-25 22:49:46 +00008438 || (!spell_iswordp(word + i + k0))))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008439 || (*s == '$' && i > 0
Bram Moolenaarea408852005-06-25 22:49:46 +00008440 && spell_iswordp(word + i - 1)
8441 && (!spell_iswordp(word + i + k0))))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008442 {
8443 /* search for followup rules, if: */
8444 /* followup and k > 1 and NO '-' in searchstring */
8445 c0 = word[i + k - 1];
8446 n0 = slang->sl_sal_first[c0];
8447
8448 if (slang->sl_followup && k > 1 && n0 >= 0
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008449 && p0 != '-' && word[i + k] != NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008450 {
8451 /* test follow-up rule for "word[i + k]" */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008452 for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008453 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008454 /* Quickly skip entries that don't match the word.
8455 * */
8456 k0 = smp[n0].sm_leadlen;
8457 if (k0 > 1)
8458 {
8459 if (word[i + k] != s[1])
8460 continue;
8461 if (k0 > 2)
8462 {
8463 pf = word + i + k + 1;
8464 for (j = 2; j < k0; ++j)
8465 if (*pf++ != s[j])
8466 break;
8467 if (j < k0)
8468 continue;
8469 }
8470 }
8471 k0 += k - 1;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008472
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008473 if ((pf = smp[n0].sm_oneof) != NULL)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008474 {
8475 /* Check for match with one of the chars in
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008476 * "sm_oneof". */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008477 while (*pf != NUL && *pf != word[i + k0])
8478 ++pf;
8479 if (*pf == NUL)
8480 continue;
8481 ++k0;
8482 }
8483
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008484 p0 = 5;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008485 s = smp[n0].sm_rules;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008486 while (*s == '-')
8487 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008488 /* "k0" gets NOT reduced because
8489 * "if (k0 == k)" */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008490 s++;
8491 }
8492 if (*s == '<')
8493 s++;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008494 if (VIM_ISDIGIT(*s))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008495 {
8496 p0 = *s - '0';
8497 s++;
8498 }
8499
8500 if (*s == NUL
8501 /* *s == '^' cuts */
8502 || (*s == '$'
Bram Moolenaarea408852005-06-25 22:49:46 +00008503 && !spell_iswordp(word + i + k0)))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008504 {
8505 if (k0 == k)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008506 /* this is just a piece of the string */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008507 continue;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008508
8509 if (p0 < pri)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008510 /* priority too low */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008511 continue;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008512 /* rule fits; stop search */
8513 break;
8514 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008515 }
8516
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008517 if (p0 >= pri && smp[n0].sm_lead[0] == c0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008518 continue;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008519 }
8520
8521 /* replace string */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008522 s = smp[n].sm_to;
8523 pf = smp[n].sm_rules;
8524 p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008525 if (p0 == 1 && z == 0)
8526 {
8527 /* rule with '<' is used */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008528 if (reslen > 0 && *s != NUL && (res[reslen - 1] == c
8529 || res[reslen - 1] == *s))
8530 reslen--;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008531 z0 = 1;
8532 z = 1;
8533 k0 = 0;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008534 while (*s != NUL && word[i + k0] != NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008535 {
8536 word[i + k0] = *s;
8537 k0++;
8538 s++;
8539 }
8540 if (k > k0)
8541 mch_memmove(word + i + k0, word + i + k,
8542 STRLEN(word + i + k) + 1);
8543
8544 /* new "actual letter" */
8545 c = word[i];
8546 }
8547 else
8548 {
8549 /* no '<' rule used */
8550 i += k - 1;
8551 z = 0;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008552 while (*s != NUL && s[1] != NUL && reslen < MAXWLEN)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008553 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008554 if (reslen == 0 || res[reslen - 1] != *s)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008555 res[reslen++] = *s;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008556 s++;
8557 }
8558 /* new "actual letter" */
8559 c = *s;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008560 if (strstr((char *)pf, "^^") != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008561 {
8562 if (c != NUL)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008563 res[reslen++] = c;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008564 mch_memmove(word, word + i + 1,
8565 STRLEN(word + i + 1) + 1);
8566 i = 0;
8567 z0 = 1;
8568 }
8569 }
8570 break;
8571 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008572 }
8573 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008574 else if (vim_iswhite(c))
8575 {
8576 c = ' ';
8577 k = 1;
8578 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008579
8580 if (z0 == 0)
8581 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008582 if (k && !p0 && reslen < MAXWLEN && c != NUL
8583 && (!slang->sl_collapse || reslen == 0
8584 || res[reslen - 1] != c))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008585 /* condense only double letters */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008586 res[reslen++] = c;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008587
8588 i++;
8589 z = 0;
8590 k = 0;
8591 }
8592 }
8593
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008594 res[reslen] = NUL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008595}
8596
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008597#ifdef FEAT_MBYTE
8598/*
8599 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
8600 * Multi-byte version of spell_soundfold().
8601 */
8602 static void
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008603spell_soundfold_wsal(slang, inword, res)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008604 slang_T *slang;
8605 char_u *inword;
8606 char_u *res;
8607{
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008608 salitem_T *smp = (salitem_T *)slang->sl_sal.ga_data;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008609 int word[MAXWLEN];
8610 int wres[MAXWLEN];
8611 int l;
8612 char_u *s;
8613 int *ws;
8614 char_u *t;
8615 int *pf;
8616 int i, j, z;
8617 int reslen;
8618 int n, k = 0;
8619 int z0;
8620 int k0;
8621 int n0;
8622 int c;
8623 int pri;
8624 int p0 = -333;
8625 int c0;
8626 int did_white = FALSE;
8627
8628 /*
8629 * Convert the multi-byte string to a wide-character string.
8630 * Remove accents, if wanted. We actually remove all non-word characters.
8631 * But keep white space.
8632 */
8633 n = 0;
8634 for (s = inword; *s != NUL; )
8635 {
8636 t = s;
8637 c = mb_ptr2char_adv(&s);
8638 if (slang->sl_rem_accents)
8639 {
8640 if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c))
8641 {
8642 if (did_white)
8643 continue;
8644 c = ' ';
8645 did_white = TRUE;
8646 }
8647 else
8648 {
8649 did_white = FALSE;
8650 if (!spell_iswordp(t))
8651 continue;
8652 }
8653 }
8654 word[n++] = c;
8655 }
8656 word[n] = NUL;
8657
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008658 /*
8659 * This comes from Aspell phonet.cpp.
8660 * Converted from C++ to C. Added support for multi-byte chars.
8661 * Changed to keep spaces.
8662 */
8663 i = reslen = z = 0;
8664 while ((c = word[i]) != NUL)
8665 {
8666 /* Start with the first rule that has the character in the word. */
8667 n = slang->sl_sal_first[c & 0xff];
8668 z0 = 0;
8669
8670 if (n >= 0)
8671 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008672 /* check all rules for the same index byte */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008673 for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff); ++n)
8674 {
8675 /* Quickly skip entries that don't match the word. Most
8676 * entries are less then three chars, optimize for that. */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008677 if (c != ws[0])
8678 continue;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008679 k = smp[n].sm_leadlen;
8680 if (k > 1)
8681 {
8682 if (word[i + 1] != ws[1])
8683 continue;
8684 if (k > 2)
8685 {
8686 for (j = 2; j < k; ++j)
8687 if (word[i + j] != ws[j])
8688 break;
8689 if (j < k)
8690 continue;
8691 }
8692 }
8693
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008694 if ((pf = smp[n].sm_oneof_w) != NULL)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008695 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008696 /* Check for match with one of the chars in "sm_oneof". */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008697 while (*pf != NUL && *pf != word[i + k])
8698 ++pf;
8699 if (*pf == NUL)
8700 continue;
8701 ++k;
8702 }
8703 s = smp[n].sm_rules;
8704 pri = 5; /* default priority */
8705
8706 p0 = *s;
8707 k0 = k;
8708 while (*s == '-' && k > 1)
8709 {
8710 k--;
8711 s++;
8712 }
8713 if (*s == '<')
8714 s++;
8715 if (VIM_ISDIGIT(*s))
8716 {
8717 /* determine priority */
8718 pri = *s - '0';
8719 s++;
8720 }
8721 if (*s == '^' && *(s + 1) == '^')
8722 s++;
8723
8724 if (*s == NUL
8725 || (*s == '^'
8726 && (i == 0 || !(word[i - 1] == ' '
8727 || spell_iswordp_w(word + i - 1)))
8728 && (*(s + 1) != '$'
8729 || (!spell_iswordp_w(word + i + k0))))
8730 || (*s == '$' && i > 0
8731 && spell_iswordp_w(word + i - 1)
8732 && (!spell_iswordp_w(word + i + k0))))
8733 {
8734 /* search for followup rules, if: */
8735 /* followup and k > 1 and NO '-' in searchstring */
8736 c0 = word[i + k - 1];
8737 n0 = slang->sl_sal_first[c0 & 0xff];
8738
8739 if (slang->sl_followup && k > 1 && n0 >= 0
8740 && p0 != '-' && word[i + k] != NUL)
8741 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008742 /* Test follow-up rule for "word[i + k]"; loop over
8743 * all entries with the same index byte. */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008744 for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff)
8745 == (c0 & 0xff); ++n0)
8746 {
8747 /* Quickly skip entries that don't match the word.
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008748 */
8749 if (c0 != ws[0])
8750 continue;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008751 k0 = smp[n0].sm_leadlen;
8752 if (k0 > 1)
8753 {
8754 if (word[i + k] != ws[1])
8755 continue;
8756 if (k0 > 2)
8757 {
8758 pf = word + i + k + 1;
8759 for (j = 2; j < k0; ++j)
8760 if (*pf++ != ws[j])
8761 break;
8762 if (j < k0)
8763 continue;
8764 }
8765 }
8766 k0 += k - 1;
8767
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008768 if ((pf = smp[n0].sm_oneof_w) != NULL)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008769 {
8770 /* Check for match with one of the chars in
Bram Moolenaar42eeac32005-06-29 22:40:58 +00008771 * "sm_oneof". */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008772 while (*pf != NUL && *pf != word[i + k0])
8773 ++pf;
8774 if (*pf == NUL)
8775 continue;
8776 ++k0;
8777 }
8778
8779 p0 = 5;
8780 s = smp[n0].sm_rules;
8781 while (*s == '-')
8782 {
8783 /* "k0" gets NOT reduced because
8784 * "if (k0 == k)" */
8785 s++;
8786 }
8787 if (*s == '<')
8788 s++;
8789 if (VIM_ISDIGIT(*s))
8790 {
8791 p0 = *s - '0';
8792 s++;
8793 }
8794
8795 if (*s == NUL
8796 /* *s == '^' cuts */
8797 || (*s == '$'
8798 && !spell_iswordp_w(word + i + k0)))
8799 {
8800 if (k0 == k)
8801 /* this is just a piece of the string */
8802 continue;
8803
8804 if (p0 < pri)
8805 /* priority too low */
8806 continue;
8807 /* rule fits; stop search */
8808 break;
8809 }
8810 }
8811
8812 if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff)
8813 == (c0 & 0xff))
8814 continue;
8815 }
8816
8817 /* replace string */
8818 ws = smp[n].sm_to_w;
8819 s = smp[n].sm_rules;
8820 p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0;
8821 if (p0 == 1 && z == 0)
8822 {
8823 /* rule with '<' is used */
8824 if (reslen > 0 && *ws != NUL && (wres[reslen - 1] == c
8825 || wres[reslen - 1] == *ws))
8826 reslen--;
8827 z0 = 1;
8828 z = 1;
8829 k0 = 0;
8830 while (*ws != NUL && word[i + k0] != NUL)
8831 {
8832 word[i + k0] = *ws;
8833 k0++;
8834 ws++;
8835 }
8836 if (k > k0)
8837 mch_memmove(word + i + k0, word + i + k,
8838 sizeof(int) * (STRLEN(word + i + k) + 1));
8839
8840 /* new "actual letter" */
8841 c = word[i];
8842 }
8843 else
8844 {
8845 /* no '<' rule used */
8846 i += k - 1;
8847 z = 0;
8848 while (*ws != NUL && ws[1] != NUL && reslen < MAXWLEN)
8849 {
8850 if (reslen == 0 || wres[reslen - 1] != *ws)
8851 wres[reslen++] = *ws;
8852 ws++;
8853 }
8854 /* new "actual letter" */
8855 c = *ws;
8856 if (strstr((char *)s, "^^") != NULL)
8857 {
8858 if (c != NUL)
8859 wres[reslen++] = c;
8860 mch_memmove(word, word + i + 1,
8861 sizeof(int) * (STRLEN(word + i + 1) + 1));
8862 i = 0;
8863 z0 = 1;
8864 }
8865 }
8866 break;
8867 }
8868 }
8869 }
8870 else if (vim_iswhite(c))
8871 {
8872 c = ' ';
8873 k = 1;
8874 }
8875
8876 if (z0 == 0)
8877 {
8878 if (k && !p0 && reslen < MAXWLEN && c != NUL
8879 && (!slang->sl_collapse || reslen == 0
8880 || wres[reslen - 1] != c))
8881 /* condense only double letters */
8882 wres[reslen++] = c;
8883
8884 i++;
8885 z = 0;
8886 k = 0;
8887 }
8888 }
8889
8890 /* Convert wide characters in "wres" to a multi-byte string in "res". */
8891 l = 0;
8892 for (n = 0; n < reslen; ++n)
8893 {
8894 l += mb_char2bytes(wres[n], res + l);
8895 if (l + MB_MAXBYTES > MAXWLEN)
8896 break;
8897 }
8898 res[l] = NUL;
8899}
8900#endif
8901
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008902/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008903 * Compute a score for two sound-a-like words.
8904 * This permits up to two inserts/deletes/swaps/etc. to keep things fast.
8905 * Instead of a generic loop we write out the code. That keeps it fast by
8906 * avoiding checks that will not be possible.
8907 */
8908 static int
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008909soundalike_score(goodstart, badstart)
8910 char_u *goodstart; /* sound-folded good word */
8911 char_u *badstart; /* sound-folded bad word */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008912{
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008913 char_u *goodsound = goodstart;
8914 char_u *badsound = badstart;
8915 int goodlen;
8916 int badlen;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008917 int n;
8918 char_u *pl, *ps;
8919 char_u *pl2, *ps2;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008920 int score = 0;
8921
8922 /* adding/inserting "*" at the start (word starts with vowel) shouldn't be
8923 * counted so much, vowels halfway the word aren't counted at all. */
8924 if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound)
8925 {
8926 score = SCORE_DEL / 2;
8927 if (*badsound == '*')
8928 ++badsound;
8929 else
8930 ++goodsound;
8931 }
8932
8933 goodlen = STRLEN(goodsound);
8934 badlen = STRLEN(badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008935
8936 /* Return quickly if the lenghts are too different to be fixed by two
8937 * changes. */
8938 n = goodlen - badlen;
8939 if (n < -2 || n > 2)
8940 return SCORE_MAXMAX;
8941
8942 if (n > 0)
8943 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008944 pl = goodsound; /* goodsound is longest */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008945 ps = badsound;
8946 }
8947 else
8948 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008949 pl = badsound; /* badsound is longest */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008950 ps = goodsound;
8951 }
8952
8953 /* Skip over the identical part. */
8954 while (*pl == *ps && *pl != NUL)
8955 {
8956 ++pl;
8957 ++ps;
8958 }
8959
8960 switch (n)
8961 {
8962 case -2:
8963 case 2:
8964 /*
8965 * Must delete two characters from "pl".
8966 */
8967 ++pl; /* first delete */
8968 while (*pl == *ps)
8969 {
8970 ++pl;
8971 ++ps;
8972 }
8973 /* strings must be equal after second delete */
8974 if (STRCMP(pl + 1, ps) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008975 return score + SCORE_DEL * 2;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008976
8977 /* Failed to compare. */
8978 break;
8979
8980 case -1:
8981 case 1:
8982 /*
8983 * Minimal one delete from "pl" required.
8984 */
8985
8986 /* 1: delete */
8987 pl2 = pl + 1;
8988 ps2 = ps;
8989 while (*pl2 == *ps2)
8990 {
8991 if (*pl2 == NUL) /* reached the end */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008992 return score + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008993 ++pl2;
8994 ++ps2;
8995 }
8996
8997 /* 2: delete then swap, then rest must be equal */
8998 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
8999 && STRCMP(pl2 + 2, ps2 + 2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009000 return score + SCORE_DEL + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009001
9002 /* 3: delete then substitute, then the rest must be equal */
9003 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009004 return score + SCORE_DEL + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009005
9006 /* 4: first swap then delete */
9007 if (pl[0] == ps[1] && pl[1] == ps[0])
9008 {
9009 pl2 = pl + 2; /* swap, skip two chars */
9010 ps2 = ps + 2;
9011 while (*pl2 == *ps2)
9012 {
9013 ++pl2;
9014 ++ps2;
9015 }
9016 /* delete a char and then strings must be equal */
9017 if (STRCMP(pl2 + 1, ps2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009018 return score + SCORE_SWAP + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009019 }
9020
9021 /* 5: first substitute then delete */
9022 pl2 = pl + 1; /* substitute, skip one char */
9023 ps2 = ps + 1;
9024 while (*pl2 == *ps2)
9025 {
9026 ++pl2;
9027 ++ps2;
9028 }
9029 /* delete a char and then strings must be equal */
9030 if (STRCMP(pl2 + 1, ps2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009031 return score + SCORE_SUBST + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009032
9033 /* Failed to compare. */
9034 break;
9035
9036 case 0:
9037 /*
9038 * Lenghts are equal, thus changes must result in same length: An
9039 * insert is only possible in combination with a delete.
9040 * 1: check if for identical strings
9041 */
9042 if (*pl == NUL)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009043 return score;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009044
9045 /* 2: swap */
9046 if (pl[0] == ps[1] && pl[1] == ps[0])
9047 {
9048 pl2 = pl + 2; /* swap, skip two chars */
9049 ps2 = ps + 2;
9050 while (*pl2 == *ps2)
9051 {
9052 if (*pl2 == NUL) /* reached the end */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009053 return score + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009054 ++pl2;
9055 ++ps2;
9056 }
9057 /* 3: swap and swap again */
9058 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
9059 && STRCMP(pl2 + 2, ps2 + 2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009060 return score + SCORE_SWAP + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009061
9062 /* 4: swap and substitute */
9063 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009064 return score + SCORE_SWAP + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009065 }
9066
9067 /* 5: substitute */
9068 pl2 = pl + 1;
9069 ps2 = ps + 1;
9070 while (*pl2 == *ps2)
9071 {
9072 if (*pl2 == NUL) /* reached the end */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009073 return score + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009074 ++pl2;
9075 ++ps2;
9076 }
9077
9078 /* 6: substitute and swap */
9079 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
9080 && STRCMP(pl2 + 2, ps2 + 2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009081 return score + SCORE_SUBST + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009082
9083 /* 7: substitute and substitute */
9084 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009085 return score + SCORE_SUBST + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009086
9087 /* 8: insert then delete */
9088 pl2 = pl;
9089 ps2 = ps + 1;
9090 while (*pl2 == *ps2)
9091 {
9092 ++pl2;
9093 ++ps2;
9094 }
9095 if (STRCMP(pl2 + 1, ps2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009096 return score + SCORE_INS + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009097
9098 /* 9: delete then insert */
9099 pl2 = pl + 1;
9100 ps2 = ps;
9101 while (*pl2 == *ps2)
9102 {
9103 ++pl2;
9104 ++ps2;
9105 }
9106 if (STRCMP(pl2, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009107 return score + SCORE_INS + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009108
9109 /* Failed to compare. */
9110 break;
9111 }
9112
9113 return SCORE_MAXMAX;
9114}
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009115
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009116/*
9117 * Compute the "edit distance" to turn "badword" into "goodword". The less
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009118 * deletes/inserts/substitutes/swaps are required the lower the score.
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009119 *
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009120 * The algorithm comes from Aspell editdist.cpp, edit_distance().
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009121 * It has been converted from C++ to C and modified to support multi-byte
9122 * characters.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009123 */
9124 static int
9125spell_edit_score(badword, goodword)
9126 char_u *badword;
9127 char_u *goodword;
9128{
9129 int *cnt;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009130 int badlen, goodlen; /* lenghts including NUL */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009131 int j, i;
9132 int t;
9133 int bc, gc;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009134 int pbc, pgc;
9135#ifdef FEAT_MBYTE
9136 char_u *p;
9137 int wbadword[MAXWLEN];
9138 int wgoodword[MAXWLEN];
9139
9140 if (has_mbyte)
9141 {
9142 /* Get the characters from the multi-byte strings and put them in an
9143 * int array for easy access. */
9144 for (p = badword, badlen = 0; *p != NUL; )
9145 wbadword[badlen++] = mb_ptr2char_adv(&p);
9146 ++badlen;
9147 for (p = goodword, goodlen = 0; *p != NUL; )
9148 wgoodword[goodlen++] = mb_ptr2char_adv(&p);
9149 ++goodlen;
9150 }
9151 else
9152#endif
9153 {
9154 badlen = STRLEN(badword) + 1;
9155 goodlen = STRLEN(goodword) + 1;
9156 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009157
9158 /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */
9159#define CNT(a, b) cnt[(a) + (b) * (badlen + 1)]
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009160 cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)),
9161 TRUE);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009162 if (cnt == NULL)
9163 return 0; /* out of memory */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009164
9165 CNT(0, 0) = 0;
9166 for (j = 1; j <= goodlen; ++j)
9167 CNT(0, j) = CNT(0, j - 1) + SCORE_DEL;
9168
9169 for (i = 1; i <= badlen; ++i)
9170 {
9171 CNT(i, 0) = CNT(i - 1, 0) + SCORE_INS;
9172 for (j = 1; j <= goodlen; ++j)
9173 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009174#ifdef FEAT_MBYTE
9175 if (has_mbyte)
9176 {
9177 bc = wbadword[i - 1];
9178 gc = wgoodword[j - 1];
9179 }
9180 else
9181#endif
9182 {
9183 bc = badword[i - 1];
9184 gc = goodword[j - 1];
9185 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009186 if (bc == gc)
9187 CNT(i, j) = CNT(i - 1, j - 1);
9188 else
9189 {
9190 /* Use a better score when there is only a case difference. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009191 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009192 CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1);
9193 else
9194 CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1);
9195
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009196 if (i > 1 && j > 1)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009197 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00009198#ifdef FEAT_MBYTE
9199 if (has_mbyte)
9200 {
9201 pbc = wbadword[i - 2];
9202 pgc = wgoodword[j - 2];
9203 }
9204 else
9205#endif
9206 {
9207 pbc = badword[i - 2];
9208 pgc = goodword[j - 2];
9209 }
9210 if (bc == pgc && pbc == gc)
9211 {
9212 t = SCORE_SWAP + CNT(i - 2, j - 2);
9213 if (t < CNT(i, j))
9214 CNT(i, j) = t;
9215 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009216 }
9217 t = SCORE_DEL + CNT(i - 1, j);
9218 if (t < CNT(i, j))
9219 CNT(i, j) = t;
9220 t = SCORE_INS + CNT(i, j - 1);
9221 if (t < CNT(i, j))
9222 CNT(i, j) = t;
9223 }
9224 }
9225 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00009226
9227 i = CNT(badlen - 1, goodlen - 1);
9228 vim_free(cnt);
9229 return i;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00009230}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00009231
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009232/*
9233 * ":spelldump"
9234 */
9235/*ARGSUSED*/
9236 void
9237ex_spelldump(eap)
9238 exarg_T *eap;
9239{
9240 buf_T *buf = curbuf;
9241 langp_T *lp;
9242 slang_T *slang;
9243 idx_T arridx[MAXWLEN];
9244 int curi[MAXWLEN];
9245 char_u word[MAXWLEN];
9246 int c;
9247 char_u *byts;
9248 idx_T *idxs;
9249 linenr_T lnum = 0;
9250 int round;
9251 int depth;
9252 int n;
9253 int flags;
9254
9255 if (no_spell_checking())
9256 return;
9257
9258 /* Create a new empty buffer by splitting the window. */
9259 do_cmdline_cmd((char_u *)"new");
9260 if (!bufempty() || !buf_valid(buf))
9261 return;
9262
9263 for (lp = LANGP_ENTRY(buf->b_langp, 0); lp->lp_slang != NULL; ++lp)
9264 {
9265 slang = lp->lp_slang;
9266
9267 vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
9268 ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
9269
9270 /* round 1: case-folded tree
9271 * round 2: keep-case tree */
9272 for (round = 1; round <= 2; ++round)
9273 {
9274 if (round == 1)
9275 {
9276 byts = slang->sl_fbyts;
9277 idxs = slang->sl_fidxs;
9278 }
9279 else
9280 {
9281 byts = slang->sl_kbyts;
9282 idxs = slang->sl_kidxs;
9283 }
9284 if (byts == NULL)
9285 continue; /* array is empty */
9286
9287 depth = 0;
9288 arridx[0] = 0;
9289 curi[0] = 1;
9290 while (depth >= 0 && !got_int)
9291 {
9292 if (curi[depth] > byts[arridx[depth]])
9293 {
9294 /* Done all bytes at this node, go up one level. */
9295 --depth;
9296 line_breakcheck();
9297 }
9298 else
9299 {
9300 /* Do one more byte at this node. */
9301 n = arridx[depth] + curi[depth];
9302 ++curi[depth];
9303 c = byts[n];
9304 if (c == 0)
9305 {
9306 /* End of word, deal with the word.
9307 * Don't use keep-case words in the fold-case tree,
9308 * they will appear in the keep-case tree.
9309 * Only use the word when the region matches. */
9310 flags = (int)idxs[n];
9311 if ((round == 2 || (flags & WF_KEEPCAP) == 0)
9312 && ((flags & WF_REGION) == 0
9313 || (((unsigned)flags >> 8)
9314 & lp->lp_region) != 0))
9315 {
9316 word[depth] = NUL;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00009317
9318 /* Dump the basic word if there is no prefix or
9319 * when it's the first one. */
9320 c = (unsigned)flags >> 16;
9321 if (c == 0 || curi[depth] == 2)
9322 dump_word(word, round, flags, lnum++);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009323
9324 /* Apply the prefix, if there is one. */
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00009325 if (c != 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009326 lnum = apply_prefixes(slang, word, round,
9327 flags, lnum);
9328 }
9329 }
9330 else
9331 {
9332 /* Normal char, go one level deeper. */
9333 word[depth++] = c;
9334 arridx[depth] = idxs[n];
9335 curi[depth] = 1;
9336 }
9337 }
9338 }
9339 }
9340 }
9341
9342 /* Delete the empty line that we started with. */
9343 if (curbuf->b_ml.ml_line_count > 1)
9344 ml_delete(curbuf->b_ml.ml_line_count, FALSE);
9345
9346 redraw_later(NOT_VALID);
9347}
9348
9349/*
9350 * Dump one word: apply case modifications and append a line to the buffer.
9351 */
9352 static void
9353dump_word(word, round, flags, lnum)
9354 char_u *word;
9355 int round;
9356 int flags;
9357 linenr_T lnum;
9358{
9359 int keepcap = FALSE;
9360 char_u *p;
9361 char_u cword[MAXWLEN];
9362 char_u badword[MAXWLEN + 3];
9363
9364 if (round == 1 && (flags & WF_CAPMASK) != 0)
9365 {
9366 /* Need to fix case according to "flags". */
9367 make_case_word(word, cword, flags);
9368 p = cword;
9369 }
9370 else
9371 {
9372 p = word;
9373 if (round == 2 && (captype(word, NULL) & WF_KEEPCAP) == 0)
9374 keepcap = TRUE;
9375 }
9376
9377 /* Bad word is preceded by "/!" and some other
9378 * flags. */
9379 if ((flags & (WF_BANNED | WF_RARE)) || keepcap)
9380 {
9381 STRCPY(badword, "/");
9382 if (keepcap)
9383 STRCAT(badword, "=");
9384 if (flags & WF_BANNED)
9385 STRCAT(badword, "!");
9386 else if (flags & WF_RARE)
9387 STRCAT(badword, "?");
9388 STRCAT(badword, p);
9389 p = badword;
9390 }
9391
9392 ml_append(lnum, p, (colnr_T)0, FALSE);
9393}
9394
9395/*
Bram Moolenaara1ba8112005-06-28 23:23:32 +00009396 * For ":spelldump": Find matching prefixes for "word". Prepend each to
9397 * "word" and append a line to the buffer.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009398 * Return the updated line number.
9399 */
9400 static linenr_T
9401apply_prefixes(slang, word, round, flags, startlnum)
9402 slang_T *slang;
9403 char_u *word; /* case-folded word */
9404 int round;
9405 int flags; /* flags with prefix ID */
9406 linenr_T startlnum;
9407{
9408 idx_T arridx[MAXWLEN];
9409 int curi[MAXWLEN];
9410 char_u prefix[MAXWLEN];
9411 int c;
9412 char_u *byts;
9413 idx_T *idxs;
9414 linenr_T lnum = startlnum;
9415 int depth;
9416 int n;
9417 int len;
9418 int prefid = (unsigned)flags >> 16;
9419 int i;
9420
9421 byts = slang->sl_pbyts;
9422 idxs = slang->sl_pidxs;
9423 if (byts != NULL) /* array not is empty */
9424 {
9425 /*
9426 * Loop over all prefixes, building them byte-by-byte in prefix[].
9427 * When at the end of a prefix check that it supports "prefid".
9428 */
9429 depth = 0;
9430 arridx[0] = 0;
9431 curi[0] = 1;
9432 while (depth >= 0 && !got_int)
9433 {
9434 len = arridx[depth];
9435 if (curi[depth] > byts[len])
9436 {
9437 /* Done all bytes at this node, go up one level. */
9438 --depth;
9439 line_breakcheck();
9440 }
9441 else
9442 {
9443 /* Do one more byte at this node. */
9444 n = len + curi[depth];
9445 ++curi[depth];
9446 c = byts[n];
9447 if (c == 0)
9448 {
9449 /* End of prefix, find out how many IDs there are. */
9450 for (i = 1; i < len; ++i)
9451 if (byts[n + i] != 0)
9452 break;
9453 curi[depth] += i - 1;
9454
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009455 i = valid_word_prefix(i, n, prefid, word, slang);
9456 if (i != 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009457 {
9458 vim_strncpy(prefix + depth, word, MAXWLEN - depth);
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00009459 dump_word(prefix, round,
9460 (i & WF_RAREPFX) ? (flags | WF_RARE)
9461 : flags, lnum++);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00009462 }
9463 }
9464 else
9465 {
9466 /* Normal char, go one level deeper. */
9467 prefix[depth++] = c;
9468 arridx[depth] = idxs[n];
9469 curi[depth] = 1;
9470 }
9471 }
9472 }
9473 }
9474
9475 return lnum;
9476}
9477
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00009478#endif /* FEAT_SYN_HL */