blob: 54ca1036bd018d4c99e6948290237afdb85e0da4 [file] [log] [blame]
Bram Moolenaare19defe2005-03-21 08:23:33 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * spell.c: code for spell checking
Bram Moolenaarfc735152005-03-22 22:54:12 +000012 *
Bram Moolenaar51485f02005-06-04 21:55:20 +000013 * The spell checking mechanism uses a tree (aka trie). Each node in the tree
14 * has a list of bytes that can appear (siblings). For each byte there is a
15 * pointer to the node with the byte that follows in the word (child).
Bram Moolenaar9f30f502005-06-14 22:01:04 +000016 *
17 * A NUL byte is used where the word may end. The bytes are sorted, so that
18 * binary searching can be used and the NUL bytes are at the start. The
19 * number of possible bytes is stored before the list of bytes.
20 *
21 * The tree uses two arrays: "byts" stores the characters, "idxs" stores
22 * either the next index or flags. The tree starts at index 0. For example,
23 * to lookup "vi" this sequence is followed:
24 * i = 0
25 * len = byts[i]
26 * n = where "v" appears in byts[i + 1] to byts[i + len]
27 * i = idxs[n]
28 * len = byts[i]
29 * n = where "i" appears in byts[i + 1] to byts[i + len]
30 * i = idxs[n]
31 * len = byts[i]
32 * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
Bram Moolenaar51485f02005-06-04 21:55:20 +000033 *
34 * There are two trees: one with case-folded words and one with words in
35 * original case. The second one is only used for keep-case words and is
36 * usually small.
37 *
38 * Thanks to Olaf Seibert for providing an example implementation of this tree
39 * and the compression mechanism.
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +000040 *
41 * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +000042 *
Bram Moolenaar402d2fe2005-04-15 21:00:38 +000043 * Why doesn't Vim use aspell/ispell/myspell/etc.?
44 * See ":help develop-spell".
45 */
46
Bram Moolenaar51485f02005-06-04 21:55:20 +000047/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000048 * Use this to let the score depend in how much a suggestion sounds like the
Bram Moolenaar9f30f502005-06-14 22:01:04 +000049 * bad word. It's quite slow and only occasionally makes the sorting better.
50#define SOUNDFOLD_SCORE
51 */
52
53/*
54 * Use this to adjust the score after finding suggestions, based on the
55 * suggested word sounding like the bad word. This is much faster than doing
56 * it for every possible suggestion.
57 * Disadvantage: When "the" is typed as "hte" it sounds different and goes
58 * down in the list.
59#define RESCORE(word_score, sound_score) ((2 * word_score + sound_score) / 3)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000060 */
61
62/*
Bram Moolenaar51485f02005-06-04 21:55:20 +000063 * Vim spell file format: <HEADER> <SUGGEST> <LWORDTREE> <KWORDTREE>
64 *
65 * <HEADER>: <fileID> <regioncnt> <regionname> ...
66 * <charflagslen> <charflags> <fcharslen> <fchars>
67 *
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000068 * <fileID> 10 bytes "VIMspell06"
Bram Moolenaar51485f02005-06-04 21:55:20 +000069 * <regioncnt> 1 byte number of regions following (8 supported)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000070 * <regionname> 2 bytes Region name: ca, au, etc. Lower case.
Bram Moolenaar51485f02005-06-04 21:55:20 +000071 * First <regionname> is region 1.
72 *
73 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
74 * <charflags> N bytes List of flags (first one is for character 128):
Bram Moolenaar9f30f502005-06-14 22:01:04 +000075 * 0x01 word character CF_WORD
76 * 0x02 upper-case character CF_UPPER
Bram Moolenaar51485f02005-06-04 21:55:20 +000077 * <fcharslen> 2 bytes Number of bytes in <fchars>.
78 * <fchars> N bytes Folded characters, first one is for character 128.
79 *
80 *
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000081 * <SUGGEST> : <repcount> <rep> ...
82 * <salflags> <salcount> <sal> ...
83 * <maplen> <mapstr>
Bram Moolenaar51485f02005-06-04 21:55:20 +000084 *
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +000085 * <repcount> 2 bytes number of <rep> items, MSB first.
86 *
87 * <rep> : <repfromlen> <repfrom> <reptolen> <repto>
88 *
89 * <repfromlen> 1 byte length of <repfrom>
90 *
91 * <repfrom> N bytes "from" part of replacement
92 *
93 * <reptolen> 1 byte length of <repto>
94 *
95 * <repto> N bytes "to" part of replacement
96 *
97 * <salflags> 1 byte flags for soundsalike conversion:
98 * SAL_F0LLOWUP
99 * SAL_COLLAPSE
100 * SAL_REM_ACCENTS
101 *
102 * <sal> : <salfromlen> <salfrom> <saltolen> <salto>
103 *
104 * <salfromlen> 1 byte length of <salfrom>
105 *
106 * <salfrom> N bytes "from" part of soundsalike
107 *
108 * <saltolen> 1 byte length of <salto>
109 *
110 * <salto> N bytes "to" part of soundsalike
111 *
112 * <maplen> 2 bytes length of <mapstr>, MSB first
113 *
114 * <mapstr> N bytes String with sequences of similar characters,
115 * separated by slashes.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000116 *
117 *
118 * <LWORDTREE>: <wordtree>
119 *
120 * <wordtree>: <nodecount> <nodedata> ...
121 *
122 * <nodecount> 4 bytes Number of nodes following. MSB first.
123 *
124 * <nodedata>: <siblingcount> <sibling> ...
125 *
126 * <siblingcount> 1 byte Number of siblings in this node. The siblings
127 * follow in sorted order.
128 *
129 * <sibling>: <byte> [<nodeidx> <xbyte> | <flags> [<region>]]
130 *
131 * <byte> 1 byte Byte value of the sibling. Special cases:
132 * BY_NOFLAGS: End of word without flags and for all
133 * regions.
134 * BY_FLAGS: End of word, <flags> follow.
135 * BY_INDEX: Child of sibling is shared, <nodeidx>
136 * and <xbyte> follow.
137 *
138 * <nodeidx> 3 bytes Index of child for this sibling, MSB first.
139 *
140 * <xbyte> 1 byte byte value of the sibling.
141 *
142 * <flags> 1 byte bitmask of:
143 * WF_ALLCAP word must have only capitals
144 * WF_ONECAP first char of word must be capital
145 * WF_RARE rare word
146 * WF_REGION <region> follows
147 *
148 * <region> 1 byte Bitmask for regions in which word is valid. When
149 * omitted it's valid in all regions.
150 * Lowest bit is for region 1.
151 *
152 * <KWORDTREE>: <wordtree>
153 *
Bram Moolenaar51485f02005-06-04 21:55:20 +0000154 * All text characters are in 'encoding', but stored as single bytes.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000155 */
156
Bram Moolenaare19defe2005-03-21 08:23:33 +0000157#if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
158# include <io.h> /* for lseek(), must be before vim.h */
159#endif
160
161#include "vim.h"
162
163#if defined(FEAT_SYN_HL) || defined(PROTO)
164
165#ifdef HAVE_FCNTL_H
166# include <fcntl.h>
167#endif
168
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000169#define MAXWLEN 250 /* Assume max. word len is this many bytes.
170 Some places assume a word length fits in a
171 byte, thus it can't be above 255. */
Bram Moolenaarfc735152005-03-22 22:54:12 +0000172
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000173/* Type used for indexes in the word tree need to be at least 3 bytes. If int
174 * is 8 bytes we could use something smaller, but what? */
175#if SIZEOF_INT > 2
176typedef int idx_T;
177#else
178typedef long idx_T;
179#endif
180
181/* Flags used for a word. Only the lowest byte can be used, the region byte
182 * comes above it. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000183#define WF_REGION 0x01 /* region byte follows */
184#define WF_ONECAP 0x02 /* word with one capital (or all capitals) */
185#define WF_ALLCAP 0x04 /* word must be all capitals */
186#define WF_RARE 0x08 /* rare word */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000187#define WF_BANNED 0x10 /* bad word */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000188#define WF_KEEPCAP 0x80 /* keep-case word */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000189
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000190#define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000191
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000192#define WF_USED 0x10000 /* Word was found in text. Must be in separate
193 byte before region and flags. */
194
Bram Moolenaar51485f02005-06-04 21:55:20 +0000195#define BY_NOFLAGS 0 /* end of word without flags or region */
196#define BY_FLAGS 1 /* end of word, flag byte follows */
197#define BY_INDEX 2 /* child is shared, index follows */
198#define BY_SPECIAL BY_INDEX /* hightest special byte value */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000199
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000200/* Info from "REP" and "SAL" entries in ".aff" file used in si_rep, sl_rep,
201 * si_sal and sl_sal.
202 * One replacement: from "ft_from" to "ft_to". */
203typedef struct fromto_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000204{
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000205 char_u *ft_from;
206 char_u *ft_to;
207} fromto_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000208
209/*
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000210 * Structure used to store words and other info for one language, loaded from
211 * a .spl file.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000212 * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the
213 * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words.
214 *
215 * The "byts" array stores the possible bytes in each tree node, preceded by
216 * the number of possible bytes, sorted on byte value:
217 * <len> <byte1> <byte2> ...
218 * The "idxs" array stores the index of the child node corresponding to the
219 * byte in "byts".
220 * Exception: when the byte is zero, the word may end here and "idxs" holds
221 * the flags and region for the word. There may be several zeros in sequence
222 * for alternative flag/region combinations.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000223 */
224typedef struct slang_S slang_T;
225struct slang_S
226{
227 slang_T *sl_next; /* next language */
228 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */
Bram Moolenaarb765d632005-06-07 21:00:02 +0000229 char_u *sl_fname; /* name of .spl file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000230 int sl_add; /* TRUE if it's a .add file. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000231 char_u *sl_fbyts; /* case-folded word bytes */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000232 idx_T *sl_fidxs; /* case-folded word indexes */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000233 char_u *sl_kbyts; /* keep-case word bytes */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000234 idx_T *sl_kidxs; /* keep-case word indexes */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000235 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000236
237 garray_T sl_rep; /* list of fromto_T entries from REP lines */
238 short sl_rep_first[256]; /* indexes where byte first appears, -1 if
239 there is none */
240 garray_T sl_sal; /* list of fromto_T entries from SAL lines */
241 short sl_sal_first[256]; /* indexes where byte first appears, -1 if
242 there is none */
243 int sl_followup; /* SAL followup */
244 int sl_collapse; /* SAL collapse_result */
245 int sl_rem_accents; /* SAL remove_accents */
246 char_u *sl_map; /* string with similar chars from MAP lines */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000247};
248
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000249/* First language that is loaded, start of the linked list of loaded
250 * languages. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000251static slang_T *first_lang = NULL;
252
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000253/* Flags used in .spl file for soundsalike flags. */
254#define SAL_F0LLOWUP 1
255#define SAL_COLLAPSE 2
256#define SAL_REM_ACCENTS 4
257
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000258/*
259 * Structure used in "b_langp", filled from 'spelllang'.
260 */
261typedef struct langp_S
262{
263 slang_T *lp_slang; /* info for this language (NULL for last one) */
264 int lp_region; /* bitmask for region or REGION_ALL */
265} langp_T;
266
267#define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
268
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000269#define REGION_ALL 0xff /* word valid in all regions */
270
271/* Result values. Lower number is accepted over higher one. */
272#define SP_BANNED -1
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000273#define SP_OK 0
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000274#define SP_RARE 1
275#define SP_LOCAL 2
276#define SP_BAD 3
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000277
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000278#define VIMSPELLMAGIC "VIMspell06" /* string at start of Vim spell file */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000279#define VIMSPELLMAGICL 10
280
281/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000282 * Information used when looking for suggestions.
283 */
284typedef struct suginfo_S
285{
286 garray_T su_ga; /* suggestions, contains "suggest_T" */
287 int su_maxscore; /* maximum score for adding to su_ga */
288 int su_icase; /* accept words with wrong case */
289 int su_icase_add; /* add matches while ignoring case */
290 char_u *su_badptr; /* start of bad word in line */
291 int su_badlen; /* length of detected bad word in line */
292 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */
293 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */
294 hashtab_T su_banned; /* table with banned words */
295#ifdef SOUNDFOLD_SCORE
296 slang_T *su_slang; /* currently used slang_T */
297 char_u su_salword[MAXWLEN]; /* soundfolded badword */
298#endif
299} suginfo_T;
300
301/* One word suggestion. Used in "si_ga". */
302typedef struct suggest_S
303{
304 char_u *st_word; /* suggested word, allocated string */
305 int st_orglen; /* length of replaced text */
306 int st_score; /* lower is better */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000307#ifdef RESCORE
308 int st_had_bonus; /* bonus already included in score */
309#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000310} suggest_T;
311
312#define SUG(sup, i) (((suggest_T *)(sup)->su_ga.ga_data)[i])
313
314/* Number of suggestions displayed. */
315#define SUG_PROMPT_COUNT ((int)Rows - 2)
316
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000317/* Number of suggestions kept when cleaning up. When rescore_suggestions() is
318 * called the score may change, thus we need to keep more than what is
319 * displayed. */
320#define SUG_CLEAN_COUNT (SUG_PROMPT_COUNT < 25 ? 25 : SUG_PROMPT_COUNT)
321
322/* Threshold for sorting and cleaning up suggestions. Don't want to keep lots
323 * of suggestions that are not going to be displayed. */
324#define SUG_MAX_COUNT (SUG_PROMPT_COUNT + 50)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000325
326/* score for various changes */
327#define SCORE_SPLIT 99 /* split bad word */
328#define SCORE_ICASE 52 /* slightly different case */
329#define SCORE_ALLCAP 120 /* need all-cap case */
330#define SCORE_REGION 70 /* word is for different region */
331#define SCORE_RARE 180 /* rare word */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000332#define SCORE_NOTUSED 11 /* word not found in text yet */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000333
334/* score for edit distance */
335#define SCORE_SWAP 90 /* swap two characters */
336#define SCORE_SWAP3 110 /* swap two characters in three */
337#define SCORE_REP 87 /* REP replacement */
338#define SCORE_SUBST 93 /* substitute a character */
339#define SCORE_SIMILAR 33 /* substitute a similar character */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000340#define SCORE_DEL 94 /* delete a character */
341#define SCORE_INS 96 /* insert a character */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000342
343#define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower.
344 * 350 allows for about three changes. */
345#define SCORE_MAXMAX 999999 /* accept any score */
346
347/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000348 * Structure to store info for word matching.
349 */
350typedef struct matchinf_S
351{
352 langp_T *mi_lp; /* info for language and region */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000353
354 /* pointers to original text to be checked */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000355 char_u *mi_word; /* start of word being checked */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000356 char_u *mi_end; /* end of matching word */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000357 char_u *mi_fend; /* next char to be added to mi_fword */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000358 char_u *mi_cend; /* char after what was used for
359 mi_capflags */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000360
361 /* case-folded text */
362 char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000363 int mi_fwordlen; /* nr of valid bytes in mi_fword */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000364
365 /* others */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000366 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000367 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000368} matchinf_T;
369
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000370/*
371 * The tables used for recognizing word characters according to spelling.
372 * These are only used for the first 256 characters of 'encoding'.
373 */
374typedef struct spelltab_S
375{
376 char_u st_isw[256]; /* flags: is word char */
377 char_u st_isu[256]; /* flags: is uppercase char */
378 char_u st_fold[256]; /* chars: folded case */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000379 char_u st_upper[256]; /* chars: upper case */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000380} spelltab_T;
381
382static spelltab_T spelltab;
383static int did_set_spelltab;
384
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000385#define CF_WORD 0x01
386#define CF_UPPER 0x02
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000387
388static void clear_spell_chartab __ARGS((spelltab_T *sp));
389static int set_spell_finish __ARGS((spelltab_T *new_st));
390
391/*
392 * Return TRUE if "p" points to a word character or "c" is a word character
393 * for spelling.
394 * Checking for a word character is done very often, avoid the function call
395 * overhead.
396 */
397#ifdef FEAT_MBYTE
398# define SPELL_ISWORDP(p) ((has_mbyte && MB_BYTE2LEN(*(p)) > 1) \
399 ? (mb_get_class(p) >= 2) : spelltab.st_isw[*(p)])
400#else
401# define SPELL_ISWORDP(p) (spelltab.st_isw[*(p)])
402#endif
403
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000404/*
405 * Struct to keep the state at each level in spell_try_change().
406 */
407typedef struct trystate_S
408{
409 int ts_state; /* state at this level, STATE_ */
410 int ts_score; /* score */
411 int ts_curi; /* index in list of child nodes */
412 int ts_fidx; /* index in fword[], case-folded bad word */
413 int ts_fidxtry; /* ts_fidx at which bytes may be changed */
414 int ts_twordlen; /* valid length of tword[] */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000415 idx_T ts_arridx; /* index in tree array, start of node */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000416 char_u ts_save_prewordlen; /* saved "prewordlen" */
417 int ts_save_splitoff; /* su_splitoff saved here */
418 int ts_save_badflags; /* badflags saved here */
419} trystate_T;
420
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000421static slang_T *slang_alloc __ARGS((char_u *lang));
422static void slang_free __ARGS((slang_T *lp));
Bram Moolenaarb765d632005-06-07 21:00:02 +0000423static void slang_clear __ARGS((slang_T *lp));
Bram Moolenaar51485f02005-06-04 21:55:20 +0000424static void find_word __ARGS((matchinf_T *mip, int keepcap));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000425static int spell_valid_case __ARGS((int origflags, int treeflags));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000426static void spell_load_lang __ARGS((char_u *lang));
Bram Moolenaarb765d632005-06-07 21:00:02 +0000427static char_u *spell_enc __ARGS((void));
428static void spell_load_cb __ARGS((char_u *fname, void *cookie));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000429static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent));
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000430static idx_T read_tree __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000431static int find_region __ARGS((char_u *rp, char_u *region));
432static int captype __ARGS((char_u *word, char_u *end));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000433static void spell_reload_one __ARGS((char_u *fname, int added_word));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000434static int set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000435static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp));
436static void write_spell_chartab __ARGS((FILE *fd));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000437static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen));
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000438static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000439static void spell_try_change __ARGS((suginfo_T *su));
440static int try_deeper __ARGS((suginfo_T *su, trystate_T *stack, int depth, int score_add));
441static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword));
442static void spell_try_soundalike __ARGS((suginfo_T *su));
443static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags));
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000444#if 0
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000445static int similar_chars __ARGS((slang_T *slang, int c1, int c2));
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000446#endif
447#ifdef RESCORE
448static void add_suggestion __ARGS((suginfo_T *su, char_u *goodword, int use_score, int had_bonus));
449#else
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000450static void add_suggestion __ARGS((suginfo_T *su, char_u *goodword, int use_score));
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000451#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000452static void add_banned __ARGS((suginfo_T *su, char_u *word));
453static int was_banned __ARGS((suginfo_T *su, char_u *word));
454static void free_banned __ARGS((suginfo_T *su));
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000455#ifdef RESCORE
456static void rescore_suggestions __ARGS((suginfo_T *su));
457#endif
458static void cleanup_suggestions __ARGS((suginfo_T *su, int keep));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000459static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, char_u *res));
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000460#if defined(RESCORE) || defined(SOUNDFOLD_SCORE)
461static int spell_sound_score __ARGS((slang_T *slang, char_u *goodword, char_u *badsound));
462#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000463static int spell_edit_score __ARGS((char_u *badword, char_u *goodword));
464
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000465/*
466 * Use our own character-case definitions, because the current locale may
467 * differ from what the .spl file uses.
468 * These must not be called with negative number!
469 */
470#ifndef FEAT_MBYTE
471/* Non-multi-byte implementation. */
472# define SPELL_TOFOLD(c) ((c) < 256 ? spelltab.st_fold[c] : (c))
473# define SPELL_TOUPPER(c) ((c) < 256 ? spelltab.st_upper[c] : (c))
474# define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE)
475#else
476/* Multi-byte implementation. For Unicode we can call utf_*(), but don't do
477 * that for ASCII, because we don't want to use 'casemap' here. Otherwise use
478 * the "w" library function for characters above 255 if available. */
479# ifdef HAVE_TOWLOWER
480# define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
481 : (c) < 256 ? spelltab.st_fold[c] : towlower(c))
482# else
483# define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
484 : (c) < 256 ? spelltab.st_fold[c] : (c))
485# endif
486
487# ifdef HAVE_TOWUPPER
488# define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
489 : (c) < 256 ? spelltab.st_upper[c] : towupper(c))
490# else
491# define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
492 : (c) < 256 ? spelltab.st_upper[c] : (c))
493# endif
494
495# ifdef HAVE_ISWUPPER
496# define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
497 : (c) < 256 ? spelltab.st_isu[c] : iswupper(c))
498# else
499# define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
500 : (c) < 256 ? spelltab.st_isu[c] : (c))
501# endif
502#endif
503
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000504
505static char *e_format = N_("E759: Format error in spell file");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000506
507/*
508 * Main spell-checking function.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000509 * "ptr" points to a character that could be the start of a word.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000510 * "*attrp" is set to the attributes for a badly spelled word. For a non-word
511 * or when it's OK it remains unchanged.
512 * This must only be called when 'spelllang' is not empty.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000513 *
514 * "sug" is normally NULL. When looking for suggestions it points to
515 * suginfo_T. It's passed as a void pointer to keep the struct local.
516 *
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000517 * Returns the length of the word in bytes, also when it's OK, so that the
518 * caller can skip over the word.
519 */
520 int
Bram Moolenaar51485f02005-06-04 21:55:20 +0000521spell_check(wp, ptr, attrp)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000522 win_T *wp; /* current window */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000523 char_u *ptr;
524 int *attrp;
525{
526 matchinf_T mi; /* Most things are put in "mi" so that it can
527 be passed to functions quickly. */
528
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000529 /* A word never starts at a space or a control character. Return quickly
530 * then, skipping over the character. */
531 if (*ptr <= ' ')
532 return 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000533
Bram Moolenaar51485f02005-06-04 21:55:20 +0000534 /* A word starting with a number is always OK. Also skip hexadecimal
535 * numbers 0xFF99 and 0X99FF. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000536 if (*ptr >= '0' && *ptr <= '9')
Bram Moolenaar51485f02005-06-04 21:55:20 +0000537 {
Bram Moolenaar3982c542005-06-08 21:56:31 +0000538 if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
539 mi.mi_end = skiphex(ptr + 2);
Bram Moolenaar51485f02005-06-04 21:55:20 +0000540 else
541 mi.mi_end = skipdigits(ptr);
542 }
543 else
544 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000545 /* Find the end of the word. */
546 mi.mi_word = ptr;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000547 mi.mi_fend = ptr;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000548
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000549 if (SPELL_ISWORDP(mi.mi_fend))
Bram Moolenaar51485f02005-06-04 21:55:20 +0000550 {
551 /* Make case-folded copy of the characters until the next non-word
552 * character. */
553 do
554 {
555 mb_ptr_adv(mi.mi_fend);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000556 } while (*mi.mi_fend != NUL && SPELL_ISWORDP(mi.mi_fend));
Bram Moolenaar51485f02005-06-04 21:55:20 +0000557 }
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000558
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000559 /* We always use the characters up to the next non-word character,
560 * also for bad words. */
561 mi.mi_end = mi.mi_fend;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000562
563 /* Check caps type later. */
564 mi.mi_capflags = 0;
565 mi.mi_cend = NULL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000566
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000567 /* Include one non-word character so that we can check for the
568 * word end. */
569 if (*mi.mi_fend != NUL)
570 mb_ptr_adv(mi.mi_fend);
571
572 (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
573 MAXWLEN + 1);
574 mi.mi_fwordlen = STRLEN(mi.mi_fword);
575
Bram Moolenaar51485f02005-06-04 21:55:20 +0000576 /* The word is bad unless we recognize it. */
577 mi.mi_result = SP_BAD;
578
579 /*
580 * Loop over the languages specified in 'spelllang'.
581 * We check them all, because a matching word may be longer than an
582 * already found matching word.
583 */
584 for (mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000585 mi.mi_lp->lp_slang != NULL; ++mi.mi_lp)
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000586 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000587 /* Check for a matching word in case-folded words. */
588 find_word(&mi, FALSE);
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000589
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000590 /* Check for a matching word in keep-case words. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000591 find_word(&mi, TRUE);
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000592 }
593
Bram Moolenaar51485f02005-06-04 21:55:20 +0000594 if (mi.mi_result != SP_OK)
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000595 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000596 /* When we are at a non-word character there is no error, just
597 * skip over the character (try looking for a word after it). */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000598 if (!SPELL_ISWORDP(ptr))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000599 {
600#ifdef FEAT_MBYTE
601 if (has_mbyte)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000602 return mb_ptr2len_check(ptr);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000603#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +0000604 return 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000605 }
Bram Moolenaar51485f02005-06-04 21:55:20 +0000606
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000607 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000608 *attrp = highlight_attr[HLF_SPB];
609 else if (mi.mi_result == SP_RARE)
610 *attrp = highlight_attr[HLF_SPR];
611 else
612 *attrp = highlight_attr[HLF_SPL];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000613 }
614 }
615
Bram Moolenaar51485f02005-06-04 21:55:20 +0000616 return (int)(mi.mi_end - ptr);
617}
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000618
Bram Moolenaar51485f02005-06-04 21:55:20 +0000619/*
620 * Check if the word at "mip->mi_word" is in the tree.
621 * When "keepcap" is TRUE check in keep-case word tree.
622 *
623 * For a match mip->mi_result is updated.
624 */
625 static void
626find_word(mip, keepcap)
627 matchinf_T *mip;
628 int keepcap;
629{
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000630 idx_T arridx = 0;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000631 int endlen[MAXWLEN]; /* length at possible word endings */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000632 idx_T endidx[MAXWLEN]; /* possible word endings */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000633 int endidxcnt = 0;
634 int len;
635 int wlen = 0;
636 int flen;
637 int c;
638 char_u *ptr;
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000639 idx_T lo, hi, m;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000640#ifdef FEAT_MBYTE
641 char_u *s;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000642#endif
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000643 char_u *p;
644 int res = SP_BAD;
645 int valid;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000646 slang_T *slang = mip->mi_lp->lp_slang;
647 unsigned flags;
648 char_u *byts;
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000649 idx_T *idxs;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000650
651 if (keepcap)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000652 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000653 /* Check for word with matching case in keep-case tree. */
654 ptr = mip->mi_word;
655 flen = 9999; /* no case folding, always enough bytes */
656 byts = slang->sl_kbyts;
657 idxs = slang->sl_kidxs;
658 }
659 else
660 {
661 /* Check for case-folded in case-folded tree. */
662 ptr = mip->mi_fword;
663 flen = mip->mi_fwordlen; /* available case-folded bytes */
664 byts = slang->sl_fbyts;
665 idxs = slang->sl_fidxs;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000666 }
667
Bram Moolenaar51485f02005-06-04 21:55:20 +0000668 if (byts == NULL)
669 return; /* array is empty */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000670
Bram Moolenaar51485f02005-06-04 21:55:20 +0000671 /*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000672 * Repeat advancing in the tree until:
673 * - there is a byte that doesn't match,
674 * - we reach the end of the tree,
675 * - or we reach the end of the line.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000676 */
677 for (;;)
678 {
679 if (flen == 0 && *mip->mi_fend != NUL)
680 {
681 /* Need to fold at least one more character. Do until next
682 * non-word character for efficiency. */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000683 p = mip->mi_fend;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000684 do
685 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000686 mb_ptr_adv(mip->mi_fend);
687 } while (*mip->mi_fend != NUL && SPELL_ISWORDP(mip->mi_fend));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000688
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000689 /* Include the non-word character so that we can check for the
690 * word end. */
691 if (*mip->mi_fend != NUL)
692 mb_ptr_adv(mip->mi_fend);
693
694 (void)spell_casefold(p, (int)(mip->mi_fend - p),
Bram Moolenaar51485f02005-06-04 21:55:20 +0000695 mip->mi_fword + mip->mi_fwordlen,
696 MAXWLEN - mip->mi_fwordlen);
Bram Moolenaar51485f02005-06-04 21:55:20 +0000697 flen = STRLEN(mip->mi_fword + mip->mi_fwordlen);
698 mip->mi_fwordlen += flen;
699 }
700
701 len = byts[arridx++];
702
703 /* If the first possible byte is a zero the word could end here.
704 * Remember this index, we first check for the longest word. */
705 if (byts[arridx] == 0)
706 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000707 if (endidxcnt == MAXWLEN)
708 {
709 /* Must be a corrupted spell file. */
710 EMSG(_(e_format));
711 return;
712 }
Bram Moolenaar51485f02005-06-04 21:55:20 +0000713 endlen[endidxcnt] = wlen;
714 endidx[endidxcnt++] = arridx++;
715 --len;
716
717 /* Skip over the zeros, there can be several flag/region
718 * combinations. */
719 while (len > 0 && byts[arridx] == 0)
720 {
721 ++arridx;
722 --len;
723 }
724 if (len == 0)
725 break; /* no children, word must end here */
726 }
727
728 /* Stop looking at end of the line. */
729 if (ptr[wlen] == NUL)
730 break;
731
732 /* Perform a binary search in the list of accepted bytes. */
733 c = ptr[wlen];
734 lo = arridx;
735 hi = arridx + len - 1;
736 while (lo < hi)
737 {
738 m = (lo + hi) / 2;
739 if (byts[m] > c)
740 hi = m - 1;
741 else if (byts[m] < c)
742 lo = m + 1;
743 else
744 {
745 lo = hi = m;
746 break;
747 }
748 }
749
750 /* Stop if there is no matching byte. */
751 if (hi < lo || byts[lo] != c)
752 break;
753
754 /* Continue at the child (if there is one). */
755 arridx = idxs[lo];
756 ++wlen;
757 --flen;
758 }
759
760 /*
761 * Verify that one of the possible endings is valid. Try the longest
762 * first.
763 */
764 while (endidxcnt > 0)
765 {
766 --endidxcnt;
767 arridx = endidx[endidxcnt];
768 wlen = endlen[endidxcnt];
769
770#ifdef FEAT_MBYTE
771 if ((*mb_head_off)(ptr, ptr + wlen) > 0)
772 continue; /* not at first byte of character */
773#endif
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000774 if (SPELL_ISWORDP(ptr + wlen))
Bram Moolenaar51485f02005-06-04 21:55:20 +0000775 continue; /* next char is a word character */
776
777#ifdef FEAT_MBYTE
778 if (!keepcap && has_mbyte)
779 {
780 /* Compute byte length in original word, length may change
781 * when folding case. */
782 p = mip->mi_word;
783 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
784 mb_ptr_adv(p);
785 wlen = p - mip->mi_word;
786 }
787#endif
788
789 /* Check flags and region. Repeat this if there are more
790 * flags/region alternatives until there is a match. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000791 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0; --len)
792 {
793 flags = idxs[arridx];
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000794
795 /* Set a flag for words that were used. The region and case
796 * doesn't matter here, it's only used to rate the suggestions. */
797 idxs[arridx] = flags | WF_USED;
798
Bram Moolenaar51485f02005-06-04 21:55:20 +0000799 if (keepcap)
800 {
801 /* For "keepcap" tree the case is always right. */
802 valid = TRUE;
803 }
804 else
805 {
806 /* Check that the word is in the required case. */
807 if (mip->mi_cend != mip->mi_word + wlen)
808 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000809 /* mi_capflags was set for a different word length, need
810 * to do it again. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000811 mip->mi_cend = mip->mi_word + wlen;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000812 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
Bram Moolenaar51485f02005-06-04 21:55:20 +0000813 }
814
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000815 valid = spell_valid_case(mip->mi_capflags, flags);
Bram Moolenaar51485f02005-06-04 21:55:20 +0000816 }
817
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000818 if (valid)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000819 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000820 if (flags & WF_BANNED)
821 res = SP_BANNED;
822 else if (flags & WF_REGION)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000823 {
824 /* Check region. */
825 if ((mip->mi_lp->lp_region & (flags >> 8)) != 0)
826 res = SP_OK;
827 else
828 res = SP_LOCAL;
829 }
830 else if (flags & WF_RARE)
831 res = SP_RARE;
832 else
833 res = SP_OK;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000834
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000835 /* Always use the longest match and the best result. */
836 if (mip->mi_result > res)
837 {
838 mip->mi_result = res;
839 mip->mi_end = mip->mi_word + wlen;
840 }
841 else if (mip->mi_result == res
842 && mip->mi_end < mip->mi_word + wlen)
843 mip->mi_end = mip->mi_word + wlen;
844
845 if (res == SP_OK)
846 break;
847 }
848 else
849 res = SP_BAD;
850
Bram Moolenaar51485f02005-06-04 21:55:20 +0000851 ++arridx;
852 }
853
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000854 if (res == SP_OK)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000855 break;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000856 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000857}
858
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000859/*
860 * Check case flags for a word. Return TRUE if the word has the requested
861 * case.
862 */
863 static int
864spell_valid_case(origflags, treeflags)
865 int origflags; /* flags for the checked word. */
866 int treeflags; /* flags for the word in the spell tree */
867{
868 return (origflags == WF_ALLCAP
869 || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0
870 && ((treeflags & WF_ONECAP) == 0 || origflags == WF_ONECAP)));
871}
872
Bram Moolenaar51485f02005-06-04 21:55:20 +0000873
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000874/*
875 * Move to next spell error.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000876 * "curline" is TRUE for "z?": find word under/after cursor in the same line.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000877 * Return OK if found, FAIL otherwise.
878 */
879 int
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000880spell_move_to(dir, allwords, curline)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000881 int dir; /* FORWARD or BACKWARD */
882 int allwords; /* TRUE for "[s" and "]s" */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000883 int curline;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000884{
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000885 linenr_T lnum;
886 pos_T found_pos;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000887 char_u *line;
888 char_u *p;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000889 int attr = 0;
890 int len;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000891 int has_syntax = syntax_present(curbuf);
892 int col;
893 int can_spell;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000894
Bram Moolenaarb765d632005-06-07 21:00:02 +0000895 if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000896 {
897 EMSG(_("E756: Spell checking not enabled"));
898 return FAIL;
899 }
900
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000901 /*
902 * Start looking for bad word at the start of the line, because we can't
903 * start halfway a word, we don't know where it starts or ends.
904 *
905 * When searching backwards, we continue in the line to find the last
906 * bad word (in the cursor line: before the cursor).
907 */
908 lnum = curwin->w_cursor.lnum;
909 found_pos.lnum = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000910
911 while (!got_int)
912 {
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000913 line = ml_get(lnum);
914 p = line;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000915
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000916 while (*p != NUL)
917 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000918 /* When searching backward don't search after the cursor. */
919 if (dir == BACKWARD
920 && lnum == curwin->w_cursor.lnum
921 && (colnr_T)(p - line) >= curwin->w_cursor.col)
922 break;
923
924 /* start of word */
925 len = spell_check(curwin, p, &attr);
926
927 if (attr != 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000928 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000929 /* We found a bad word. Check the attribute. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000930 if (allwords || attr == highlight_attr[HLF_SPB])
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000931 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000932 /* When searching forward only accept a bad word after
933 * the cursor. */
934 if (dir == BACKWARD
935 || lnum > curwin->w_cursor.lnum
936 || (lnum == curwin->w_cursor.lnum
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000937 && (colnr_T)(curline ? p - line + len
938 : p - line)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000939 > curwin->w_cursor.col))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000940 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000941 if (has_syntax)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000942 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000943 col = p - line;
944 (void)syn_get_id(lnum, (colnr_T)col,
945 FALSE, &can_spell);
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000946
Bram Moolenaar51485f02005-06-04 21:55:20 +0000947 /* have to get the line again, a multi-line
948 * regexp may make it invalid */
949 line = ml_get(lnum);
950 p = line + col;
951 }
952 else
953 can_spell = TRUE;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000954
Bram Moolenaar51485f02005-06-04 21:55:20 +0000955 if (can_spell)
956 {
957 found_pos.lnum = lnum;
958 found_pos.col = p - line;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000959#ifdef FEAT_VIRTUALEDIT
Bram Moolenaar51485f02005-06-04 21:55:20 +0000960 found_pos.coladd = 0;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000961#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +0000962 if (dir == FORWARD)
963 {
964 /* No need to search further. */
965 curwin->w_cursor = found_pos;
966 return OK;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000967 }
968 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000969 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000970 }
Bram Moolenaar51485f02005-06-04 21:55:20 +0000971 attr = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000972 }
973
Bram Moolenaar51485f02005-06-04 21:55:20 +0000974 /* advance to character after the word */
975 p += len;
976 if (*p == NUL)
977 break;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000978 }
979
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000980 if (curline)
981 return FAIL; /* only check cursor line */
982
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000983 /* Advance to next line. */
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000984 if (dir == BACKWARD)
985 {
986 if (found_pos.lnum != 0)
987 {
988 /* Use the last match in the line. */
989 curwin->w_cursor = found_pos;
990 return OK;
991 }
992 if (lnum == 1)
993 return FAIL;
994 --lnum;
995 }
996 else
997 {
998 if (lnum == curbuf->b_ml.ml_line_count)
999 return FAIL;
1000 ++lnum;
1001 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001002
1003 line_breakcheck();
1004 }
1005
1006 return FAIL; /* interrupted */
1007}
1008
1009/*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001010 * Load word list(s) for "lang" from Vim spell file(s).
Bram Moolenaarb765d632005-06-07 21:00:02 +00001011 * "lang" must be the language without the region: e.g., "en".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001012 */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001013 static void
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001014spell_load_lang(lang)
1015 char_u *lang;
1016{
Bram Moolenaarb765d632005-06-07 21:00:02 +00001017 char_u fname_enc[85];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001018 int r;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001019 char_u langcp[MAXWLEN + 1];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001020
Bram Moolenaarb765d632005-06-07 21:00:02 +00001021 /* Copy the language name to pass it to spell_load_cb() as a cookie.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001022 * It's truncated when an error is detected. */
1023 STRCPY(langcp, lang);
1024
Bram Moolenaarb765d632005-06-07 21:00:02 +00001025 /*
1026 * Find the first spell file for "lang" in 'runtimepath' and load it.
1027 */
1028 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
1029 "spell/%s.%s.spl", lang, spell_enc());
1030 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &langcp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001031
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001032 if (r == FAIL && *langcp != NUL)
1033 {
1034 /* Try loading the ASCII version. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00001035 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
Bram Moolenaar9c13b352005-05-19 20:53:52 +00001036 "spell/%s.ascii.spl", lang);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001037 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &langcp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001038 }
1039
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001040 if (r == FAIL)
1041 smsg((char_u *)_("Warning: Cannot find word list \"%s\""),
1042 fname_enc + 6);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001043 else if (*langcp != NUL)
1044 {
1045 /* Load all the additions. */
1046 STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl");
1047 do_in_runtimepath(fname_enc, TRUE, spell_load_cb, &langcp);
1048 }
1049}
1050
1051/*
1052 * Return the encoding used for spell checking: Use 'encoding', except that we
1053 * use "latin1" for "latin9". And limit to 60 characters (just in case).
1054 */
1055 static char_u *
1056spell_enc()
1057{
1058
1059#ifdef FEAT_MBYTE
1060 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
1061 return p_enc;
1062#endif
1063 return (char_u *)"latin1";
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001064}
1065
1066/*
1067 * Allocate a new slang_T.
1068 * Caller must fill "sl_next".
1069 */
1070 static slang_T *
1071slang_alloc(lang)
1072 char_u *lang;
1073{
1074 slang_T *lp;
1075
Bram Moolenaar51485f02005-06-04 21:55:20 +00001076 lp = (slang_T *)alloc_clear(sizeof(slang_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001077 if (lp != NULL)
1078 {
1079 lp->sl_name = vim_strsave(lang);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001080 ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
1081 ga_init2(&lp->sl_sal, sizeof(fromto_T), 10);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001082 }
1083 return lp;
1084}
1085
1086/*
1087 * Free the contents of an slang_T and the structure itself.
1088 */
1089 static void
1090slang_free(lp)
1091 slang_T *lp;
1092{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001093 vim_free(lp->sl_name);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001094 vim_free(lp->sl_fname);
1095 slang_clear(lp);
1096 vim_free(lp);
1097}
1098
1099/*
1100 * Clear an slang_T so that the file can be reloaded.
1101 */
1102 static void
1103slang_clear(lp)
1104 slang_T *lp;
1105{
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001106 garray_T *gap;
1107 fromto_T *ftp;
1108 int round;
1109
Bram Moolenaar51485f02005-06-04 21:55:20 +00001110 vim_free(lp->sl_fbyts);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001111 lp->sl_fbyts = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001112 vim_free(lp->sl_kbyts);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001113 lp->sl_kbyts = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001114 vim_free(lp->sl_fidxs);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001115 lp->sl_fidxs = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001116 vim_free(lp->sl_kidxs);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001117 lp->sl_kidxs = NULL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001118
1119 for (round = 1; round <= 2; ++round)
1120 {
1121 gap = round == 1 ? &lp->sl_rep : &lp->sl_sal;
1122 while (gap->ga_len > 0)
1123 {
1124 ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len];
1125 vim_free(ftp->ft_from);
1126 vim_free(ftp->ft_to);
1127 }
1128 ga_clear(gap);
1129 }
1130
1131 vim_free(lp->sl_map);
1132 lp->sl_map = NULL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001133}
1134
1135/*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001136 * Load one spell file and store the info into a slang_T.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001137 * Invoked through do_in_runtimepath().
1138 */
1139 static void
Bram Moolenaarb765d632005-06-07 21:00:02 +00001140spell_load_cb(fname, cookie)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001141 char_u *fname;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001142 void *cookie; /* points to the language name */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001143{
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001144 (void)spell_load_file(fname, (char_u *)cookie, NULL, FALSE);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001145}
1146
1147/*
1148 * Load one spell file and store the info into a slang_T.
1149 *
1150 * This is invoked in two ways:
1151 * - From spell_load_cb() to load a spell file for the first time. "lang" is
1152 * the language name, "old_lp" is NULL. Will allocate an slang_T.
1153 * - To reload a spell file that was changed. "lang" is NULL and "old_lp"
1154 * points to the existing slang_T.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001155 * Returns the slang_T the spell file was loaded into. NULL for error.
Bram Moolenaarb765d632005-06-07 21:00:02 +00001156 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001157 static slang_T *
1158spell_load_file(fname, lang, old_lp, silent)
Bram Moolenaarb765d632005-06-07 21:00:02 +00001159 char_u *fname;
1160 char_u *lang;
1161 slang_T *old_lp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001162 int silent; /* no error if file doesn't exist */
Bram Moolenaarb765d632005-06-07 21:00:02 +00001163{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001164 FILE *fd;
1165 char_u buf[MAXWLEN + 1];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001166 char_u *p;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001167 int i;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001168 int len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001169 int round;
1170 char_u *save_sourcing_name = sourcing_name;
1171 linenr_T save_sourcing_lnum = sourcing_lnum;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001172 int cnt, ccnt;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001173 char_u *fol;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001174 slang_T *lp = NULL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001175 garray_T *gap;
1176 fromto_T *ftp;
1177 int rr;
1178 short *first;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001179 idx_T idx;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001180
Bram Moolenaarb765d632005-06-07 21:00:02 +00001181 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001182 if (fd == NULL)
1183 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001184 if (!silent)
1185 EMSG2(_(e_notopen), fname);
1186 else if (p_verbose > 2)
1187 {
1188 verbose_enter();
1189 smsg((char_u *)e_notopen, fname);
1190 verbose_leave();
1191 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001192 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001193 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00001194 if (p_verbose > 2)
1195 {
1196 verbose_enter();
1197 smsg((char_u *)_("Reading spell file \"%s\""), fname);
1198 verbose_leave();
1199 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001200
Bram Moolenaarb765d632005-06-07 21:00:02 +00001201 if (old_lp == NULL)
1202 {
1203 lp = slang_alloc(lang);
1204 if (lp == NULL)
1205 goto endFAIL;
1206
1207 /* Remember the file name, used to reload the file when it's updated. */
1208 lp->sl_fname = vim_strsave(fname);
1209 if (lp->sl_fname == NULL)
1210 goto endFAIL;
1211
1212 /* Check for .add.spl. */
1213 lp->sl_add = strstr((char *)gettail(fname), ".add.") != NULL;
1214 }
1215 else
1216 lp = old_lp;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001217
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001218 /* Set sourcing_name, so that error messages mention the file name. */
1219 sourcing_name = fname;
1220 sourcing_lnum = 0;
1221
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001222 /* <HEADER>: <fileID> <regioncnt> <regionname> ...
1223 * <charflagslen> <charflags> <fcharslen> <fchars> */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001224 for (i = 0; i < VIMSPELLMAGICL; ++i)
1225 buf[i] = getc(fd); /* <fileID> */
1226 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
1227 {
1228 EMSG(_("E757: Wrong file ID in spell file"));
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001229 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001230 }
1231
1232 cnt = getc(fd); /* <regioncnt> */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001233 if (cnt < 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001234 {
1235truncerr:
1236 EMSG(_("E758: Truncated spell file"));
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001237 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001238 }
1239 if (cnt > 8)
1240 {
1241formerr:
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001242 EMSG(_(e_format));
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001243 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001244 }
1245 for (i = 0; i < cnt; ++i)
1246 {
1247 lp->sl_regions[i * 2] = getc(fd); /* <regionname> */
1248 lp->sl_regions[i * 2 + 1] = getc(fd);
1249 }
1250 lp->sl_regions[cnt * 2] = NUL;
1251
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001252 cnt = getc(fd); /* <charflagslen> */
1253 if (cnt > 0)
1254 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001255 p = alloc((unsigned)cnt);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001256 if (p == NULL)
1257 goto endFAIL;
1258 for (i = 0; i < cnt; ++i)
1259 p[i] = getc(fd); /* <charflags> */
1260
1261 ccnt = (getc(fd) << 8) + getc(fd); /* <fcharslen> */
1262 if (ccnt <= 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001263 {
1264 vim_free(p);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001265 goto formerr;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001266 }
1267 fol = alloc((unsigned)ccnt + 1);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001268 if (fol == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001269 {
1270 vim_free(p);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001271 goto endFAIL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001272 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001273 for (i = 0; i < ccnt; ++i)
1274 fol[i] = getc(fd); /* <fchars> */
1275 fol[i] = NUL;
1276
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001277 /* Set the word-char flags and fill SPELL_ISUPPER() table. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001278 i = set_spell_charflags(p, cnt, fol);
1279 vim_free(p);
1280 vim_free(fol);
1281 if (i == FAIL)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001282 goto formerr;
1283 }
1284 else
1285 {
1286 /* When <charflagslen> is zero then <fcharlen> must also be zero. */
1287 cnt = (getc(fd) << 8) + getc(fd);
1288 if (cnt != 0)
1289 goto formerr;
1290 }
1291
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001292 /* <SUGGEST> : <repcount> <rep> ...
1293 * <salflags> <salcount> <sal> ...
1294 * <maplen> <mapstr> */
1295 for (round = 1; round <= 2; ++round)
1296 {
1297 if (round == 1)
1298 {
1299 gap = &lp->sl_rep;
1300 first = lp->sl_rep_first;
1301 }
1302 else
1303 {
1304 gap = &lp->sl_sal;
1305 first = lp->sl_sal_first;
1306
1307 i = getc(fd); /* <salflags> */
1308 if (i & SAL_F0LLOWUP)
1309 lp->sl_followup = TRUE;
1310 if (i & SAL_COLLAPSE)
1311 lp->sl_collapse = TRUE;
1312 if (i & SAL_REM_ACCENTS)
1313 lp->sl_rem_accents = TRUE;
1314 }
1315
1316 cnt = (getc(fd) << 8) + getc(fd); /* <repcount> or <salcount> */
1317 if (cnt < 0)
1318 goto formerr;
1319
1320 if (ga_grow(gap, cnt) == FAIL)
1321 goto endFAIL;
1322 for (; gap->ga_len < cnt; ++gap->ga_len)
1323 {
1324 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
1325 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
1326 ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
1327 for (rr = 1; rr <= 2; ++rr)
1328 {
1329 ccnt = getc(fd);
1330 if (ccnt < 0)
1331 {
1332 if (rr == 2)
1333 vim_free(ftp->ft_from);
1334 goto formerr;
1335 }
1336 if ((p = alloc(ccnt + 1)) == NULL)
1337 {
1338 if (rr == 2)
1339 vim_free(ftp->ft_from);
1340 goto endFAIL;
1341 }
1342 for (i = 0; i < ccnt; ++i)
1343 p[i] = getc(fd); /* <repfrom> or <salfrom> */
1344 p[i] = NUL;
1345 if (rr == 1)
1346 ftp->ft_from = p;
1347 else
1348 ftp->ft_to = p;
1349 }
1350 }
1351
1352 /* Fill the first-index table. */
1353 for (i = 0; i < 256; ++i)
1354 first[i] = -1;
1355 for (i = 0; i < gap->ga_len; ++i)
1356 {
1357 ftp = &((fromto_T *)gap->ga_data)[i];
1358 if (first[*ftp->ft_from] == -1)
1359 first[*ftp->ft_from] = i;
1360 }
1361 }
1362
1363 cnt = (getc(fd) << 8) + getc(fd); /* <maplen> */
1364 if (cnt < 0)
1365 goto formerr;
1366 p = alloc(cnt + 1);
1367 if (p == NULL)
1368 goto endFAIL;
1369 for (i = 0; i < cnt; ++i)
1370 p[i] = getc(fd); /* <mapstr> */
1371 p[i] = NUL;
1372 lp->sl_map = p;
1373
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001374
Bram Moolenaar51485f02005-06-04 21:55:20 +00001375 /* round 1: <LWORDTREE>
1376 * round 2: <KWORDTREE> */
1377 for (round = 1; round <= 2; ++round)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001378 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001379 /* The tree size was computed when writing the file, so that we can
1380 * allocate it as one long block. <nodecount> */
1381 len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
1382 if (len < 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001383 goto truncerr;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001384 if (len > 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001385 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001386 /* Allocate the byte array. */
1387 p = lalloc((long_u)len, TRUE);
1388 if (p == NULL)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001389 goto endFAIL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001390 if (round == 1)
1391 lp->sl_fbyts = p;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001392 else
Bram Moolenaar51485f02005-06-04 21:55:20 +00001393 lp->sl_kbyts = p;
1394
1395 /* Allocate the index array. */
1396 p = lalloc_clear((long_u)(len * sizeof(int)), TRUE);
1397 if (p == NULL)
1398 goto endFAIL;
1399 if (round == 1)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001400 lp->sl_fidxs = (idx_T *)p;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001401 else
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001402 lp->sl_kidxs = (idx_T *)p;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001403
1404
1405 /* Read the tree and store it in the array. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001406 idx = read_tree(fd,
Bram Moolenaar51485f02005-06-04 21:55:20 +00001407 round == 1 ? lp->sl_fbyts : lp->sl_kbyts,
1408 round == 1 ? lp->sl_fidxs : lp->sl_kidxs,
1409 len, 0);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001410 if (idx == -1)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001411 goto truncerr;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001412 if (idx < 0)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001413 goto formerr;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001414 }
1415 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00001416
Bram Moolenaarb765d632005-06-07 21:00:02 +00001417 /* For a new file link it in the list of spell files. */
1418 if (old_lp == NULL)
1419 {
1420 lp->sl_next = first_lang;
1421 first_lang = lp;
1422 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001423
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001424 goto endOK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001425
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001426endFAIL:
Bram Moolenaarb765d632005-06-07 21:00:02 +00001427 if (lang != NULL)
1428 /* truncating the name signals the error to spell_load_lang() */
1429 *lang = NUL;
1430 if (lp != NULL && old_lp == NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001431 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001432 slang_free(lp);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001433 lp = NULL;
1434 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001435
1436endOK:
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001437 if (fd != NULL)
1438 fclose(fd);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001439 sourcing_name = save_sourcing_name;
1440 sourcing_lnum = save_sourcing_lnum;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001441
1442 return lp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001443}
1444
1445/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00001446 * Read one row of siblings from the spell file and store it in the byte array
1447 * "byts" and index array "idxs". Recursively read the children.
1448 *
1449 * NOTE: The code here must match put_tree().
1450 *
1451 * Returns the index follosing the siblings.
1452 * Returns -1 if the file is shorter than expected.
1453 * Returns -2 if there is a format error.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001454 */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001455 static idx_T
Bram Moolenaar51485f02005-06-04 21:55:20 +00001456read_tree(fd, byts, idxs, maxidx, startidx)
1457 FILE *fd;
1458 char_u *byts;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001459 idx_T *idxs;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001460 int maxidx; /* size of arrays */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001461 idx_T startidx; /* current index in "byts" and "idxs" */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001462{
Bram Moolenaar51485f02005-06-04 21:55:20 +00001463 int len;
1464 int i;
1465 int n;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001466 idx_T idx = startidx;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001467 int c;
1468#define SHARED_MASK 0x8000000
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001469
Bram Moolenaar51485f02005-06-04 21:55:20 +00001470 len = getc(fd); /* <siblingcount> */
1471 if (len <= 0)
1472 return -1;
1473
1474 if (startidx + len >= maxidx)
1475 return -2;
1476 byts[idx++] = len;
1477
1478 /* Read the byte values, flag/region bytes and shared indexes. */
1479 for (i = 1; i <= len; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001480 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001481 c = getc(fd); /* <byte> */
1482 if (c < 0)
1483 return -1;
1484 if (c <= BY_SPECIAL)
1485 {
1486 if (c == BY_NOFLAGS)
1487 {
1488 /* No flags, all regions. */
1489 idxs[idx] = 0;
1490 c = 0;
1491 }
1492 else if (c == BY_FLAGS)
1493 {
1494 /* Read flags and option region. */
1495 c = getc(fd); /* <flags> */
1496 if (c & WF_REGION)
1497 c = (getc(fd) << 8) + c; /* <region> */
1498 idxs[idx] = c;
1499 c = 0;
1500 }
1501 else /* c == BY_INDEX */
1502 {
1503 /* <nodeidx> */
1504 n = (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
1505 if (n < 0 || n >= maxidx)
1506 return -2;
1507 idxs[idx] = n + SHARED_MASK;
1508 c = getc(fd); /* <xbyte> */
1509 }
1510 }
1511 byts[idx++] = c;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001512 }
1513
Bram Moolenaar51485f02005-06-04 21:55:20 +00001514 /* Recursively read the children for non-shared siblings.
1515 * Skip the end-of-word ones (zero byte value) and the shared ones (and
1516 * remove SHARED_MASK) */
1517 for (i = 1; i <= len; ++i)
1518 if (byts[startidx + i] != 0)
1519 {
1520 if (idxs[startidx + i] & SHARED_MASK)
1521 idxs[startidx + i] &= ~SHARED_MASK;
1522 else
1523 {
1524 idxs[startidx + i] = idx;
1525 idx = read_tree(fd, byts, idxs, maxidx, idx);
1526 if (idx < 0)
1527 break;
1528 }
1529 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001530
Bram Moolenaar51485f02005-06-04 21:55:20 +00001531 return idx;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001532}
1533
1534/*
1535 * Parse 'spelllang' and set buf->b_langp accordingly.
1536 * Returns an error message or NULL.
1537 */
1538 char_u *
1539did_set_spelllang(buf)
1540 buf_T *buf;
1541{
1542 garray_T ga;
1543 char_u *lang;
1544 char_u *e;
1545 char_u *region;
1546 int region_mask;
1547 slang_T *lp;
1548 int c;
1549 char_u lbuf[MAXWLEN + 1];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001550 char_u spf_name[MAXPATHL];
1551 int did_spf = FALSE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001552
1553 ga_init2(&ga, sizeof(langp_T), 2);
1554
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001555 /* Get the name of the .spl file associated with 'spellfile'. */
1556 if (*buf->b_p_spf == NUL)
1557 did_spf = TRUE;
1558 else
1559 vim_snprintf((char *)spf_name, sizeof(spf_name), "%s.spl",
1560 buf->b_p_spf);
1561
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001562 /* loop over comma separated languages. */
1563 for (lang = buf->b_p_spl; *lang != NUL; lang = e)
1564 {
1565 e = vim_strchr(lang, ',');
1566 if (e == NULL)
1567 e = lang + STRLEN(lang);
Bram Moolenaar5482f332005-04-17 20:18:43 +00001568 region = NULL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001569 if (e > lang + 2)
1570 {
1571 if (e - lang >= MAXWLEN)
1572 {
1573 ga_clear(&ga);
1574 return e_invarg;
1575 }
1576 if (lang[2] == '_')
1577 region = lang + 3;
1578 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001579
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001580 /* Check if we loaded this language before. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001581 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
1582 if (STRNICMP(lp->sl_name, lang, 2) == 0)
1583 break;
1584
1585 if (lp == NULL)
1586 {
1587 /* Not found, load the language. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001588 vim_strncpy(lbuf, lang, e - lang);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001589 if (region != NULL)
1590 mch_memmove(lbuf + 2, lbuf + 5, e - lang - 4);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001591 spell_load_lang(lbuf);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001592 }
1593
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001594 /*
1595 * Loop over the languages, there can be several files for each.
1596 */
1597 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
1598 if (STRNICMP(lp->sl_name, lang, 2) == 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001599 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00001600 region_mask = REGION_ALL;
1601 if (region != NULL)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001602 {
1603 /* find region in sl_regions */
1604 c = find_region(lp->sl_regions, region);
1605 if (c == REGION_ALL)
1606 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00001607 if (!lp->sl_add)
1608 {
1609 c = *e;
1610 *e = NUL;
1611 smsg((char_u *)_("Warning: region %s not supported"),
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001612 lang);
Bram Moolenaar3982c542005-06-08 21:56:31 +00001613 *e = c;
1614 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001615 }
1616 else
1617 region_mask = 1 << c;
1618 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001619
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001620 if (ga_grow(&ga, 1) == FAIL)
1621 {
1622 ga_clear(&ga);
1623 return e_outofmem;
1624 }
1625 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
1626 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
1627 ++ga.ga_len;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001628
1629 /* Check if this is the 'spellfile' spell file. */
1630 if (fullpathcmp(spf_name, lp->sl_fname, FALSE) == FPC_SAME)
1631 did_spf = TRUE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001632 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001633
1634 if (*e == ',')
1635 ++e;
1636 }
1637
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001638 /*
1639 * Make sure the 'spellfile' file is loaded. It may be in 'runtimepath',
1640 * then it's probably loaded above already. Otherwise load it here.
1641 */
1642 if (!did_spf)
1643 {
1644 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
1645 if (fullpathcmp(spf_name, lp->sl_fname, FALSE) == FPC_SAME)
1646 break;
1647 if (lp == NULL)
1648 {
1649 vim_strncpy(lbuf, gettail(spf_name), 2);
1650 lp = spell_load_file(spf_name, lbuf, NULL, TRUE);
1651 }
1652 if (lp != NULL && ga_grow(&ga, 1) == OK)
1653 {
1654 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
1655 LANGP_ENTRY(ga, ga.ga_len)->lp_region = REGION_ALL;
1656 ++ga.ga_len;
1657 }
1658 }
1659
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001660 /* Add a NULL entry to mark the end of the list. */
1661 if (ga_grow(&ga, 1) == FAIL)
1662 {
1663 ga_clear(&ga);
1664 return e_outofmem;
1665 }
1666 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = NULL;
1667 ++ga.ga_len;
1668
1669 /* Everything is fine, store the new b_langp value. */
1670 ga_clear(&buf->b_langp);
1671 buf->b_langp = ga;
1672
1673 return NULL;
1674}
1675
1676/*
1677 * Find the region "region[2]" in "rp" (points to "sl_regions").
1678 * Each region is simply stored as the two characters of it's name.
1679 * Returns the index if found, REGION_ALL if not found.
1680 */
1681 static int
1682find_region(rp, region)
1683 char_u *rp;
1684 char_u *region;
1685{
1686 int i;
1687
1688 for (i = 0; ; i += 2)
1689 {
1690 if (rp[i] == NUL)
1691 return REGION_ALL;
1692 if (rp[i] == region[0] && rp[i + 1] == region[1])
1693 break;
1694 }
1695 return i / 2;
1696}
1697
1698/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001699 * Return case type of word:
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001700 * w word 0
Bram Moolenaar51485f02005-06-04 21:55:20 +00001701 * Word WF_ONECAP
1702 * W WORD WF_ALLCAP
1703 * WoRd wOrd WF_KEEPCAP
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001704 */
1705 static int
1706captype(word, end)
1707 char_u *word;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001708 char_u *end; /* When NULL use up to NUL byte. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001709{
1710 char_u *p;
1711 int c;
1712 int firstcap;
1713 int allcap;
1714 int past_second = FALSE; /* past second word char */
1715
1716 /* find first letter */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001717 for (p = word; !SPELL_ISWORDP(p); mb_ptr_adv(p))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001718 if (end == NULL ? *p == NUL : p >= end)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001719 return 0; /* only non-word characters, illegal word */
1720#ifdef FEAT_MBYTE
Bram Moolenaarb765d632005-06-07 21:00:02 +00001721 if (has_mbyte)
1722 c = mb_ptr2char_adv(&p);
1723 else
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001724#endif
Bram Moolenaarb765d632005-06-07 21:00:02 +00001725 c = *p++;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001726 firstcap = allcap = SPELL_ISUPPER(c);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001727
1728 /*
1729 * Need to check all letters to find a word with mixed upper/lower.
1730 * But a word with an upper char only at start is a ONECAP.
1731 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001732 for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p))
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001733 if (SPELL_ISWORDP(p))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001734 {
1735#ifdef FEAT_MBYTE
1736 c = mb_ptr2char(p);
1737#else
1738 c = *p;
1739#endif
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001740 if (!SPELL_ISUPPER(c))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001741 {
1742 /* UUl -> KEEPCAP */
1743 if (past_second && allcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001744 return WF_KEEPCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001745 allcap = FALSE;
1746 }
1747 else if (!allcap)
1748 /* UlU -> KEEPCAP */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001749 return WF_KEEPCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001750 past_second = TRUE;
1751 }
1752
1753 if (allcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001754 return WF_ALLCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001755 if (firstcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001756 return WF_ONECAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001757 return 0;
1758}
1759
1760# if defined(FEAT_MBYTE) || defined(PROTO)
1761/*
1762 * Clear all spelling tables and reload them.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001763 * Used after 'encoding' is set and when ":mkspell" was used.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001764 */
1765 void
1766spell_reload()
1767{
1768 buf_T *buf;
1769 slang_T *lp;
Bram Moolenaar3982c542005-06-08 21:56:31 +00001770 win_T *wp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001771
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001772 /* Initialize the table for SPELL_ISWORDP(). */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001773 init_spell_chartab();
1774
1775 /* Unload all allocated memory. */
1776 while (first_lang != NULL)
1777 {
1778 lp = first_lang;
1779 first_lang = lp->sl_next;
1780 slang_free(lp);
1781 }
1782
1783 /* Go through all buffers and handle 'spelllang'. */
1784 for (buf = firstbuf; buf != NULL; buf = buf->b_next)
1785 {
1786 ga_clear(&buf->b_langp);
Bram Moolenaar3982c542005-06-08 21:56:31 +00001787
1788 /* Only load the wordlists when 'spelllang' is set and there is a
1789 * window for this buffer in which 'spell' is set. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001790 if (*buf->b_p_spl != NUL)
Bram Moolenaar3982c542005-06-08 21:56:31 +00001791 {
1792 FOR_ALL_WINDOWS(wp)
1793 if (wp->w_buffer == buf && wp->w_p_spell)
1794 {
1795 (void)did_set_spelllang(buf);
1796# ifdef FEAT_WINDOWS
1797 break;
1798# endif
1799 }
1800 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001801 }
1802}
1803# endif
1804
Bram Moolenaarb765d632005-06-07 21:00:02 +00001805/*
1806 * Reload the spell file "fname" if it's loaded.
1807 */
1808 static void
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001809spell_reload_one(fname, added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00001810 char_u *fname;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001811 int added_word; /* invoked through "zg" */
Bram Moolenaarb765d632005-06-07 21:00:02 +00001812{
1813 slang_T *lp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001814 int didit = FALSE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001815
Bram Moolenaarb765d632005-06-07 21:00:02 +00001816 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
1817 if (fullpathcmp(fname, lp->sl_fname, FALSE) == FPC_SAME)
1818 {
1819 slang_clear(lp);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001820 (void)spell_load_file(fname, NULL, lp, FALSE);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001821 redraw_all_later(NOT_VALID);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001822 didit = TRUE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00001823 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001824
1825 /* When "zg" was used and the file wasn't loaded yet, should redo
1826 * 'spelllang' to get it loaded. */
1827 if (added_word && !didit)
1828 did_set_spelllang(curbuf);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001829}
1830
1831
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001832/*
1833 * Functions for ":mkspell".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001834 */
1835
Bram Moolenaar51485f02005-06-04 21:55:20 +00001836#define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001837 and .dic file. */
1838/*
1839 * Main structure to store the contents of a ".aff" file.
1840 */
1841typedef struct afffile_S
1842{
1843 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */
Bram Moolenaarb765d632005-06-07 21:00:02 +00001844 int af_rar; /* RAR ID for rare word */
1845 int af_kep; /* KEP ID for keep-case word */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001846 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */
1847 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001848} afffile_T;
1849
1850typedef struct affentry_S affentry_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001851/* Affix entry from ".aff" file. Used for prefixes and suffixes. */
1852struct affentry_S
1853{
1854 affentry_T *ae_next; /* next affix with same name/number */
1855 char_u *ae_chop; /* text to chop off basic word (can be NULL) */
1856 char_u *ae_add; /* text to add to basic word (can be NULL) */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001857 char_u *ae_cond; /* condition (NULL for ".") */
1858 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001859};
1860
1861/* Affix header from ".aff" file. Used for af_pref and af_suff. */
1862typedef struct affheader_S
1863{
1864 char_u ah_key[2]; /* key for hashtable == name of affix entry */
1865 int ah_combine; /* suffix may combine with prefix */
1866 affentry_T *ah_first; /* first affix entry */
1867} affheader_T;
1868
1869#define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
1870
1871/*
1872 * Structure that is used to store the items in the word tree. This avoids
1873 * the need to keep track of each allocated thing, it's freed all at once
1874 * after ":mkspell" is done.
1875 */
1876#define SBLOCKSIZE 16000 /* size of sb_data */
1877typedef struct sblock_S sblock_T;
1878struct sblock_S
1879{
1880 sblock_T *sb_next; /* next block in list */
1881 int sb_used; /* nr of bytes already in use */
1882 char_u sb_data[1]; /* data, actually longer */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001883};
1884
1885/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00001886 * A node in the tree.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001887 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001888typedef struct wordnode_S wordnode_T;
1889struct wordnode_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001890{
Bram Moolenaar51485f02005-06-04 21:55:20 +00001891 char_u wn_hashkey[6]; /* room for the hash key */
1892 wordnode_T *wn_next; /* next node with same hash key */
1893 wordnode_T *wn_child; /* child (next byte in word) */
1894 wordnode_T *wn_sibling; /* next sibling (alternate byte in word,
1895 always sorted) */
1896 wordnode_T *wn_wnode; /* parent node that will write this node */
1897 int wn_index; /* index in written nodes (valid after first
1898 round) */
1899 char_u wn_byte; /* Byte for this node. NUL for word end */
1900 char_u wn_flags; /* when wn_byte is NUL: WF_ flags */
1901 char_u wn_region; /* when wn_byte is NUL: region mask */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001902};
1903
Bram Moolenaar51485f02005-06-04 21:55:20 +00001904#define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001905
Bram Moolenaar51485f02005-06-04 21:55:20 +00001906/*
1907 * Info used while reading the spell files.
1908 */
1909typedef struct spellinfo_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001910{
Bram Moolenaar51485f02005-06-04 21:55:20 +00001911 wordnode_T *si_foldroot; /* tree with case-folded words */
1912 wordnode_T *si_keeproot; /* tree with keep-case words */
1913 sblock_T *si_blocks; /* memory blocks used */
1914 int si_ascii; /* handling only ASCII words */
Bram Moolenaarb765d632005-06-07 21:00:02 +00001915 int si_add; /* addition file */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001916 int si_region; /* region mask */
1917 vimconv_T si_conv; /* for conversion to 'encoding' */
Bram Moolenaar50cde822005-06-05 21:54:54 +00001918 int si_memtot; /* runtime memory used */
Bram Moolenaarb765d632005-06-07 21:00:02 +00001919 int si_verbose; /* verbose messages */
Bram Moolenaar3982c542005-06-08 21:56:31 +00001920 int si_region_count; /* number of regions supported (1 when there
1921 are no regions) */
1922 char_u si_region_name[16]; /* region names (if count > 1) */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001923
1924 garray_T si_rep; /* list of fromto_T entries from REP lines */
1925 garray_T si_sal; /* list of fromto_T entries from SAL lines */
1926 int si_followup; /* soundsalike: ? */
1927 int si_collapse; /* soundsalike: ? */
1928 int si_rem_accents; /* soundsalike: remove accents */
1929 garray_T si_map; /* MAP info concatenated */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001930} spellinfo_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001931
Bram Moolenaar51485f02005-06-04 21:55:20 +00001932static afffile_T *spell_read_aff __ARGS((char_u *fname, spellinfo_T *spin));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001933static void add_fromto __ARGS((spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to));
1934static int sal_to_bool __ARGS((char_u *s));
Bram Moolenaar5482f332005-04-17 20:18:43 +00001935static int has_non_ascii __ARGS((char_u *s));
Bram Moolenaar51485f02005-06-04 21:55:20 +00001936static void spell_free_aff __ARGS((afffile_T *aff));
1937static int spell_read_dic __ARGS((char_u *fname, spellinfo_T *spin, afffile_T *affile));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001938static int store_aff_word __ARGS((char_u *word, spellinfo_T *spin, char_u *afflist, hashtab_T *ht, hashtab_T *xht, int comb, int flags));
Bram Moolenaar51485f02005-06-04 21:55:20 +00001939static int spell_read_wordfile __ARGS((char_u *fname, spellinfo_T *spin));
1940static void *getroom __ARGS((sblock_T **blp, size_t len));
1941static char_u *getroom_save __ARGS((sblock_T **blp, char_u *s));
1942static void free_blocks __ARGS((sblock_T *bl));
1943static wordnode_T *wordtree_alloc __ARGS((sblock_T **blp));
Bram Moolenaar3982c542005-06-08 21:56:31 +00001944static int store_word __ARGS((char_u *word, spellinfo_T *spin, int flags, int region));
Bram Moolenaar51485f02005-06-04 21:55:20 +00001945static int tree_add_word __ARGS((char_u *word, wordnode_T *tree, int flags, int region, sblock_T **blp));
Bram Moolenaarb765d632005-06-07 21:00:02 +00001946static void wordtree_compress __ARGS((wordnode_T *root, spellinfo_T *spin));
Bram Moolenaar51485f02005-06-04 21:55:20 +00001947static int node_compress __ARGS((wordnode_T *node, hashtab_T *ht, int *tot));
1948static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2));
Bram Moolenaar3982c542005-06-08 21:56:31 +00001949static void write_vim_spell __ARGS((char_u *fname, spellinfo_T *spin));
Bram Moolenaar51485f02005-06-04 21:55:20 +00001950static int put_tree __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001951static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word));
Bram Moolenaarb765d632005-06-07 21:00:02 +00001952static void init_spellfile __ARGS((void));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001953
1954/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001955 * Read the affix file "fname".
Bram Moolenaar3982c542005-06-08 21:56:31 +00001956 * Returns an afffile_T, NULL for complete failure.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001957 */
1958 static afffile_T *
Bram Moolenaar51485f02005-06-04 21:55:20 +00001959spell_read_aff(fname, spin)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001960 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001961 spellinfo_T *spin;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001962{
1963 FILE *fd;
1964 afffile_T *aff;
1965 char_u rline[MAXLINELEN];
1966 char_u *line;
1967 char_u *pc = NULL;
1968 char_u *(items[6]);
1969 int itemcnt;
1970 char_u *p;
1971 int lnum = 0;
1972 affheader_T *cur_aff = NULL;
1973 int aff_todo = 0;
1974 hashtab_T *tp;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001975 char_u *low = NULL;
1976 char_u *fol = NULL;
1977 char_u *upp = NULL;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001978 static char *e_affname = N_("Affix name too long in %s line %d: %s");
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001979 int do_rep;
1980 int do_sal;
1981 int do_map;
1982 int found_map = FALSE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00001983 hashitem_T *hi;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001984
Bram Moolenaar51485f02005-06-04 21:55:20 +00001985 /*
1986 * Open the file.
1987 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00001988 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001989 if (fd == NULL)
1990 {
1991 EMSG2(_(e_notopen), fname);
1992 return NULL;
1993 }
1994
Bram Moolenaarb765d632005-06-07 21:00:02 +00001995 if (spin->si_verbose || p_verbose > 2)
1996 {
1997 if (!spin->si_verbose)
1998 verbose_enter();
1999 smsg((char_u *)_("Reading affix file %s..."), fname);
2000 out_flush();
2001 if (!spin->si_verbose)
2002 verbose_leave();
2003 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002004
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002005 /* Only do REP lines when not done in another .aff file already. */
2006 do_rep = spin->si_rep.ga_len == 0;
2007
2008 /* Only do SAL lines when not done in another .aff file already. */
2009 do_sal = spin->si_sal.ga_len == 0;
2010
2011 /* Only do MAP lines when not done in another .aff file already. */
2012 do_map = spin->si_map.ga_len == 0;
2013
Bram Moolenaar51485f02005-06-04 21:55:20 +00002014 /*
2015 * Allocate and init the afffile_T structure.
2016 */
2017 aff = (afffile_T *)getroom(&spin->si_blocks, sizeof(afffile_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002018 if (aff == NULL)
2019 return NULL;
2020 hash_init(&aff->af_pref);
2021 hash_init(&aff->af_suff);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002022
2023 /*
2024 * Read all the lines in the file one by one.
2025 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002026 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002027 {
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002028 line_breakcheck();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002029 ++lnum;
2030
2031 /* Skip comment lines. */
2032 if (*rline == '#')
2033 continue;
2034
2035 /* Convert from "SET" to 'encoding' when needed. */
2036 vim_free(pc);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002037#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00002038 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002039 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002040 pc = string_convert(&spin->si_conv, rline, NULL);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002041 if (pc == NULL)
2042 {
2043 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
2044 fname, lnum, rline);
2045 continue;
2046 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002047 line = pc;
2048 }
2049 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00002050#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002051 {
2052 pc = NULL;
2053 line = rline;
2054 }
2055
2056 /* Split the line up in white separated items. Put a NUL after each
2057 * item. */
2058 itemcnt = 0;
2059 for (p = line; ; )
2060 {
2061 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */
2062 ++p;
2063 if (*p == NUL)
2064 break;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002065 if (itemcnt == 6) /* too many items */
2066 break;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002067 items[itemcnt++] = p;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002068 while (*p > ' ') /* skip until white space or CR/NL */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002069 ++p;
2070 if (*p == NUL)
2071 break;
2072 *p++ = NUL;
2073 }
2074
2075 /* Handle non-empty lines. */
2076 if (itemcnt > 0)
2077 {
2078 if (STRCMP(items[0], "SET") == 0 && itemcnt == 2
2079 && aff->af_enc == NULL)
2080 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00002081#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00002082 /* Setup for conversion from "ENC" to 'encoding'. */
2083 aff->af_enc = enc_canonize(items[1]);
2084 if (aff->af_enc != NULL && !spin->si_ascii
2085 && convert_setup(&spin->si_conv, aff->af_enc,
2086 p_enc) == FAIL)
2087 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
2088 fname, aff->af_enc, p_enc);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002089#else
2090 smsg((char_u *)_("Conversion in %s not supported"), fname);
2091#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002092 }
Bram Moolenaar50cde822005-06-05 21:54:54 +00002093 else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1)
2094 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002095 /* ignored, we always split */
Bram Moolenaar50cde822005-06-05 21:54:54 +00002096 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002097 else if (STRCMP(items[0], "TRY") == 0 && itemcnt == 2)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002098 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002099 /* ignored, we look in the tree for what chars may appear */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002100 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002101 else if (STRCMP(items[0], "RAR") == 0 && itemcnt == 2
2102 && aff->af_rar == 0)
2103 {
2104 aff->af_rar = items[1][0];
2105 if (items[1][1] != NUL)
2106 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
2107 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00002108 else if (STRCMP(items[0], "KEP") == 0 && itemcnt == 2
2109 && aff->af_kep == 0)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002110 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00002111 aff->af_kep = items[1][0];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002112 if (items[1][1] != NUL)
2113 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
2114 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002115 else if ((STRCMP(items[0], "PFX") == 0
2116 || STRCMP(items[0], "SFX") == 0)
2117 && aff_todo == 0
2118 && itemcnt == 4)
2119 {
2120 /* New affix letter. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002121 cur_aff = (affheader_T *)getroom(&spin->si_blocks,
2122 sizeof(affheader_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002123 if (cur_aff == NULL)
2124 break;
2125 cur_aff->ah_key[0] = *items[1];
2126 cur_aff->ah_key[1] = NUL;
2127 if (items[1][1] != NUL)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002128 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002129 if (*items[2] == 'Y')
2130 cur_aff->ah_combine = TRUE;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002131 else if (*items[2] != 'N')
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002132 smsg((char_u *)_("Expected Y or N in %s line %d: %s"),
2133 fname, lnum, items[2]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002134 if (*items[0] == 'P')
2135 tp = &aff->af_pref;
2136 else
2137 tp = &aff->af_suff;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002138 aff_todo = atoi((char *)items[3]);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002139 hi = hash_find(tp, cur_aff->ah_key);
2140 if (!HASHITEM_EMPTY(hi))
Bram Moolenaar51485f02005-06-04 21:55:20 +00002141 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002142 smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
2143 fname, lnum, items[1]);
Bram Moolenaar51485f02005-06-04 21:55:20 +00002144 aff_todo = 0;
2145 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002146 else
2147 hash_add(tp, cur_aff->ah_key);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002148 }
2149 else if ((STRCMP(items[0], "PFX") == 0
2150 || STRCMP(items[0], "SFX") == 0)
2151 && aff_todo > 0
2152 && STRCMP(cur_aff->ah_key, items[1]) == 0
2153 && itemcnt == 5)
2154 {
2155 affentry_T *aff_entry;
2156
2157 /* New item for an affix letter. */
2158 --aff_todo;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002159 aff_entry = (affentry_T *)getroom(&spin->si_blocks,
2160 sizeof(affentry_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002161 if (aff_entry == NULL)
2162 break;
Bram Moolenaar5482f332005-04-17 20:18:43 +00002163
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002164 if (STRCMP(items[2], "0") != 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002165 aff_entry->ae_chop = getroom_save(&spin->si_blocks,
2166 items[2]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002167 if (STRCMP(items[3], "0") != 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002168 aff_entry->ae_add = getroom_save(&spin->si_blocks,
2169 items[3]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002170
Bram Moolenaar51485f02005-06-04 21:55:20 +00002171 /* Don't use an affix entry with non-ASCII characters when
2172 * "spin->si_ascii" is TRUE. */
2173 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop)
Bram Moolenaar5482f332005-04-17 20:18:43 +00002174 || has_non_ascii(aff_entry->ae_add)))
2175 {
Bram Moolenaar5482f332005-04-17 20:18:43 +00002176 aff_entry->ae_next = cur_aff->ah_first;
2177 cur_aff->ah_first = aff_entry;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002178
2179 if (STRCMP(items[4], ".") != 0)
2180 {
2181 char_u buf[MAXLINELEN];
2182
2183 aff_entry->ae_cond = getroom_save(&spin->si_blocks,
2184 items[4]);
2185 if (*items[0] == 'P')
2186 sprintf((char *)buf, "^%s", items[4]);
2187 else
2188 sprintf((char *)buf, "%s$", items[4]);
2189 aff_entry->ae_prog = vim_regcomp(buf,
2190 RE_MAGIC + RE_STRING);
2191 }
Bram Moolenaar5482f332005-04-17 20:18:43 +00002192 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002193 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002194 else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2)
2195 {
2196 if (fol != NULL)
2197 smsg((char_u *)_("Duplicate FOL in %s line %d"),
2198 fname, lnum);
2199 else
2200 fol = vim_strsave(items[1]);
2201 }
2202 else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2)
2203 {
2204 if (low != NULL)
2205 smsg((char_u *)_("Duplicate LOW in %s line %d"),
2206 fname, lnum);
2207 else
2208 low = vim_strsave(items[1]);
2209 }
2210 else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2)
2211 {
2212 if (upp != NULL)
2213 smsg((char_u *)_("Duplicate UPP in %s line %d"),
2214 fname, lnum);
2215 else
2216 upp = vim_strsave(items[1]);
2217 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002218 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002219 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002220 /* Ignore REP count */;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002221 if (!isdigit(*items[1]))
2222 smsg((char_u *)_("Expected REP count in %s line %d"),
2223 fname, lnum);
2224 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002225 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 3)
2226 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002227 /* REP item */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002228 if (do_rep)
2229 add_fromto(spin, &spin->si_rep, items[1], items[2]);
2230 }
2231 else if (STRCMP(items[0], "MAP") == 0 && itemcnt == 2)
2232 {
2233 /* MAP item or count */
2234 if (!found_map)
2235 {
2236 /* First line contains the count. */
2237 found_map = TRUE;
2238 if (!isdigit(*items[1]))
2239 smsg((char_u *)_("Expected MAP count in %s line %d"),
2240 fname, lnum);
2241 }
2242 else if (do_map)
2243 {
2244 /* We simply concatenate all the MAP strings, separated by
2245 * slashes. */
2246 ga_concat(&spin->si_map, items[1]);
2247 ga_append(&spin->si_map, '/');
2248 }
2249 }
2250 else if (STRCMP(items[0], "SAL") == 0 && itemcnt == 3)
2251 {
2252 if (do_sal)
2253 {
2254 /* SAL item (sounds-a-like)
2255 * Either one of the known keys or a from-to pair. */
2256 if (STRCMP(items[1], "followup") == 0)
2257 spin->si_followup = sal_to_bool(items[2]);
2258 else if (STRCMP(items[1], "collapse_result") == 0)
2259 spin->si_collapse = sal_to_bool(items[2]);
2260 else if (STRCMP(items[1], "remove_accents") == 0)
2261 spin->si_rem_accents = sal_to_bool(items[2]);
2262 else
2263 /* when "to" is "_" it means empty */
2264 add_fromto(spin, &spin->si_sal, items[1],
2265 STRCMP(items[2], "_") == 0 ? (char_u *)""
2266 : items[2]);
2267 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002268 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002269 else
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002270 smsg((char_u *)_("Unrecognized item in %s line %d: %s"),
2271 fname, lnum, items[0]);
2272 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002273 }
2274
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002275 if (fol != NULL || low != NULL || upp != NULL)
2276 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00002277 /*
2278 * Don't write a word table for an ASCII file, so that we don't check
2279 * for conflicts with a word table that matches 'encoding'.
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002280 * Don't write one for utf-8 either, we use utf_*() and
Bram Moolenaar3982c542005-06-08 21:56:31 +00002281 * mb_get_class(), the list of chars in the file will be incomplete.
2282 */
2283 if (!spin->si_ascii
2284#ifdef FEAT_MBYTE
2285 && !enc_utf8
2286#endif
2287 )
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00002288 {
2289 if (fol == NULL || low == NULL || upp == NULL)
2290 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
2291 else
Bram Moolenaar3982c542005-06-08 21:56:31 +00002292 (void)set_spell_chartab(fol, low, upp);
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00002293 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002294
2295 vim_free(fol);
2296 vim_free(low);
2297 vim_free(upp);
2298 }
2299
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002300 vim_free(pc);
2301 fclose(fd);
2302 return aff;
2303}
2304
2305/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002306 * Add a from-to item to "gap". Used for REP and SAL items.
2307 * They are stored case-folded.
2308 */
2309 static void
2310add_fromto(spin, gap, from, to)
2311 spellinfo_T *spin;
2312 garray_T *gap;
2313 char_u *from;
2314 char_u *to;
2315{
2316 fromto_T *ftp;
2317 char_u word[MAXWLEN];
2318
2319 if (ga_grow(gap, 1) == OK)
2320 {
2321 ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
2322 (void)spell_casefold(from, STRLEN(from), word, MAXWLEN);
2323 ftp->ft_from = getroom_save(&spin->si_blocks, word);
2324 (void)spell_casefold(to, STRLEN(to), word, MAXWLEN);
2325 ftp->ft_to = getroom_save(&spin->si_blocks, word);
2326 ++gap->ga_len;
2327 }
2328}
2329
2330/*
2331 * Convert a boolean argument in a SAL line to TRUE or FALSE;
2332 */
2333 static int
2334sal_to_bool(s)
2335 char_u *s;
2336{
2337 return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0;
2338}
2339
2340/*
Bram Moolenaar5482f332005-04-17 20:18:43 +00002341 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
2342 * When "s" is NULL FALSE is returned.
2343 */
2344 static int
2345has_non_ascii(s)
2346 char_u *s;
2347{
2348 char_u *p;
2349
2350 if (s != NULL)
2351 for (p = s; *p != NUL; ++p)
2352 if (*p >= 128)
2353 return TRUE;
2354 return FALSE;
2355}
2356
2357/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002358 * Free the structure filled by spell_read_aff().
2359 */
2360 static void
2361spell_free_aff(aff)
2362 afffile_T *aff;
2363{
2364 hashtab_T *ht;
2365 hashitem_T *hi;
2366 int todo;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002367 affheader_T *ah;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002368 affentry_T *ae;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002369
2370 vim_free(aff->af_enc);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002371
Bram Moolenaar51485f02005-06-04 21:55:20 +00002372 /* All this trouble to foree the "ae_prog" items... */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002373 for (ht = &aff->af_pref; ; ht = &aff->af_suff)
2374 {
2375 todo = ht->ht_used;
2376 for (hi = ht->ht_array; todo > 0; ++hi)
2377 {
2378 if (!HASHITEM_EMPTY(hi))
2379 {
2380 --todo;
2381 ah = HI2AH(hi);
Bram Moolenaar51485f02005-06-04 21:55:20 +00002382 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
2383 vim_free(ae->ae_prog);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002384 }
2385 }
2386 if (ht == &aff->af_suff)
2387 break;
2388 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002389
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002390 hash_clear(&aff->af_pref);
2391 hash_clear(&aff->af_suff);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002392}
2393
2394/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00002395 * Read dictionary file "fname".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002396 * Returns OK or FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002397 */
2398 static int
Bram Moolenaar51485f02005-06-04 21:55:20 +00002399spell_read_dic(fname, spin, affile)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002400 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002401 spellinfo_T *spin;
2402 afffile_T *affile;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002403{
Bram Moolenaar51485f02005-06-04 21:55:20 +00002404 hashtab_T ht;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002405 char_u line[MAXLINELEN];
Bram Moolenaar51485f02005-06-04 21:55:20 +00002406 char_u *afflist;
2407 char_u *dw;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002408 char_u *pc;
2409 char_u *w;
2410 int l;
2411 hash_T hash;
2412 hashitem_T *hi;
2413 FILE *fd;
2414 int lnum = 1;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002415 int non_ascii = 0;
2416 int retval = OK;
2417 char_u message[MAXLINELEN + MAXWLEN];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002418 int flags;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002419
Bram Moolenaar51485f02005-06-04 21:55:20 +00002420 /*
2421 * Open the file.
2422 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002423 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002424 if (fd == NULL)
2425 {
2426 EMSG2(_(e_notopen), fname);
2427 return FAIL;
2428 }
2429
Bram Moolenaar51485f02005-06-04 21:55:20 +00002430 /* The hashtable is only used to detect duplicated words. */
2431 hash_init(&ht);
2432
Bram Moolenaarb765d632005-06-07 21:00:02 +00002433 if (spin->si_verbose || p_verbose > 2)
2434 {
2435 if (!spin->si_verbose)
2436 verbose_enter();
2437 smsg((char_u *)_("Reading dictionary file %s..."), fname);
2438 out_flush();
2439 if (!spin->si_verbose)
2440 verbose_leave();
2441 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002442
2443 /* Read and ignore the first line: word count. */
2444 (void)vim_fgets(line, MAXLINELEN, fd);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002445 if (!vim_isdigit(*skipwhite(line)))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002446 EMSG2(_("E760: No word count in %s"), fname);
2447
2448 /*
2449 * Read all the lines in the file one by one.
2450 * The words are converted to 'encoding' here, before being added to
2451 * the hashtable.
2452 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002453 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002454 {
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002455 line_breakcheck();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002456 ++lnum;
2457
Bram Moolenaar51485f02005-06-04 21:55:20 +00002458 /* Remove CR, LF and white space from the end. White space halfway
2459 * the word is kept to allow e.g., "et al.". */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002460 l = STRLEN(line);
2461 while (l > 0 && line[l - 1] <= ' ')
2462 --l;
2463 if (l == 0)
2464 continue; /* empty line */
2465 line[l] = NUL;
2466
Bram Moolenaar51485f02005-06-04 21:55:20 +00002467 /* This takes time, print a message now and then. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002468 if (spin->si_verbose && (lnum & 0x3ff) == 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002469 {
2470 vim_snprintf((char *)message, sizeof(message),
2471 _("line %6d - %s"), lnum, line);
2472 msg_start();
2473 msg_outtrans_attr(message, 0);
2474 msg_clr_eos();
2475 msg_didout = FALSE;
2476 msg_col = 0;
2477 out_flush();
2478 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002479
Bram Moolenaar51485f02005-06-04 21:55:20 +00002480 /* Find the optional affix names. */
2481 afflist = vim_strchr(line, '/');
2482 if (afflist != NULL)
2483 *afflist++ = NUL;
2484
2485 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
2486 if (spin->si_ascii && has_non_ascii(line))
2487 {
2488 ++non_ascii;
Bram Moolenaar5482f332005-04-17 20:18:43 +00002489 continue;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002490 }
Bram Moolenaar5482f332005-04-17 20:18:43 +00002491
Bram Moolenaarb765d632005-06-07 21:00:02 +00002492#ifdef FEAT_MBYTE
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002493 /* Convert from "SET" to 'encoding' when needed. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002494 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002495 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002496 pc = string_convert(&spin->si_conv, line, NULL);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002497 if (pc == NULL)
2498 {
2499 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
2500 fname, lnum, line);
2501 continue;
2502 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002503 w = pc;
2504 }
2505 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00002506#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002507 {
2508 pc = NULL;
2509 w = line;
2510 }
2511
Bram Moolenaar51485f02005-06-04 21:55:20 +00002512 /* Store the word in the hashtable to be able to find duplicates. */
2513 dw = (char_u *)getroom_save(&spin->si_blocks, w);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002514 if (dw == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002515 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002516 vim_free(pc);
Bram Moolenaar51485f02005-06-04 21:55:20 +00002517 if (retval == FAIL)
2518 break;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002519
Bram Moolenaar51485f02005-06-04 21:55:20 +00002520 hash = hash_hash(dw);
2521 hi = hash_lookup(&ht, dw, hash);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002522 if (!HASHITEM_EMPTY(hi))
2523 smsg((char_u *)_("Duplicate word in %s line %d: %s"),
Bram Moolenaar51485f02005-06-04 21:55:20 +00002524 fname, lnum, line);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002525 else
Bram Moolenaar51485f02005-06-04 21:55:20 +00002526 hash_add_item(&ht, hi, dw, hash);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002527
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002528 flags = 0;
2529 if (afflist != NULL)
2530 {
2531 /* Check for affix name that stands for keep-case word and stands
2532 * for rare word (if defined). */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002533 if (affile->af_kep != NUL
2534 && vim_strchr(afflist, affile->af_kep) != NULL)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002535 flags |= WF_KEEPCAP;
2536 if (affile->af_rar != NUL
2537 && vim_strchr(afflist, affile->af_rar) != NULL)
2538 flags |= WF_RARE;
2539 }
2540
Bram Moolenaar51485f02005-06-04 21:55:20 +00002541 /* Add the word to the word tree(s). */
Bram Moolenaar3982c542005-06-08 21:56:31 +00002542 if (store_word(dw, spin, flags, spin->si_region) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002543 retval = FAIL;
2544
2545 if (afflist != NULL)
2546 {
2547 /* Find all matching suffixes and add the resulting words.
2548 * Additionally do matching prefixes that combine. */
2549 if (store_aff_word(dw, spin, afflist,
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002550 &affile->af_suff, &affile->af_pref,
2551 FALSE, flags) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002552 retval = FAIL;
2553
2554 /* Find all matching prefixes and add the resulting words. */
2555 if (store_aff_word(dw, spin, afflist,
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002556 &affile->af_pref, NULL, FALSE, flags) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002557 retval = FAIL;
2558 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002559 }
2560
Bram Moolenaar51485f02005-06-04 21:55:20 +00002561 if (spin->si_ascii && non_ascii > 0)
2562 smsg((char_u *)_("Ignored %d words with non-ASCII characters"),
2563 non_ascii);
2564 hash_clear(&ht);
2565
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002566 fclose(fd);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002567 return retval;
2568}
2569
2570/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00002571 * Apply affixes to a word and store the resulting words.
2572 * "ht" is the hashtable with affentry_T that need to be applied, either
2573 * prefixes or suffixes.
2574 * "xht", when not NULL, is the prefix hashtable, to be used additionally on
2575 * the resulting words for combining affixes.
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002576 *
2577 * Returns FAIL when out of memory.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002578 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002579 static int
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002580store_aff_word(word, spin, afflist, ht, xht, comb, flags)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002581 char_u *word; /* basic word start */
2582 spellinfo_T *spin; /* spell info */
2583 char_u *afflist; /* list of names of supported affixes */
2584 hashtab_T *ht;
2585 hashtab_T *xht;
2586 int comb; /* only use affixes that combine */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002587 int flags; /* flags for the word */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002588{
2589 int todo;
2590 hashitem_T *hi;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002591 affheader_T *ah;
2592 affentry_T *ae;
2593 regmatch_T regmatch;
2594 char_u newword[MAXWLEN];
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002595 int retval = OK;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002596 int i;
2597 char_u *p;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002598
Bram Moolenaar51485f02005-06-04 21:55:20 +00002599 todo = ht->ht_used;
2600 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002601 {
2602 if (!HASHITEM_EMPTY(hi))
2603 {
2604 --todo;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002605 ah = HI2AH(hi);
Bram Moolenaar5482f332005-04-17 20:18:43 +00002606
Bram Moolenaar51485f02005-06-04 21:55:20 +00002607 /* Check that the affix combines, if required, and that the word
2608 * supports this affix. */
2609 if ((!comb || ah->ah_combine)
2610 && vim_strchr(afflist, *ah->ah_key) != NULL)
Bram Moolenaar5482f332005-04-17 20:18:43 +00002611 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002612 /* Loop over all affix entries with this name. */
2613 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002614 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002615 /* Check the condition. It's not logical to match case
2616 * here, but it is required for compatibility with
2617 * Myspell. */
2618 regmatch.regprog = ae->ae_prog;
2619 regmatch.rm_ic = FALSE;
2620 if (ae->ae_prog == NULL
2621 || vim_regexec(&regmatch, word, (colnr_T)0))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002622 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002623 /* Match. Remove the chop and add the affix. */
2624 if (xht == NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002625 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002626 /* prefix: chop/add at the start of the word */
2627 if (ae->ae_add == NULL)
2628 *newword = NUL;
2629 else
2630 STRCPY(newword, ae->ae_add);
2631 p = word;
2632 if (ae->ae_chop != NULL)
Bram Moolenaarb765d632005-06-07 21:00:02 +00002633 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002634 /* Skip chop string. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002635#ifdef FEAT_MBYTE
2636 if (has_mbyte)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002637 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00002638 i = mb_charlen(ae->ae_chop);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002639 for ( ; i > 0; --i)
2640 mb_ptr_adv(p);
2641 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00002642 else
2643#endif
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002644 p += STRLEN(ae->ae_chop);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002645 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002646 STRCAT(newword, p);
2647 }
2648 else
2649 {
2650 /* suffix: chop/add at the end of the word */
2651 STRCPY(newword, word);
2652 if (ae->ae_chop != NULL)
2653 {
2654 /* Remove chop string. */
2655 p = newword + STRLEN(newword);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002656#ifdef FEAT_MBYTE
2657 if (has_mbyte)
2658 i = mb_charlen(ae->ae_chop);
2659 else
2660#endif
2661 i = STRLEN(ae->ae_chop);
2662 for ( ; i > 0; --i)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002663 mb_ptr_back(newword, p);
2664 *p = NUL;
2665 }
2666 if (ae->ae_add != NULL)
2667 STRCAT(newword, ae->ae_add);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002668 }
2669
Bram Moolenaar51485f02005-06-04 21:55:20 +00002670 /* Store the modified word. */
Bram Moolenaar3982c542005-06-08 21:56:31 +00002671 if (store_word(newword, spin,
2672 flags, spin->si_region) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002673 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002674
Bram Moolenaar51485f02005-06-04 21:55:20 +00002675 /* When added a suffix and combining is allowed also
2676 * try adding prefixes additionally. */
2677 if (xht != NULL && ah->ah_combine)
2678 if (store_aff_word(newword, spin, afflist,
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002679 xht, NULL, TRUE, flags) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002680 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002681 }
2682 }
2683 }
2684 }
2685 }
2686
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002687 return retval;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002688}
2689
2690/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00002691 * Read a file with a list of words.
2692 */
2693 static int
2694spell_read_wordfile(fname, spin)
2695 char_u *fname;
2696 spellinfo_T *spin;
2697{
2698 FILE *fd;
2699 long lnum = 0;
2700 char_u rline[MAXLINELEN];
2701 char_u *line;
2702 char_u *pc = NULL;
2703 int l;
2704 int retval = OK;
2705 int did_word = FALSE;
2706 int non_ascii = 0;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002707 int flags;
Bram Moolenaar3982c542005-06-08 21:56:31 +00002708 int regionmask;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002709
2710 /*
2711 * Open the file.
2712 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002713 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar51485f02005-06-04 21:55:20 +00002714 if (fd == NULL)
2715 {
2716 EMSG2(_(e_notopen), fname);
2717 return FAIL;
2718 }
2719
Bram Moolenaarb765d632005-06-07 21:00:02 +00002720 if (spin->si_verbose || p_verbose > 2)
2721 {
2722 if (!spin->si_verbose)
2723 verbose_enter();
2724 smsg((char_u *)_("Reading word file %s..."), fname);
2725 out_flush();
2726 if (!spin->si_verbose)
2727 verbose_leave();
2728 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002729
2730 /*
2731 * Read all the lines in the file one by one.
2732 */
2733 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
2734 {
2735 line_breakcheck();
2736 ++lnum;
2737
2738 /* Skip comment lines. */
2739 if (*rline == '#')
2740 continue;
2741
2742 /* Remove CR, LF and white space from the end. */
2743 l = STRLEN(rline);
2744 while (l > 0 && rline[l - 1] <= ' ')
2745 --l;
2746 if (l == 0)
2747 continue; /* empty or blank line */
2748 rline[l] = NUL;
2749
2750 /* Convert from "=encoding={encoding}" to 'encoding' when needed. */
2751 vim_free(pc);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002752#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00002753 if (spin->si_conv.vc_type != CONV_NONE)
2754 {
2755 pc = string_convert(&spin->si_conv, rline, NULL);
2756 if (pc == NULL)
2757 {
2758 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
2759 fname, lnum, rline);
2760 continue;
2761 }
2762 line = pc;
2763 }
2764 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00002765#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00002766 {
2767 pc = NULL;
2768 line = rline;
2769 }
2770
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002771 flags = 0;
Bram Moolenaar3982c542005-06-08 21:56:31 +00002772 regionmask = spin->si_region;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002773
2774 if (*line == '/')
Bram Moolenaar51485f02005-06-04 21:55:20 +00002775 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002776 ++line;
Bram Moolenaar3982c542005-06-08 21:56:31 +00002777
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002778 if (STRNCMP(line, "encoding=", 9) == 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002779 {
2780 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar3982c542005-06-08 21:56:31 +00002781 smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"),
2782 fname, lnum, line - 1);
Bram Moolenaar51485f02005-06-04 21:55:20 +00002783 else if (did_word)
Bram Moolenaar3982c542005-06-08 21:56:31 +00002784 smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"),
2785 fname, lnum, line - 1);
Bram Moolenaar51485f02005-06-04 21:55:20 +00002786 else
2787 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00002788#ifdef FEAT_MBYTE
2789 char_u *enc;
2790
Bram Moolenaar51485f02005-06-04 21:55:20 +00002791 /* Setup for conversion to 'encoding'. */
Bram Moolenaar3982c542005-06-08 21:56:31 +00002792 line += 10;
2793 enc = enc_canonize(line);
Bram Moolenaar51485f02005-06-04 21:55:20 +00002794 if (enc != NULL && !spin->si_ascii
2795 && convert_setup(&spin->si_conv, enc,
2796 p_enc) == FAIL)
2797 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
Bram Moolenaar3982c542005-06-08 21:56:31 +00002798 fname, line, p_enc);
Bram Moolenaar51485f02005-06-04 21:55:20 +00002799 vim_free(enc);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002800#else
2801 smsg((char_u *)_("Conversion in %s not supported"), fname);
2802#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00002803 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002804 continue;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002805 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002806
Bram Moolenaar3982c542005-06-08 21:56:31 +00002807 if (STRNCMP(line, "regions=", 8) == 0)
2808 {
2809 if (spin->si_region_count > 1)
2810 smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"),
2811 fname, lnum, line);
2812 else
2813 {
2814 line += 8;
2815 if (STRLEN(line) > 16)
2816 smsg((char_u *)_("Too many regions in %s line %d: %s"),
2817 fname, lnum, line);
2818 else
2819 {
2820 spin->si_region_count = STRLEN(line) / 2;
2821 STRCPY(spin->si_region_name, line);
2822 }
2823 }
2824 continue;
2825 }
2826
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002827 if (*line == '=')
2828 {
2829 /* keep-case word */
2830 flags |= WF_KEEPCAP;
2831 ++line;
2832 }
2833
2834 if (*line == '!')
2835 {
2836 /* Bad, bad, wicked word. */
2837 flags |= WF_BANNED;
2838 ++line;
2839 }
2840 else if (*line == '?')
2841 {
2842 /* Rare word. */
2843 flags |= WF_RARE;
2844 ++line;
2845 }
2846
Bram Moolenaar3982c542005-06-08 21:56:31 +00002847 if (VIM_ISDIGIT(*line))
2848 {
2849 /* region number(s) */
2850 regionmask = 0;
2851 while (VIM_ISDIGIT(*line))
2852 {
2853 l = *line - '0';
2854 if (l > spin->si_region_count)
2855 {
2856 smsg((char_u *)_("Invalid region nr in %s line %d: %s"),
2857 fname, lnum, line);
2858 break;
2859 }
2860 regionmask |= 1 << (l - 1);
2861 ++line;
2862 }
2863 flags |= WF_REGION;
2864 }
2865
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002866 if (flags == 0)
2867 {
2868 smsg((char_u *)_("/ line ignored in %s line %d: %s"),
Bram Moolenaar51485f02005-06-04 21:55:20 +00002869 fname, lnum, line);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002870 continue;
2871 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002872 }
2873
2874 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
2875 if (spin->si_ascii && has_non_ascii(line))
2876 {
2877 ++non_ascii;
2878 continue;
2879 }
2880
2881 /* Normal word: store it. */
Bram Moolenaar3982c542005-06-08 21:56:31 +00002882 if (store_word(line, spin, flags, regionmask) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002883 {
2884 retval = FAIL;
2885 break;
2886 }
2887 did_word = TRUE;
2888 }
2889
2890 vim_free(pc);
2891 fclose(fd);
2892
Bram Moolenaarb765d632005-06-07 21:00:02 +00002893 if (spin->si_ascii && non_ascii > 0 && (spin->si_verbose || p_verbose > 2))
2894 {
2895 if (p_verbose > 2)
2896 verbose_enter();
Bram Moolenaar51485f02005-06-04 21:55:20 +00002897 smsg((char_u *)_("Ignored %d words with non-ASCII characters"),
2898 non_ascii);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002899 if (p_verbose > 2)
2900 verbose_leave();
2901 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002902 return retval;
2903}
2904
2905/*
2906 * Get part of an sblock_T, "len" bytes long.
2907 * This avoids calling free() for every little struct we use.
2908 * The memory is cleared to all zeros.
2909 * Returns NULL when out of memory.
2910 */
2911 static void *
2912getroom(blp, len)
2913 sblock_T **blp;
2914 size_t len; /* length needed */
2915{
2916 char_u *p;
2917 sblock_T *bl = *blp;
2918
2919 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE)
2920 {
2921 /* Allocate a block of memory. This is not freed until much later. */
2922 bl = (sblock_T *)alloc_clear((unsigned)(sizeof(sblock_T) + SBLOCKSIZE));
2923 if (bl == NULL)
2924 return NULL;
2925 bl->sb_next = *blp;
2926 *blp = bl;
2927 bl->sb_used = 0;
2928 }
2929
2930 p = bl->sb_data + bl->sb_used;
2931 bl->sb_used += len;
2932
2933 return p;
2934}
2935
2936/*
2937 * Make a copy of a string into memory allocated with getroom().
2938 */
2939 static char_u *
2940getroom_save(blp, s)
2941 sblock_T **blp;
2942 char_u *s;
2943{
2944 char_u *sc;
2945
2946 sc = (char_u *)getroom(blp, STRLEN(s) + 1);
2947 if (sc != NULL)
2948 STRCPY(sc, s);
2949 return sc;
2950}
2951
2952
2953/*
2954 * Free the list of allocated sblock_T.
2955 */
2956 static void
2957free_blocks(bl)
2958 sblock_T *bl;
2959{
2960 sblock_T *next;
2961
2962 while (bl != NULL)
2963 {
2964 next = bl->sb_next;
2965 vim_free(bl);
2966 bl = next;
2967 }
2968}
2969
2970/*
2971 * Allocate the root of a word tree.
2972 */
2973 static wordnode_T *
2974wordtree_alloc(blp)
2975 sblock_T **blp;
2976{
2977 return (wordnode_T *)getroom(blp, sizeof(wordnode_T));
2978}
2979
2980/*
2981 * Store a word in the tree(s).
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002982 * Always store it in the case-folded tree. A keep-case word can also be used
2983 * with all caps.
Bram Moolenaar51485f02005-06-04 21:55:20 +00002984 * For a keep-case word also store it in the keep-case tree.
2985 */
2986 static int
Bram Moolenaar3982c542005-06-08 21:56:31 +00002987store_word(word, spin, flags, region)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002988 char_u *word;
2989 spellinfo_T *spin;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002990 int flags; /* extra flags, WF_BANNED */
Bram Moolenaar3982c542005-06-08 21:56:31 +00002991 int region; /* supported region(s) */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002992{
2993 int len = STRLEN(word);
2994 int ct = captype(word, word + len);
2995 char_u foldword[MAXWLEN];
2996 int res;
2997
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002998 (void)spell_casefold(word, len, foldword, MAXWLEN);
2999 res = tree_add_word(foldword, spin->si_foldroot, ct | flags,
3000 region, &spin->si_blocks);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003001
3002 if (res == OK && (ct == WF_KEEPCAP || flags & WF_KEEPCAP))
3003 res = tree_add_word(word, spin->si_keeproot, flags,
Bram Moolenaar3982c542005-06-08 21:56:31 +00003004 region, &spin->si_blocks);
Bram Moolenaar51485f02005-06-04 21:55:20 +00003005 return res;
3006}
3007
3008/*
3009 * Add word "word" to a word tree at "root".
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003010 * Returns FAIL when out of memory.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003011 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003012 static int
Bram Moolenaar51485f02005-06-04 21:55:20 +00003013tree_add_word(word, root, flags, region, blp)
3014 char_u *word;
3015 wordnode_T *root;
3016 int flags;
3017 int region;
3018 sblock_T **blp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003019{
Bram Moolenaar51485f02005-06-04 21:55:20 +00003020 wordnode_T *node = root;
3021 wordnode_T *np;
3022 wordnode_T **prev = NULL;
3023 int i;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003024
Bram Moolenaar51485f02005-06-04 21:55:20 +00003025 /* Add each byte of the word to the tree, including the NUL at the end. */
3026 for (i = 0; ; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003027 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003028 /* Look for the sibling that has the same character. They are sorted
3029 * on byte value, thus stop searching when a sibling is found with a
3030 * higher byte value. For zero bytes (end of word) check that the
3031 * flags are equal, there is a separate zero byte for each flag value.
3032 */
3033 while (node != NULL && (node->wn_byte < word[i]
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003034 || (node->wn_byte == 0 && node->wn_flags != (flags & 0xff))))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003035 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003036 prev = &node->wn_sibling;
3037 node = *prev;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003038 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00003039 if (node == NULL || node->wn_byte != word[i])
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003040 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003041 /* Allocate a new node. */
3042 np = (wordnode_T *)getroom(blp, sizeof(wordnode_T));
3043 if (np == NULL)
3044 return FAIL;
3045 np->wn_byte = word[i];
3046 *prev = np;
3047 np->wn_sibling = node;
3048 node = np;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003049 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003050
Bram Moolenaar51485f02005-06-04 21:55:20 +00003051 if (word[i] == NUL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003052 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003053 node->wn_flags = flags;
3054 node->wn_region |= region;
3055 break;
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +00003056 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00003057 prev = &node->wn_child;
3058 node = *prev;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003059 }
3060
3061 return OK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003062}
3063
3064/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00003065 * Compress a tree: find tails that are identical and can be shared.
3066 */
3067 static void
Bram Moolenaarb765d632005-06-07 21:00:02 +00003068wordtree_compress(root, spin)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003069 wordnode_T *root;
Bram Moolenaarb765d632005-06-07 21:00:02 +00003070 spellinfo_T *spin;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003071{
3072 hashtab_T ht;
3073 int n;
3074 int tot = 0;
3075
3076 if (root != NULL)
3077 {
3078 hash_init(&ht);
3079 n = node_compress(root, &ht, &tot);
Bram Moolenaarb765d632005-06-07 21:00:02 +00003080 if (spin->si_verbose || p_verbose > 2)
3081 {
3082 if (!spin->si_verbose)
3083 verbose_enter();
3084 smsg((char_u *)_("Compressed %d of %d nodes; %d%% remaining"),
Bram Moolenaar51485f02005-06-04 21:55:20 +00003085 n, tot, (tot - n) * 100 / tot);
Bram Moolenaarb765d632005-06-07 21:00:02 +00003086 if (p_verbose > 2)
3087 verbose_leave();
3088 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00003089 hash_clear(&ht);
3090 }
3091}
3092
3093/*
3094 * Compress a node, its siblings and its children, depth first.
3095 * Returns the number of compressed nodes.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003096 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003097 static int
Bram Moolenaar51485f02005-06-04 21:55:20 +00003098node_compress(node, ht, tot)
3099 wordnode_T *node;
3100 hashtab_T *ht;
3101 int *tot; /* total count of nodes before compressing,
3102 incremented while going through the tree */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003103{
Bram Moolenaar51485f02005-06-04 21:55:20 +00003104 wordnode_T *np;
3105 wordnode_T *tp;
3106 wordnode_T *child;
3107 hash_T hash;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003108 hashitem_T *hi;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003109 int len = 0;
3110 unsigned nr, n;
3111 int compressed = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003112
Bram Moolenaar51485f02005-06-04 21:55:20 +00003113 /*
3114 * Go through the list of siblings. Compress each child and then try
3115 * finding an identical child to replace it.
3116 * Note that with "child" we mean not just the node that is pointed to,
3117 * but the whole list of siblings, of which the node is the first.
3118 */
3119 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003120 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003121 ++len;
3122 if ((child = np->wn_child) != NULL)
3123 {
3124 /* Compress the child. This fills wn_hashkey. */
3125 compressed += node_compress(child, ht, tot);
3126
3127 /* Try to find an identical child. */
3128 hash = hash_hash(child->wn_hashkey);
3129 hi = hash_lookup(ht, child->wn_hashkey, hash);
3130 tp = NULL;
3131 if (!HASHITEM_EMPTY(hi))
3132 {
3133 /* There are children with an identical hash value. Now check
3134 * if there is one that is really identical. */
3135 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_next)
3136 if (node_equal(child, tp))
3137 {
3138 /* Found one! Now use that child in place of the
3139 * current one. This means the current child is
3140 * dropped from the tree. */
3141 np->wn_child = tp;
3142 ++compressed;
3143 break;
3144 }
3145 if (tp == NULL)
3146 {
3147 /* No other child with this hash value equals the child of
3148 * the node, add it to the linked list after the first
3149 * item. */
3150 tp = HI2WN(hi);
3151 child->wn_next = tp->wn_next;
3152 tp->wn_next = child;
3153 }
3154 }
3155 else
3156 /* No other child has this hash value, add it to the
3157 * hashtable. */
3158 hash_add_item(ht, hi, child->wn_hashkey, hash);
3159 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003160 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00003161 *tot += len;
3162
3163 /*
3164 * Make a hash key for the node and its siblings, so that we can quickly
3165 * find a lookalike node. This must be done after compressing the sibling
3166 * list, otherwise the hash key would become invalid by the compression.
3167 */
3168 node->wn_hashkey[0] = len;
3169 nr = 0;
3170 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003171 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003172 if (np->wn_byte == NUL)
3173 /* end node: only use wn_flags and wn_region */
3174 n = np->wn_flags + (np->wn_region << 8);
3175 else
3176 /* byte node: use the byte value and the child pointer */
3177 n = np->wn_byte + ((long_u)np->wn_child << 8);
3178 nr = nr * 101 + n;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003179 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00003180
3181 /* Avoid NUL bytes, it terminates the hash key. */
3182 n = nr & 0xff;
3183 node->wn_hashkey[1] = n == 0 ? 1 : n;
3184 n = (nr >> 8) & 0xff;
3185 node->wn_hashkey[2] = n == 0 ? 1 : n;
3186 n = (nr >> 16) & 0xff;
3187 node->wn_hashkey[3] = n == 0 ? 1 : n;
3188 n = (nr >> 24) & 0xff;
3189 node->wn_hashkey[4] = n == 0 ? 1 : n;
3190 node->wn_hashkey[5] = NUL;
3191
3192 return compressed;
3193}
3194
3195/*
3196 * Return TRUE when two nodes have identical siblings and children.
3197 */
3198 static int
3199node_equal(n1, n2)
3200 wordnode_T *n1;
3201 wordnode_T *n2;
3202{
3203 wordnode_T *p1;
3204 wordnode_T *p2;
3205
3206 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
3207 p1 = p1->wn_sibling, p2 = p2->wn_sibling)
3208 if (p1->wn_byte != p2->wn_byte
3209 || (p1->wn_byte == NUL
3210 ? (p1->wn_flags != p2->wn_flags
3211 || p1->wn_region != p2->wn_region)
3212 : (p1->wn_child != p2->wn_child)))
3213 break;
3214
3215 return p1 == NULL && p2 == NULL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003216}
3217
3218/*
3219 * Write a number to file "fd", MSB first, in "len" bytes.
3220 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003221 void
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003222put_bytes(fd, nr, len)
3223 FILE *fd;
3224 long_u nr;
3225 int len;
3226{
3227 int i;
3228
3229 for (i = len - 1; i >= 0; --i)
3230 putc((int)(nr >> (i * 8)), fd);
3231}
3232
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003233static int
3234#ifdef __BORLANDC__
3235_RTLENTRYF
3236#endif
3237rep_compare __ARGS((const void *s1, const void *s2));
3238
3239/*
3240 * Function given to qsort() to sort the REP items on "from" string.
3241 */
3242 static int
3243#ifdef __BORLANDC__
3244_RTLENTRYF
3245#endif
3246rep_compare(s1, s2)
3247 const void *s1;
3248 const void *s2;
3249{
3250 fromto_T *p1 = (fromto_T *)s1;
3251 fromto_T *p2 = (fromto_T *)s2;
3252
3253 return STRCMP(p1->ft_from, p2->ft_from);
3254}
3255
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003256/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003257 * Write the Vim spell file "fname".
3258 */
3259 static void
Bram Moolenaar3982c542005-06-08 21:56:31 +00003260write_vim_spell(fname, spin)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003261 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003262 spellinfo_T *spin;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003263{
Bram Moolenaar51485f02005-06-04 21:55:20 +00003264 FILE *fd;
3265 int regionmask;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003266 int round;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003267 wordnode_T *tree;
3268 int nodecount;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003269 int i;
3270 int l;
3271 garray_T *gap;
3272 fromto_T *ftp;
3273 char_u *p;
3274 int rr;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003275
Bram Moolenaarb765d632005-06-07 21:00:02 +00003276 fd = mch_fopen((char *)fname, "w");
Bram Moolenaar51485f02005-06-04 21:55:20 +00003277 if (fd == NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003278 {
3279 EMSG2(_(e_notopen), fname);
3280 return;
3281 }
3282
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003283 /* <HEADER>: <fileID> <regioncnt> <regionname> ...
3284 * <charflagslen> <charflags> <fcharslen> <fchars> */
Bram Moolenaar51485f02005-06-04 21:55:20 +00003285
3286 /* <fileID> */
3287 if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1)
3288 EMSG(_(e_write));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003289
3290 /* write the region names if there is more than one */
Bram Moolenaar3982c542005-06-08 21:56:31 +00003291 if (spin->si_region_count > 1)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003292 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00003293 putc(spin->si_region_count, fd); /* <regioncnt> <regionname> ... */
3294 fwrite(spin->si_region_name, (size_t)(spin->si_region_count * 2),
3295 (size_t)1, fd);
3296 regionmask = (1 << spin->si_region_count) - 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003297 }
3298 else
3299 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003300 putc(0, fd);
3301 regionmask = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003302 }
3303
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003304 /*
3305 * Write the table with character flags and table for case folding.
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00003306 * <charflagslen> <charflags> <fcharlen> <fchars>
3307 * Skip this for ASCII, the table may conflict with the one used for
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003308 * 'encoding'.
3309 * Also skip this for an .add.spl file, the main spell file must contain
3310 * the table (avoids that it conflicts). File is shorter too.
3311 */
3312 if (spin->si_ascii || spin->si_add)
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00003313 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003314 putc(0, fd);
3315 putc(0, fd);
3316 putc(0, fd);
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00003317 }
3318 else
Bram Moolenaar51485f02005-06-04 21:55:20 +00003319 write_spell_chartab(fd);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003320
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003321 /* Sort the REP items. */
3322 qsort(spin->si_rep.ga_data, (size_t)spin->si_rep.ga_len,
3323 sizeof(fromto_T), rep_compare);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003324
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003325 /* <SUGGEST> : <repcount> <rep> ...
3326 * <salflags> <salcount> <sal> ...
3327 * <maplen> <mapstr> */
3328 for (round = 1; round <= 2; ++round)
3329 {
3330 if (round == 1)
3331 gap = &spin->si_rep;
3332 else
3333 {
3334 gap = &spin->si_sal;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003335
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003336 i = 0;
3337 if (spin->si_followup)
3338 i |= SAL_F0LLOWUP;
3339 if (spin->si_collapse)
3340 i |= SAL_COLLAPSE;
3341 if (spin->si_rem_accents)
3342 i |= SAL_REM_ACCENTS;
3343 putc(i, fd); /* <salflags> */
3344 }
3345
3346 put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */
3347 for (i = 0; i < gap->ga_len; ++i)
3348 {
3349 /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
3350 /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
3351 ftp = &((fromto_T *)gap->ga_data)[i];
3352 for (rr = 1; rr <= 2; ++rr)
3353 {
3354 p = rr == 1 ? ftp->ft_from : ftp->ft_to;
3355 l = STRLEN(p);
3356 putc(l, fd);
3357 fwrite(p, l, (size_t)1, fd);
3358 }
3359 }
3360 }
3361
3362 put_bytes(fd, (long_u)spin->si_map.ga_len, 2); /* <maplen> */
3363 if (spin->si_map.ga_len > 0) /* <mapstr> */
3364 fwrite(spin->si_map.ga_data, (size_t)spin->si_map.ga_len,
3365 (size_t)1, fd);
Bram Moolenaar50cde822005-06-05 21:54:54 +00003366
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003367 /*
Bram Moolenaar51485f02005-06-04 21:55:20 +00003368 * <LWORDTREE> <KWORDTREE>
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003369 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003370 spin->si_memtot = 0;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003371 for (round = 1; round <= 2; ++round)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003372 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003373 tree = (round == 1) ? spin->si_foldroot : spin->si_keeproot;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003374
Bram Moolenaar51485f02005-06-04 21:55:20 +00003375 /* Count the number of nodes. Needed to be able to allocate the
3376 * memory when reading the nodes. Also fills in the index for shared
3377 * nodes. */
3378 nodecount = put_tree(NULL, tree, 0, regionmask);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003379
Bram Moolenaar51485f02005-06-04 21:55:20 +00003380 /* number of nodes in 4 bytes */
3381 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
Bram Moolenaar50cde822005-06-05 21:54:54 +00003382 spin->si_memtot += nodecount + nodecount * sizeof(int);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003383
Bram Moolenaar51485f02005-06-04 21:55:20 +00003384 /* Write the nodes. */
3385 (void)put_tree(fd, tree, 0, regionmask);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003386 }
3387
Bram Moolenaar51485f02005-06-04 21:55:20 +00003388 fclose(fd);
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00003389}
3390
3391/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00003392 * Dump a word tree at node "node".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003393 *
Bram Moolenaar51485f02005-06-04 21:55:20 +00003394 * This first writes the list of possible bytes (siblings). Then for each
3395 * byte recursively write the children.
3396 *
3397 * NOTE: The code here must match the code in read_tree(), since assumptions
3398 * are made about the indexes (so that we don't have to write them in the
3399 * file).
3400 *
3401 * Returns the number of nodes used.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003402 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00003403 static int
3404put_tree(fd, node, index, regionmask)
3405 FILE *fd; /* NULL when only counting */
3406 wordnode_T *node;
3407 int index;
3408 int regionmask;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003409{
Bram Moolenaar51485f02005-06-04 21:55:20 +00003410 int newindex = index;
3411 int siblingcount = 0;
3412 wordnode_T *np;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003413 int flags;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003414
Bram Moolenaar51485f02005-06-04 21:55:20 +00003415 /* If "node" is zero the tree is empty. */
3416 if (node == NULL)
3417 return 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003418
Bram Moolenaar51485f02005-06-04 21:55:20 +00003419 /* Store the index where this node is written. */
3420 node->wn_index = index;
3421
3422 /* Count the number of siblings. */
3423 for (np = node; np != NULL; np = np->wn_sibling)
3424 ++siblingcount;
3425
3426 /* Write the sibling count. */
3427 if (fd != NULL)
3428 putc(siblingcount, fd); /* <siblingcount> */
3429
3430 /* Write each sibling byte and optionally extra info. */
3431 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003432 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003433 if (np->wn_byte == 0)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00003434 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003435 if (fd != NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003436 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003437 /* For a NUL byte (end of word) instead of the byte itself
3438 * we write the flag/region items. */
3439 flags = np->wn_flags;
3440 if (regionmask != 0 && np->wn_region != regionmask)
3441 flags |= WF_REGION;
3442 if (flags == 0)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00003443 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003444 /* word without flags or region */
3445 putc(BY_NOFLAGS, fd); /* <byte> */
3446 }
3447 else
3448 {
3449 putc(BY_FLAGS, fd); /* <byte> */
3450 putc(flags, fd); /* <flags> */
3451 if (flags & WF_REGION)
3452 putc(np->wn_region, fd); /* <regionmask> */
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00003453 }
3454 }
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00003455 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00003456 else
3457 {
3458 if (np->wn_child->wn_index != 0 && np->wn_child->wn_wnode != node)
3459 {
3460 /* The child is written elsewhere, write the reference. */
3461 if (fd != NULL)
3462 {
3463 putc(BY_INDEX, fd); /* <byte> */
3464 /* <nodeidx> */
3465 put_bytes(fd, (long_u)np->wn_child->wn_index, 3);
3466 }
3467 }
3468 else if (np->wn_child->wn_wnode == NULL)
3469 /* We will write the child below and give it an index. */
3470 np->wn_child->wn_wnode = node;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003471
Bram Moolenaar51485f02005-06-04 21:55:20 +00003472 if (fd != NULL)
3473 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
3474 {
3475 EMSG(_(e_write));
3476 return 0;
3477 }
3478 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003479 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00003480
3481 /* Space used in the array when reading: one for each sibling and one for
3482 * the count. */
3483 newindex += siblingcount + 1;
3484
3485 /* Recursively dump the children of each sibling. */
3486 for (np = node; np != NULL; np = np->wn_sibling)
3487 if (np->wn_byte != 0 && np->wn_child->wn_wnode == node)
3488 newindex = put_tree(fd, np->wn_child, newindex, regionmask);
3489
3490 return newindex;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003491}
3492
3493
3494/*
Bram Moolenaarb765d632005-06-07 21:00:02 +00003495 * ":mkspell [-ascii] outfile infile ..."
3496 * ":mkspell [-ascii] addfile"
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003497 */
3498 void
3499ex_mkspell(eap)
3500 exarg_T *eap;
3501{
3502 int fcount;
3503 char_u **fnames;
Bram Moolenaarb765d632005-06-07 21:00:02 +00003504 char_u *arg = eap->arg;
3505 int ascii = FALSE;
3506
3507 if (STRNCMP(arg, "-ascii", 6) == 0)
3508 {
3509 ascii = TRUE;
3510 arg = skipwhite(arg + 6);
3511 }
3512
3513 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
3514 if (get_arglist_exp(arg, &fcount, &fnames) == OK)
3515 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003516 mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
Bram Moolenaarb765d632005-06-07 21:00:02 +00003517 FreeWild(fcount, fnames);
3518 }
3519}
3520
3521/*
3522 * Create a Vim spell file from one or more word lists.
3523 * "fnames[0]" is the output file name.
3524 * "fnames[fcount - 1]" is the last input file name.
3525 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name
3526 * and ".spl" is appended to make the output file name.
3527 */
3528 static void
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003529mkspell(fcount, fnames, ascii, overwrite, added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00003530 int fcount;
3531 char_u **fnames;
3532 int ascii; /* -ascii argument given */
3533 int overwrite; /* overwrite existing output file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003534 int added_word; /* invoked through "zg" */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003535{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003536 char_u fname[MAXPATHL];
3537 char_u wfname[MAXPATHL];
Bram Moolenaarb765d632005-06-07 21:00:02 +00003538 char_u **innames;
3539 int incount;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003540 afffile_T *(afile[8]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003541 int i;
3542 int len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003543 struct stat st;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003544 int error = FALSE;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003545 spellinfo_T spin;
3546
3547 vim_memset(&spin, 0, sizeof(spin));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003548 spin.si_verbose = !added_word;
Bram Moolenaarb765d632005-06-07 21:00:02 +00003549 spin.si_ascii = ascii;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003550 spin.si_followup = TRUE;
3551 spin.si_rem_accents = TRUE;
3552 ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20);
3553 ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
3554 ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003555
Bram Moolenaarb765d632005-06-07 21:00:02 +00003556 /* default: fnames[0] is output file, following are input files */
3557 innames = &fnames[1];
3558 incount = fcount - 1;
3559
3560 if (fcount >= 1)
Bram Moolenaar5482f332005-04-17 20:18:43 +00003561 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00003562 len = STRLEN(fnames[0]);
3563 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
3564 {
3565 /* For ":mkspell path/en.latin1.add" output file is
3566 * "path/en.latin1.add.spl". */
3567 innames = &fnames[0];
3568 incount = 1;
3569 vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]);
3570 }
3571 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
3572 {
3573 /* Name ends in ".spl", use as the file name. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003574 vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1);
Bram Moolenaarb765d632005-06-07 21:00:02 +00003575 }
3576 else
3577 /* Name should be language, make the file name from it. */
3578 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
3579 spin.si_ascii ? (char_u *)"ascii" : spell_enc());
3580
3581 /* Check for .ascii.spl. */
3582 if (strstr((char *)gettail(wfname), ".ascii.") != NULL)
3583 spin.si_ascii = TRUE;
3584
3585 /* Check for .add.spl. */
3586 if (strstr((char *)gettail(wfname), ".add.") != NULL)
3587 spin.si_add = TRUE;
Bram Moolenaar5482f332005-04-17 20:18:43 +00003588 }
3589
Bram Moolenaarb765d632005-06-07 21:00:02 +00003590 if (incount <= 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003591 EMSG(_(e_invarg)); /* need at least output and input names */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003592 else if (incount > 8)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003593 EMSG(_("E754: Only up to 8 regions supported"));
3594 else
3595 {
3596 /* Check for overwriting before doing things that may take a lot of
3597 * time. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003598 if (!overwrite && mch_stat((char *)wfname, &st) >= 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003599 {
3600 EMSG(_(e_exists));
Bram Moolenaarb765d632005-06-07 21:00:02 +00003601 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003602 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00003603 if (mch_isdir(wfname))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003604 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00003605 EMSG2(_(e_isadir2), wfname);
3606 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003607 }
3608
3609 /*
3610 * Init the aff and dic pointers.
3611 * Get the region names if there are more than 2 arguments.
3612 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003613 for (i = 0; i < incount; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003614 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00003615 afile[i] = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003616
Bram Moolenaar3982c542005-06-08 21:56:31 +00003617 if (incount > 1)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003618 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00003619 len = STRLEN(innames[i]);
3620 if (STRLEN(gettail(innames[i])) < 5
3621 || innames[i][len - 3] != '_')
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003622 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00003623 EMSG2(_("E755: Invalid region in %s"), innames[i]);
3624 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003625 }
Bram Moolenaar3982c542005-06-08 21:56:31 +00003626 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
3627 spin.si_region_name[i * 2 + 1] =
3628 TOLOWER_ASC(innames[i][len - 1]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003629 }
3630 }
Bram Moolenaar3982c542005-06-08 21:56:31 +00003631 spin.si_region_count = incount;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003632
Bram Moolenaarb765d632005-06-07 21:00:02 +00003633 if (!spin.si_add)
3634 /* Clear the char type tables, don't want to use any of the
3635 * currently used spell properties. */
3636 init_spell_chartab();
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00003637
Bram Moolenaar51485f02005-06-04 21:55:20 +00003638 spin.si_foldroot = wordtree_alloc(&spin.si_blocks);
3639 spin.si_keeproot = wordtree_alloc(&spin.si_blocks);
3640 if (spin.si_foldroot == NULL || spin.si_keeproot == NULL)
3641 {
3642 error = TRUE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00003643 return;
Bram Moolenaar51485f02005-06-04 21:55:20 +00003644 }
3645
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003646 /*
3647 * Read all the .aff and .dic files.
3648 * Text is converted to 'encoding'.
Bram Moolenaar51485f02005-06-04 21:55:20 +00003649 * Words are stored in the case-folded and keep-case trees.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003650 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003651 for (i = 0; i < incount && !error; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003652 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003653 spin.si_conv.vc_type = CONV_NONE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00003654 spin.si_region = 1 << i;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003655
Bram Moolenaarb765d632005-06-07 21:00:02 +00003656 vim_snprintf((char *)fname, sizeof(fname), "%s.aff", innames[i]);
Bram Moolenaar51485f02005-06-04 21:55:20 +00003657 if (mch_stat((char *)fname, &st) >= 0)
3658 {
3659 /* Read the .aff file. Will init "spin->si_conv" based on the
3660 * "SET" line. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003661 afile[i] = spell_read_aff(fname, &spin);
3662 if (afile[i] == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003663 error = TRUE;
3664 else
3665 {
3666 /* Read the .dic file and store the words in the trees. */
3667 vim_snprintf((char *)fname, sizeof(fname), "%s.dic",
Bram Moolenaarb765d632005-06-07 21:00:02 +00003668 innames[i]);
3669 if (spell_read_dic(fname, &spin, afile[i]) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003670 error = TRUE;
3671 }
3672 }
3673 else
3674 {
3675 /* No .aff file, try reading the file as a word list. Store
3676 * the words in the trees. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003677 if (spell_read_wordfile(innames[i], &spin) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003678 error = TRUE;
3679 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003680
Bram Moolenaarb765d632005-06-07 21:00:02 +00003681#ifdef FEAT_MBYTE
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003682 /* Free any conversion stuff. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00003683 convert_setup(&spin.si_conv, NULL, NULL);
Bram Moolenaarb765d632005-06-07 21:00:02 +00003684#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003685 }
3686
Bram Moolenaar51485f02005-06-04 21:55:20 +00003687 if (!error)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003688 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003689 /*
3690 * Remove the dummy NUL from the start of the tree root.
3691 */
3692 spin.si_foldroot = spin.si_foldroot->wn_sibling;
3693 spin.si_keeproot = spin.si_keeproot->wn_sibling;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003694
3695 /*
Bram Moolenaar51485f02005-06-04 21:55:20 +00003696 * Combine tails in the tree.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003697 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003698 if (!added_word || p_verbose > 2)
Bram Moolenaarb765d632005-06-07 21:00:02 +00003699 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003700 if (added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00003701 verbose_enter();
3702 MSG(_("Compressing word tree..."));
3703 out_flush();
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003704 if (added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00003705 verbose_leave();
3706 }
3707 wordtree_compress(spin.si_foldroot, &spin);
3708 wordtree_compress(spin.si_keeproot, &spin);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003709 }
3710
Bram Moolenaar51485f02005-06-04 21:55:20 +00003711 if (!error)
3712 {
3713 /*
3714 * Write the info in the spell file.
3715 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003716 if (!added_word || p_verbose > 2)
Bram Moolenaarb765d632005-06-07 21:00:02 +00003717 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003718 if (added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00003719 verbose_enter();
3720 smsg((char_u *)_("Writing spell file %s..."), wfname);
3721 out_flush();
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003722 if (added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00003723 verbose_leave();
3724 }
Bram Moolenaar50cde822005-06-05 21:54:54 +00003725
Bram Moolenaar3982c542005-06-08 21:56:31 +00003726 write_vim_spell(wfname, &spin);
Bram Moolenaarb765d632005-06-07 21:00:02 +00003727
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003728 if (!added_word || p_verbose > 2)
Bram Moolenaarb765d632005-06-07 21:00:02 +00003729 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003730 if (added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00003731 verbose_enter();
3732 MSG(_("Done!"));
3733 smsg((char_u *)_("Estimated runtime memory use: %d bytes"),
Bram Moolenaar50cde822005-06-05 21:54:54 +00003734 spin.si_memtot);
Bram Moolenaarb765d632005-06-07 21:00:02 +00003735 out_flush();
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003736 if (added_word)
Bram Moolenaarb765d632005-06-07 21:00:02 +00003737 verbose_leave();
3738 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003739
Bram Moolenaarb765d632005-06-07 21:00:02 +00003740 /* If the file is loaded need to reload it. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003741 spell_reload_one(wfname, added_word);
Bram Moolenaar51485f02005-06-04 21:55:20 +00003742 }
3743
3744 /* Free the allocated memory. */
3745 free_blocks(spin.si_blocks);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003746 ga_clear(&spin.si_rep);
3747 ga_clear(&spin.si_sal);
3748 ga_clear(&spin.si_map);
Bram Moolenaar51485f02005-06-04 21:55:20 +00003749
3750 /* Free the .aff file structures. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003751 for (i = 0; i < incount; ++i)
3752 if (afile[i] != NULL)
3753 spell_free_aff(afile[i]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003754 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003755}
3756
Bram Moolenaarb765d632005-06-07 21:00:02 +00003757
3758/*
3759 * ":spellgood {word}"
3760 * ":spellwrong {word}"
3761 */
3762 void
3763ex_spell(eap)
3764 exarg_T *eap;
3765{
3766 spell_add_word(eap->arg, STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong);
3767}
3768
3769/*
3770 * Add "word[len]" to 'spellfile' as a good or bad word.
3771 */
3772 void
3773spell_add_word(word, len, bad)
3774 char_u *word;
3775 int len;
3776 int bad;
3777{
3778 FILE *fd;
3779 buf_T *buf;
3780
3781 if (*curbuf->b_p_spf == NUL)
3782 init_spellfile();
3783 if (*curbuf->b_p_spf == NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003784 EMSG(_("E764: 'spellfile' is not set"));
Bram Moolenaarb765d632005-06-07 21:00:02 +00003785 else
3786 {
3787 /* Check that the user isn't editing the .add file somewhere. */
3788 buf = buflist_findname_exp(curbuf->b_p_spf);
3789 if (buf != NULL && buf->b_ml.ml_mfp == NULL)
3790 buf = NULL;
3791 if (buf != NULL && bufIsChanged(buf))
3792 EMSG(_(e_bufloaded));
3793 else
3794 {
3795 fd = mch_fopen((char *)curbuf->b_p_spf, "a");
3796 if (fd == NULL)
3797 EMSG2(_(e_notopen), curbuf->b_p_spf);
3798 else
3799 {
3800 if (bad)
3801 fprintf(fd, "/!%.*s\n", len, word);
3802 else
3803 fprintf(fd, "%.*s\n", len, word);
3804 fclose(fd);
3805
3806 /* Update the .add.spl file. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003807 mkspell(1, &curbuf->b_p_spf, FALSE, TRUE, TRUE);
Bram Moolenaarb765d632005-06-07 21:00:02 +00003808
3809 /* If the .add file is edited somewhere, reload it. */
3810 if (buf != NULL)
3811 buf_reload(buf);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003812
3813 redraw_all_later(NOT_VALID);
Bram Moolenaarb765d632005-06-07 21:00:02 +00003814 }
3815 }
3816 }
3817}
3818
3819/*
3820 * Initialize 'spellfile' for the current buffer.
3821 */
3822 static void
3823init_spellfile()
3824{
3825 char_u buf[MAXPATHL];
3826 int l;
3827 slang_T *sl;
3828 char_u *rtp;
3829
3830 if (*curbuf->b_p_spl != NUL && curbuf->b_langp.ga_len > 0)
3831 {
3832 /* Loop over all entries in 'runtimepath'. */
3833 rtp = p_rtp;
3834 while (*rtp != NUL)
3835 {
3836 /* Copy the path from 'runtimepath' to buf[]. */
3837 copy_option_part(&rtp, buf, MAXPATHL, ",");
3838 if (filewritable(buf) == 2)
3839 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00003840 /* Use the first language name from 'spelllang' and the
3841 * encoding used in the first loaded .spl file. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003842 sl = LANGP_ENTRY(curbuf->b_langp, 0)->lp_slang;
3843 l = STRLEN(buf);
3844 vim_snprintf((char *)buf + l, MAXPATHL - l,
Bram Moolenaar3982c542005-06-08 21:56:31 +00003845 "/spell/%.*s.%s.add",
3846 2, curbuf->b_p_spl,
Bram Moolenaarb765d632005-06-07 21:00:02 +00003847 strstr((char *)gettail(sl->sl_fname), ".ascii.") != NULL
3848 ? (char_u *)"ascii" : spell_enc());
3849 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
3850 break;
3851 }
3852 }
3853 }
3854}
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003855
Bram Moolenaar51485f02005-06-04 21:55:20 +00003856
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003857/*
3858 * Init the chartab used for spelling for ASCII.
3859 * EBCDIC is not supported!
3860 */
3861 static void
3862clear_spell_chartab(sp)
3863 spelltab_T *sp;
3864{
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003865 int i;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003866
3867 /* Init everything to FALSE. */
3868 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
3869 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
3870 for (i = 0; i < 256; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003871 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003872 sp->st_fold[i] = i;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003873 sp->st_upper[i] = i;
3874 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003875
3876 /* We include digits. A word shouldn't start with a digit, but handling
3877 * that is done separately. */
3878 for (i = '0'; i <= '9'; ++i)
3879 sp->st_isw[i] = TRUE;
3880 for (i = 'A'; i <= 'Z'; ++i)
3881 {
3882 sp->st_isw[i] = TRUE;
3883 sp->st_isu[i] = TRUE;
3884 sp->st_fold[i] = i + 0x20;
3885 }
3886 for (i = 'a'; i <= 'z'; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003887 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003888 sp->st_isw[i] = TRUE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003889 sp->st_upper[i] = i - 0x20;
3890 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003891}
3892
3893/*
3894 * Init the chartab used for spelling. Only depends on 'encoding'.
3895 * Called once while starting up and when 'encoding' changes.
3896 * The default is to use isalpha(), but the spell file should define the word
3897 * characters to make it possible that 'encoding' differs from the current
3898 * locale.
3899 */
3900 void
3901init_spell_chartab()
3902{
3903 int i;
3904
3905 did_set_spelltab = FALSE;
3906 clear_spell_chartab(&spelltab);
3907
3908#ifdef FEAT_MBYTE
3909 if (enc_dbcs)
3910 {
3911 /* DBCS: assume double-wide characters are word characters. */
3912 for (i = 128; i <= 255; ++i)
3913 if (MB_BYTE2LEN(i) == 2)
3914 spelltab.st_isw[i] = TRUE;
3915 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003916 else if (enc_utf8)
3917 {
3918 for (i = 128; i < 256; ++i)
3919 {
3920 spelltab.st_isu[i] = utf_isupper(i);
3921 spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
3922 spelltab.st_fold[i] = utf_fold(i);
3923 spelltab.st_upper[i] = utf_toupper(i);
3924 }
3925 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003926 else
3927#endif
3928 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003929 /* Rough guess: use locale-dependent library functions. */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003930 for (i = 128; i < 256; ++i)
3931 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003932 if (MB_ISUPPER(i))
3933 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003934 spelltab.st_isw[i] = TRUE;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003935 spelltab.st_isu[i] = TRUE;
3936 spelltab.st_fold[i] = MB_TOLOWER(i);
3937 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003938 else if (MB_ISLOWER(i))
3939 {
3940 spelltab.st_isw[i] = TRUE;
3941 spelltab.st_upper[i] = MB_TOUPPER(i);
3942 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003943 }
3944 }
3945}
3946
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003947static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
3948static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
3949
3950/*
3951 * Set the spell character tables from strings in the affix file.
3952 */
3953 static int
3954set_spell_chartab(fol, low, upp)
3955 char_u *fol;
3956 char_u *low;
3957 char_u *upp;
3958{
3959 /* We build the new tables here first, so that we can compare with the
3960 * previous one. */
3961 spelltab_T new_st;
3962 char_u *pf = fol, *pl = low, *pu = upp;
3963 int f, l, u;
3964
3965 clear_spell_chartab(&new_st);
3966
3967 while (*pf != NUL)
3968 {
3969 if (*pl == NUL || *pu == NUL)
3970 {
3971 EMSG(_(e_affform));
3972 return FAIL;
3973 }
3974#ifdef FEAT_MBYTE
3975 f = mb_ptr2char_adv(&pf);
3976 l = mb_ptr2char_adv(&pl);
3977 u = mb_ptr2char_adv(&pu);
3978#else
3979 f = *pf++;
3980 l = *pl++;
3981 u = *pu++;
3982#endif
3983 /* Every character that appears is a word character. */
3984 if (f < 256)
3985 new_st.st_isw[f] = TRUE;
3986 if (l < 256)
3987 new_st.st_isw[l] = TRUE;
3988 if (u < 256)
3989 new_st.st_isw[u] = TRUE;
3990
3991 /* if "LOW" and "FOL" are not the same the "LOW" char needs
3992 * case-folding */
3993 if (l < 256 && l != f)
3994 {
3995 if (f >= 256)
3996 {
3997 EMSG(_(e_affrange));
3998 return FAIL;
3999 }
4000 new_st.st_fold[l] = f;
4001 }
4002
4003 /* if "UPP" and "FOL" are not the same the "UPP" char needs
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004004 * case-folding, it's upper case and the "UPP" is the upper case of
4005 * "FOL" . */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004006 if (u < 256 && u != f)
4007 {
4008 if (f >= 256)
4009 {
4010 EMSG(_(e_affrange));
4011 return FAIL;
4012 }
4013 new_st.st_fold[u] = f;
4014 new_st.st_isu[u] = TRUE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004015 new_st.st_upper[f] = u;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004016 }
4017 }
4018
4019 if (*pl != NUL || *pu != NUL)
4020 {
4021 EMSG(_(e_affform));
4022 return FAIL;
4023 }
4024
4025 return set_spell_finish(&new_st);
4026}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004027
4028/*
4029 * Set the spell character tables from strings in the .spl file.
4030 */
4031 static int
4032set_spell_charflags(flags, cnt, upp)
4033 char_u *flags;
4034 int cnt;
4035 char_u *upp;
4036{
4037 /* We build the new tables here first, so that we can compare with the
4038 * previous one. */
4039 spelltab_T new_st;
4040 int i;
4041 char_u *p = upp;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004042 int c;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004043
4044 clear_spell_chartab(&new_st);
4045
4046 for (i = 0; i < cnt; ++i)
4047 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004048 new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
4049 new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004050
4051 if (*p == NUL)
4052 return FAIL;
4053#ifdef FEAT_MBYTE
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004054 c = mb_ptr2char_adv(&p);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004055#else
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004056 c = *p++;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004057#endif
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004058 new_st.st_fold[i + 128] = c;
4059 if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
4060 new_st.st_upper[c] = i + 128;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004061 }
4062
4063 return set_spell_finish(&new_st);
4064}
4065
4066 static int
4067set_spell_finish(new_st)
4068 spelltab_T *new_st;
4069{
4070 int i;
4071
4072 if (did_set_spelltab)
4073 {
4074 /* check that it's the same table */
4075 for (i = 0; i < 256; ++i)
4076 {
4077 if (spelltab.st_isw[i] != new_st->st_isw[i]
4078 || spelltab.st_isu[i] != new_st->st_isu[i]
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004079 || spelltab.st_fold[i] != new_st->st_fold[i]
4080 || spelltab.st_upper[i] != new_st->st_upper[i])
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004081 {
4082 EMSG(_("E763: Word characters differ between spell files"));
4083 return FAIL;
4084 }
4085 }
4086 }
4087 else
4088 {
4089 /* copy the new spelltab into the one being used */
4090 spelltab = *new_st;
4091 did_set_spelltab = TRUE;
4092 }
4093
4094 return OK;
4095}
4096
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004097/*
4098 * Write the current tables into the .spl file.
4099 * This makes sure the same characters are recognized as word characters when
4100 * generating an when using a spell file.
4101 */
4102 static void
4103write_spell_chartab(fd)
4104 FILE *fd;
4105{
4106 char_u charbuf[256 * 4];
4107 int len = 0;
4108 int flags;
4109 int i;
4110
4111 fputc(128, fd); /* <charflagslen> */
4112 for (i = 128; i < 256; ++i)
4113 {
4114 flags = 0;
4115 if (spelltab.st_isw[i])
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004116 flags |= CF_WORD;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004117 if (spelltab.st_isu[i])
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004118 flags |= CF_UPPER;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004119 fputc(flags, fd); /* <charflags> */
4120
Bram Moolenaarb765d632005-06-07 21:00:02 +00004121#ifdef FEAT_MBYTE
4122 if (has_mbyte)
4123 len += mb_char2bytes(spelltab.st_fold[i], charbuf + len);
4124 else
4125#endif
4126 charbuf[len++] = spelltab.st_fold[i];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004127 }
4128
4129 put_bytes(fd, (long_u)len, 2); /* <fcharlen> */
4130 fwrite(charbuf, (size_t)len, (size_t)1, fd); /* <fchars> */
4131}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004132
4133/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004134 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated.
4135 * Uses the character definitions from the .spl file.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004136 * When using a multi-byte 'encoding' the length may change!
4137 * Returns FAIL when something wrong.
4138 */
4139 static int
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004140spell_casefold(str, len, buf, buflen)
4141 char_u *str;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004142 int len;
4143 char_u *buf;
4144 int buflen;
4145{
4146 int i;
4147
4148 if (len >= buflen)
4149 {
4150 buf[0] = NUL;
4151 return FAIL; /* result will not fit */
4152 }
4153
4154#ifdef FEAT_MBYTE
4155 if (has_mbyte)
4156 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004157 int outi = 0;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004158 char_u *p;
4159 int c;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004160
4161 /* Fold one character at a time. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004162 for (p = str; p < str + len; )
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004163 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004164 if (outi + MB_MAXBYTES > buflen)
4165 {
4166 buf[outi] = NUL;
4167 return FAIL;
4168 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004169 c = mb_ptr2char_adv(&p);
4170 outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004171 }
4172 buf[outi] = NUL;
4173 }
4174 else
4175#endif
4176 {
4177 /* Be quick for non-multibyte encodings. */
4178 for (i = 0; i < len; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004179 buf[i] = spelltab.st_fold[str[i]];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00004180 buf[i] = NUL;
4181 }
4182
4183 return OK;
4184}
4185
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004186/*
4187 * "z?": Find badly spelled word under or after the cursor.
4188 * Give suggestions for the properly spelled word.
4189 * This is based on the mechanisms of Aspell, but completely reimplemented.
4190 */
4191 void
4192spell_suggest()
4193{
4194 char_u *line;
4195 pos_T prev_cursor = curwin->w_cursor;
4196 int attr;
4197 char_u wcopy[MAXWLEN + 2];
4198 char_u *p;
4199 int i;
4200 int c;
4201 suginfo_T sug;
4202 suggest_T *stp;
4203
4204 /*
4205 * Find the start of the badly spelled word.
4206 */
4207 if (spell_move_to(FORWARD, TRUE, TRUE) == FAIL)
4208 {
4209 beep_flush();
4210 return;
4211 }
4212
4213 /*
4214 * Set the info in "sug".
4215 */
4216 vim_memset(&sug, 0, sizeof(sug));
4217 ga_init2(&sug.su_ga, (int)sizeof(suggest_T), 10);
4218 hash_init(&sug.su_banned);
4219 line = ml_get_curline();
4220 sug.su_badptr = line + curwin->w_cursor.col;
4221 sug.su_badlen = spell_check(curwin, sug.su_badptr, &attr);
4222 if (sug.su_badlen >= MAXWLEN)
4223 sug.su_badlen = MAXWLEN - 1; /* just in case */
4224 vim_strncpy(sug.su_badword, sug.su_badptr, sug.su_badlen);
4225 (void)spell_casefold(sug.su_badptr, sug.su_badlen,
4226 sug.su_fbadword, MAXWLEN);
4227
4228 /* Ban the bad word itself. It may appear in another region. */
4229 add_banned(&sug, sug.su_badword);
4230
4231 /*
4232 * 1. Try inserting/deleting/swapping/changing a letter, use REP entries
4233 * from the .aff file and inserting a space (split the word).
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004234 *
4235 * Set a maximum score to limit the combination of operations that is
4236 * tried.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004237 */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004238 sug.su_maxscore = SCORE_MAXINIT;
4239 spell_try_change(&sug);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004240
4241 /*
4242 * 2. Try finding sound-a-like words.
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004243 *
4244 * Only do this when we don't have a lot of suggestions yet, because it's
4245 * very slow and often doesn't find new suggestions.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004246 */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004247 if (sug.su_ga.ga_len < SUG_CLEAN_COUNT)
4248 {
4249 /* Allow a higher score now. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004250 sug.su_maxscore = SCORE_MAXMAX;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004251 spell_try_soundalike(&sug);
4252 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004253
4254 /* When CTRL-C was hit while searching do show the results. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004255 ui_breakcheck();
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004256 if (got_int)
4257 {
4258 (void)vgetc();
4259 got_int = FALSE;
4260 }
4261
4262 if (sug.su_ga.ga_len == 0)
4263 MSG(_("Sorry, no suggestions"));
4264 else
4265 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004266#ifdef RESCORE
4267 /* Do slow but more accurate computation of the word score. */
4268 rescore_suggestions(&sug);
4269#endif
4270
4271 /* Sort the suggestions and truncate at SUG_PROMPT_COUNT. */
4272 cleanup_suggestions(&sug, SUG_PROMPT_COUNT);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004273
4274 /* List the suggestions. */
4275 msg_start();
4276 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"),
4277 sug.su_badlen, sug.su_badptr);
4278 msg_puts(IObuff);
4279 msg_clr_eos();
4280 msg_putchar('\n');
4281 msg_scroll = TRUE;
4282 for (i = 0; i < sug.su_ga.ga_len; ++i)
4283 {
4284 stp = &SUG(&sug, i);
4285
4286 /* The suggested word may replace only part of the bad word, add
4287 * the not replaced part. */
4288 STRCPY(wcopy, stp->st_word);
4289 if (sug.su_badlen > stp->st_orglen)
4290 vim_strncpy(wcopy + STRLEN(wcopy),
4291 sug.su_badptr + stp->st_orglen,
4292 sug.su_badlen - stp->st_orglen);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004293 if (p_verbose > 0)
4294 vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\" (%d)"),
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004295 i + 1, wcopy, stp->st_score);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004296 else
4297 vim_snprintf((char *)IObuff, IOSIZE, _("%2d \"%s\""),
4298 i + 1, wcopy);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004299 msg_puts(IObuff);
4300 lines_left = 3; /* avoid more prompt */
4301 msg_putchar('\n');
4302 }
4303
4304 /* Ask for choice. */
4305 i = prompt_for_number();
4306 if (i > 0 && i <= sug.su_ga.ga_len && u_save_cursor())
4307 {
4308 /* Replace the word. */
4309 stp = &SUG(&sug, i - 1);
4310 p = alloc(STRLEN(line) - stp->st_orglen + STRLEN(stp->st_word) + 1);
4311 if (p != NULL)
4312 {
4313 c = sug.su_badptr - line;
4314 mch_memmove(p, line, c);
4315 STRCPY(p + c, stp->st_word);
4316 STRCAT(p, sug.su_badptr + stp->st_orglen);
4317 ml_replace(curwin->w_cursor.lnum, p, FALSE);
4318 curwin->w_cursor.col = c;
4319 changed_bytes(curwin->w_cursor.lnum, c);
4320 }
4321 }
4322 else
4323 curwin->w_cursor = prev_cursor;
4324 }
4325
4326 /* Free the suggestions. */
4327 for (i = 0; i < sug.su_ga.ga_len; ++i)
4328 vim_free(SUG(&sug, i).st_word);
4329 ga_clear(&sug.su_ga);
4330
4331 /* Free the banned words. */
4332 free_banned(&sug);
4333}
4334
4335/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004336 * Make a copy of "word", with the first letter upper or lower cased, to
4337 * "wcopy[MAXWLEN]". "word" must not be empty.
4338 * The result is NUL terminated.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004339 */
4340 static void
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004341onecap_copy(word, wcopy, upper)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004342 char_u *word;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004343 char_u *wcopy;
4344 int upper; /* TRUE: first letter made upper case */
4345{
4346 char_u *p;
4347 int c;
4348 int l;
4349
4350 p = word;
4351#ifdef FEAT_MBYTE
4352 if (has_mbyte)
4353 c = mb_ptr2char_adv(&p);
4354 else
4355#endif
4356 c = *p++;
4357 if (upper)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004358 c = SPELL_TOUPPER(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004359 else
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004360 c = SPELL_TOFOLD(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004361#ifdef FEAT_MBYTE
4362 if (has_mbyte)
4363 l = mb_char2bytes(c, wcopy);
4364 else
4365#endif
4366 {
4367 l = 1;
4368 wcopy[0] = c;
4369 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004370 vim_strncpy(wcopy + l, p, MAXWLEN - l);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004371}
4372
4373/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004374 * Make a copy of "word" with all the letters upper cased into
4375 * "wcopy[MAXWLEN]". The result is NUL terminated.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004376 */
4377 static void
4378allcap_copy(word, wcopy)
4379 char_u *word;
4380 char_u *wcopy;
4381{
4382 char_u *s;
4383 char_u *d;
4384 int c;
4385
4386 d = wcopy;
4387 for (s = word; *s != NUL; )
4388 {
4389#ifdef FEAT_MBYTE
4390 if (has_mbyte)
4391 c = mb_ptr2char_adv(&s);
4392 else
4393#endif
4394 c = *s++;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004395 c = SPELL_TOUPPER(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004396
4397#ifdef FEAT_MBYTE
4398 if (has_mbyte)
4399 {
4400 if (d - wcopy >= MAXWLEN - MB_MAXBYTES)
4401 break;
4402 d += mb_char2bytes(c, d);
4403 }
4404 else
4405#endif
4406 {
4407 if (d - wcopy >= MAXWLEN - 1)
4408 break;
4409 *d++ = c;
4410 }
4411 }
4412 *d = NUL;
4413}
4414
4415/*
4416 * Try finding suggestions by adding/removing/swapping letters.
4417 */
4418 static void
4419spell_try_change(su)
4420 suginfo_T *su;
4421{
4422 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */
4423 char_u tword[MAXWLEN]; /* good word collected so far */
4424 trystate_T stack[MAXWLEN];
4425 char_u preword[MAXWLEN * 3]; /* word found with proper case (appended
4426 * to for word split) */
4427 char_u prewordlen = 0; /* length of word in "preword" */
4428 int splitoff = 0; /* index in tword after last split */
4429 trystate_T *sp;
4430 int newscore;
4431 langp_T *lp;
4432 char_u *byts;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004433 idx_T *idxs;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004434 int depth;
4435 int c;
4436 int n;
4437 int flags;
4438 int badflags;
4439 garray_T *gap;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004440 idx_T arridx;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004441 int len;
4442 char_u *p;
4443 fromto_T *ftp;
4444 int fl, tl;
4445
4446 /* get caps flags for bad word */
4447 badflags = captype(su->su_badptr, su->su_badptr + su->su_badlen);
4448
4449 /* We make a copy of the case-folded bad word, so that we can modify it
4450 * to find matches (esp. REP items). */
4451 STRCPY(fword, su->su_fbadword);
4452
4453 /*
4454 * At each node in the tree these states are tried:
4455 */
4456#define STATE_START 0 /* At start of node, check if word may end or
4457 * split word. */
4458#define STATE_SPLITUNDO 1 /* Undo word split. */
4459#define STATE_ENDNUL 2 /* Past NUL bytes at start of the node. */
4460#define STATE_PLAIN 3 /* Use each byte of the node. */
4461#define STATE_DEL 4 /* Delete a byte from the bad word. */
4462#define STATE_INS 5 /* Insert a byte in the bad word. */
4463#define STATE_SWAP 6 /* Swap two bytes. */
4464#define STATE_SWAP3A 7 /* Swap two bytes over three. */
4465#define STATE_ROT3L 8 /* Rotate three bytes left */
4466#define STATE_ROT3R 9 /* Rotate three bytes right */
4467#define STATE_ROT_UNDO 10 /* undo rotating */
4468#define STATE_REP_INI 11 /* Prepare for using REP items. */
4469#define STATE_REP 12 /* Use matching REP items from the .aff file. */
4470#define STATE_REP_UNDO 13 /* Undo a REP item replacement. */
4471#define STATE_FINAL 99 /* End of this node. */
4472
4473
4474 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
4475 lp->lp_slang != NULL; ++lp)
4476 {
4477#ifdef SOUNDFOLD_SCORE
4478 su->su_slang = lp->lp_slang;
4479 if (lp->lp_slang->sl_sal.ga_len > 0)
4480 /* soundfold the bad word */
4481 spell_soundfold(lp->lp_slang, su->su_fbadword, su->su_salword);
4482#endif
4483
4484 /*
4485 * Go through the whole case-fold tree, try changes at each node.
4486 * "tword[]" contains the word collected from nodes in the tree.
4487 * "fword[]" the word we are trying to match with (initially the bad
4488 * word).
4489 */
4490 byts = lp->lp_slang->sl_fbyts;
4491 idxs = lp->lp_slang->sl_fidxs;
4492
4493 depth = 0;
4494 stack[0].ts_state = STATE_START;
4495 stack[0].ts_score = 0;
4496 stack[0].ts_curi = 1;
4497 stack[0].ts_fidx = 0;
4498 stack[0].ts_fidxtry = 0;
4499 stack[0].ts_twordlen = 0;
4500 stack[0].ts_arridx = 0;
4501
4502 while (depth >= 0 && !got_int)
4503 {
4504 sp = &stack[depth];
4505 switch (sp->ts_state)
4506 {
4507 case STATE_START:
4508 /*
4509 * Start of node: Deal with NUL bytes, which means
4510 * tword[] may end here.
4511 */
4512 arridx = sp->ts_arridx; /* current node in the tree */
4513 len = byts[arridx]; /* bytes in this node */
4514 arridx += sp->ts_curi; /* index of current byte */
4515
4516 if (sp->ts_curi > len || (c = byts[arridx]) != 0)
4517 {
4518 /* Past bytes in node and/or past NUL bytes. */
4519 sp->ts_state = STATE_ENDNUL;
4520 break;
4521 }
4522
4523 /*
4524 * End of word in tree.
4525 */
4526 ++sp->ts_curi; /* eat one NUL byte */
4527
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004528 flags = (int)idxs[arridx];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004529
4530 /*
4531 * Form the word with proper case in preword.
4532 * If there is a word from a previous split, append.
4533 */
4534 tword[sp->ts_twordlen] = NUL;
4535 if (flags & WF_KEEPCAP)
4536 /* Must find the word in the keep-case tree. */
4537 find_keepcap_word(lp->lp_slang, tword + splitoff,
4538 preword + prewordlen);
4539 else
4540 /* Include badflags: if the badword is onecap or allcap
4541 * use that for the goodword too. */
4542 make_case_word(tword + splitoff,
4543 preword + prewordlen, flags | badflags);
4544
4545 /* Don't use a banned word. It may appear again as a good
4546 * word, thus remember it. */
4547 if (flags & WF_BANNED)
4548 {
4549 add_banned(su, preword + prewordlen);
4550 break;
4551 }
4552 if (was_banned(su, preword + prewordlen))
4553 break;
4554
4555 newscore = 0;
4556 if ((flags & WF_REGION)
4557 && (((unsigned)flags >> 8) & lp->lp_region) == 0)
4558 newscore += SCORE_REGION;
4559 if (flags & WF_RARE)
4560 newscore += SCORE_RARE;
4561
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004562 /* Words that were not found in the text get a penalty. */
4563 if ((flags & WF_USED) == 0)
4564 newscore += SCORE_NOTUSED;
4565
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004566 if (!spell_valid_case(badflags,
4567 captype(preword + prewordlen, NULL)))
4568 newscore += SCORE_ICASE;
4569
4570 if (fword[sp->ts_fidx] == 0)
4571 {
4572 /* The badword also ends: add suggestions, */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004573 add_suggestion(su, preword, sp->ts_score + newscore
4574#ifdef RESCORE
4575 , FALSE
4576#endif
4577 );
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004578 }
4579 else if (sp->ts_fidx >= sp->ts_fidxtry)
4580 {
4581 /* The word in the tree ends but the badword
4582 * continues: try inserting a space and check that a valid
4583 * words starts at fword[sp->ts_fidx]. */
4584 if (try_deeper(su, stack, depth, newscore + SCORE_SPLIT))
4585 {
4586 /* Save things to be restored at STATE_SPLITUNDO. */
4587 sp->ts_save_prewordlen = prewordlen;
4588 sp->ts_save_badflags = badflags;
4589 sp->ts_save_splitoff = splitoff;
4590
4591 /* Append a space to preword. */
4592 STRCAT(preword, " ");
4593 prewordlen = STRLEN(preword);
4594 splitoff = sp->ts_twordlen;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004595#ifdef FEAT_MBYTE
4596 if (has_mbyte)
4597 {
4598 int i = 0;
4599
4600 /* Case-folding may change the number of bytes:
4601 * Count nr of chars in fword[sp->ts_fidx] and
4602 * advance that many chars in su->su_badptr. */
4603 for (p = fword; p < fword + sp->ts_fidx;
4604 mb_ptr_adv(p))
4605 ++i;
4606 for (p = su->su_badptr; i > 0; mb_ptr_adv(p))
4607 --i;
4608 }
4609 else
4610#endif
4611 p = su->su_badptr + sp->ts_fidx;
4612 badflags = captype(p, su->su_badptr + su->su_badlen);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004613
4614 sp->ts_state = STATE_SPLITUNDO;
4615 ++depth;
4616 /* Restart at top of the tree. */
4617 stack[depth].ts_arridx = 0;
4618 }
4619 }
4620 break;
4621
4622 case STATE_SPLITUNDO:
4623 /* Fixup the changes done for word split. */
4624 badflags = sp->ts_save_badflags;
4625 splitoff = sp->ts_save_splitoff;
4626 prewordlen = sp->ts_save_prewordlen;
4627
4628 /* Continue looking for NUL bytes. */
4629 sp->ts_state = STATE_START;
4630 break;
4631
4632 case STATE_ENDNUL:
4633 /* Past the NUL bytes in the node. */
4634 if (fword[sp->ts_fidx] == 0)
4635 {
4636 /* The badword ends, can't use the bytes in this node. */
4637 sp->ts_state = STATE_DEL;
4638 break;
4639 }
4640 sp->ts_state = STATE_PLAIN;
4641 /*FALLTHROUGH*/
4642
4643 case STATE_PLAIN:
4644 /*
4645 * Go over all possible bytes at this node, add each to
4646 * tword[] and use child node. "ts_curi" is the index.
4647 */
4648 arridx = sp->ts_arridx;
4649 if (sp->ts_curi > byts[arridx])
4650 {
4651 /* Done all bytes at this node, do next state. When still
4652 * at already changed bytes skip the other tricks. */
4653 if (sp->ts_fidx >= sp->ts_fidxtry)
4654 sp->ts_state = STATE_DEL;
4655 else
4656 sp->ts_state = STATE_FINAL;
4657 }
4658 else
4659 {
4660 arridx += sp->ts_curi++;
4661 c = byts[arridx];
4662
4663 /* Normal byte, go one level deeper. If it's not equal to
4664 * the byte in the bad word adjust the score. But don't
4665 * even try when the byte was already changed. */
4666 if (c == fword[sp->ts_fidx])
4667 newscore = 0;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004668
4669 /* TODO: this is too slow and comparing bytes isn't right
4670 * for multi-byte characters. */
4671#if 0
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004672 else if (lp->lp_slang->sl_map != NULL
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004673 && similar_chars(lp->lp_slang,
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004674 c, fword[sp->ts_fidx]))
4675 newscore = SCORE_SIMILAR;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004676#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004677 else
4678 newscore = SCORE_SUBST;
4679 if ((newscore == 0 || sp->ts_fidx >= sp->ts_fidxtry)
4680 && try_deeper(su, stack, depth, newscore))
4681 {
4682 ++depth;
4683 ++stack[depth].ts_fidx;
4684 tword[stack[depth].ts_twordlen++] = c;
4685 stack[depth].ts_arridx = idxs[arridx];
4686 }
4687 }
4688 break;
4689
4690 case STATE_DEL:
4691 /* Try skipping one byte in the bad word (delete it). */
4692 sp->ts_state = STATE_INS;
4693 sp->ts_curi = 1;
4694 if (fword[sp->ts_fidx] != NUL
4695 && try_deeper(su, stack, depth, SCORE_DEL))
4696 {
4697 ++depth;
4698 ++stack[depth].ts_fidx;
4699 break;
4700 }
4701 /*FALLTHROUGH*/
4702
4703 case STATE_INS:
4704 /* Insert one byte. Do this for each possible bytes at this
4705 * node. */
4706 n = sp->ts_arridx;
4707 if (sp->ts_curi > byts[n])
4708 {
4709 /* Done all bytes at this node, do next state. */
4710 sp->ts_state = STATE_SWAP;
4711 sp->ts_curi = 1;
4712 }
4713 else
4714 {
4715 /* Do one more byte at this node. */
4716 n += sp->ts_curi++;
4717 c = byts[n];
4718 if (c != 0 && try_deeper(su, stack, depth, SCORE_INS))
4719 {
4720 ++depth;
4721 tword[stack[depth].ts_twordlen++] = c;
4722 stack[depth].ts_arridx = idxs[n];
4723 }
4724 }
4725 break;
4726
4727 case STATE_SWAP:
4728 /* Swap two bytes: "12" -> "21". This means looking for the
4729 * following byte at the current node and the current byte at
4730 * its child node. We change "fword" here, it's changed back
4731 * afterwards. TODO: should swap characters instead of bytes.
4732 * */
4733 c = fword[sp->ts_fidx];
4734 if (c != NUL && fword[sp->ts_fidx + 1] != NUL
4735 && try_deeper(su, stack, depth, SCORE_SWAP))
4736 {
4737 sp->ts_state = STATE_SWAP3A;
4738 ++depth;
4739 fword[sp->ts_fidx] = fword[sp->ts_fidx + 1];
4740 fword[sp->ts_fidx + 1] = c;
4741 stack[depth].ts_fidxtry = sp->ts_fidx + 2;
4742 }
4743 else
4744 /* If this swap doesn't work then SWAP3 won't either. */
4745 sp->ts_state = STATE_REP_INI;
4746 break;
4747
4748 case STATE_SWAP3A:
4749 /* First undo the STATE_SWAP swap: "21" -> "12". */
4750 c = fword[sp->ts_fidx];
4751 fword[sp->ts_fidx] = fword[sp->ts_fidx + 1];
4752 fword[sp->ts_fidx + 1] = c;
4753
4754 /* Swap two bytes, skipping one: "123" -> "321". We change
4755 * "fword" here, it's changed back afterwards. TODO: should
4756 * swap characters instead of bytes. */
4757 c = fword[sp->ts_fidx];
4758 if (c != NUL && fword[sp->ts_fidx + 1] != NUL
4759 && fword[sp->ts_fidx + 2] != NUL
4760 && try_deeper(su, stack, depth, SCORE_SWAP3))
4761 {
4762 sp->ts_state = STATE_ROT3L;
4763 ++depth;
4764 fword[sp->ts_fidx] = fword[sp->ts_fidx + 2];
4765 fword[sp->ts_fidx + 2] = c;
4766 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
4767 }
4768 else
4769 sp->ts_state = STATE_REP_INI;
4770 break;
4771
4772 case STATE_ROT3L:
4773 /* First undo STATE_SWAP3A: "321" -> "123" */
4774 c = fword[sp->ts_fidx];
4775 fword[sp->ts_fidx] = fword[sp->ts_fidx + 2];
4776 fword[sp->ts_fidx + 2] = c;
4777
4778 /* Rotate three bytes left: "123" -> "231". We change
4779 * "fword" here, it's changed back afterwards. TODO: should
4780 * swap characters instead of bytes. */
4781 if (try_deeper(su, stack, depth, SCORE_SWAP3))
4782 {
4783 sp->ts_state = STATE_ROT3R;
4784 ++depth;
4785 c = fword[sp->ts_fidx];
4786 fword[sp->ts_fidx] = fword[sp->ts_fidx + 1];
4787 fword[sp->ts_fidx + 1] = fword[sp->ts_fidx + 2];
4788 fword[sp->ts_fidx + 2] = c;
4789 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
4790 }
4791 else
4792 sp->ts_state = STATE_REP_INI;
4793 break;
4794
4795 case STATE_ROT3R:
4796 /* First undo STATE_ROT3L: "231" -> "123" */
4797 c = fword[sp->ts_fidx + 2];
4798 fword[sp->ts_fidx + 2] = fword[sp->ts_fidx + 1];
4799 fword[sp->ts_fidx + 1] = fword[sp->ts_fidx];
4800 fword[sp->ts_fidx] = c;
4801
4802 /* Rotate three bytes right: "123" -> "312". We change
4803 * "fword" here, it's changed back afterwards. TODO: should
4804 * swap characters instead of bytes. */
4805 if (try_deeper(su, stack, depth, SCORE_SWAP3))
4806 {
4807 sp->ts_state = STATE_ROT_UNDO;
4808 ++depth;
4809 c = fword[sp->ts_fidx + 2];
4810 fword[sp->ts_fidx + 2] = fword[sp->ts_fidx + 1];
4811 fword[sp->ts_fidx + 1] = fword[sp->ts_fidx];
4812 fword[sp->ts_fidx] = c;
4813 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
4814 }
4815 else
4816 sp->ts_state = STATE_REP_INI;
4817 break;
4818
4819 case STATE_ROT_UNDO:
4820 /* Undo STATE_ROT3R: "312" -> "123" */
4821 c = fword[sp->ts_fidx];
4822 fword[sp->ts_fidx] = fword[sp->ts_fidx + 1];
4823 fword[sp->ts_fidx + 1] = fword[sp->ts_fidx + 2];
4824 fword[sp->ts_fidx + 2] = c;
4825 /*FALLTHROUGH*/
4826
4827 case STATE_REP_INI:
4828 /* Check if matching with REP items from the .aff file would
4829 * work. Quickly skip if there are no REP items or the score
4830 * is going to be too high anyway. */
4831 gap = &lp->lp_slang->sl_rep;
4832 if (gap->ga_len == 0
4833 || sp->ts_score + SCORE_REP >= su->su_maxscore)
4834 {
4835 sp->ts_state = STATE_FINAL;
4836 break;
4837 }
4838
4839 /* Use the first byte to quickly find the first entry that
4840 * matches. If the index is -1 there is none. */
4841 sp->ts_curi = lp->lp_slang->sl_rep_first[fword[sp->ts_fidx]];
4842 if (sp->ts_curi < 0)
4843 {
4844 sp->ts_state = STATE_FINAL;
4845 break;
4846 }
4847
4848 sp->ts_state = STATE_REP;
4849 /*FALLTHROUGH*/
4850
4851 case STATE_REP:
4852 /* Try matching with REP items from the .aff file. For each
4853 * match replace the charactes and check if the resulting word
4854 * is valid. */
4855 p = fword + sp->ts_fidx;
4856
4857 gap = &lp->lp_slang->sl_rep;
4858 while (sp->ts_curi < gap->ga_len)
4859 {
4860 ftp = (fromto_T *)gap->ga_data + sp->ts_curi++;
4861 if (*ftp->ft_from != *p)
4862 {
4863 /* past possible matching entries */
4864 sp->ts_curi = gap->ga_len;
4865 break;
4866 }
4867 if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0
4868 && try_deeper(su, stack, depth, SCORE_REP))
4869 {
4870 /* Need to undo this afterwards. */
4871 sp->ts_state = STATE_REP_UNDO;
4872
4873 /* Change the "from" to the "to" string. */
4874 ++depth;
4875 fl = STRLEN(ftp->ft_from);
4876 tl = STRLEN(ftp->ft_to);
4877 if (fl != tl)
4878 mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1);
4879 mch_memmove(p, ftp->ft_to, tl);
4880 stack[depth].ts_fidxtry = sp->ts_fidx + tl;
4881 break;
4882 }
4883 }
4884
4885 if (sp->ts_curi >= gap->ga_len)
4886 /* No (more) matches. */
4887 sp->ts_state = STATE_FINAL;
4888
4889 break;
4890
4891 case STATE_REP_UNDO:
4892 /* Undo a REP replacement and continue with the next one. */
4893 ftp = (fromto_T *)lp->lp_slang->sl_rep.ga_data
4894 + sp->ts_curi - 1;
4895 fl = STRLEN(ftp->ft_from);
4896 tl = STRLEN(ftp->ft_to);
4897 p = fword + sp->ts_fidx;
4898 if (fl != tl)
4899 mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1);
4900 mch_memmove(p, ftp->ft_from, fl);
4901 sp->ts_state = STATE_REP;
4902 break;
4903
4904 default:
4905 /* Did all possible states at this level, go up one level. */
4906 --depth;
4907 }
4908
4909 line_breakcheck();
4910 }
4911 }
4912}
4913
4914/*
4915 * Try going one level deeper in the tree.
4916 */
4917 static int
4918try_deeper(su, stack, depth, score_add)
4919 suginfo_T *su;
4920 trystate_T *stack;
4921 int depth;
4922 int score_add;
4923{
4924 int newscore;
4925
4926 /* Refuse to go deeper if the scrore is getting too big. */
4927 newscore = stack[depth].ts_score + score_add;
4928 if (newscore >= su->su_maxscore)
4929 return FALSE;
4930
4931 stack[depth + 1].ts_state = STATE_START;
4932 stack[depth + 1].ts_score = newscore;
4933 stack[depth + 1].ts_curi = 1; /* start just after length byte */
4934 stack[depth + 1].ts_fidx = stack[depth].ts_fidx;
4935 stack[depth + 1].ts_fidxtry = stack[depth].ts_fidxtry;
4936 stack[depth + 1].ts_twordlen = stack[depth].ts_twordlen;
4937 stack[depth + 1].ts_arridx = stack[depth].ts_arridx;
4938 return TRUE;
4939}
4940
4941/*
4942 * "fword" is a good word with case folded. Find the matching keep-case
4943 * words and put it in "kword".
4944 * Theoretically there could be several keep-case words that result in the
4945 * same case-folded word, but we only find one...
4946 */
4947 static void
4948find_keepcap_word(slang, fword, kword)
4949 slang_T *slang;
4950 char_u *fword;
4951 char_u *kword;
4952{
4953 char_u uword[MAXWLEN]; /* "fword" in upper-case */
4954 int depth;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004955 idx_T tryidx;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004956
4957 /* The following arrays are used at each depth in the tree. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004958 idx_T arridx[MAXWLEN];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004959 int round[MAXWLEN];
4960 int fwordidx[MAXWLEN];
4961 int uwordidx[MAXWLEN];
4962 int kwordlen[MAXWLEN];
4963
4964 int flen, ulen;
4965 int l;
4966 int len;
4967 int c;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004968 idx_T lo, hi, m;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004969 char_u *p;
4970 char_u *byts = slang->sl_kbyts; /* array with bytes of the words */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004971 idx_T *idxs = slang->sl_kidxs; /* array with indexes */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004972
4973 if (byts == NULL)
4974 {
4975 /* array is empty: "cannot happen" */
4976 *kword = NUL;
4977 return;
4978 }
4979
4980 /* Make an all-cap version of "fword". */
4981 allcap_copy(fword, uword);
4982
4983 /*
4984 * Each character needs to be tried both case-folded and upper-case.
4985 * All this gets very complicated if we keep in mind that changing case
4986 * may change the byte length of a multi-byte character...
4987 */
4988 depth = 0;
4989 arridx[0] = 0;
4990 round[0] = 0;
4991 fwordidx[0] = 0;
4992 uwordidx[0] = 0;
4993 kwordlen[0] = 0;
4994 while (depth >= 0)
4995 {
4996 if (fword[fwordidx[depth]] == NUL)
4997 {
4998 /* We are at the end of "fword". If the tree allows a word to end
4999 * here we have found a match. */
5000 if (byts[arridx[depth] + 1] == 0)
5001 {
5002 kword[kwordlen[depth]] = NUL;
5003 return;
5004 }
5005
5006 /* kword is getting too long, continue one level up */
5007 --depth;
5008 }
5009 else if (++round[depth] > 2)
5010 {
5011 /* tried both fold-case and upper-case character, continue one
5012 * level up */
5013 --depth;
5014 }
5015 else
5016 {
5017 /*
5018 * round[depth] == 1: Try using the folded-case character.
5019 * round[depth] == 2: Try using the upper-case character.
5020 */
5021#ifdef FEAT_MBYTE
5022 if (has_mbyte)
5023 {
5024 flen = mb_ptr2len_check(fword + fwordidx[depth]);
5025 ulen = mb_ptr2len_check(uword + uwordidx[depth]);
5026 }
5027 else
5028#endif
5029 ulen = flen = 1;
5030 if (round[depth] == 1)
5031 {
5032 p = fword + fwordidx[depth];
5033 l = flen;
5034 }
5035 else
5036 {
5037 p = uword + uwordidx[depth];
5038 l = ulen;
5039 }
5040
5041 for (tryidx = arridx[depth]; l > 0; --l)
5042 {
5043 /* Perform a binary search in the list of accepted bytes. */
5044 len = byts[tryidx++];
5045 c = *p++;
5046 lo = tryidx;
5047 hi = tryidx + len - 1;
5048 while (lo < hi)
5049 {
5050 m = (lo + hi) / 2;
5051 if (byts[m] > c)
5052 hi = m - 1;
5053 else if (byts[m] < c)
5054 lo = m + 1;
5055 else
5056 {
5057 lo = hi = m;
5058 break;
5059 }
5060 }
5061
5062 /* Stop if there is no matching byte. */
5063 if (hi < lo || byts[lo] != c)
5064 break;
5065
5066 /* Continue at the child (if there is one). */
5067 tryidx = idxs[lo];
5068 }
5069
5070 if (l == 0)
5071 {
5072 /*
5073 * Found the matching char. Copy it to "kword" and go a
5074 * level deeper.
5075 */
5076 if (round[depth] == 1)
5077 {
5078 STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth],
5079 flen);
5080 kwordlen[depth + 1] = kwordlen[depth] + flen;
5081 }
5082 else
5083 {
5084 STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth],
5085 ulen);
5086 kwordlen[depth + 1] = kwordlen[depth] + ulen;
5087 }
5088 fwordidx[depth + 1] = fwordidx[depth] + flen;
5089 uwordidx[depth + 1] = uwordidx[depth] + ulen;
5090
5091 ++depth;
5092 arridx[depth] = tryidx;
5093 round[depth] = 0;
5094 }
5095 }
5096 }
5097
5098 /* Didn't find it: "cannot happen". */
5099 *kword = NUL;
5100}
5101
5102/*
5103 * Find suggestions by comparing the word in a sound-a-like form.
5104 */
5105 static void
5106spell_try_soundalike(su)
5107 suginfo_T *su;
5108{
5109 char_u salword[MAXWLEN];
5110 char_u tword[MAXWLEN];
5111 char_u tfword[MAXWLEN];
5112 char_u tsalword[MAXWLEN];
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005113 idx_T arridx[MAXWLEN];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005114 int curi[MAXWLEN];
5115 langp_T *lp;
5116 char_u *byts;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005117 idx_T *idxs;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005118 int depth;
5119 int c;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005120 idx_T n;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005121 int round;
5122 int flags;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005123 int score, sound_score;
5124 char_u *bp, *sp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005125
5126 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
5127 lp->lp_slang != NULL; ++lp)
5128 {
5129 if (lp->lp_slang->sl_sal.ga_len > 0)
5130 {
5131 /* soundfold the bad word */
5132 spell_soundfold(lp->lp_slang, su->su_fbadword, salword);
5133
5134 /*
5135 * Go through the whole tree, soundfold each word and compare.
5136 * round 1: use the case-folded tree.
5137 * round 2: use the keep-case tree.
5138 */
5139 for (round = 1; round <= 2; ++round)
5140 {
5141 if (round == 1)
5142 {
5143 byts = lp->lp_slang->sl_fbyts;
5144 idxs = lp->lp_slang->sl_fidxs;
5145 }
5146 else
5147 {
5148 byts = lp->lp_slang->sl_kbyts;
5149 idxs = lp->lp_slang->sl_kidxs;
5150 }
5151
5152 depth = 0;
5153 arridx[0] = 0;
5154 curi[0] = 1;
5155 while (depth >= 0 && !got_int)
5156 {
5157 if (curi[depth] > byts[arridx[depth]])
5158 /* Done all bytes at this node, go up one level. */
5159 --depth;
5160 else
5161 {
5162 /* Do one more byte at this node. */
5163 n = arridx[depth] + curi[depth];
5164 ++curi[depth];
5165 c = byts[n];
5166 if (c == 0)
5167 {
5168 /* End of word, deal with the word. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005169 flags = (int)idxs[n];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005170 if (round == 2 || (flags & WF_KEEPCAP) == 0)
5171 {
5172 tword[depth] = NUL;
5173 if (round == 1)
5174 spell_soundfold(lp->lp_slang,
5175 tword, tsalword);
5176 else
5177 {
5178 /* In keep-case tree need to case-fold the
5179 * word. */
5180 (void)spell_casefold(tword, depth,
5181 tfword, MAXWLEN);
5182 spell_soundfold(lp->lp_slang,
5183 tfword, tsalword);
5184 }
5185
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005186 /*
5187 * Accept the word if the sound-folded words
5188 * are (almost) equal.
5189 */
5190 for (bp = salword, sp = tsalword; *bp == *sp;
5191 ++bp, ++sp)
5192 if (*bp == NUL)
5193 break;
5194
5195 if (*bp == *sp)
5196 /* equal */
5197 sound_score = 0;
5198 else if (*bp != NUL && bp[1] != NUL
5199 && *bp == sp[1] && bp[1] == *sp
5200 && STRCMP(bp + 2, sp + 2) == 0)
5201 /* swap two bytes */
5202 sound_score = SCORE_SWAP;
5203 else if (STRCMP(bp + 1, sp) == 0)
5204 /* delete byte */
5205 sound_score = SCORE_DEL;
5206 else if (STRCMP(bp, sp + 1) == 0)
5207 /* insert byte */
5208 sound_score = SCORE_INS;
5209 else if (STRCMP(bp + 1, sp + 1) == 0)
5210 /* skip one byte */
5211 sound_score = SCORE_SUBST;
5212 else
5213 /* not equal or similar */
5214 sound_score = SCORE_MAXMAX;
5215
5216 if (sound_score < SCORE_MAXMAX)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005217 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005218 char_u cword[MAXWLEN];
5219 char_u *p;
5220
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005221 if (round == 1 && flags != 0)
5222 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005223 /* Need to fix case according to
5224 * "flags". */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005225 make_case_word(tword, cword, flags);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005226 p = cword;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005227 }
5228 else
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005229 p = tword;
5230
5231 /* Compute the score. */
5232 score = spell_edit_score(su->su_badword, p);
5233#ifdef RESCORE
5234 /* give a bonus for the good word sounding
5235 * the same as the bad word */
5236 add_suggestion(su, tword,
5237 RESCORE(score, sound_score),
5238 TRUE);
5239#else
5240 add_suggestion(su, tword,
5241 score + sound_score);
5242#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005243 }
5244 }
5245
5246 /* Skip over other NUL bytes. */
5247 while (byts[n + 1] == 0)
5248 {
5249 ++n;
5250 ++curi[depth];
5251 }
5252 }
5253 else
5254 {
5255 /* Normal char, go one level deeper. */
5256 tword[depth++] = c;
5257 arridx[depth] = idxs[n];
5258 curi[depth] = 1;
5259 }
5260 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005261
5262 line_breakcheck();
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005263 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005264 }
5265 }
5266 }
5267}
5268
5269/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005270 * Copy "fword" to "cword", fixing case according to "flags".
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005271 */
5272 static void
5273make_case_word(fword, cword, flags)
5274 char_u *fword;
5275 char_u *cword;
5276 int flags;
5277{
5278 if (flags & WF_ALLCAP)
5279 /* Make it all upper-case */
5280 allcap_copy(fword, cword);
5281 else if (flags & WF_ONECAP)
5282 /* Make the first letter upper-case */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005283 onecap_copy(fword, cword, TRUE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005284 else
5285 /* Use goodword as-is. */
5286 STRCPY(cword, fword);
5287}
5288
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005289#if 0
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005290/*
5291 * Return TRUE if "c1" and "c2" are similar characters according to the MAP
5292 * lines in the .aff file.
5293 */
5294 static int
5295similar_chars(slang, c1, c2)
5296 slang_T *slang;
5297 int c1;
5298 int c2;
5299{
5300 char_u *p1;
5301 char_u *p2;
5302
5303 /* The similar characters are stored separated with slashes:
5304 * "aaa/bbb/ccc/". Search for each character and if the next slash is the
5305 * same one they are in the same MAP entry. */
5306 p1 = vim_strchr(slang->sl_map, c1);
5307 if (p1 == NULL)
5308 return FALSE;
5309 p2 = vim_strchr(slang->sl_map, c2);
5310 if (p2 == NULL)
5311 return FALSE;
5312 return vim_strchr(p1, '/') == vim_strchr(p2, '/');
5313}
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005314#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005315
5316/*
5317 * Add a suggestion to the list of suggestions.
5318 * Do not add a duplicate suggestion or suggestions with a bad score.
5319 * When "use_score" is not zero it's used, otherwise the score is computed
5320 * with spell_edit_score().
5321 */
5322 static void
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005323add_suggestion(su, goodword, score
5324#ifdef RESCORE
5325 , had_bonus
5326#endif
5327 )
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005328 suginfo_T *su;
5329 char_u *goodword;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005330 int score;
5331#ifdef RESCORE
5332 int had_bonus; /* set st_had_bonus */
5333#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005334{
5335 suggest_T *stp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005336 int i;
5337#ifdef SOUNDFOLD_SCORE
5338 char_u fword[MAXWLEN];
5339 char_u salword[MAXWLEN];
5340#endif
5341
5342 /* Check that the word wasn't banned. */
5343 if (was_banned(su, goodword))
5344 return;
5345
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005346 if (score <= su->su_maxscore)
5347 {
5348#ifdef SOUNDFOLD_SCORE
5349 /* Add to the score when the word sounds differently.
5350 * This is slow... */
5351 if (su->su_slang->sl_sal.ga_len > 0)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005352 score += spell_sound_score(su->su_slang, fword, su->su_salword);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005353#endif
5354
5355 /* Check if the word is already there. */
5356 stp = &SUG(su, 0);
5357 for (i = su->su_ga.ga_len - 1; i >= 0; --i)
5358 if (STRCMP(stp[i].st_word, goodword) == 0)
5359 {
5360 /* Found it. Remember the lowest score. */
5361 if (stp[i].st_score > score)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005362 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005363 stp[i].st_score = score;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005364#ifdef RESCORE
5365 stp[i].st_had_bonus = had_bonus;
5366#endif
5367 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005368 break;
5369 }
5370
5371 if (i < 0 && ga_grow(&su->su_ga, 1) == OK)
5372 {
5373 /* Add a suggestion. */
5374 stp = &SUG(su, su->su_ga.ga_len);
5375 stp->st_word = vim_strsave(goodword);
5376 if (stp->st_word != NULL)
5377 {
5378 stp->st_score = score;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005379#ifdef RESCORE
5380 stp->st_had_bonus = had_bonus;
5381#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005382 stp->st_orglen = su->su_badlen;
5383 ++su->su_ga.ga_len;
5384
5385 /* If we have too many suggestions now, sort the list and keep
5386 * the best suggestions. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005387 if (su->su_ga.ga_len > SUG_MAX_COUNT)
5388 cleanup_suggestions(su, SUG_CLEAN_COUNT);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005389 }
5390 }
5391 }
5392}
5393
5394/*
5395 * Add a word to be banned.
5396 */
5397 static void
5398add_banned(su, word)
5399 suginfo_T *su;
5400 char_u *word;
5401{
5402 char_u *s = vim_strsave(word);
5403 hash_T hash;
5404 hashitem_T *hi;
5405
5406 if (s != NULL)
5407 {
5408 hash = hash_hash(s);
5409 hi = hash_lookup(&su->su_banned, s, hash);
5410 if (HASHITEM_EMPTY(hi))
5411 hash_add_item(&su->su_banned, hi, s, hash);
5412 }
5413}
5414
5415/*
5416 * Return TRUE if a word appears in the list of banned words.
5417 */
5418 static int
5419was_banned(su, word)
5420 suginfo_T *su;
5421 char_u *word;
5422{
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005423 hashitem_T *hi = hash_find(&su->su_banned, word);
5424
5425 return !HASHITEM_EMPTY(hi);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005426}
5427
5428/*
5429 * Free the banned words in "su".
5430 */
5431 static void
5432free_banned(su)
5433 suginfo_T *su;
5434{
5435 int todo;
5436 hashitem_T *hi;
5437
5438 todo = su->su_banned.ht_used;
5439 for (hi = su->su_banned.ht_array; todo > 0; ++hi)
5440 {
5441 if (!HASHITEM_EMPTY(hi))
5442 {
5443 vim_free(hi->hi_key);
5444 --todo;
5445 }
5446 }
5447 hash_clear(&su->su_banned);
5448}
5449
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005450#ifdef RESCORE
5451/*
5452 * Recompute the score if sound-folding is possible. This is slow,
5453 * thus only done for the final results.
5454 */
5455 static void
5456rescore_suggestions(su)
5457 suginfo_T *su;
5458{
5459 langp_T *lp;
5460 suggest_T *stp;
5461 char_u sal_badword[MAXWLEN];
5462 int score;
5463 int i;
5464
5465 for (lp = LANGP_ENTRY(curwin->w_buffer->b_langp, 0);
5466 lp->lp_slang != NULL; ++lp)
5467 {
5468 if (lp->lp_slang->sl_sal.ga_len > 0)
5469 {
5470 /* soundfold the bad word */
5471 spell_soundfold(lp->lp_slang, su->su_fbadword, sal_badword);
5472
5473 for (i = 0; i < su->su_ga.ga_len; ++i)
5474 {
5475 stp = &SUG(su, i);
5476 if (!stp->st_had_bonus)
5477 {
5478 score = spell_sound_score(lp->lp_slang, stp->st_word,
5479 sal_badword);
5480 stp->st_score = RESCORE(stp->st_score, score);
5481 }
5482 }
5483 break;
5484 }
5485 }
5486}
5487#endif
5488
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005489static int
5490#ifdef __BORLANDC__
5491_RTLENTRYF
5492#endif
5493sug_compare __ARGS((const void *s1, const void *s2));
5494
5495/*
5496 * Function given to qsort() to sort the suggestions on st_score.
5497 */
5498 static int
5499#ifdef __BORLANDC__
5500_RTLENTRYF
5501#endif
5502sug_compare(s1, s2)
5503 const void *s1;
5504 const void *s2;
5505{
5506 suggest_T *p1 = (suggest_T *)s1;
5507 suggest_T *p2 = (suggest_T *)s2;
5508
5509 return p1->st_score - p2->st_score;
5510}
5511
5512/*
5513 * Cleanup the suggestions:
5514 * - Sort on score.
5515 * - Remove words that won't be displayed.
5516 */
5517 static void
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005518cleanup_suggestions(su, keep)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005519 suginfo_T *su;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005520 int keep; /* nr of suggestions to keep */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005521{
5522 suggest_T *stp = &SUG(su, 0);
5523 int i;
5524
5525 /* Sort the list. */
5526 qsort(su->su_ga.ga_data, (size_t)su->su_ga.ga_len,
5527 sizeof(suggest_T), sug_compare);
5528
5529 /* Truncate the list to the number of suggestions that will be displayed. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005530 if (su->su_ga.ga_len > keep)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005531 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005532 for (i = keep; i < su->su_ga.ga_len; ++i)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005533 vim_free(stp[i].st_word);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005534 su->su_ga.ga_len = keep;
5535 su->su_maxscore = stp[keep - 1].st_score;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005536 }
5537}
5538
5539/*
5540 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
5541 */
5542 static void
5543spell_soundfold(slang, inword, res)
5544 slang_T *slang;
5545 char_u *inword;
5546 char_u *res;
5547{
5548 fromto_T *ftp;
5549 char_u word[MAXWLEN];
5550#ifdef FEAT_MBYTE
5551 int l;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005552 int found_mbyte = FALSE;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005553#endif
5554 char_u *s;
5555 char_u *t;
5556 int i, j, z;
5557 int n, k = 0;
5558 int z0;
5559 int k0;
5560 int n0;
5561 int c;
5562 int pri;
5563 int p0 = -333;
5564 int c0;
5565
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005566 /* Remove accents, if wanted. We actually remove all non-word characters.
5567 * But keep white space. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005568 if (slang->sl_rem_accents)
5569 {
5570 t = word;
5571 for (s = inword; *s != NUL; )
5572 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005573 if (vim_iswhite(*s))
5574 *t++ = *s++;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005575#ifdef FEAT_MBYTE
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005576 else if (has_mbyte)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005577 {
5578 l = mb_ptr2len_check(s);
5579 if (SPELL_ISWORDP(s))
5580 {
5581 mch_memmove(t, s, l);
5582 t += l;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005583 if (l > 1)
5584 found_mbyte = TRUE;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005585 }
5586 s += l;
5587 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005588#endif
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005589 else
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005590 {
5591 if (SPELL_ISWORDP(s))
5592 *t++ = *s;
5593 ++s;
5594 }
5595 }
5596 *t = NUL;
5597 }
5598 else
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005599 {
5600#ifdef FEAT_MBYTE
5601 if (has_mbyte)
5602 for (s = inword; *s != NUL; s += l)
5603 if ((l = mb_ptr2len_check(s)) > 1)
5604 {
5605 found_mbyte = TRUE;
5606 break;
5607 }
5608#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005609 STRCPY(word, inword);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005610 }
5611
5612#ifdef FEAT_MBYTE
5613 /* If there are multi-byte characters in the word return it as-is, because
5614 * the following won't work. */
5615 if (found_mbyte)
5616 {
5617 STRCPY(res, word);
5618 return;
5619 }
5620#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005621
5622 ftp = (fromto_T *)slang->sl_sal.ga_data;
5623
5624 /*
5625 * This comes from Aspell phonet.cpp. Converted from C++ to C.
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005626 * Changed to keep spaces.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005627 * TODO: support for multi-byte chars.
5628 */
5629 i = j = z = 0;
5630 while ((c = word[i]) != NUL)
5631 {
5632 n = slang->sl_sal_first[c];
5633 z0 = 0;
5634
5635 if (n >= 0)
5636 {
5637 /* check all rules for the same letter */
5638 while (ftp[n].ft_from[0] == c)
5639 {
5640 /* check whole string */
5641 k = 1; /* number of found letters */
5642 pri = 5; /* default priority */
5643 s = ftp[n].ft_from;
5644 s++; /* important for (see below) "*(s-1)" */
5645
5646 /* Skip over normal letters that match with the word. */
5647 while (*s != NUL && word[i + k] == *s
5648 && !vim_isdigit(*s) && strchr("(-<^$", *s) == NULL)
5649 {
5650 k++;
5651 s++;
5652 }
5653
5654 if (*s == '(')
5655 {
5656 /* check alternate letters in "(..)" */
5657 for (t = s + 1; *t != ')' && *t != NUL; ++t)
5658 if (*t == word[i + k])
5659 {
5660 /* match */
5661 ++k;
5662 for (s = t + 1; *s != NUL; ++s)
5663 if (*s == ')')
5664 {
5665 ++s;
5666 break;
5667 }
5668 break;
5669 }
5670 }
5671
5672 p0 = *s;
5673 k0 = k;
5674 while (*s == '-' && k > 1)
5675 {
5676 k--;
5677 s++;
5678 }
5679 if (*s == '<')
5680 s++;
5681 if (vim_isdigit(*s))
5682 {
5683 /* determine priority */
5684 pri = *s - '0';
5685 s++;
5686 }
5687 if (*s == '^' && *(s + 1) == '^')
5688 s++;
5689
5690 if (*s == NUL
5691 || (*s == '^'
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005692 && (i == 0 || !(word[i - 1] == ' '
5693 || SPELL_ISWORDP(word + i - 1)))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005694 && (*(s + 1) != '$'
5695 || (!SPELL_ISWORDP(word + i + k0))))
5696 || (*s == '$' && i > 0
5697 && SPELL_ISWORDP(word + i - 1)
5698 && (!SPELL_ISWORDP(word + i + k0))))
5699 {
5700 /* search for followup rules, if: */
5701 /* followup and k > 1 and NO '-' in searchstring */
5702 c0 = word[i + k - 1];
5703 n0 = slang->sl_sal_first[c0];
5704
5705 if (slang->sl_followup && k > 1 && n0 >= 0
5706 && p0 != '-' && word[i + k] != NUL)
5707 {
5708 /* test follow-up rule for "word[i + k]" */
5709 while (ftp[n0].ft_from[0] == c0)
5710 {
5711
5712 /* check whole string */
5713 k0 = k;
5714 p0 = 5;
5715 s = ftp[n0].ft_from;
5716 s++;
5717 while (*s != NUL && word[i+k0] == *s
5718 && !vim_isdigit(*s)
5719 && strchr("(-<^$",*s) == NULL)
5720 {
5721 k0++;
5722 s++;
5723 }
5724 if (*s == '(')
5725 {
5726 /* check alternate letters in "(..)" */
5727 for (t = s + 1; *t != ')' && *t != NUL; ++t)
5728 if (*t == word[i + k0])
5729 {
5730 /* match */
5731 ++k0;
5732 for (s = t + 1; *s != NUL; ++s)
5733 if (*s == ')')
5734 {
5735 ++s;
5736 break;
5737 }
5738 break;
5739 }
5740 }
5741 while (*s == '-')
5742 {
5743 /* "k0" gets NOT reduced */
5744 /* because "if (k0 == k)" */
5745 s++;
5746 }
5747 if (*s == '<')
5748 s++;
5749 if (vim_isdigit(*s))
5750 {
5751 p0 = *s - '0';
5752 s++;
5753 }
5754
5755 if (*s == NUL
5756 /* *s == '^' cuts */
5757 || (*s == '$'
5758 && !SPELL_ISWORDP(word + i + k0)))
5759 {
5760 if (k0 == k)
5761 {
5762 /* this is just a piece of the string */
5763 ++n0;
5764 continue;
5765 }
5766
5767 if (p0 < pri)
5768 {
5769 /* priority too low */
5770 ++n0;
5771 continue;
5772 }
5773 /* rule fits; stop search */
5774 break;
5775 }
5776 ++n0;
5777 }
5778
5779 if (p0 >= pri && ftp[n0].ft_from[0] == c0)
5780 {
5781 ++n;
5782 continue;
5783 }
5784 }
5785
5786 /* replace string */
5787 s = ftp[n].ft_to;
5788 p0 = (ftp[n].ft_from[0] != NUL
5789 && vim_strchr(ftp[n].ft_from + 1,
5790 '<') != NULL) ? 1 : 0;
5791 if (p0 == 1 && z == 0)
5792 {
5793 /* rule with '<' is used */
5794 if (j > 0 && *s != NUL
5795 && (res[j - 1] == c || res[j - 1] == *s))
5796 j--;
5797 z0 = 1;
5798 z = 1;
5799 k0 = 0;
5800 while (*s != NUL && word[i+k0] != NUL)
5801 {
5802 word[i + k0] = *s;
5803 k0++;
5804 s++;
5805 }
5806 if (k > k0)
5807 mch_memmove(word + i + k0, word + i + k,
5808 STRLEN(word + i + k) + 1);
5809
5810 /* new "actual letter" */
5811 c = word[i];
5812 }
5813 else
5814 {
5815 /* no '<' rule used */
5816 i += k - 1;
5817 z = 0;
5818 while (*s != NUL && s[1] != NUL && j < MAXWLEN)
5819 {
5820 if (j == 0 || res[j - 1] != *s)
5821 {
5822 res[j] = *s;
5823 j++;
5824 }
5825 s++;
5826 }
5827 /* new "actual letter" */
5828 c = *s;
5829 if (ftp[n].ft_from[0] != NUL
5830 && strstr((char *)ftp[n].ft_from + 1,
5831 "^^") != NULL)
5832 {
5833 if (c != NUL)
5834 {
5835 res[j] = c;
5836 j++;
5837 }
5838 mch_memmove(word, word + i + 1,
5839 STRLEN(word + i + 1) + 1);
5840 i = 0;
5841 z0 = 1;
5842 }
5843 }
5844 break;
5845 }
5846 ++n;
5847 }
5848 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005849 else if (vim_iswhite(c))
5850 {
5851 c = ' ';
5852 k = 1;
5853 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005854
5855 if (z0 == 0)
5856 {
5857 if (k && !p0 && j < MAXWLEN && c != NUL
5858 && (!slang->sl_collapse || j == 0 || res[j - 1] != c))
5859 {
5860 /* condense only double letters */
5861 res[j] = c;
5862 j++;
5863 }
5864
5865 i++;
5866 z = 0;
5867 k = 0;
5868 }
5869 }
5870
5871 res[j] = NUL;
5872}
5873
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005874#if defined(RESCORE) || defined(SOUNDFOLD_SCORE)
5875/*
5876 * Return the score for how much words sound different.
5877 */
5878 static int
5879spell_sound_score(slang, goodword, badsound)
5880 slang_T *slang;
5881 char_u *goodword; /* good word */
5882 char_u *badsound; /* sound-folded bad word */
5883{
5884 char_u fword[MAXWLEN];
5885 char_u goodsound[MAXWLEN];
5886 int score;
5887
5888 /* Case-fold the word, needed for sound folding. */
5889 (void)spell_casefold(goodword, STRLEN(goodword), fword, MAXWLEN);
5890
5891 /* sound-fold the good word */
5892 spell_soundfold(slang, fword, goodsound);
5893
5894 /* compute the edit distance-score of the sounds */
5895 score = spell_edit_score(badsound, goodsound);
5896
5897 /* Correction: adding/inserting "*" at the start (word starts with vowel)
5898 * shouldn't be counted so much, vowels halfway the word aren't counted at
5899 * all. */
5900 if (*badsound != *goodsound && (*badsound == '*' || *goodsound == '*'))
5901 score -= SCORE_DEL / 2;
5902
5903 return score;
5904}
5905#endif
5906
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005907/*
5908 * Compute the "edit distance" to turn "badword" into "goodword". The less
5909 * deletes/inserts/swaps are required the lower the score.
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005910 *
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005911 * The algorithm comes from Aspell editdist.cpp, edit_distance().
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005912 * It has been converted from C++ to C and modified to support multi-byte
5913 * characters.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005914 */
5915 static int
5916spell_edit_score(badword, goodword)
5917 char_u *badword;
5918 char_u *goodword;
5919{
5920 int *cnt;
5921 int badlen, goodlen;
5922 int j, i;
5923 int t;
5924 int bc, gc;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005925 int pbc, pgc;
5926#ifdef FEAT_MBYTE
5927 char_u *p;
5928 int wbadword[MAXWLEN];
5929 int wgoodword[MAXWLEN];
5930
5931 if (has_mbyte)
5932 {
5933 /* Get the characters from the multi-byte strings and put them in an
5934 * int array for easy access. */
5935 for (p = badword, badlen = 0; *p != NUL; )
5936 wbadword[badlen++] = mb_ptr2char_adv(&p);
5937 ++badlen;
5938 for (p = goodword, goodlen = 0; *p != NUL; )
5939 wgoodword[goodlen++] = mb_ptr2char_adv(&p);
5940 ++goodlen;
5941 }
5942 else
5943#endif
5944 {
5945 badlen = STRLEN(badword) + 1;
5946 goodlen = STRLEN(goodword) + 1;
5947 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005948
5949 /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */
5950#define CNT(a, b) cnt[(a) + (b) * (badlen + 1)]
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005951 cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)),
5952 TRUE);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005953 if (cnt == NULL)
5954 return 0; /* out of memory */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005955
5956 CNT(0, 0) = 0;
5957 for (j = 1; j <= goodlen; ++j)
5958 CNT(0, j) = CNT(0, j - 1) + SCORE_DEL;
5959
5960 for (i = 1; i <= badlen; ++i)
5961 {
5962 CNT(i, 0) = CNT(i - 1, 0) + SCORE_INS;
5963 for (j = 1; j <= goodlen; ++j)
5964 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005965#ifdef FEAT_MBYTE
5966 if (has_mbyte)
5967 {
5968 bc = wbadword[i - 1];
5969 gc = wgoodword[j - 1];
5970 }
5971 else
5972#endif
5973 {
5974 bc = badword[i - 1];
5975 gc = goodword[j - 1];
5976 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005977 if (bc == gc)
5978 CNT(i, j) = CNT(i - 1, j - 1);
5979 else
5980 {
5981 /* Use a better score when there is only a case difference. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005982 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005983 CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1);
5984 else
5985 CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1);
5986
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005987 if (i > 1 && j > 1)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005988 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005989#ifdef FEAT_MBYTE
5990 if (has_mbyte)
5991 {
5992 pbc = wbadword[i - 2];
5993 pgc = wgoodword[j - 2];
5994 }
5995 else
5996#endif
5997 {
5998 pbc = badword[i - 2];
5999 pgc = goodword[j - 2];
6000 }
6001 if (bc == pgc && pbc == gc)
6002 {
6003 t = SCORE_SWAP + CNT(i - 2, j - 2);
6004 if (t < CNT(i, j))
6005 CNT(i, j) = t;
6006 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006007 }
6008 t = SCORE_DEL + CNT(i - 1, j);
6009 if (t < CNT(i, j))
6010 CNT(i, j) = t;
6011 t = SCORE_INS + CNT(i, j - 1);
6012 if (t < CNT(i, j))
6013 CNT(i, j) = t;
6014 }
6015 }
6016 }
6017 return CNT(badlen - 1, goodlen - 1);
6018}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00006019
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00006020#endif /* FEAT_SYN_HL */