blob: 4063895bea5488c965a991e92f55ca62b2ea1a76 [file] [log] [blame]
Bram Moolenaare19defe2005-03-21 08:23:33 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * spell.c: code for spell checking
Bram Moolenaarfc735152005-03-22 22:54:12 +000012 *
Bram Moolenaar51485f02005-06-04 21:55:20 +000013 * The spell checking mechanism uses a tree (aka trie). Each node in the tree
14 * has a list of bytes that can appear (siblings). For each byte there is a
15 * pointer to the node with the byte that follows in the word (child).
16 * A NUL byte is used where the word may end.
17 *
18 * There are two trees: one with case-folded words and one with words in
19 * original case. The second one is only used for keep-case words and is
20 * usually small.
21 *
22 * Thanks to Olaf Seibert for providing an example implementation of this tree
23 * and the compression mechanism.
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +000024 *
25 * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +000026 *
Bram Moolenaar402d2fe2005-04-15 21:00:38 +000027 * Why doesn't Vim use aspell/ispell/myspell/etc.?
28 * See ":help develop-spell".
29 */
30
Bram Moolenaar51485f02005-06-04 21:55:20 +000031/*
32 * Vim spell file format: <HEADER> <SUGGEST> <LWORDTREE> <KWORDTREE>
33 *
34 * <HEADER>: <fileID> <regioncnt> <regionname> ...
35 * <charflagslen> <charflags> <fcharslen> <fchars>
36 *
37 * <fileID> 10 bytes "VIMspell05"
38 * <regioncnt> 1 byte number of regions following (8 supported)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000039 * <regionname> 2 bytes Region name: ca, au, etc. Lower case.
Bram Moolenaar51485f02005-06-04 21:55:20 +000040 * First <regionname> is region 1.
41 *
42 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
43 * <charflags> N bytes List of flags (first one is for character 128):
44 * 0x01 word character
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000045 * 0x02 upper-case character
Bram Moolenaar51485f02005-06-04 21:55:20 +000046 * <fcharslen> 2 bytes Number of bytes in <fchars>.
47 * <fchars> N bytes Folded characters, first one is for character 128.
48 *
49 *
50 * <SUGGEST> : <suggestlen> <more> ...
51 *
52 * <suggestlen> 4 bytes Length of <SUGGEST> in bytes, excluding
53 * <suggestlen>. MSB first.
54 * <more> To be defined.
55 *
56 *
57 * <LWORDTREE>: <wordtree>
58 *
59 * <wordtree>: <nodecount> <nodedata> ...
60 *
61 * <nodecount> 4 bytes Number of nodes following. MSB first.
62 *
63 * <nodedata>: <siblingcount> <sibling> ...
64 *
65 * <siblingcount> 1 byte Number of siblings in this node. The siblings
66 * follow in sorted order.
67 *
68 * <sibling>: <byte> [<nodeidx> <xbyte> | <flags> [<region>]]
69 *
70 * <byte> 1 byte Byte value of the sibling. Special cases:
71 * BY_NOFLAGS: End of word without flags and for all
72 * regions.
73 * BY_FLAGS: End of word, <flags> follow.
74 * BY_INDEX: Child of sibling is shared, <nodeidx>
75 * and <xbyte> follow.
76 *
77 * <nodeidx> 3 bytes Index of child for this sibling, MSB first.
78 *
79 * <xbyte> 1 byte byte value of the sibling.
80 *
81 * <flags> 1 byte bitmask of:
82 * WF_ALLCAP word must have only capitals
83 * WF_ONECAP first char of word must be capital
84 * WF_RARE rare word
85 * WF_REGION <region> follows
86 *
87 * <region> 1 byte Bitmask for regions in which word is valid. When
88 * omitted it's valid in all regions.
89 * Lowest bit is for region 1.
90 *
91 * <KWORDTREE>: <wordtree>
92 *
93 *
94 * All text characters are in 'encoding', but stored as single bytes.
95 * The region name is ASCII.
96 */
97
Bram Moolenaare19defe2005-03-21 08:23:33 +000098#if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
99# include <io.h> /* for lseek(), must be before vim.h */
100#endif
101
102#include "vim.h"
103
104#if defined(FEAT_SYN_HL) || defined(PROTO)
105
106#ifdef HAVE_FCNTL_H
107# include <fcntl.h>
108#endif
109
Bram Moolenaar51485f02005-06-04 21:55:20 +0000110#define MAXWLEN 250 /* assume max. word len is this many bytes */
Bram Moolenaarfc735152005-03-22 22:54:12 +0000111
Bram Moolenaar51485f02005-06-04 21:55:20 +0000112/* Flags used for a word. */
113#define WF_REGION 0x01 /* region byte follows */
114#define WF_ONECAP 0x02 /* word with one capital (or all capitals) */
115#define WF_ALLCAP 0x04 /* word must be all capitals */
116#define WF_RARE 0x08 /* rare word */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000117#define WF_BANNED 0x10 /* bad word */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000118
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000119#define WF_KEEPCAP 0x100 /* keep-case word (not stored in file) */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000120
121#define BY_NOFLAGS 0 /* end of word without flags or region */
122#define BY_FLAGS 1 /* end of word, flag byte follows */
123#define BY_INDEX 2 /* child is shared, index follows */
124#define BY_SPECIAL BY_INDEX /* hightest special byte value */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000125
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000126/* Info from "REP" entries in ".aff" file used in af_rep.
127 * TODO: This is not used yet. Either use it or remove it. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000128typedef struct repentry_S
129{
130 char_u *re_from;
131 char_u *re_to;
132} repentry_T;
133
134/*
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000135 * Structure used to store words and other info for one language, loaded from
136 * a .spl file.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000137 * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the
138 * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words.
139 *
140 * The "byts" array stores the possible bytes in each tree node, preceded by
141 * the number of possible bytes, sorted on byte value:
142 * <len> <byte1> <byte2> ...
143 * The "idxs" array stores the index of the child node corresponding to the
144 * byte in "byts".
145 * Exception: when the byte is zero, the word may end here and "idxs" holds
146 * the flags and region for the word. There may be several zeros in sequence
147 * for alternative flag/region combinations.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000148 */
149typedef struct slang_S slang_T;
150struct slang_S
151{
152 slang_T *sl_next; /* next language */
153 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */
Bram Moolenaarb765d632005-06-07 21:00:02 +0000154 char_u *sl_fname; /* name of .spl file */
155 int sl_add; /* TRUE if it's an addition. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000156 char_u *sl_fbyts; /* case-folded word bytes */
157 int *sl_fidxs; /* case-folded word indexes */
158 char_u *sl_kbyts; /* keep-case word bytes */
159 int *sl_kidxs; /* keep-case word indexes */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000160 char_u *sl_try; /* "TRY" from .aff file TODO: not used */
161 garray_T sl_rep; /* list of repentry_T entries from REP lines
162 * TODO not used */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000163 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000164 int sl_error; /* error while loading */
165};
166
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000167/* First language that is loaded, start of the linked list of loaded
168 * languages. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000169static slang_T *first_lang = NULL;
170
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000171/*
172 * Structure used in "b_langp", filled from 'spelllang'.
173 */
174typedef struct langp_S
175{
176 slang_T *lp_slang; /* info for this language (NULL for last one) */
177 int lp_region; /* bitmask for region or REGION_ALL */
178} langp_T;
179
180#define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
181
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000182#define REGION_ALL 0xff /* word valid in all regions */
183
184/* Result values. Lower number is accepted over higher one. */
185#define SP_BANNED -1
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000186#define SP_OK 0
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000187#define SP_RARE 1
188#define SP_LOCAL 2
189#define SP_BAD 3
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000190
Bram Moolenaar51485f02005-06-04 21:55:20 +0000191#define VIMSPELLMAGIC "VIMspell05" /* string at start of Vim spell file */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000192#define VIMSPELLMAGICL 10
193
194/*
195 * Structure to store info for word matching.
196 */
197typedef struct matchinf_S
198{
199 langp_T *mi_lp; /* info for language and region */
200 slang_T *mi_slang; /* info for the language */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000201
202 /* pointers to original text to be checked */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000203 char_u *mi_word; /* start of word being checked */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000204 char_u *mi_end; /* end of matching word */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000205 char_u *mi_fend; /* next char to be added to mi_fword */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000206 char_u *mi_cend; /* char after what was used for
207 mi_capflags */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000208
209 /* case-folded text */
210 char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000211 int mi_fwordlen; /* nr of valid bytes in mi_fword */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000212
213 /* others */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000214 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000215 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000216} matchinf_T;
217
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000218/*
219 * The tables used for recognizing word characters according to spelling.
220 * These are only used for the first 256 characters of 'encoding'.
221 */
222typedef struct spelltab_S
223{
224 char_u st_isw[256]; /* flags: is word char */
225 char_u st_isu[256]; /* flags: is uppercase char */
226 char_u st_fold[256]; /* chars: folded case */
227} spelltab_T;
228
229static spelltab_T spelltab;
230static int did_set_spelltab;
231
232#define SPELL_ISWORD 1
233#define SPELL_ISUPPER 2
234
235static void clear_spell_chartab __ARGS((spelltab_T *sp));
236static int set_spell_finish __ARGS((spelltab_T *new_st));
237
238/*
239 * Return TRUE if "p" points to a word character or "c" is a word character
240 * for spelling.
241 * Checking for a word character is done very often, avoid the function call
242 * overhead.
243 */
244#ifdef FEAT_MBYTE
245# define SPELL_ISWORDP(p) ((has_mbyte && MB_BYTE2LEN(*(p)) > 1) \
246 ? (mb_get_class(p) >= 2) : spelltab.st_isw[*(p)])
247#else
248# define SPELL_ISWORDP(p) (spelltab.st_isw[*(p)])
249#endif
250
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000251static slang_T *slang_alloc __ARGS((char_u *lang));
252static void slang_free __ARGS((slang_T *lp));
Bram Moolenaarb765d632005-06-07 21:00:02 +0000253static void slang_clear __ARGS((slang_T *lp));
Bram Moolenaar51485f02005-06-04 21:55:20 +0000254static void find_word __ARGS((matchinf_T *mip, int keepcap));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000255static void spell_load_lang __ARGS((char_u *lang));
Bram Moolenaarb765d632005-06-07 21:00:02 +0000256static char_u *spell_enc __ARGS((void));
257static void spell_load_cb __ARGS((char_u *fname, void *cookie));
258static void spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp));
Bram Moolenaar51485f02005-06-04 21:55:20 +0000259static int read_tree __ARGS((FILE *fd, char_u *byts, int *idxs, int maxidx, int startidx));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000260static int find_region __ARGS((char_u *rp, char_u *region));
261static int captype __ARGS((char_u *word, char_u *end));
Bram Moolenaarb765d632005-06-07 21:00:02 +0000262static void spell_reload_one __ARGS((char_u *fname));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000263static int set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000264static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp));
265static void write_spell_chartab __ARGS((FILE *fd));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000266static int spell_isupper __ARGS((int c));
267static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen));
268
269static char *e_format = N_("E759: Format error in spell file");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000270
271/*
272 * Main spell-checking function.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000273 * "ptr" points to a character that could be the start of a word.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000274 * "*attrp" is set to the attributes for a badly spelled word. For a non-word
275 * or when it's OK it remains unchanged.
276 * This must only be called when 'spelllang' is not empty.
277 * Returns the length of the word in bytes, also when it's OK, so that the
278 * caller can skip over the word.
279 */
280 int
Bram Moolenaar51485f02005-06-04 21:55:20 +0000281spell_check(wp, ptr, attrp)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000282 win_T *wp; /* current window */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000283 char_u *ptr;
284 int *attrp;
285{
286 matchinf_T mi; /* Most things are put in "mi" so that it can
287 be passed to functions quickly. */
288
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000289 /* A word never starts at a space or a control character. Return quickly
290 * then, skipping over the character. */
291 if (*ptr <= ' ')
292 return 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000293
Bram Moolenaar51485f02005-06-04 21:55:20 +0000294 /* A word starting with a number is always OK. Also skip hexadecimal
295 * numbers 0xFF99 and 0X99FF. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000296 if (*ptr >= '0' && *ptr <= '9')
Bram Moolenaar51485f02005-06-04 21:55:20 +0000297 {
298 if (*ptr == '0' && (ptr[1] == 'x' || ptr[2] == 'X'))
299 mi.mi_end = skiphex(ptr);
300 else
301 mi.mi_end = skipdigits(ptr);
302 }
303 else
304 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000305 /* Find the end of the word. */
306 mi.mi_word = ptr;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000307 mi.mi_fend = ptr;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000308 if (SPELL_ISWORDP(mi.mi_fend))
Bram Moolenaar51485f02005-06-04 21:55:20 +0000309 {
310 /* Make case-folded copy of the characters until the next non-word
311 * character. */
312 do
313 {
314 mb_ptr_adv(mi.mi_fend);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000315 } while (*mi.mi_fend != NUL && SPELL_ISWORDP(mi.mi_fend));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000316
Bram Moolenaar51485f02005-06-04 21:55:20 +0000317 /* Check the caps type of the word. */
318 mi.mi_capflags = captype(ptr, mi.mi_fend);
Bram Moolenaar51485f02005-06-04 21:55:20 +0000319 }
320 else
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000321 /* No word characters, caps type is always zero. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000322 mi.mi_capflags = 0;
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000323
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000324 /* We always use the characters up to the next non-word character,
325 * also for bad words. */
326 mi.mi_end = mi.mi_fend;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000327 mi.mi_cend = mi.mi_fend;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000328
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000329 /* Include one non-word character so that we can check for the
330 * word end. */
331 if (*mi.mi_fend != NUL)
332 mb_ptr_adv(mi.mi_fend);
333
334 (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
335 MAXWLEN + 1);
336 mi.mi_fwordlen = STRLEN(mi.mi_fword);
337
Bram Moolenaar51485f02005-06-04 21:55:20 +0000338 /* The word is bad unless we recognize it. */
339 mi.mi_result = SP_BAD;
340
341 /*
342 * Loop over the languages specified in 'spelllang'.
343 * We check them all, because a matching word may be longer than an
344 * already found matching word.
345 */
346 for (mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000347 mi.mi_lp->lp_slang != NULL; ++mi.mi_lp)
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000348 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000349 /* Check for a matching word in case-folded words. */
350 find_word(&mi, FALSE);
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000351
Bram Moolenaar51485f02005-06-04 21:55:20 +0000352 /* Try keep-case words. */
353 find_word(&mi, TRUE);
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000354 }
355
Bram Moolenaar51485f02005-06-04 21:55:20 +0000356 if (mi.mi_result != SP_OK)
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000357 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000358 /* When we are at a non-word character there is no error, just
359 * skip over the character (try looking for a word after it). */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000360 if (!SPELL_ISWORDP(ptr))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000361 {
362#ifdef FEAT_MBYTE
363 if (has_mbyte)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000364 return mb_ptr2len_check(ptr);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000365#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +0000366 return 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000367 }
Bram Moolenaar51485f02005-06-04 21:55:20 +0000368
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000369 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000370 *attrp = highlight_attr[HLF_SPB];
371 else if (mi.mi_result == SP_RARE)
372 *attrp = highlight_attr[HLF_SPR];
373 else
374 *attrp = highlight_attr[HLF_SPL];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000375 }
376 }
377
Bram Moolenaar51485f02005-06-04 21:55:20 +0000378 return (int)(mi.mi_end - ptr);
379}
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000380
Bram Moolenaar51485f02005-06-04 21:55:20 +0000381/*
382 * Check if the word at "mip->mi_word" is in the tree.
383 * When "keepcap" is TRUE check in keep-case word tree.
384 *
385 * For a match mip->mi_result is updated.
386 */
387 static void
388find_word(mip, keepcap)
389 matchinf_T *mip;
390 int keepcap;
391{
392 int arridx = 0;
393 int endlen[MAXWLEN]; /* length at possible word endings */
394 int endidx[MAXWLEN]; /* possible word endings */
395 int endidxcnt = 0;
396 int len;
397 int wlen = 0;
398 int flen;
399 int c;
400 char_u *ptr;
401 unsigned lo, hi, m;
402#ifdef FEAT_MBYTE
403 char_u *s;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000404#endif
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000405 char_u *p;
406 int res = SP_BAD;
407 int valid;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000408 slang_T *slang = mip->mi_lp->lp_slang;
409 unsigned flags;
410 char_u *byts;
411 int *idxs;
412
413 if (keepcap)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000414 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000415 /* Check for word with matching case in keep-case tree. */
416 ptr = mip->mi_word;
417 flen = 9999; /* no case folding, always enough bytes */
418 byts = slang->sl_kbyts;
419 idxs = slang->sl_kidxs;
420 }
421 else
422 {
423 /* Check for case-folded in case-folded tree. */
424 ptr = mip->mi_fword;
425 flen = mip->mi_fwordlen; /* available case-folded bytes */
426 byts = slang->sl_fbyts;
427 idxs = slang->sl_fidxs;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000428 }
429
Bram Moolenaar51485f02005-06-04 21:55:20 +0000430 if (byts == NULL)
431 return; /* array is empty */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000432
Bram Moolenaar51485f02005-06-04 21:55:20 +0000433 /*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000434 * Repeat advancing in the tree until:
435 * - there is a byte that doesn't match,
436 * - we reach the end of the tree,
437 * - or we reach the end of the line.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000438 */
439 for (;;)
440 {
441 if (flen == 0 && *mip->mi_fend != NUL)
442 {
443 /* Need to fold at least one more character. Do until next
444 * non-word character for efficiency. */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000445 p = mip->mi_fend;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000446 do
447 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000448 mb_ptr_adv(mip->mi_fend);
449 } while (*mip->mi_fend != NUL && SPELL_ISWORDP(mip->mi_fend));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000450
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000451 /* Include the non-word character so that we can check for the
452 * word end. */
453 if (*mip->mi_fend != NUL)
454 mb_ptr_adv(mip->mi_fend);
455
456 (void)spell_casefold(p, (int)(mip->mi_fend - p),
Bram Moolenaar51485f02005-06-04 21:55:20 +0000457 mip->mi_fword + mip->mi_fwordlen,
458 MAXWLEN - mip->mi_fwordlen);
Bram Moolenaar51485f02005-06-04 21:55:20 +0000459 flen = STRLEN(mip->mi_fword + mip->mi_fwordlen);
460 mip->mi_fwordlen += flen;
461 }
462
463 len = byts[arridx++];
464
465 /* If the first possible byte is a zero the word could end here.
466 * Remember this index, we first check for the longest word. */
467 if (byts[arridx] == 0)
468 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000469 if (endidxcnt == MAXWLEN)
470 {
471 /* Must be a corrupted spell file. */
472 EMSG(_(e_format));
473 return;
474 }
Bram Moolenaar51485f02005-06-04 21:55:20 +0000475 endlen[endidxcnt] = wlen;
476 endidx[endidxcnt++] = arridx++;
477 --len;
478
479 /* Skip over the zeros, there can be several flag/region
480 * combinations. */
481 while (len > 0 && byts[arridx] == 0)
482 {
483 ++arridx;
484 --len;
485 }
486 if (len == 0)
487 break; /* no children, word must end here */
488 }
489
490 /* Stop looking at end of the line. */
491 if (ptr[wlen] == NUL)
492 break;
493
494 /* Perform a binary search in the list of accepted bytes. */
495 c = ptr[wlen];
496 lo = arridx;
497 hi = arridx + len - 1;
498 while (lo < hi)
499 {
500 m = (lo + hi) / 2;
501 if (byts[m] > c)
502 hi = m - 1;
503 else if (byts[m] < c)
504 lo = m + 1;
505 else
506 {
507 lo = hi = m;
508 break;
509 }
510 }
511
512 /* Stop if there is no matching byte. */
513 if (hi < lo || byts[lo] != c)
514 break;
515
516 /* Continue at the child (if there is one). */
517 arridx = idxs[lo];
518 ++wlen;
519 --flen;
520 }
521
522 /*
523 * Verify that one of the possible endings is valid. Try the longest
524 * first.
525 */
526 while (endidxcnt > 0)
527 {
528 --endidxcnt;
529 arridx = endidx[endidxcnt];
530 wlen = endlen[endidxcnt];
531
532#ifdef FEAT_MBYTE
533 if ((*mb_head_off)(ptr, ptr + wlen) > 0)
534 continue; /* not at first byte of character */
535#endif
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000536 if (SPELL_ISWORDP(ptr + wlen))
Bram Moolenaar51485f02005-06-04 21:55:20 +0000537 continue; /* next char is a word character */
538
539#ifdef FEAT_MBYTE
540 if (!keepcap && has_mbyte)
541 {
542 /* Compute byte length in original word, length may change
543 * when folding case. */
544 p = mip->mi_word;
545 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
546 mb_ptr_adv(p);
547 wlen = p - mip->mi_word;
548 }
549#endif
550
551 /* Check flags and region. Repeat this if there are more
552 * flags/region alternatives until there is a match. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000553 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0; --len)
554 {
555 flags = idxs[arridx];
556 if (keepcap)
557 {
558 /* For "keepcap" tree the case is always right. */
559 valid = TRUE;
560 }
561 else
562 {
563 /* Check that the word is in the required case. */
564 if (mip->mi_cend != mip->mi_word + wlen)
565 {
566 /* mi_capflags was set for a different word
567 * length, need to do it again. */
568 mip->mi_cend = mip->mi_word + wlen;
569 mip->mi_capflags = captype(mip->mi_word,
570 mip->mi_cend);
571 }
572
573 valid = (mip->mi_capflags == WF_ALLCAP
574 || ((flags & WF_ALLCAP) == 0
575 && ((flags & WF_ONECAP) == 0
576 || mip->mi_capflags == WF_ONECAP)));
577 }
578
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000579 if (valid)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000580 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000581 if (flags & WF_BANNED)
582 res = SP_BANNED;
583 else if (flags & WF_REGION)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000584 {
585 /* Check region. */
586 if ((mip->mi_lp->lp_region & (flags >> 8)) != 0)
587 res = SP_OK;
588 else
589 res = SP_LOCAL;
590 }
591 else if (flags & WF_RARE)
592 res = SP_RARE;
593 else
594 res = SP_OK;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000595
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000596 /* Always use the longest match and the best result. */
597 if (mip->mi_result > res)
598 {
599 mip->mi_result = res;
600 mip->mi_end = mip->mi_word + wlen;
601 }
602 else if (mip->mi_result == res
603 && mip->mi_end < mip->mi_word + wlen)
604 mip->mi_end = mip->mi_word + wlen;
605
606 if (res == SP_OK)
607 break;
608 }
609 else
610 res = SP_BAD;
611
Bram Moolenaar51485f02005-06-04 21:55:20 +0000612 ++arridx;
613 }
614
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000615 if (res == SP_OK)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000616 break;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000617 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000618}
619
Bram Moolenaar51485f02005-06-04 21:55:20 +0000620
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000621/*
622 * Move to next spell error.
623 * Return OK if found, FAIL otherwise.
624 */
625 int
626spell_move_to(dir, allwords)
627 int dir; /* FORWARD or BACKWARD */
628 int allwords; /* TRUE for "[s" and "]s" */
629{
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000630 linenr_T lnum;
631 pos_T found_pos;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000632 char_u *line;
633 char_u *p;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000634 int attr = 0;
635 int len;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000636 int has_syntax = syntax_present(curbuf);
637 int col;
638 int can_spell;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000639
Bram Moolenaarb765d632005-06-07 21:00:02 +0000640 if (!curwin->w_p_spell || *curbuf->b_p_spl == NUL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000641 {
642 EMSG(_("E756: Spell checking not enabled"));
643 return FAIL;
644 }
645
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000646 /*
647 * Start looking for bad word at the start of the line, because we can't
648 * start halfway a word, we don't know where it starts or ends.
649 *
650 * When searching backwards, we continue in the line to find the last
651 * bad word (in the cursor line: before the cursor).
652 */
653 lnum = curwin->w_cursor.lnum;
654 found_pos.lnum = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000655
656 while (!got_int)
657 {
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000658 line = ml_get(lnum);
659 p = line;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000660
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000661 while (*p != NUL)
662 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000663 /* When searching backward don't search after the cursor. */
664 if (dir == BACKWARD
665 && lnum == curwin->w_cursor.lnum
666 && (colnr_T)(p - line) >= curwin->w_cursor.col)
667 break;
668
669 /* start of word */
670 len = spell_check(curwin, p, &attr);
671
672 if (attr != 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000673 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000674 /* We found a bad word. Check the attribute. */
675 /* TODO: check for syntax @Spell cluster. */
676 if (allwords || attr == highlight_attr[HLF_SPB])
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000677 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000678 /* When searching forward only accept a bad word after
679 * the cursor. */
680 if (dir == BACKWARD
681 || lnum > curwin->w_cursor.lnum
682 || (lnum == curwin->w_cursor.lnum
683 && (colnr_T)(p - line)
684 > curwin->w_cursor.col))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000685 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000686 if (has_syntax)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000687 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000688 col = p - line;
689 (void)syn_get_id(lnum, (colnr_T)col,
690 FALSE, &can_spell);
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000691
Bram Moolenaar51485f02005-06-04 21:55:20 +0000692 /* have to get the line again, a multi-line
693 * regexp may make it invalid */
694 line = ml_get(lnum);
695 p = line + col;
696 }
697 else
698 can_spell = TRUE;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000699
Bram Moolenaar51485f02005-06-04 21:55:20 +0000700 if (can_spell)
701 {
702 found_pos.lnum = lnum;
703 found_pos.col = p - line;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000704#ifdef FEAT_VIRTUALEDIT
Bram Moolenaar51485f02005-06-04 21:55:20 +0000705 found_pos.coladd = 0;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000706#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +0000707 if (dir == FORWARD)
708 {
709 /* No need to search further. */
710 curwin->w_cursor = found_pos;
711 return OK;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000712 }
713 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000714 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000715 }
Bram Moolenaar51485f02005-06-04 21:55:20 +0000716 attr = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000717 }
718
Bram Moolenaar51485f02005-06-04 21:55:20 +0000719 /* advance to character after the word */
720 p += len;
721 if (*p == NUL)
722 break;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000723 }
724
725 /* Advance to next line. */
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000726 if (dir == BACKWARD)
727 {
728 if (found_pos.lnum != 0)
729 {
730 /* Use the last match in the line. */
731 curwin->w_cursor = found_pos;
732 return OK;
733 }
734 if (lnum == 1)
735 return FAIL;
736 --lnum;
737 }
738 else
739 {
740 if (lnum == curbuf->b_ml.ml_line_count)
741 return FAIL;
742 ++lnum;
743 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000744
745 line_breakcheck();
746 }
747
748 return FAIL; /* interrupted */
749}
750
751/*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000752 * Load word list(s) for "lang" from Vim spell file(s).
Bram Moolenaarb765d632005-06-07 21:00:02 +0000753 * "lang" must be the language without the region: e.g., "en".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000754 */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000755 static void
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000756spell_load_lang(lang)
757 char_u *lang;
758{
Bram Moolenaarb765d632005-06-07 21:00:02 +0000759 char_u fname_enc[85];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000760 int r;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000761 char_u langcp[MAXWLEN + 1];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000762
Bram Moolenaarb765d632005-06-07 21:00:02 +0000763 /* Copy the language name to pass it to spell_load_cb() as a cookie.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000764 * It's truncated when an error is detected. */
765 STRCPY(langcp, lang);
766
Bram Moolenaarb765d632005-06-07 21:00:02 +0000767 /*
768 * Find the first spell file for "lang" in 'runtimepath' and load it.
769 */
770 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
771 "spell/%s.%s.spl", lang, spell_enc());
772 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &langcp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000773
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000774 if (r == FAIL && *langcp != NUL)
775 {
776 /* Try loading the ASCII version. */
Bram Moolenaarb765d632005-06-07 21:00:02 +0000777 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
Bram Moolenaar9c13b352005-05-19 20:53:52 +0000778 "spell/%s.ascii.spl", lang);
Bram Moolenaarb765d632005-06-07 21:00:02 +0000779 r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &langcp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000780 }
781
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000782 if (r == FAIL)
783 smsg((char_u *)_("Warning: Cannot find word list \"%s\""),
784 fname_enc + 6);
Bram Moolenaarb765d632005-06-07 21:00:02 +0000785 else if (*langcp != NUL)
786 {
787 /* Load all the additions. */
788 STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl");
789 do_in_runtimepath(fname_enc, TRUE, spell_load_cb, &langcp);
790 }
791}
792
793/*
794 * Return the encoding used for spell checking: Use 'encoding', except that we
795 * use "latin1" for "latin9". And limit to 60 characters (just in case).
796 */
797 static char_u *
798spell_enc()
799{
800
801#ifdef FEAT_MBYTE
802 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
803 return p_enc;
804#endif
805 return (char_u *)"latin1";
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000806}
807
808/*
809 * Allocate a new slang_T.
810 * Caller must fill "sl_next".
811 */
812 static slang_T *
813slang_alloc(lang)
814 char_u *lang;
815{
816 slang_T *lp;
817
Bram Moolenaar51485f02005-06-04 21:55:20 +0000818 lp = (slang_T *)alloc_clear(sizeof(slang_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000819 if (lp != NULL)
820 {
821 lp->sl_name = vim_strsave(lang);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000822 ga_init2(&lp->sl_rep, sizeof(repentry_T), 4);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000823 }
824 return lp;
825}
826
827/*
828 * Free the contents of an slang_T and the structure itself.
829 */
830 static void
831slang_free(lp)
832 slang_T *lp;
833{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000834 vim_free(lp->sl_name);
Bram Moolenaarb765d632005-06-07 21:00:02 +0000835 vim_free(lp->sl_fname);
836 slang_clear(lp);
837 vim_free(lp);
838}
839
840/*
841 * Clear an slang_T so that the file can be reloaded.
842 */
843 static void
844slang_clear(lp)
845 slang_T *lp;
846{
Bram Moolenaar51485f02005-06-04 21:55:20 +0000847 vim_free(lp->sl_fbyts);
Bram Moolenaarb765d632005-06-07 21:00:02 +0000848 lp->sl_fbyts = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000849 vim_free(lp->sl_kbyts);
Bram Moolenaarb765d632005-06-07 21:00:02 +0000850 lp->sl_kbyts = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000851 vim_free(lp->sl_fidxs);
Bram Moolenaarb765d632005-06-07 21:00:02 +0000852 lp->sl_fidxs = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000853 vim_free(lp->sl_kidxs);
Bram Moolenaarb765d632005-06-07 21:00:02 +0000854 lp->sl_kidxs = NULL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000855 ga_clear(&lp->sl_rep);
856 vim_free(lp->sl_try);
Bram Moolenaarb765d632005-06-07 21:00:02 +0000857 lp->sl_try = NULL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000858}
859
860/*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000861 * Load one spell file and store the info into a slang_T.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000862 * Invoked through do_in_runtimepath().
863 */
864 static void
Bram Moolenaarb765d632005-06-07 21:00:02 +0000865spell_load_cb(fname, cookie)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000866 char_u *fname;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000867 void *cookie; /* points to the language name */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000868{
Bram Moolenaarb765d632005-06-07 21:00:02 +0000869 spell_load_file(fname, (char_u *)cookie, NULL);
870}
871
872/*
873 * Load one spell file and store the info into a slang_T.
874 *
875 * This is invoked in two ways:
876 * - From spell_load_cb() to load a spell file for the first time. "lang" is
877 * the language name, "old_lp" is NULL. Will allocate an slang_T.
878 * - To reload a spell file that was changed. "lang" is NULL and "old_lp"
879 * points to the existing slang_T.
880 */
881 static void
882spell_load_file(fname, lang, old_lp)
883 char_u *fname;
884 char_u *lang;
885 slang_T *old_lp;
886{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000887 FILE *fd;
888 char_u buf[MAXWLEN + 1];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000889 char_u *p;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000890 int i;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000891 int len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000892 int round;
893 char_u *save_sourcing_name = sourcing_name;
894 linenr_T save_sourcing_lnum = sourcing_lnum;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000895 int cnt, ccnt;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000896 char_u *fol;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000897 slang_T *lp = NULL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000898
Bram Moolenaarb765d632005-06-07 21:00:02 +0000899 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000900 if (fd == NULL)
901 {
902 EMSG2(_(e_notopen), fname);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000903 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000904 }
Bram Moolenaarb765d632005-06-07 21:00:02 +0000905 if (p_verbose > 2)
906 {
907 verbose_enter();
908 smsg((char_u *)_("Reading spell file \"%s\""), fname);
909 verbose_leave();
910 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000911
Bram Moolenaarb765d632005-06-07 21:00:02 +0000912 if (old_lp == NULL)
913 {
914 lp = slang_alloc(lang);
915 if (lp == NULL)
916 goto endFAIL;
917
918 /* Remember the file name, used to reload the file when it's updated. */
919 lp->sl_fname = vim_strsave(fname);
920 if (lp->sl_fname == NULL)
921 goto endFAIL;
922
923 /* Check for .add.spl. */
924 lp->sl_add = strstr((char *)gettail(fname), ".add.") != NULL;
925 }
926 else
927 lp = old_lp;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000928
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000929 /* Set sourcing_name, so that error messages mention the file name. */
930 sourcing_name = fname;
931 sourcing_lnum = 0;
932
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000933 /* <HEADER>: <fileID> <regioncnt> <regionname> ...
934 * <charflagslen> <charflags> <fcharslen> <fchars> */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000935 for (i = 0; i < VIMSPELLMAGICL; ++i)
936 buf[i] = getc(fd); /* <fileID> */
937 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
938 {
939 EMSG(_("E757: Wrong file ID in spell file"));
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000940 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000941 }
942
943 cnt = getc(fd); /* <regioncnt> */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000944 if (cnt < 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000945 {
946truncerr:
947 EMSG(_("E758: Truncated spell file"));
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000948 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000949 }
950 if (cnt > 8)
951 {
952formerr:
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000953 EMSG(_(e_format));
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000954 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000955 }
956 for (i = 0; i < cnt; ++i)
957 {
958 lp->sl_regions[i * 2] = getc(fd); /* <regionname> */
959 lp->sl_regions[i * 2 + 1] = getc(fd);
960 }
961 lp->sl_regions[cnt * 2] = NUL;
962
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000963 cnt = getc(fd); /* <charflagslen> */
964 if (cnt > 0)
965 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000966 p = alloc((unsigned)cnt);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000967 if (p == NULL)
968 goto endFAIL;
969 for (i = 0; i < cnt; ++i)
970 p[i] = getc(fd); /* <charflags> */
971
972 ccnt = (getc(fd) << 8) + getc(fd); /* <fcharslen> */
973 if (ccnt <= 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000974 {
975 vim_free(p);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000976 goto formerr;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000977 }
978 fol = alloc((unsigned)ccnt + 1);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000979 if (fol == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000980 {
981 vim_free(p);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000982 goto endFAIL;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000983 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000984 for (i = 0; i < ccnt; ++i)
985 fol[i] = getc(fd); /* <fchars> */
986 fol[i] = NUL;
987
988 /* Set the word-char flags and fill spell_isupper() table. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000989 i = set_spell_charflags(p, cnt, fol);
990 vim_free(p);
991 vim_free(fol);
992 if (i == FAIL)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000993 goto formerr;
994 }
995 else
996 {
997 /* When <charflagslen> is zero then <fcharlen> must also be zero. */
998 cnt = (getc(fd) << 8) + getc(fd);
999 if (cnt != 0)
1000 goto formerr;
1001 }
1002
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001003 /* <SUGGEST> : <suggestlen> <more> ... */
1004 /* TODO, just skip this for now */
1005 i = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
1006 while (i-- > 0)
1007 if (getc(fd) == EOF) /* <suggestlen> */
1008 goto truncerr;
1009
Bram Moolenaar51485f02005-06-04 21:55:20 +00001010 /* round 1: <LWORDTREE>
1011 * round 2: <KWORDTREE> */
1012 for (round = 1; round <= 2; ++round)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001013 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001014 /* The tree size was computed when writing the file, so that we can
1015 * allocate it as one long block. <nodecount> */
1016 len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
1017 if (len < 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001018 goto truncerr;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001019 if (len > 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001020 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001021 /* Allocate the byte array. */
1022 p = lalloc((long_u)len, TRUE);
1023 if (p == NULL)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001024 goto endFAIL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001025 if (round == 1)
1026 lp->sl_fbyts = p;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001027 else
Bram Moolenaar51485f02005-06-04 21:55:20 +00001028 lp->sl_kbyts = p;
1029
1030 /* Allocate the index array. */
1031 p = lalloc_clear((long_u)(len * sizeof(int)), TRUE);
1032 if (p == NULL)
1033 goto endFAIL;
1034 if (round == 1)
1035 lp->sl_fidxs = (int *)p;
1036 else
1037 lp->sl_kidxs = (int *)p;
1038
1039
1040 /* Read the tree and store it in the array. */
1041 i = read_tree(fd,
1042 round == 1 ? lp->sl_fbyts : lp->sl_kbyts,
1043 round == 1 ? lp->sl_fidxs : lp->sl_kidxs,
1044 len, 0);
1045 if (i == -1)
1046 goto truncerr;
1047 if (i < 0)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001048 goto formerr;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001049 }
1050 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00001051
Bram Moolenaarb765d632005-06-07 21:00:02 +00001052 /* For a new file link it in the list of spell files. */
1053 if (old_lp == NULL)
1054 {
1055 lp->sl_next = first_lang;
1056 first_lang = lp;
1057 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001058
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001059 goto endOK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001060
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001061endFAIL:
Bram Moolenaarb765d632005-06-07 21:00:02 +00001062 if (lang != NULL)
1063 /* truncating the name signals the error to spell_load_lang() */
1064 *lang = NUL;
1065 if (lp != NULL && old_lp == NULL)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001066 slang_free(lp);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001067
1068endOK:
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001069 if (fd != NULL)
1070 fclose(fd);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001071 sourcing_name = save_sourcing_name;
1072 sourcing_lnum = save_sourcing_lnum;
1073}
1074
1075/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00001076 * Read one row of siblings from the spell file and store it in the byte array
1077 * "byts" and index array "idxs". Recursively read the children.
1078 *
1079 * NOTE: The code here must match put_tree().
1080 *
1081 * Returns the index follosing the siblings.
1082 * Returns -1 if the file is shorter than expected.
1083 * Returns -2 if there is a format error.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001084 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001085 static int
1086read_tree(fd, byts, idxs, maxidx, startidx)
1087 FILE *fd;
1088 char_u *byts;
1089 int *idxs;
1090 int maxidx; /* size of arrays */
1091 int startidx; /* current index in "byts" and "idxs" */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001092{
Bram Moolenaar51485f02005-06-04 21:55:20 +00001093 int len;
1094 int i;
1095 int n;
1096 int idx = startidx;
1097 int c;
1098#define SHARED_MASK 0x8000000
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001099
Bram Moolenaar51485f02005-06-04 21:55:20 +00001100 len = getc(fd); /* <siblingcount> */
1101 if (len <= 0)
1102 return -1;
1103
1104 if (startidx + len >= maxidx)
1105 return -2;
1106 byts[idx++] = len;
1107
1108 /* Read the byte values, flag/region bytes and shared indexes. */
1109 for (i = 1; i <= len; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001110 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001111 c = getc(fd); /* <byte> */
1112 if (c < 0)
1113 return -1;
1114 if (c <= BY_SPECIAL)
1115 {
1116 if (c == BY_NOFLAGS)
1117 {
1118 /* No flags, all regions. */
1119 idxs[idx] = 0;
1120 c = 0;
1121 }
1122 else if (c == BY_FLAGS)
1123 {
1124 /* Read flags and option region. */
1125 c = getc(fd); /* <flags> */
1126 if (c & WF_REGION)
1127 c = (getc(fd) << 8) + c; /* <region> */
1128 idxs[idx] = c;
1129 c = 0;
1130 }
1131 else /* c == BY_INDEX */
1132 {
1133 /* <nodeidx> */
1134 n = (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
1135 if (n < 0 || n >= maxidx)
1136 return -2;
1137 idxs[idx] = n + SHARED_MASK;
1138 c = getc(fd); /* <xbyte> */
1139 }
1140 }
1141 byts[idx++] = c;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001142 }
1143
Bram Moolenaar51485f02005-06-04 21:55:20 +00001144 /* Recursively read the children for non-shared siblings.
1145 * Skip the end-of-word ones (zero byte value) and the shared ones (and
1146 * remove SHARED_MASK) */
1147 for (i = 1; i <= len; ++i)
1148 if (byts[startidx + i] != 0)
1149 {
1150 if (idxs[startidx + i] & SHARED_MASK)
1151 idxs[startidx + i] &= ~SHARED_MASK;
1152 else
1153 {
1154 idxs[startidx + i] = idx;
1155 idx = read_tree(fd, byts, idxs, maxidx, idx);
1156 if (idx < 0)
1157 break;
1158 }
1159 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001160
Bram Moolenaar51485f02005-06-04 21:55:20 +00001161 return idx;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001162}
1163
1164/*
1165 * Parse 'spelllang' and set buf->b_langp accordingly.
1166 * Returns an error message or NULL.
1167 */
1168 char_u *
1169did_set_spelllang(buf)
1170 buf_T *buf;
1171{
1172 garray_T ga;
1173 char_u *lang;
1174 char_u *e;
1175 char_u *region;
1176 int region_mask;
1177 slang_T *lp;
1178 int c;
1179 char_u lbuf[MAXWLEN + 1];
1180
1181 ga_init2(&ga, sizeof(langp_T), 2);
1182
1183 /* loop over comma separated languages. */
1184 for (lang = buf->b_p_spl; *lang != NUL; lang = e)
1185 {
1186 e = vim_strchr(lang, ',');
1187 if (e == NULL)
1188 e = lang + STRLEN(lang);
Bram Moolenaar5482f332005-04-17 20:18:43 +00001189 region = NULL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001190 if (e > lang + 2)
1191 {
1192 if (e - lang >= MAXWLEN)
1193 {
1194 ga_clear(&ga);
1195 return e_invarg;
1196 }
1197 if (lang[2] == '_')
1198 region = lang + 3;
1199 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001200
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001201 /* Check if we loaded this language before. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001202 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
1203 if (STRNICMP(lp->sl_name, lang, 2) == 0)
1204 break;
1205
1206 if (lp == NULL)
1207 {
1208 /* Not found, load the language. */
1209 STRNCPY(lbuf, lang, e - lang);
1210 lbuf[e - lang] = NUL;
1211 if (region != NULL)
1212 mch_memmove(lbuf + 2, lbuf + 5, e - lang - 4);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001213 spell_load_lang(lbuf);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001214 }
1215
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001216 /*
1217 * Loop over the languages, there can be several files for each.
1218 */
1219 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
1220 if (STRNICMP(lp->sl_name, lang, 2) == 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001221 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00001222 if (region == NULL || lp->sl_add)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001223 region_mask = REGION_ALL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001224 else
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001225 {
1226 /* find region in sl_regions */
1227 c = find_region(lp->sl_regions, region);
1228 if (c == REGION_ALL)
1229 {
1230 c = *e;
1231 *e = NUL;
1232 smsg((char_u *)_("Warning: region %s not supported"),
1233 lang);
1234 *e = c;
1235 region_mask = REGION_ALL;
1236 }
1237 else
1238 region_mask = 1 << c;
1239 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001240
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001241 if (ga_grow(&ga, 1) == FAIL)
1242 {
1243 ga_clear(&ga);
1244 return e_outofmem;
1245 }
1246 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
1247 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
1248 ++ga.ga_len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001249 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001250
1251 if (*e == ',')
1252 ++e;
1253 }
1254
1255 /* Add a NULL entry to mark the end of the list. */
1256 if (ga_grow(&ga, 1) == FAIL)
1257 {
1258 ga_clear(&ga);
1259 return e_outofmem;
1260 }
1261 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = NULL;
1262 ++ga.ga_len;
1263
1264 /* Everything is fine, store the new b_langp value. */
1265 ga_clear(&buf->b_langp);
1266 buf->b_langp = ga;
1267
1268 return NULL;
1269}
1270
1271/*
1272 * Find the region "region[2]" in "rp" (points to "sl_regions").
1273 * Each region is simply stored as the two characters of it's name.
1274 * Returns the index if found, REGION_ALL if not found.
1275 */
1276 static int
1277find_region(rp, region)
1278 char_u *rp;
1279 char_u *region;
1280{
1281 int i;
1282
1283 for (i = 0; ; i += 2)
1284 {
1285 if (rp[i] == NUL)
1286 return REGION_ALL;
1287 if (rp[i] == region[0] && rp[i + 1] == region[1])
1288 break;
1289 }
1290 return i / 2;
1291}
1292
1293/*
1294 * Return type of word:
1295 * w word 0
Bram Moolenaar51485f02005-06-04 21:55:20 +00001296 * Word WF_ONECAP
1297 * W WORD WF_ALLCAP
1298 * WoRd wOrd WF_KEEPCAP
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001299 */
1300 static int
1301captype(word, end)
1302 char_u *word;
1303 char_u *end;
1304{
1305 char_u *p;
1306 int c;
1307 int firstcap;
1308 int allcap;
1309 int past_second = FALSE; /* past second word char */
1310
1311 /* find first letter */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001312 for (p = word; !SPELL_ISWORDP(p); mb_ptr_adv(p))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001313 if (p >= end)
1314 return 0; /* only non-word characters, illegal word */
1315#ifdef FEAT_MBYTE
Bram Moolenaarb765d632005-06-07 21:00:02 +00001316 if (has_mbyte)
1317 c = mb_ptr2char_adv(&p);
1318 else
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001319#endif
Bram Moolenaarb765d632005-06-07 21:00:02 +00001320 c = *p++;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001321 firstcap = allcap = spell_isupper(c);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001322
1323 /*
1324 * Need to check all letters to find a word with mixed upper/lower.
1325 * But a word with an upper char only at start is a ONECAP.
1326 */
1327 for ( ; p < end; mb_ptr_adv(p))
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001328 if (SPELL_ISWORDP(p))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001329 {
1330#ifdef FEAT_MBYTE
1331 c = mb_ptr2char(p);
1332#else
1333 c = *p;
1334#endif
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001335 if (!spell_isupper(c))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001336 {
1337 /* UUl -> KEEPCAP */
1338 if (past_second && allcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001339 return WF_KEEPCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001340 allcap = FALSE;
1341 }
1342 else if (!allcap)
1343 /* UlU -> KEEPCAP */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001344 return WF_KEEPCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001345 past_second = TRUE;
1346 }
1347
1348 if (allcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001349 return WF_ALLCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001350 if (firstcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001351 return WF_ONECAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001352 return 0;
1353}
1354
1355# if defined(FEAT_MBYTE) || defined(PROTO)
1356/*
1357 * Clear all spelling tables and reload them.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001358 * Used after 'encoding' is set and when ":mkspell" was used.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001359 */
1360 void
1361spell_reload()
1362{
1363 buf_T *buf;
1364 slang_T *lp;
1365
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001366 /* Initialize the table for SPELL_ISWORDP(). */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001367 init_spell_chartab();
1368
1369 /* Unload all allocated memory. */
1370 while (first_lang != NULL)
1371 {
1372 lp = first_lang;
1373 first_lang = lp->sl_next;
1374 slang_free(lp);
1375 }
1376
1377 /* Go through all buffers and handle 'spelllang'. */
1378 for (buf = firstbuf; buf != NULL; buf = buf->b_next)
1379 {
1380 ga_clear(&buf->b_langp);
1381 if (*buf->b_p_spl != NUL)
1382 did_set_spelllang(buf);
1383 }
1384}
1385# endif
1386
Bram Moolenaarb765d632005-06-07 21:00:02 +00001387/*
1388 * Reload the spell file "fname" if it's loaded.
1389 */
1390 static void
1391spell_reload_one(fname)
1392 char_u *fname;
1393{
1394 slang_T *lp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001395
Bram Moolenaarb765d632005-06-07 21:00:02 +00001396 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
1397 if (fullpathcmp(fname, lp->sl_fname, FALSE) == FPC_SAME)
1398 {
1399 slang_clear(lp);
1400 spell_load_file(fname, NULL, lp);
1401 redraw_all_later(NOT_VALID);
1402 }
1403}
1404
1405
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001406/*
1407 * Functions for ":mkspell".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001408 */
1409
Bram Moolenaar51485f02005-06-04 21:55:20 +00001410#define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001411 and .dic file. */
1412/*
1413 * Main structure to store the contents of a ".aff" file.
1414 */
1415typedef struct afffile_S
1416{
1417 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */
1418 char_u *af_try; /* "TRY" line in "af_enc" encoding */
Bram Moolenaarb765d632005-06-07 21:00:02 +00001419 int af_rar; /* RAR ID for rare word */
1420 int af_kep; /* KEP ID for keep-case word */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001421 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */
1422 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */
1423 garray_T af_rep; /* list of repentry_T entries from REP lines */
1424} afffile_T;
1425
1426typedef struct affentry_S affentry_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001427/* Affix entry from ".aff" file. Used for prefixes and suffixes. */
1428struct affentry_S
1429{
1430 affentry_T *ae_next; /* next affix with same name/number */
1431 char_u *ae_chop; /* text to chop off basic word (can be NULL) */
1432 char_u *ae_add; /* text to add to basic word (can be NULL) */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001433 char_u *ae_cond; /* condition (NULL for ".") */
1434 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001435};
1436
1437/* Affix header from ".aff" file. Used for af_pref and af_suff. */
1438typedef struct affheader_S
1439{
1440 char_u ah_key[2]; /* key for hashtable == name of affix entry */
1441 int ah_combine; /* suffix may combine with prefix */
1442 affentry_T *ah_first; /* first affix entry */
1443} affheader_T;
1444
1445#define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
1446
1447/*
1448 * Structure that is used to store the items in the word tree. This avoids
1449 * the need to keep track of each allocated thing, it's freed all at once
1450 * after ":mkspell" is done.
1451 */
1452#define SBLOCKSIZE 16000 /* size of sb_data */
1453typedef struct sblock_S sblock_T;
1454struct sblock_S
1455{
1456 sblock_T *sb_next; /* next block in list */
1457 int sb_used; /* nr of bytes already in use */
1458 char_u sb_data[1]; /* data, actually longer */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001459};
1460
1461/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00001462 * A node in the tree.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001463 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001464typedef struct wordnode_S wordnode_T;
1465struct wordnode_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001466{
Bram Moolenaar51485f02005-06-04 21:55:20 +00001467 char_u wn_hashkey[6]; /* room for the hash key */
1468 wordnode_T *wn_next; /* next node with same hash key */
1469 wordnode_T *wn_child; /* child (next byte in word) */
1470 wordnode_T *wn_sibling; /* next sibling (alternate byte in word,
1471 always sorted) */
1472 wordnode_T *wn_wnode; /* parent node that will write this node */
1473 int wn_index; /* index in written nodes (valid after first
1474 round) */
1475 char_u wn_byte; /* Byte for this node. NUL for word end */
1476 char_u wn_flags; /* when wn_byte is NUL: WF_ flags */
1477 char_u wn_region; /* when wn_byte is NUL: region mask */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001478};
1479
Bram Moolenaar51485f02005-06-04 21:55:20 +00001480#define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001481
Bram Moolenaar51485f02005-06-04 21:55:20 +00001482/*
1483 * Info used while reading the spell files.
1484 */
1485typedef struct spellinfo_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001486{
Bram Moolenaar51485f02005-06-04 21:55:20 +00001487 wordnode_T *si_foldroot; /* tree with case-folded words */
1488 wordnode_T *si_keeproot; /* tree with keep-case words */
1489 sblock_T *si_blocks; /* memory blocks used */
1490 int si_ascii; /* handling only ASCII words */
Bram Moolenaarb765d632005-06-07 21:00:02 +00001491 int si_add; /* addition file */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001492 int si_region; /* region mask */
1493 vimconv_T si_conv; /* for conversion to 'encoding' */
Bram Moolenaar50cde822005-06-05 21:54:54 +00001494 int si_memtot; /* runtime memory used */
Bram Moolenaarb765d632005-06-07 21:00:02 +00001495 int si_verbose; /* verbose messages */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001496} spellinfo_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001497
Bram Moolenaar51485f02005-06-04 21:55:20 +00001498static afffile_T *spell_read_aff __ARGS((char_u *fname, spellinfo_T *spin));
Bram Moolenaar5482f332005-04-17 20:18:43 +00001499static int has_non_ascii __ARGS((char_u *s));
Bram Moolenaar51485f02005-06-04 21:55:20 +00001500static void spell_free_aff __ARGS((afffile_T *aff));
1501static int spell_read_dic __ARGS((char_u *fname, spellinfo_T *spin, afffile_T *affile));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001502static int store_aff_word __ARGS((char_u *word, spellinfo_T *spin, char_u *afflist, hashtab_T *ht, hashtab_T *xht, int comb, int flags));
Bram Moolenaar51485f02005-06-04 21:55:20 +00001503static int spell_read_wordfile __ARGS((char_u *fname, spellinfo_T *spin));
1504static void *getroom __ARGS((sblock_T **blp, size_t len));
1505static char_u *getroom_save __ARGS((sblock_T **blp, char_u *s));
1506static void free_blocks __ARGS((sblock_T *bl));
1507static wordnode_T *wordtree_alloc __ARGS((sblock_T **blp));
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001508static int store_word __ARGS((char_u *word, spellinfo_T *spin, int flags));
Bram Moolenaar51485f02005-06-04 21:55:20 +00001509static int tree_add_word __ARGS((char_u *word, wordnode_T *tree, int flags, int region, sblock_T **blp));
Bram Moolenaarb765d632005-06-07 21:00:02 +00001510static void wordtree_compress __ARGS((wordnode_T *root, spellinfo_T *spin));
Bram Moolenaar51485f02005-06-04 21:55:20 +00001511static int node_compress __ARGS((wordnode_T *node, hashtab_T *ht, int *tot));
1512static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2));
1513static void write_vim_spell __ARGS((char_u *fname, spellinfo_T *spin, int regcount, char_u *regchars));
1514static int put_tree __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask));
Bram Moolenaarb765d632005-06-07 21:00:02 +00001515static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int verbose));
1516static void init_spellfile __ARGS((void));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001517
1518/*
1519 * Read an affix ".aff" file.
1520 * Returns an afffile_T, NULL for failure.
1521 */
1522 static afffile_T *
Bram Moolenaar51485f02005-06-04 21:55:20 +00001523spell_read_aff(fname, spin)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001524 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001525 spellinfo_T *spin;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001526{
1527 FILE *fd;
1528 afffile_T *aff;
1529 char_u rline[MAXLINELEN];
1530 char_u *line;
1531 char_u *pc = NULL;
1532 char_u *(items[6]);
1533 int itemcnt;
1534 char_u *p;
1535 int lnum = 0;
1536 affheader_T *cur_aff = NULL;
1537 int aff_todo = 0;
1538 hashtab_T *tp;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001539 char_u *low = NULL;
1540 char_u *fol = NULL;
1541 char_u *upp = NULL;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001542 static char *e_affname = N_("Affix name too long in %s line %d: %s");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001543
Bram Moolenaar51485f02005-06-04 21:55:20 +00001544 /*
1545 * Open the file.
1546 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00001547 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001548 if (fd == NULL)
1549 {
1550 EMSG2(_(e_notopen), fname);
1551 return NULL;
1552 }
1553
Bram Moolenaarb765d632005-06-07 21:00:02 +00001554 if (spin->si_verbose || p_verbose > 2)
1555 {
1556 if (!spin->si_verbose)
1557 verbose_enter();
1558 smsg((char_u *)_("Reading affix file %s..."), fname);
1559 out_flush();
1560 if (!spin->si_verbose)
1561 verbose_leave();
1562 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001563
Bram Moolenaar51485f02005-06-04 21:55:20 +00001564 /*
1565 * Allocate and init the afffile_T structure.
1566 */
1567 aff = (afffile_T *)getroom(&spin->si_blocks, sizeof(afffile_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001568 if (aff == NULL)
1569 return NULL;
1570 hash_init(&aff->af_pref);
1571 hash_init(&aff->af_suff);
1572 ga_init2(&aff->af_rep, (int)sizeof(repentry_T), 20);
1573
1574 /*
1575 * Read all the lines in the file one by one.
1576 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001577 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001578 {
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001579 line_breakcheck();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001580 ++lnum;
1581
1582 /* Skip comment lines. */
1583 if (*rline == '#')
1584 continue;
1585
1586 /* Convert from "SET" to 'encoding' when needed. */
1587 vim_free(pc);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001588#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00001589 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001590 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001591 pc = string_convert(&spin->si_conv, rline, NULL);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001592 if (pc == NULL)
1593 {
1594 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
1595 fname, lnum, rline);
1596 continue;
1597 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001598 line = pc;
1599 }
1600 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00001601#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001602 {
1603 pc = NULL;
1604 line = rline;
1605 }
1606
1607 /* Split the line up in white separated items. Put a NUL after each
1608 * item. */
1609 itemcnt = 0;
1610 for (p = line; ; )
1611 {
1612 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */
1613 ++p;
1614 if (*p == NUL)
1615 break;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001616 if (itemcnt == 6) /* too many items */
1617 break;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001618 items[itemcnt++] = p;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001619 while (*p > ' ') /* skip until white space or CR/NL */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001620 ++p;
1621 if (*p == NUL)
1622 break;
1623 *p++ = NUL;
1624 }
1625
1626 /* Handle non-empty lines. */
1627 if (itemcnt > 0)
1628 {
1629 if (STRCMP(items[0], "SET") == 0 && itemcnt == 2
1630 && aff->af_enc == NULL)
1631 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00001632#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00001633 /* Setup for conversion from "ENC" to 'encoding'. */
1634 aff->af_enc = enc_canonize(items[1]);
1635 if (aff->af_enc != NULL && !spin->si_ascii
1636 && convert_setup(&spin->si_conv, aff->af_enc,
1637 p_enc) == FAIL)
1638 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
1639 fname, aff->af_enc, p_enc);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001640#else
1641 smsg((char_u *)_("Conversion in %s not supported"), fname);
1642#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001643 }
Bram Moolenaar50cde822005-06-05 21:54:54 +00001644 else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1)
1645 {
1646 /* ignored */
1647 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001648 else if (STRCMP(items[0], "TRY") == 0 && itemcnt == 2
1649 && aff->af_try == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001650 {
1651 aff->af_try = getroom_save(&spin->si_blocks, items[1]);
1652 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001653 else if (STRCMP(items[0], "RAR") == 0 && itemcnt == 2
1654 && aff->af_rar == 0)
1655 {
1656 aff->af_rar = items[1][0];
1657 if (items[1][1] != NUL)
1658 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
1659 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00001660 else if (STRCMP(items[0], "KEP") == 0 && itemcnt == 2
1661 && aff->af_kep == 0)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001662 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00001663 aff->af_kep = items[1][0];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001664 if (items[1][1] != NUL)
1665 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
1666 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001667 else if ((STRCMP(items[0], "PFX") == 0
1668 || STRCMP(items[0], "SFX") == 0)
1669 && aff_todo == 0
1670 && itemcnt == 4)
1671 {
1672 /* New affix letter. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001673 cur_aff = (affheader_T *)getroom(&spin->si_blocks,
1674 sizeof(affheader_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001675 if (cur_aff == NULL)
1676 break;
1677 cur_aff->ah_key[0] = *items[1];
1678 cur_aff->ah_key[1] = NUL;
1679 if (items[1][1] != NUL)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001680 smsg((char_u *)_(e_affname), fname, lnum, items[1]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001681 if (*items[2] == 'Y')
1682 cur_aff->ah_combine = TRUE;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001683 else if (*items[2] != 'N')
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001684 smsg((char_u *)_("Expected Y or N in %s line %d: %s"),
1685 fname, lnum, items[2]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001686 if (*items[0] == 'P')
1687 tp = &aff->af_pref;
1688 else
1689 tp = &aff->af_suff;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001690 aff_todo = atoi((char *)items[3]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001691 if (!HASHITEM_EMPTY(hash_find(tp, cur_aff->ah_key)))
Bram Moolenaar51485f02005-06-04 21:55:20 +00001692 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001693 smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
1694 fname, lnum, items[1]);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001695 aff_todo = 0;
1696 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001697 else
1698 hash_add(tp, cur_aff->ah_key);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001699 }
1700 else if ((STRCMP(items[0], "PFX") == 0
1701 || STRCMP(items[0], "SFX") == 0)
1702 && aff_todo > 0
1703 && STRCMP(cur_aff->ah_key, items[1]) == 0
1704 && itemcnt == 5)
1705 {
1706 affentry_T *aff_entry;
1707
1708 /* New item for an affix letter. */
1709 --aff_todo;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001710 aff_entry = (affentry_T *)getroom(&spin->si_blocks,
1711 sizeof(affentry_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001712 if (aff_entry == NULL)
1713 break;
Bram Moolenaar5482f332005-04-17 20:18:43 +00001714
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001715 if (STRCMP(items[2], "0") != 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001716 aff_entry->ae_chop = getroom_save(&spin->si_blocks,
1717 items[2]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001718 if (STRCMP(items[3], "0") != 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001719 aff_entry->ae_add = getroom_save(&spin->si_blocks,
1720 items[3]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001721
Bram Moolenaar51485f02005-06-04 21:55:20 +00001722 /* Don't use an affix entry with non-ASCII characters when
1723 * "spin->si_ascii" is TRUE. */
1724 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop)
Bram Moolenaar5482f332005-04-17 20:18:43 +00001725 || has_non_ascii(aff_entry->ae_add)))
1726 {
Bram Moolenaar5482f332005-04-17 20:18:43 +00001727 aff_entry->ae_next = cur_aff->ah_first;
1728 cur_aff->ah_first = aff_entry;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001729
1730 if (STRCMP(items[4], ".") != 0)
1731 {
1732 char_u buf[MAXLINELEN];
1733
1734 aff_entry->ae_cond = getroom_save(&spin->si_blocks,
1735 items[4]);
1736 if (*items[0] == 'P')
1737 sprintf((char *)buf, "^%s", items[4]);
1738 else
1739 sprintf((char *)buf, "%s$", items[4]);
1740 aff_entry->ae_prog = vim_regcomp(buf,
1741 RE_MAGIC + RE_STRING);
1742 }
Bram Moolenaar5482f332005-04-17 20:18:43 +00001743 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001744 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001745 else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2)
1746 {
1747 if (fol != NULL)
1748 smsg((char_u *)_("Duplicate FOL in %s line %d"),
1749 fname, lnum);
1750 else
1751 fol = vim_strsave(items[1]);
1752 }
1753 else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2)
1754 {
1755 if (low != NULL)
1756 smsg((char_u *)_("Duplicate LOW in %s line %d"),
1757 fname, lnum);
1758 else
1759 low = vim_strsave(items[1]);
1760 }
1761 else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2)
1762 {
1763 if (upp != NULL)
1764 smsg((char_u *)_("Duplicate UPP in %s line %d"),
1765 fname, lnum);
1766 else
1767 upp = vim_strsave(items[1]);
1768 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001769 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2)
1770 /* Ignore REP count */;
1771 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 3)
1772 {
1773 repentry_T *rp;
1774
1775 /* REP item */
1776 if (ga_grow(&aff->af_rep, 1) == FAIL)
1777 break;
1778 rp = ((repentry_T *)aff->af_rep.ga_data) + aff->af_rep.ga_len;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001779 rp->re_from = getroom_save(&spin->si_blocks, items[1]);
1780 rp->re_to = getroom_save(&spin->si_blocks, items[2]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001781 ++aff->af_rep.ga_len;
1782 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00001783 else
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001784 smsg((char_u *)_("Unrecognized item in %s line %d: %s"),
1785 fname, lnum, items[0]);
1786 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001787 }
1788
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001789 if (fol != NULL || low != NULL || upp != NULL)
1790 {
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00001791 /* Don't write a word table for an ASCII file, so that we don't check
1792 * for conflicts with a word table that matches 'encoding'. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001793 if (!spin->si_ascii)
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00001794 {
1795 if (fol == NULL || low == NULL || upp == NULL)
1796 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
1797 else
1798 set_spell_chartab(fol, low, upp);
1799 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001800
1801 vim_free(fol);
1802 vim_free(low);
1803 vim_free(upp);
1804 }
1805
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001806 vim_free(pc);
1807 fclose(fd);
1808 return aff;
1809}
1810
1811/*
Bram Moolenaar5482f332005-04-17 20:18:43 +00001812 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
1813 * When "s" is NULL FALSE is returned.
1814 */
1815 static int
1816has_non_ascii(s)
1817 char_u *s;
1818{
1819 char_u *p;
1820
1821 if (s != NULL)
1822 for (p = s; *p != NUL; ++p)
1823 if (*p >= 128)
1824 return TRUE;
1825 return FALSE;
1826}
1827
1828/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001829 * Free the structure filled by spell_read_aff().
1830 */
1831 static void
1832spell_free_aff(aff)
1833 afffile_T *aff;
1834{
1835 hashtab_T *ht;
1836 hashitem_T *hi;
1837 int todo;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001838 affheader_T *ah;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001839 affentry_T *ae;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001840
1841 vim_free(aff->af_enc);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001842
Bram Moolenaar51485f02005-06-04 21:55:20 +00001843 /* All this trouble to foree the "ae_prog" items... */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001844 for (ht = &aff->af_pref; ; ht = &aff->af_suff)
1845 {
1846 todo = ht->ht_used;
1847 for (hi = ht->ht_array; todo > 0; ++hi)
1848 {
1849 if (!HASHITEM_EMPTY(hi))
1850 {
1851 --todo;
1852 ah = HI2AH(hi);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001853 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
1854 vim_free(ae->ae_prog);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001855 }
1856 }
1857 if (ht == &aff->af_suff)
1858 break;
1859 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00001860
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001861 hash_clear(&aff->af_pref);
1862 hash_clear(&aff->af_suff);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001863 ga_clear(&aff->af_rep);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001864}
1865
1866/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00001867 * Read dictionary file "fname".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001868 * Returns OK or FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001869 */
1870 static int
Bram Moolenaar51485f02005-06-04 21:55:20 +00001871spell_read_dic(fname, spin, affile)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001872 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001873 spellinfo_T *spin;
1874 afffile_T *affile;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001875{
Bram Moolenaar51485f02005-06-04 21:55:20 +00001876 hashtab_T ht;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001877 char_u line[MAXLINELEN];
Bram Moolenaar51485f02005-06-04 21:55:20 +00001878 char_u *afflist;
1879 char_u *dw;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001880 char_u *pc;
1881 char_u *w;
1882 int l;
1883 hash_T hash;
1884 hashitem_T *hi;
1885 FILE *fd;
1886 int lnum = 1;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001887 int non_ascii = 0;
1888 int retval = OK;
1889 char_u message[MAXLINELEN + MAXWLEN];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001890 int flags;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001891
Bram Moolenaar51485f02005-06-04 21:55:20 +00001892 /*
1893 * Open the file.
1894 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00001895 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001896 if (fd == NULL)
1897 {
1898 EMSG2(_(e_notopen), fname);
1899 return FAIL;
1900 }
1901
Bram Moolenaar51485f02005-06-04 21:55:20 +00001902 /* The hashtable is only used to detect duplicated words. */
1903 hash_init(&ht);
1904
Bram Moolenaarb765d632005-06-07 21:00:02 +00001905 if (spin->si_verbose || p_verbose > 2)
1906 {
1907 if (!spin->si_verbose)
1908 verbose_enter();
1909 smsg((char_u *)_("Reading dictionary file %s..."), fname);
1910 out_flush();
1911 if (!spin->si_verbose)
1912 verbose_leave();
1913 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001914
1915 /* Read and ignore the first line: word count. */
1916 (void)vim_fgets(line, MAXLINELEN, fd);
1917 if (!isdigit(*skipwhite(line)))
1918 EMSG2(_("E760: No word count in %s"), fname);
1919
1920 /*
1921 * Read all the lines in the file one by one.
1922 * The words are converted to 'encoding' here, before being added to
1923 * the hashtable.
1924 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001925 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001926 {
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001927 line_breakcheck();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001928 ++lnum;
1929
Bram Moolenaar51485f02005-06-04 21:55:20 +00001930 /* Remove CR, LF and white space from the end. White space halfway
1931 * the word is kept to allow e.g., "et al.". */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001932 l = STRLEN(line);
1933 while (l > 0 && line[l - 1] <= ' ')
1934 --l;
1935 if (l == 0)
1936 continue; /* empty line */
1937 line[l] = NUL;
1938
Bram Moolenaar51485f02005-06-04 21:55:20 +00001939 /* This takes time, print a message now and then. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00001940 if (spin->si_verbose && (lnum & 0x3ff) == 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001941 {
1942 vim_snprintf((char *)message, sizeof(message),
1943 _("line %6d - %s"), lnum, line);
1944 msg_start();
1945 msg_outtrans_attr(message, 0);
1946 msg_clr_eos();
1947 msg_didout = FALSE;
1948 msg_col = 0;
1949 out_flush();
1950 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001951
Bram Moolenaar51485f02005-06-04 21:55:20 +00001952 /* Find the optional affix names. */
1953 afflist = vim_strchr(line, '/');
1954 if (afflist != NULL)
1955 *afflist++ = NUL;
1956
1957 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
1958 if (spin->si_ascii && has_non_ascii(line))
1959 {
1960 ++non_ascii;
Bram Moolenaar5482f332005-04-17 20:18:43 +00001961 continue;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001962 }
Bram Moolenaar5482f332005-04-17 20:18:43 +00001963
Bram Moolenaarb765d632005-06-07 21:00:02 +00001964#ifdef FEAT_MBYTE
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001965 /* Convert from "SET" to 'encoding' when needed. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001966 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001967 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001968 pc = string_convert(&spin->si_conv, line, NULL);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001969 if (pc == NULL)
1970 {
1971 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
1972 fname, lnum, line);
1973 continue;
1974 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001975 w = pc;
1976 }
1977 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00001978#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001979 {
1980 pc = NULL;
1981 w = line;
1982 }
1983
Bram Moolenaar51485f02005-06-04 21:55:20 +00001984 /* Store the word in the hashtable to be able to find duplicates. */
1985 dw = (char_u *)getroom_save(&spin->si_blocks, w);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001986 if (dw == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001987 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001988 vim_free(pc);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001989 if (retval == FAIL)
1990 break;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001991
Bram Moolenaar51485f02005-06-04 21:55:20 +00001992 hash = hash_hash(dw);
1993 hi = hash_lookup(&ht, dw, hash);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001994 if (!HASHITEM_EMPTY(hi))
1995 smsg((char_u *)_("Duplicate word in %s line %d: %s"),
Bram Moolenaar51485f02005-06-04 21:55:20 +00001996 fname, lnum, line);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001997 else
Bram Moolenaar51485f02005-06-04 21:55:20 +00001998 hash_add_item(&ht, hi, dw, hash);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001999
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002000 flags = 0;
2001 if (afflist != NULL)
2002 {
2003 /* Check for affix name that stands for keep-case word and stands
2004 * for rare word (if defined). */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002005 if (affile->af_kep != NUL
2006 && vim_strchr(afflist, affile->af_kep) != NULL)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002007 flags |= WF_KEEPCAP;
2008 if (affile->af_rar != NUL
2009 && vim_strchr(afflist, affile->af_rar) != NULL)
2010 flags |= WF_RARE;
2011 }
2012
Bram Moolenaar51485f02005-06-04 21:55:20 +00002013 /* Add the word to the word tree(s). */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002014 if (store_word(dw, spin, flags) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002015 retval = FAIL;
2016
2017 if (afflist != NULL)
2018 {
2019 /* Find all matching suffixes and add the resulting words.
2020 * Additionally do matching prefixes that combine. */
2021 if (store_aff_word(dw, spin, afflist,
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002022 &affile->af_suff, &affile->af_pref,
2023 FALSE, flags) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002024 retval = FAIL;
2025
2026 /* Find all matching prefixes and add the resulting words. */
2027 if (store_aff_word(dw, spin, afflist,
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002028 &affile->af_pref, NULL, FALSE, flags) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002029 retval = FAIL;
2030 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002031 }
2032
Bram Moolenaar51485f02005-06-04 21:55:20 +00002033 if (spin->si_ascii && non_ascii > 0)
2034 smsg((char_u *)_("Ignored %d words with non-ASCII characters"),
2035 non_ascii);
2036 hash_clear(&ht);
2037
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002038 fclose(fd);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002039 return retval;
2040}
2041
2042/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00002043 * Apply affixes to a word and store the resulting words.
2044 * "ht" is the hashtable with affentry_T that need to be applied, either
2045 * prefixes or suffixes.
2046 * "xht", when not NULL, is the prefix hashtable, to be used additionally on
2047 * the resulting words for combining affixes.
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002048 *
2049 * Returns FAIL when out of memory.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002050 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002051 static int
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002052store_aff_word(word, spin, afflist, ht, xht, comb, flags)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002053 char_u *word; /* basic word start */
2054 spellinfo_T *spin; /* spell info */
2055 char_u *afflist; /* list of names of supported affixes */
2056 hashtab_T *ht;
2057 hashtab_T *xht;
2058 int comb; /* only use affixes that combine */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002059 int flags; /* flags for the word */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002060{
2061 int todo;
2062 hashitem_T *hi;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002063 affheader_T *ah;
2064 affentry_T *ae;
2065 regmatch_T regmatch;
2066 char_u newword[MAXWLEN];
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002067 int retval = OK;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002068 int i;
2069 char_u *p;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002070
Bram Moolenaar51485f02005-06-04 21:55:20 +00002071 todo = ht->ht_used;
2072 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002073 {
2074 if (!HASHITEM_EMPTY(hi))
2075 {
2076 --todo;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002077 ah = HI2AH(hi);
Bram Moolenaar5482f332005-04-17 20:18:43 +00002078
Bram Moolenaar51485f02005-06-04 21:55:20 +00002079 /* Check that the affix combines, if required, and that the word
2080 * supports this affix. */
2081 if ((!comb || ah->ah_combine)
2082 && vim_strchr(afflist, *ah->ah_key) != NULL)
Bram Moolenaar5482f332005-04-17 20:18:43 +00002083 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002084 /* Loop over all affix entries with this name. */
2085 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002086 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002087 /* Check the condition. It's not logical to match case
2088 * here, but it is required for compatibility with
2089 * Myspell. */
2090 regmatch.regprog = ae->ae_prog;
2091 regmatch.rm_ic = FALSE;
2092 if (ae->ae_prog == NULL
2093 || vim_regexec(&regmatch, word, (colnr_T)0))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002094 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002095 /* Match. Remove the chop and add the affix. */
2096 if (xht == NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002097 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002098 /* prefix: chop/add at the start of the word */
2099 if (ae->ae_add == NULL)
2100 *newword = NUL;
2101 else
2102 STRCPY(newword, ae->ae_add);
2103 p = word;
2104 if (ae->ae_chop != NULL)
Bram Moolenaarb765d632005-06-07 21:00:02 +00002105 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002106 /* Skip chop string. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002107#ifdef FEAT_MBYTE
2108 if (has_mbyte)
2109 i = mb_charlen(ae->ae_chop);
2110 else
2111#endif
2112 i = STRLEN(ae->ae_chop);
2113 for ( ; i > 0; --i)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002114 mb_ptr_adv(p);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002115 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002116 STRCAT(newword, p);
2117 }
2118 else
2119 {
2120 /* suffix: chop/add at the end of the word */
2121 STRCPY(newword, word);
2122 if (ae->ae_chop != NULL)
2123 {
2124 /* Remove chop string. */
2125 p = newword + STRLEN(newword);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002126#ifdef FEAT_MBYTE
2127 if (has_mbyte)
2128 i = mb_charlen(ae->ae_chop);
2129 else
2130#endif
2131 i = STRLEN(ae->ae_chop);
2132 for ( ; i > 0; --i)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002133 mb_ptr_back(newword, p);
2134 *p = NUL;
2135 }
2136 if (ae->ae_add != NULL)
2137 STRCAT(newword, ae->ae_add);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002138 }
2139
Bram Moolenaar51485f02005-06-04 21:55:20 +00002140 /* Store the modified word. */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002141 if (store_word(newword, spin, flags) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002142 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002143
Bram Moolenaar51485f02005-06-04 21:55:20 +00002144 /* When added a suffix and combining is allowed also
2145 * try adding prefixes additionally. */
2146 if (xht != NULL && ah->ah_combine)
2147 if (store_aff_word(newword, spin, afflist,
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002148 xht, NULL, TRUE, flags) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002149 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002150 }
2151 }
2152 }
2153 }
2154 }
2155
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002156 return retval;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002157}
2158
2159/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00002160 * Read a file with a list of words.
2161 */
2162 static int
2163spell_read_wordfile(fname, spin)
2164 char_u *fname;
2165 spellinfo_T *spin;
2166{
2167 FILE *fd;
2168 long lnum = 0;
2169 char_u rline[MAXLINELEN];
2170 char_u *line;
2171 char_u *pc = NULL;
2172 int l;
2173 int retval = OK;
2174 int did_word = FALSE;
2175 int non_ascii = 0;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002176 int flags;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002177
2178 /*
2179 * Open the file.
2180 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002181 fd = mch_fopen((char *)fname, "r");
Bram Moolenaar51485f02005-06-04 21:55:20 +00002182 if (fd == NULL)
2183 {
2184 EMSG2(_(e_notopen), fname);
2185 return FAIL;
2186 }
2187
Bram Moolenaarb765d632005-06-07 21:00:02 +00002188 if (spin->si_verbose || p_verbose > 2)
2189 {
2190 if (!spin->si_verbose)
2191 verbose_enter();
2192 smsg((char_u *)_("Reading word file %s..."), fname);
2193 out_flush();
2194 if (!spin->si_verbose)
2195 verbose_leave();
2196 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002197
2198 /*
2199 * Read all the lines in the file one by one.
2200 */
2201 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
2202 {
2203 line_breakcheck();
2204 ++lnum;
2205
2206 /* Skip comment lines. */
2207 if (*rline == '#')
2208 continue;
2209
2210 /* Remove CR, LF and white space from the end. */
2211 l = STRLEN(rline);
2212 while (l > 0 && rline[l - 1] <= ' ')
2213 --l;
2214 if (l == 0)
2215 continue; /* empty or blank line */
2216 rline[l] = NUL;
2217
2218 /* Convert from "=encoding={encoding}" to 'encoding' when needed. */
2219 vim_free(pc);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002220#ifdef FEAT_MBYTE
Bram Moolenaar51485f02005-06-04 21:55:20 +00002221 if (spin->si_conv.vc_type != CONV_NONE)
2222 {
2223 pc = string_convert(&spin->si_conv, rline, NULL);
2224 if (pc == NULL)
2225 {
2226 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
2227 fname, lnum, rline);
2228 continue;
2229 }
2230 line = pc;
2231 }
2232 else
Bram Moolenaarb765d632005-06-07 21:00:02 +00002233#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00002234 {
2235 pc = NULL;
2236 line = rline;
2237 }
2238
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002239 flags = 0;
2240
2241 if (*line == '/')
Bram Moolenaar51485f02005-06-04 21:55:20 +00002242 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002243 ++line;
2244 if (STRNCMP(line, "encoding=", 9) == 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002245 {
2246 if (spin->si_conv.vc_type != CONV_NONE)
2247 smsg((char_u *)_("Duplicate =encoding= line ignored in %s line %d: %s"),
2248 fname, lnum, line);
2249 else if (did_word)
2250 smsg((char_u *)_("=encoding= line after word ignored in %s line %d: %s"),
2251 fname, lnum, line);
2252 else
2253 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00002254#ifdef FEAT_MBYTE
2255 char_u *enc;
2256
Bram Moolenaar51485f02005-06-04 21:55:20 +00002257 /* Setup for conversion to 'encoding'. */
2258 enc = enc_canonize(line + 10);
2259 if (enc != NULL && !spin->si_ascii
2260 && convert_setup(&spin->si_conv, enc,
2261 p_enc) == FAIL)
2262 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
2263 fname, line + 10, p_enc);
2264 vim_free(enc);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002265#else
2266 smsg((char_u *)_("Conversion in %s not supported"), fname);
2267#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00002268 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002269 continue;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002270 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002271
2272 if (*line == '=')
2273 {
2274 /* keep-case word */
2275 flags |= WF_KEEPCAP;
2276 ++line;
2277 }
2278
2279 if (*line == '!')
2280 {
2281 /* Bad, bad, wicked word. */
2282 flags |= WF_BANNED;
2283 ++line;
2284 }
2285 else if (*line == '?')
2286 {
2287 /* Rare word. */
2288 flags |= WF_RARE;
2289 ++line;
2290 }
2291
2292 if (flags == 0)
2293 {
2294 smsg((char_u *)_("/ line ignored in %s line %d: %s"),
Bram Moolenaar51485f02005-06-04 21:55:20 +00002295 fname, lnum, line);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002296 continue;
2297 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002298 }
2299
2300 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
2301 if (spin->si_ascii && has_non_ascii(line))
2302 {
2303 ++non_ascii;
2304 continue;
2305 }
2306
2307 /* Normal word: store it. */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002308 if (store_word(line, spin, flags) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002309 {
2310 retval = FAIL;
2311 break;
2312 }
2313 did_word = TRUE;
2314 }
2315
2316 vim_free(pc);
2317 fclose(fd);
2318
Bram Moolenaarb765d632005-06-07 21:00:02 +00002319 if (spin->si_ascii && non_ascii > 0 && (spin->si_verbose || p_verbose > 2))
2320 {
2321 if (p_verbose > 2)
2322 verbose_enter();
Bram Moolenaar51485f02005-06-04 21:55:20 +00002323 smsg((char_u *)_("Ignored %d words with non-ASCII characters"),
2324 non_ascii);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002325 if (p_verbose > 2)
2326 verbose_leave();
2327 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002328 return retval;
2329}
2330
2331/*
2332 * Get part of an sblock_T, "len" bytes long.
2333 * This avoids calling free() for every little struct we use.
2334 * The memory is cleared to all zeros.
2335 * Returns NULL when out of memory.
2336 */
2337 static void *
2338getroom(blp, len)
2339 sblock_T **blp;
2340 size_t len; /* length needed */
2341{
2342 char_u *p;
2343 sblock_T *bl = *blp;
2344
2345 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE)
2346 {
2347 /* Allocate a block of memory. This is not freed until much later. */
2348 bl = (sblock_T *)alloc_clear((unsigned)(sizeof(sblock_T) + SBLOCKSIZE));
2349 if (bl == NULL)
2350 return NULL;
2351 bl->sb_next = *blp;
2352 *blp = bl;
2353 bl->sb_used = 0;
2354 }
2355
2356 p = bl->sb_data + bl->sb_used;
2357 bl->sb_used += len;
2358
2359 return p;
2360}
2361
2362/*
2363 * Make a copy of a string into memory allocated with getroom().
2364 */
2365 static char_u *
2366getroom_save(blp, s)
2367 sblock_T **blp;
2368 char_u *s;
2369{
2370 char_u *sc;
2371
2372 sc = (char_u *)getroom(blp, STRLEN(s) + 1);
2373 if (sc != NULL)
2374 STRCPY(sc, s);
2375 return sc;
2376}
2377
2378
2379/*
2380 * Free the list of allocated sblock_T.
2381 */
2382 static void
2383free_blocks(bl)
2384 sblock_T *bl;
2385{
2386 sblock_T *next;
2387
2388 while (bl != NULL)
2389 {
2390 next = bl->sb_next;
2391 vim_free(bl);
2392 bl = next;
2393 }
2394}
2395
2396/*
2397 * Allocate the root of a word tree.
2398 */
2399 static wordnode_T *
2400wordtree_alloc(blp)
2401 sblock_T **blp;
2402{
2403 return (wordnode_T *)getroom(blp, sizeof(wordnode_T));
2404}
2405
2406/*
2407 * Store a word in the tree(s).
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002408 * Always store it in the case-folded tree. A keep-case word can also be used
2409 * with all caps.
Bram Moolenaar51485f02005-06-04 21:55:20 +00002410 * For a keep-case word also store it in the keep-case tree.
2411 */
2412 static int
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002413store_word(word, spin, flags)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002414 char_u *word;
2415 spellinfo_T *spin;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002416 int flags; /* extra flags, WF_BANNED */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002417{
2418 int len = STRLEN(word);
2419 int ct = captype(word, word + len);
2420 char_u foldword[MAXWLEN];
2421 int res;
2422
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002423 if (flags & WF_KEEPCAP)
2424 res = OK; /* keep-case specified, don't add as fold-case */
2425 else
2426 {
2427 (void)spell_casefold(word, len, foldword, MAXWLEN);
2428 res = tree_add_word(foldword, spin->si_foldroot,
2429 (ct == WF_KEEPCAP ? WF_ALLCAP : ct) | flags,
2430 spin->si_region, &spin->si_blocks);
2431 }
2432
2433 if (res == OK && (ct == WF_KEEPCAP || flags & WF_KEEPCAP))
2434 res = tree_add_word(word, spin->si_keeproot, flags,
2435 spin->si_region, &spin->si_blocks);
Bram Moolenaar51485f02005-06-04 21:55:20 +00002436 return res;
2437}
2438
2439/*
2440 * Add word "word" to a word tree at "root".
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002441 * Returns FAIL when out of memory.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002442 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002443 static int
Bram Moolenaar51485f02005-06-04 21:55:20 +00002444tree_add_word(word, root, flags, region, blp)
2445 char_u *word;
2446 wordnode_T *root;
2447 int flags;
2448 int region;
2449 sblock_T **blp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002450{
Bram Moolenaar51485f02005-06-04 21:55:20 +00002451 wordnode_T *node = root;
2452 wordnode_T *np;
2453 wordnode_T **prev = NULL;
2454 int i;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002455
Bram Moolenaar51485f02005-06-04 21:55:20 +00002456 /* Add each byte of the word to the tree, including the NUL at the end. */
2457 for (i = 0; ; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002458 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002459 /* Look for the sibling that has the same character. They are sorted
2460 * on byte value, thus stop searching when a sibling is found with a
2461 * higher byte value. For zero bytes (end of word) check that the
2462 * flags are equal, there is a separate zero byte for each flag value.
2463 */
2464 while (node != NULL && (node->wn_byte < word[i]
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002465 || (node->wn_byte == 0 && node->wn_flags != (flags & 0xff))))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002466 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002467 prev = &node->wn_sibling;
2468 node = *prev;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002469 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002470 if (node == NULL || node->wn_byte != word[i])
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002471 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002472 /* Allocate a new node. */
2473 np = (wordnode_T *)getroom(blp, sizeof(wordnode_T));
2474 if (np == NULL)
2475 return FAIL;
2476 np->wn_byte = word[i];
2477 *prev = np;
2478 np->wn_sibling = node;
2479 node = np;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002480 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002481
Bram Moolenaar51485f02005-06-04 21:55:20 +00002482 if (word[i] == NUL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002483 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002484 node->wn_flags = flags;
2485 node->wn_region |= region;
2486 break;
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +00002487 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002488 prev = &node->wn_child;
2489 node = *prev;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002490 }
2491
2492 return OK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002493}
2494
2495/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00002496 * Compress a tree: find tails that are identical and can be shared.
2497 */
2498 static void
Bram Moolenaarb765d632005-06-07 21:00:02 +00002499wordtree_compress(root, spin)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002500 wordnode_T *root;
Bram Moolenaarb765d632005-06-07 21:00:02 +00002501 spellinfo_T *spin;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002502{
2503 hashtab_T ht;
2504 int n;
2505 int tot = 0;
2506
2507 if (root != NULL)
2508 {
2509 hash_init(&ht);
2510 n = node_compress(root, &ht, &tot);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002511 if (spin->si_verbose || p_verbose > 2)
2512 {
2513 if (!spin->si_verbose)
2514 verbose_enter();
2515 smsg((char_u *)_("Compressed %d of %d nodes; %d%% remaining"),
Bram Moolenaar51485f02005-06-04 21:55:20 +00002516 n, tot, (tot - n) * 100 / tot);
Bram Moolenaarb765d632005-06-07 21:00:02 +00002517 if (p_verbose > 2)
2518 verbose_leave();
2519 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002520 hash_clear(&ht);
2521 }
2522}
2523
2524/*
2525 * Compress a node, its siblings and its children, depth first.
2526 * Returns the number of compressed nodes.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002527 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002528 static int
Bram Moolenaar51485f02005-06-04 21:55:20 +00002529node_compress(node, ht, tot)
2530 wordnode_T *node;
2531 hashtab_T *ht;
2532 int *tot; /* total count of nodes before compressing,
2533 incremented while going through the tree */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002534{
Bram Moolenaar51485f02005-06-04 21:55:20 +00002535 wordnode_T *np;
2536 wordnode_T *tp;
2537 wordnode_T *child;
2538 hash_T hash;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002539 hashitem_T *hi;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002540 int len = 0;
2541 unsigned nr, n;
2542 int compressed = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002543
Bram Moolenaar51485f02005-06-04 21:55:20 +00002544 /*
2545 * Go through the list of siblings. Compress each child and then try
2546 * finding an identical child to replace it.
2547 * Note that with "child" we mean not just the node that is pointed to,
2548 * but the whole list of siblings, of which the node is the first.
2549 */
2550 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002551 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002552 ++len;
2553 if ((child = np->wn_child) != NULL)
2554 {
2555 /* Compress the child. This fills wn_hashkey. */
2556 compressed += node_compress(child, ht, tot);
2557
2558 /* Try to find an identical child. */
2559 hash = hash_hash(child->wn_hashkey);
2560 hi = hash_lookup(ht, child->wn_hashkey, hash);
2561 tp = NULL;
2562 if (!HASHITEM_EMPTY(hi))
2563 {
2564 /* There are children with an identical hash value. Now check
2565 * if there is one that is really identical. */
2566 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_next)
2567 if (node_equal(child, tp))
2568 {
2569 /* Found one! Now use that child in place of the
2570 * current one. This means the current child is
2571 * dropped from the tree. */
2572 np->wn_child = tp;
2573 ++compressed;
2574 break;
2575 }
2576 if (tp == NULL)
2577 {
2578 /* No other child with this hash value equals the child of
2579 * the node, add it to the linked list after the first
2580 * item. */
2581 tp = HI2WN(hi);
2582 child->wn_next = tp->wn_next;
2583 tp->wn_next = child;
2584 }
2585 }
2586 else
2587 /* No other child has this hash value, add it to the
2588 * hashtable. */
2589 hash_add_item(ht, hi, child->wn_hashkey, hash);
2590 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002591 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002592 *tot += len;
2593
2594 /*
2595 * Make a hash key for the node and its siblings, so that we can quickly
2596 * find a lookalike node. This must be done after compressing the sibling
2597 * list, otherwise the hash key would become invalid by the compression.
2598 */
2599 node->wn_hashkey[0] = len;
2600 nr = 0;
2601 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002602 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002603 if (np->wn_byte == NUL)
2604 /* end node: only use wn_flags and wn_region */
2605 n = np->wn_flags + (np->wn_region << 8);
2606 else
2607 /* byte node: use the byte value and the child pointer */
2608 n = np->wn_byte + ((long_u)np->wn_child << 8);
2609 nr = nr * 101 + n;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002610 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002611
2612 /* Avoid NUL bytes, it terminates the hash key. */
2613 n = nr & 0xff;
2614 node->wn_hashkey[1] = n == 0 ? 1 : n;
2615 n = (nr >> 8) & 0xff;
2616 node->wn_hashkey[2] = n == 0 ? 1 : n;
2617 n = (nr >> 16) & 0xff;
2618 node->wn_hashkey[3] = n == 0 ? 1 : n;
2619 n = (nr >> 24) & 0xff;
2620 node->wn_hashkey[4] = n == 0 ? 1 : n;
2621 node->wn_hashkey[5] = NUL;
2622
2623 return compressed;
2624}
2625
2626/*
2627 * Return TRUE when two nodes have identical siblings and children.
2628 */
2629 static int
2630node_equal(n1, n2)
2631 wordnode_T *n1;
2632 wordnode_T *n2;
2633{
2634 wordnode_T *p1;
2635 wordnode_T *p2;
2636
2637 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
2638 p1 = p1->wn_sibling, p2 = p2->wn_sibling)
2639 if (p1->wn_byte != p2->wn_byte
2640 || (p1->wn_byte == NUL
2641 ? (p1->wn_flags != p2->wn_flags
2642 || p1->wn_region != p2->wn_region)
2643 : (p1->wn_child != p2->wn_child)))
2644 break;
2645
2646 return p1 == NULL && p2 == NULL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002647}
2648
2649/*
2650 * Write a number to file "fd", MSB first, in "len" bytes.
2651 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002652 void
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002653put_bytes(fd, nr, len)
2654 FILE *fd;
2655 long_u nr;
2656 int len;
2657{
2658 int i;
2659
2660 for (i = len - 1; i >= 0; --i)
2661 putc((int)(nr >> (i * 8)), fd);
2662}
2663
2664/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002665 * Write the Vim spell file "fname".
2666 */
2667 static void
Bram Moolenaar51485f02005-06-04 21:55:20 +00002668write_vim_spell(fname, spin, regcount, regchars)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002669 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002670 spellinfo_T *spin;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002671 int regcount; /* number of regions */
2672 char_u *regchars; /* region names */
2673{
Bram Moolenaar51485f02005-06-04 21:55:20 +00002674 FILE *fd;
2675 int regionmask;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002676 int round;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002677 wordnode_T *tree;
2678 int nodecount;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002679
Bram Moolenaarb765d632005-06-07 21:00:02 +00002680 fd = mch_fopen((char *)fname, "w");
Bram Moolenaar51485f02005-06-04 21:55:20 +00002681 if (fd == NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002682 {
2683 EMSG2(_(e_notopen), fname);
2684 return;
2685 }
2686
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002687 /* <HEADER>: <fileID> <regioncnt> <regionname> ...
2688 * <charflagslen> <charflags> <fcharslen> <fchars> */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002689
2690 /* <fileID> */
2691 if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1)
2692 EMSG(_(e_write));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002693
2694 /* write the region names if there is more than one */
2695 if (regcount > 1)
2696 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002697 putc(regcount, fd); /* <regioncnt> <regionname> ... */
2698 fwrite(regchars, (size_t)(regcount * 2), (size_t)1, fd);
2699 regionmask = (1 << regcount) - 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002700 }
2701 else
2702 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002703 putc(0, fd);
2704 regionmask = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002705 }
2706
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002707 /* Write the table with character flags and table for case folding.
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00002708 * <charflagslen> <charflags> <fcharlen> <fchars>
2709 * Skip this for ASCII, the table may conflict with the one used for
2710 * 'encoding'. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002711 if (spin->si_ascii)
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00002712 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002713 putc(0, fd);
2714 putc(0, fd);
2715 putc(0, fd);
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00002716 }
2717 else
Bram Moolenaar51485f02005-06-04 21:55:20 +00002718 write_spell_chartab(fd);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002719
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002720
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002721 /* <SUGGEST> : <suggestlen> <more> ...
2722 * TODO. Only write a zero length for now. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002723 put_bytes(fd, 0L, 4); /* <suggestlen> */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002724
Bram Moolenaar50cde822005-06-05 21:54:54 +00002725 spin->si_memtot = 0;
2726
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002727 /*
Bram Moolenaar51485f02005-06-04 21:55:20 +00002728 * <LWORDTREE> <KWORDTREE>
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002729 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002730 for (round = 1; round <= 2; ++round)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002731 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002732 tree = (round == 1) ? spin->si_foldroot : spin->si_keeproot;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002733
Bram Moolenaar51485f02005-06-04 21:55:20 +00002734 /* Count the number of nodes. Needed to be able to allocate the
2735 * memory when reading the nodes. Also fills in the index for shared
2736 * nodes. */
2737 nodecount = put_tree(NULL, tree, 0, regionmask);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002738
Bram Moolenaar51485f02005-06-04 21:55:20 +00002739 /* number of nodes in 4 bytes */
2740 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
Bram Moolenaar50cde822005-06-05 21:54:54 +00002741 spin->si_memtot += nodecount + nodecount * sizeof(int);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002742
Bram Moolenaar51485f02005-06-04 21:55:20 +00002743 /* Write the nodes. */
2744 (void)put_tree(fd, tree, 0, regionmask);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002745 }
2746
Bram Moolenaar51485f02005-06-04 21:55:20 +00002747 fclose(fd);
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002748}
2749
2750/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00002751 * Dump a word tree at node "node".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002752 *
Bram Moolenaar51485f02005-06-04 21:55:20 +00002753 * This first writes the list of possible bytes (siblings). Then for each
2754 * byte recursively write the children.
2755 *
2756 * NOTE: The code here must match the code in read_tree(), since assumptions
2757 * are made about the indexes (so that we don't have to write them in the
2758 * file).
2759 *
2760 * Returns the number of nodes used.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002761 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002762 static int
2763put_tree(fd, node, index, regionmask)
2764 FILE *fd; /* NULL when only counting */
2765 wordnode_T *node;
2766 int index;
2767 int regionmask;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002768{
Bram Moolenaar51485f02005-06-04 21:55:20 +00002769 int newindex = index;
2770 int siblingcount = 0;
2771 wordnode_T *np;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002772 int flags;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002773
Bram Moolenaar51485f02005-06-04 21:55:20 +00002774 /* If "node" is zero the tree is empty. */
2775 if (node == NULL)
2776 return 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002777
Bram Moolenaar51485f02005-06-04 21:55:20 +00002778 /* Store the index where this node is written. */
2779 node->wn_index = index;
2780
2781 /* Count the number of siblings. */
2782 for (np = node; np != NULL; np = np->wn_sibling)
2783 ++siblingcount;
2784
2785 /* Write the sibling count. */
2786 if (fd != NULL)
2787 putc(siblingcount, fd); /* <siblingcount> */
2788
2789 /* Write each sibling byte and optionally extra info. */
2790 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002791 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002792 if (np->wn_byte == 0)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002793 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002794 if (fd != NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002795 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002796 /* For a NUL byte (end of word) instead of the byte itself
2797 * we write the flag/region items. */
2798 flags = np->wn_flags;
2799 if (regionmask != 0 && np->wn_region != regionmask)
2800 flags |= WF_REGION;
2801 if (flags == 0)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002802 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002803 /* word without flags or region */
2804 putc(BY_NOFLAGS, fd); /* <byte> */
2805 }
2806 else
2807 {
2808 putc(BY_FLAGS, fd); /* <byte> */
2809 putc(flags, fd); /* <flags> */
2810 if (flags & WF_REGION)
2811 putc(np->wn_region, fd); /* <regionmask> */
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002812 }
2813 }
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002814 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002815 else
2816 {
2817 if (np->wn_child->wn_index != 0 && np->wn_child->wn_wnode != node)
2818 {
2819 /* The child is written elsewhere, write the reference. */
2820 if (fd != NULL)
2821 {
2822 putc(BY_INDEX, fd); /* <byte> */
2823 /* <nodeidx> */
2824 put_bytes(fd, (long_u)np->wn_child->wn_index, 3);
2825 }
2826 }
2827 else if (np->wn_child->wn_wnode == NULL)
2828 /* We will write the child below and give it an index. */
2829 np->wn_child->wn_wnode = node;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002830
Bram Moolenaar51485f02005-06-04 21:55:20 +00002831 if (fd != NULL)
2832 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
2833 {
2834 EMSG(_(e_write));
2835 return 0;
2836 }
2837 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002838 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002839
2840 /* Space used in the array when reading: one for each sibling and one for
2841 * the count. */
2842 newindex += siblingcount + 1;
2843
2844 /* Recursively dump the children of each sibling. */
2845 for (np = node; np != NULL; np = np->wn_sibling)
2846 if (np->wn_byte != 0 && np->wn_child->wn_wnode == node)
2847 newindex = put_tree(fd, np->wn_child, newindex, regionmask);
2848
2849 return newindex;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002850}
2851
2852
2853/*
Bram Moolenaarb765d632005-06-07 21:00:02 +00002854 * ":mkspell [-ascii] outfile infile ..."
2855 * ":mkspell [-ascii] addfile"
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002856 */
2857 void
2858ex_mkspell(eap)
2859 exarg_T *eap;
2860{
2861 int fcount;
2862 char_u **fnames;
Bram Moolenaarb765d632005-06-07 21:00:02 +00002863 char_u *arg = eap->arg;
2864 int ascii = FALSE;
2865
2866 if (STRNCMP(arg, "-ascii", 6) == 0)
2867 {
2868 ascii = TRUE;
2869 arg = skipwhite(arg + 6);
2870 }
2871
2872 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
2873 if (get_arglist_exp(arg, &fcount, &fnames) == OK)
2874 {
2875 mkspell(fcount, fnames, ascii, eap->forceit, TRUE);
2876 FreeWild(fcount, fnames);
2877 }
2878}
2879
2880/*
2881 * Create a Vim spell file from one or more word lists.
2882 * "fnames[0]" is the output file name.
2883 * "fnames[fcount - 1]" is the last input file name.
2884 * Exception: when "fnames[0]" ends in ".add" it's used as the input file name
2885 * and ".spl" is appended to make the output file name.
2886 */
2887 static void
2888mkspell(fcount, fnames, ascii, overwrite, verbose)
2889 int fcount;
2890 char_u **fnames;
2891 int ascii; /* -ascii argument given */
2892 int overwrite; /* overwrite existing output file */
2893 int verbose; /* give progress messages */
2894{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002895 char_u fname[MAXPATHL];
2896 char_u wfname[MAXPATHL];
Bram Moolenaarb765d632005-06-07 21:00:02 +00002897 char_u **innames;
2898 int incount;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002899 afffile_T *(afile[8]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002900 int i;
2901 int len;
2902 char_u region_name[16];
2903 struct stat st;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002904 int error = FALSE;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002905 spellinfo_T spin;
2906
2907 vim_memset(&spin, 0, sizeof(spin));
Bram Moolenaarb765d632005-06-07 21:00:02 +00002908 spin.si_verbose = verbose;
2909 spin.si_ascii = ascii;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002910
Bram Moolenaarb765d632005-06-07 21:00:02 +00002911 /* default: fnames[0] is output file, following are input files */
2912 innames = &fnames[1];
2913 incount = fcount - 1;
2914
2915 if (fcount >= 1)
Bram Moolenaar5482f332005-04-17 20:18:43 +00002916 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00002917 len = STRLEN(fnames[0]);
2918 if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
2919 {
2920 /* For ":mkspell path/en.latin1.add" output file is
2921 * "path/en.latin1.add.spl". */
2922 innames = &fnames[0];
2923 incount = 1;
2924 vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]);
2925 }
2926 else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
2927 {
2928 /* Name ends in ".spl", use as the file name. */
2929 STRNCPY(wfname, fnames[0], sizeof(wfname));
2930 wfname[sizeof(wfname) - 1] = NUL;
2931 }
2932 else
2933 /* Name should be language, make the file name from it. */
2934 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
2935 spin.si_ascii ? (char_u *)"ascii" : spell_enc());
2936
2937 /* Check for .ascii.spl. */
2938 if (strstr((char *)gettail(wfname), ".ascii.") != NULL)
2939 spin.si_ascii = TRUE;
2940
2941 /* Check for .add.spl. */
2942 if (strstr((char *)gettail(wfname), ".add.") != NULL)
2943 spin.si_add = TRUE;
Bram Moolenaar5482f332005-04-17 20:18:43 +00002944 }
2945
Bram Moolenaarb765d632005-06-07 21:00:02 +00002946 if (incount <= 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002947 EMSG(_(e_invarg)); /* need at least output and input names */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002948 else if (incount > 8)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002949 EMSG(_("E754: Only up to 8 regions supported"));
2950 else
2951 {
2952 /* Check for overwriting before doing things that may take a lot of
2953 * time. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002954 if (!overwrite && mch_stat((char *)wfname, &st) >= 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002955 {
2956 EMSG(_(e_exists));
Bram Moolenaarb765d632005-06-07 21:00:02 +00002957 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002958 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00002959 if (mch_isdir(wfname))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002960 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00002961 EMSG2(_(e_isadir2), wfname);
2962 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002963 }
2964
2965 /*
2966 * Init the aff and dic pointers.
2967 * Get the region names if there are more than 2 arguments.
2968 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00002969 for (i = 0; i < incount; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002970 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00002971 afile[i] = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002972
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002973 if (fcount > 2)
2974 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00002975 len = STRLEN(innames[i]);
2976 if (STRLEN(gettail(innames[i])) < 5
2977 || innames[i][len - 3] != '_')
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002978 {
Bram Moolenaarb765d632005-06-07 21:00:02 +00002979 EMSG2(_("E755: Invalid region in %s"), innames[i]);
2980 return;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002981 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00002982 region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
2983 region_name[i * 2 + 1] = TOLOWER_ASC(innames[i][len - 1]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002984 }
2985 }
2986
Bram Moolenaarb765d632005-06-07 21:00:02 +00002987 if (!spin.si_add)
2988 /* Clear the char type tables, don't want to use any of the
2989 * currently used spell properties. */
2990 init_spell_chartab();
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002991
Bram Moolenaar51485f02005-06-04 21:55:20 +00002992 spin.si_foldroot = wordtree_alloc(&spin.si_blocks);
2993 spin.si_keeproot = wordtree_alloc(&spin.si_blocks);
2994 if (spin.si_foldroot == NULL || spin.si_keeproot == NULL)
2995 {
2996 error = TRUE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00002997 return;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002998 }
2999
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003000 /*
3001 * Read all the .aff and .dic files.
3002 * Text is converted to 'encoding'.
Bram Moolenaar51485f02005-06-04 21:55:20 +00003003 * Words are stored in the case-folded and keep-case trees.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003004 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003005 for (i = 0; i < incount && !error; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003006 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003007 spin.si_conv.vc_type = CONV_NONE;
Bram Moolenaarb765d632005-06-07 21:00:02 +00003008 spin.si_region = 1 << i;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003009
Bram Moolenaarb765d632005-06-07 21:00:02 +00003010 vim_snprintf((char *)fname, sizeof(fname), "%s.aff", innames[i]);
Bram Moolenaar51485f02005-06-04 21:55:20 +00003011 if (mch_stat((char *)fname, &st) >= 0)
3012 {
3013 /* Read the .aff file. Will init "spin->si_conv" based on the
3014 * "SET" line. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003015 afile[i] = spell_read_aff(fname, &spin);
3016 if (afile[i] == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003017 error = TRUE;
3018 else
3019 {
3020 /* Read the .dic file and store the words in the trees. */
3021 vim_snprintf((char *)fname, sizeof(fname), "%s.dic",
Bram Moolenaarb765d632005-06-07 21:00:02 +00003022 innames[i]);
3023 if (spell_read_dic(fname, &spin, afile[i]) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003024 error = TRUE;
3025 }
3026 }
3027 else
3028 {
3029 /* No .aff file, try reading the file as a word list. Store
3030 * the words in the trees. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003031 if (spell_read_wordfile(innames[i], &spin) == FAIL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00003032 error = TRUE;
3033 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003034
Bram Moolenaarb765d632005-06-07 21:00:02 +00003035#ifdef FEAT_MBYTE
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003036 /* Free any conversion stuff. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00003037 convert_setup(&spin.si_conv, NULL, NULL);
Bram Moolenaarb765d632005-06-07 21:00:02 +00003038#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003039 }
3040
Bram Moolenaar51485f02005-06-04 21:55:20 +00003041 if (!error)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003042 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00003043 /*
3044 * Remove the dummy NUL from the start of the tree root.
3045 */
3046 spin.si_foldroot = spin.si_foldroot->wn_sibling;
3047 spin.si_keeproot = spin.si_keeproot->wn_sibling;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003048
3049 /*
Bram Moolenaar51485f02005-06-04 21:55:20 +00003050 * Combine tails in the tree.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003051 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003052 if (verbose || p_verbose > 2)
3053 {
3054 if (!verbose)
3055 verbose_enter();
3056 MSG(_("Compressing word tree..."));
3057 out_flush();
3058 if (!verbose)
3059 verbose_leave();
3060 }
3061 wordtree_compress(spin.si_foldroot, &spin);
3062 wordtree_compress(spin.si_keeproot, &spin);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003063 }
3064
Bram Moolenaar51485f02005-06-04 21:55:20 +00003065 if (!error)
3066 {
3067 /*
3068 * Write the info in the spell file.
3069 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003070 if (verbose || p_verbose > 2)
3071 {
3072 if (!verbose)
3073 verbose_enter();
3074 smsg((char_u *)_("Writing spell file %s..."), wfname);
3075 out_flush();
3076 if (!verbose)
3077 verbose_leave();
3078 }
Bram Moolenaar50cde822005-06-05 21:54:54 +00003079
Bram Moolenaarb765d632005-06-07 21:00:02 +00003080 write_vim_spell(wfname, &spin, incount, region_name);
3081
3082 if (verbose || p_verbose > 2)
3083 {
3084 if (!verbose)
3085 verbose_enter();
3086 MSG(_("Done!"));
3087 smsg((char_u *)_("Estimated runtime memory use: %d bytes"),
Bram Moolenaar50cde822005-06-05 21:54:54 +00003088 spin.si_memtot);
Bram Moolenaarb765d632005-06-07 21:00:02 +00003089 out_flush();
3090 if (!verbose)
3091 verbose_leave();
3092 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003093
Bram Moolenaarb765d632005-06-07 21:00:02 +00003094 /* If the file is loaded need to reload it. */
3095 spell_reload_one(wfname);
Bram Moolenaar51485f02005-06-04 21:55:20 +00003096 }
3097
3098 /* Free the allocated memory. */
3099 free_blocks(spin.si_blocks);
3100
3101 /* Free the .aff file structures. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00003102 for (i = 0; i < incount; ++i)
3103 if (afile[i] != NULL)
3104 spell_free_aff(afile[i]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003105 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003106}
3107
Bram Moolenaarb765d632005-06-07 21:00:02 +00003108
3109/*
3110 * ":spellgood {word}"
3111 * ":spellwrong {word}"
3112 */
3113 void
3114ex_spell(eap)
3115 exarg_T *eap;
3116{
3117 spell_add_word(eap->arg, STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong);
3118}
3119
3120/*
3121 * Add "word[len]" to 'spellfile' as a good or bad word.
3122 */
3123 void
3124spell_add_word(word, len, bad)
3125 char_u *word;
3126 int len;
3127 int bad;
3128{
3129 FILE *fd;
3130 buf_T *buf;
3131
3132 if (*curbuf->b_p_spf == NUL)
3133 init_spellfile();
3134 if (*curbuf->b_p_spf == NUL)
3135 EMSG(_("E999: 'spellfile' is not set"));
3136 else
3137 {
3138 /* Check that the user isn't editing the .add file somewhere. */
3139 buf = buflist_findname_exp(curbuf->b_p_spf);
3140 if (buf != NULL && buf->b_ml.ml_mfp == NULL)
3141 buf = NULL;
3142 if (buf != NULL && bufIsChanged(buf))
3143 EMSG(_(e_bufloaded));
3144 else
3145 {
3146 fd = mch_fopen((char *)curbuf->b_p_spf, "a");
3147 if (fd == NULL)
3148 EMSG2(_(e_notopen), curbuf->b_p_spf);
3149 else
3150 {
3151 if (bad)
3152 fprintf(fd, "/!%.*s\n", len, word);
3153 else
3154 fprintf(fd, "%.*s\n", len, word);
3155 fclose(fd);
3156
3157 /* Update the .add.spl file. */
3158 mkspell(1, &curbuf->b_p_spf, FALSE, TRUE, FALSE);
3159
3160 /* If the .add file is edited somewhere, reload it. */
3161 if (buf != NULL)
3162 buf_reload(buf);
3163 }
3164 }
3165 }
3166}
3167
3168/*
3169 * Initialize 'spellfile' for the current buffer.
3170 */
3171 static void
3172init_spellfile()
3173{
3174 char_u buf[MAXPATHL];
3175 int l;
3176 slang_T *sl;
3177 char_u *rtp;
3178
3179 if (*curbuf->b_p_spl != NUL && curbuf->b_langp.ga_len > 0)
3180 {
3181 /* Loop over all entries in 'runtimepath'. */
3182 rtp = p_rtp;
3183 while (*rtp != NUL)
3184 {
3185 /* Copy the path from 'runtimepath' to buf[]. */
3186 copy_option_part(&rtp, buf, MAXPATHL, ",");
3187 if (filewritable(buf) == 2)
3188 {
3189 sl = LANGP_ENTRY(curbuf->b_langp, 0)->lp_slang;
3190 l = STRLEN(buf);
3191 vim_snprintf((char *)buf + l, MAXPATHL - l,
3192 "/spell/%.2s.%s.add",
3193 sl->sl_name,
3194 strstr((char *)gettail(sl->sl_fname), ".ascii.") != NULL
3195 ? (char_u *)"ascii" : spell_enc());
3196 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
3197 break;
3198 }
3199 }
3200 }
3201}
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003202
Bram Moolenaar51485f02005-06-04 21:55:20 +00003203
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003204/*
3205 * Init the chartab used for spelling for ASCII.
3206 * EBCDIC is not supported!
3207 */
3208 static void
3209clear_spell_chartab(sp)
3210 spelltab_T *sp;
3211{
3212 int i;
3213
3214 /* Init everything to FALSE. */
3215 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
3216 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
3217 for (i = 0; i < 256; ++i)
3218 sp->st_fold[i] = i;
3219
3220 /* We include digits. A word shouldn't start with a digit, but handling
3221 * that is done separately. */
3222 for (i = '0'; i <= '9'; ++i)
3223 sp->st_isw[i] = TRUE;
3224 for (i = 'A'; i <= 'Z'; ++i)
3225 {
3226 sp->st_isw[i] = TRUE;
3227 sp->st_isu[i] = TRUE;
3228 sp->st_fold[i] = i + 0x20;
3229 }
3230 for (i = 'a'; i <= 'z'; ++i)
3231 sp->st_isw[i] = TRUE;
3232}
3233
3234/*
3235 * Init the chartab used for spelling. Only depends on 'encoding'.
3236 * Called once while starting up and when 'encoding' changes.
3237 * The default is to use isalpha(), but the spell file should define the word
3238 * characters to make it possible that 'encoding' differs from the current
3239 * locale.
3240 */
3241 void
3242init_spell_chartab()
3243{
3244 int i;
3245
3246 did_set_spelltab = FALSE;
3247 clear_spell_chartab(&spelltab);
3248
3249#ifdef FEAT_MBYTE
3250 if (enc_dbcs)
3251 {
3252 /* DBCS: assume double-wide characters are word characters. */
3253 for (i = 128; i <= 255; ++i)
3254 if (MB_BYTE2LEN(i) == 2)
3255 spelltab.st_isw[i] = TRUE;
3256 }
3257 else
3258#endif
3259 {
3260 /* Rough guess: use isalpha() and isupper() for characters above 128. */
3261 for (i = 128; i < 256; ++i)
3262 {
3263 spelltab.st_isw[i] = MB_ISUPPER(i) || MB_ISLOWER(i);
3264 if (MB_ISUPPER(i))
3265 {
3266 spelltab.st_isu[i] = TRUE;
3267 spelltab.st_fold[i] = MB_TOLOWER(i);
3268 }
3269 }
3270 }
3271}
3272
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003273static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
3274static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
3275
3276/*
3277 * Set the spell character tables from strings in the affix file.
3278 */
3279 static int
3280set_spell_chartab(fol, low, upp)
3281 char_u *fol;
3282 char_u *low;
3283 char_u *upp;
3284{
3285 /* We build the new tables here first, so that we can compare with the
3286 * previous one. */
3287 spelltab_T new_st;
3288 char_u *pf = fol, *pl = low, *pu = upp;
3289 int f, l, u;
3290
3291 clear_spell_chartab(&new_st);
3292
3293 while (*pf != NUL)
3294 {
3295 if (*pl == NUL || *pu == NUL)
3296 {
3297 EMSG(_(e_affform));
3298 return FAIL;
3299 }
3300#ifdef FEAT_MBYTE
3301 f = mb_ptr2char_adv(&pf);
3302 l = mb_ptr2char_adv(&pl);
3303 u = mb_ptr2char_adv(&pu);
3304#else
3305 f = *pf++;
3306 l = *pl++;
3307 u = *pu++;
3308#endif
3309 /* Every character that appears is a word character. */
3310 if (f < 256)
3311 new_st.st_isw[f] = TRUE;
3312 if (l < 256)
3313 new_st.st_isw[l] = TRUE;
3314 if (u < 256)
3315 new_st.st_isw[u] = TRUE;
3316
3317 /* if "LOW" and "FOL" are not the same the "LOW" char needs
3318 * case-folding */
3319 if (l < 256 && l != f)
3320 {
3321 if (f >= 256)
3322 {
3323 EMSG(_(e_affrange));
3324 return FAIL;
3325 }
3326 new_st.st_fold[l] = f;
3327 }
3328
3329 /* if "UPP" and "FOL" are not the same the "UPP" char needs
3330 * case-folding and it's upper case. */
3331 if (u < 256 && u != f)
3332 {
3333 if (f >= 256)
3334 {
3335 EMSG(_(e_affrange));
3336 return FAIL;
3337 }
3338 new_st.st_fold[u] = f;
3339 new_st.st_isu[u] = TRUE;
3340 }
3341 }
3342
3343 if (*pl != NUL || *pu != NUL)
3344 {
3345 EMSG(_(e_affform));
3346 return FAIL;
3347 }
3348
3349 return set_spell_finish(&new_st);
3350}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003351
3352/*
3353 * Set the spell character tables from strings in the .spl file.
3354 */
3355 static int
3356set_spell_charflags(flags, cnt, upp)
3357 char_u *flags;
3358 int cnt;
3359 char_u *upp;
3360{
3361 /* We build the new tables here first, so that we can compare with the
3362 * previous one. */
3363 spelltab_T new_st;
3364 int i;
3365 char_u *p = upp;
3366
3367 clear_spell_chartab(&new_st);
3368
3369 for (i = 0; i < cnt; ++i)
3370 {
3371 new_st.st_isw[i + 128] = (flags[i] & SPELL_ISWORD) != 0;
3372 new_st.st_isu[i + 128] = (flags[i] & SPELL_ISUPPER) != 0;
3373
3374 if (*p == NUL)
3375 return FAIL;
3376#ifdef FEAT_MBYTE
3377 new_st.st_fold[i + 128] = mb_ptr2char_adv(&p);
3378#else
3379 new_st.st_fold[i + 128] = *p++;
3380#endif
3381 }
3382
3383 return set_spell_finish(&new_st);
3384}
3385
3386 static int
3387set_spell_finish(new_st)
3388 spelltab_T *new_st;
3389{
3390 int i;
3391
3392 if (did_set_spelltab)
3393 {
3394 /* check that it's the same table */
3395 for (i = 0; i < 256; ++i)
3396 {
3397 if (spelltab.st_isw[i] != new_st->st_isw[i]
3398 || spelltab.st_isu[i] != new_st->st_isu[i]
3399 || spelltab.st_fold[i] != new_st->st_fold[i])
3400 {
3401 EMSG(_("E763: Word characters differ between spell files"));
3402 return FAIL;
3403 }
3404 }
3405 }
3406 else
3407 {
3408 /* copy the new spelltab into the one being used */
3409 spelltab = *new_st;
3410 did_set_spelltab = TRUE;
3411 }
3412
3413 return OK;
3414}
3415
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003416/*
3417 * Write the current tables into the .spl file.
3418 * This makes sure the same characters are recognized as word characters when
3419 * generating an when using a spell file.
3420 */
3421 static void
3422write_spell_chartab(fd)
3423 FILE *fd;
3424{
3425 char_u charbuf[256 * 4];
3426 int len = 0;
3427 int flags;
3428 int i;
3429
3430 fputc(128, fd); /* <charflagslen> */
3431 for (i = 128; i < 256; ++i)
3432 {
3433 flags = 0;
3434 if (spelltab.st_isw[i])
3435 flags |= SPELL_ISWORD;
3436 if (spelltab.st_isu[i])
3437 flags |= SPELL_ISUPPER;
3438 fputc(flags, fd); /* <charflags> */
3439
Bram Moolenaarb765d632005-06-07 21:00:02 +00003440#ifdef FEAT_MBYTE
3441 if (has_mbyte)
3442 len += mb_char2bytes(spelltab.st_fold[i], charbuf + len);
3443 else
3444#endif
3445 charbuf[len++] = spelltab.st_fold[i];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003446 }
3447
3448 put_bytes(fd, (long_u)len, 2); /* <fcharlen> */
3449 fwrite(charbuf, (size_t)len, (size_t)1, fd); /* <fchars> */
3450}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003451
3452/*
3453 * Return TRUE if "c" is an upper-case character for spelling.
3454 */
3455 static int
3456spell_isupper(c)
3457 int c;
3458{
3459# ifdef FEAT_MBYTE
3460 if (enc_utf8)
3461 {
3462 /* For Unicode we can call utf_isupper(), but don't do that for ASCII,
3463 * because we don't want to use 'casemap' here. */
3464 if (c >= 128)
3465 return utf_isupper(c);
3466 }
3467 else if (has_mbyte && c > 256)
3468 {
3469 /* For characters above 255 we don't have something specfied.
3470 * Fall back to locale-dependent iswupper(). If not available
3471 * simply return FALSE. */
3472# ifdef HAVE_ISWUPPER
3473 return iswupper(c);
3474# else
3475 return FALSE;
3476# endif
3477 }
3478# endif
3479 return spelltab.st_isu[c];
3480}
3481
3482/*
3483 * Case-fold "p[len]" into "buf[buflen]". Used for spell checking.
3484 * When using a multi-byte 'encoding' the length may change!
3485 * Returns FAIL when something wrong.
3486 */
3487 static int
3488spell_casefold(p, len, buf, buflen)
3489 char_u *p;
3490 int len;
3491 char_u *buf;
3492 int buflen;
3493{
3494 int i;
3495
3496 if (len >= buflen)
3497 {
3498 buf[0] = NUL;
3499 return FAIL; /* result will not fit */
3500 }
3501
3502#ifdef FEAT_MBYTE
3503 if (has_mbyte)
3504 {
3505 int c;
3506 int outi = 0;
3507
3508 /* Fold one character at a time. */
3509 for (i = 0; i < len; i += mb_ptr2len_check(p + i))
3510 {
3511 c = mb_ptr2char(p + i);
3512 if (enc_utf8)
3513 /* For Unicode case folding is always the same, no need to use
3514 * the table from the spell file. */
3515 c = utf_fold(c);
3516 else if (c < 256)
3517 /* Use the table from the spell file. */
3518 c = spelltab.st_fold[c];
3519# ifdef HAVE_TOWLOWER
3520 else
3521 /* We don't know what to do, fall back to towlower(), it
3522 * depends on the current locale. */
3523 c = towlower(c);
3524# endif
3525 if (outi + MB_MAXBYTES > buflen)
3526 {
3527 buf[outi] = NUL;
3528 return FAIL;
3529 }
3530 outi += mb_char2bytes(c, buf + outi);
3531 }
3532 buf[outi] = NUL;
3533 }
3534 else
3535#endif
3536 {
3537 /* Be quick for non-multibyte encodings. */
3538 for (i = 0; i < len; ++i)
3539 buf[i] = spelltab.st_fold[p[i]];
3540 buf[i] = NUL;
3541 }
3542
3543 return OK;
3544}
3545
3546
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00003547#endif /* FEAT_SYN_HL */