blob: 9db1b6076110a9bc0f81464e52f60c353dd5536a [file] [log] [blame]
Bram Moolenaare19defe2005-03-21 08:23:33 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * spell.c: code for spell checking
Bram Moolenaarfc735152005-03-22 22:54:12 +000012 *
Bram Moolenaar51485f02005-06-04 21:55:20 +000013 * The spell checking mechanism uses a tree (aka trie). Each node in the tree
14 * has a list of bytes that can appear (siblings). For each byte there is a
15 * pointer to the node with the byte that follows in the word (child).
16 * A NUL byte is used where the word may end.
17 *
18 * There are two trees: one with case-folded words and one with words in
19 * original case. The second one is only used for keep-case words and is
20 * usually small.
21 *
22 * Thanks to Olaf Seibert for providing an example implementation of this tree
23 * and the compression mechanism.
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +000024 *
25 * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +000026 *
Bram Moolenaar402d2fe2005-04-15 21:00:38 +000027 * Why doesn't Vim use aspell/ispell/myspell/etc.?
28 * See ":help develop-spell".
29 */
30
Bram Moolenaar51485f02005-06-04 21:55:20 +000031/*
32 * Vim spell file format: <HEADER> <SUGGEST> <LWORDTREE> <KWORDTREE>
33 *
34 * <HEADER>: <fileID> <regioncnt> <regionname> ...
35 * <charflagslen> <charflags> <fcharslen> <fchars>
36 *
37 * <fileID> 10 bytes "VIMspell05"
38 * <regioncnt> 1 byte number of regions following (8 supported)
39 * <regionname> 2 bytes Region name: ca, au, etc.
40 * First <regionname> is region 1.
41 *
42 * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
43 * <charflags> N bytes List of flags (first one is for character 128):
44 * 0x01 word character
45 * 0x01 upper-case character
46 * <fcharslen> 2 bytes Number of bytes in <fchars>.
47 * <fchars> N bytes Folded characters, first one is for character 128.
48 *
49 *
50 * <SUGGEST> : <suggestlen> <more> ...
51 *
52 * <suggestlen> 4 bytes Length of <SUGGEST> in bytes, excluding
53 * <suggestlen>. MSB first.
54 * <more> To be defined.
55 *
56 *
57 * <LWORDTREE>: <wordtree>
58 *
59 * <wordtree>: <nodecount> <nodedata> ...
60 *
61 * <nodecount> 4 bytes Number of nodes following. MSB first.
62 *
63 * <nodedata>: <siblingcount> <sibling> ...
64 *
65 * <siblingcount> 1 byte Number of siblings in this node. The siblings
66 * follow in sorted order.
67 *
68 * <sibling>: <byte> [<nodeidx> <xbyte> | <flags> [<region>]]
69 *
70 * <byte> 1 byte Byte value of the sibling. Special cases:
71 * BY_NOFLAGS: End of word without flags and for all
72 * regions.
73 * BY_FLAGS: End of word, <flags> follow.
74 * BY_INDEX: Child of sibling is shared, <nodeidx>
75 * and <xbyte> follow.
76 *
77 * <nodeidx> 3 bytes Index of child for this sibling, MSB first.
78 *
79 * <xbyte> 1 byte byte value of the sibling.
80 *
81 * <flags> 1 byte bitmask of:
82 * WF_ALLCAP word must have only capitals
83 * WF_ONECAP first char of word must be capital
84 * WF_RARE rare word
85 * WF_REGION <region> follows
86 *
87 * <region> 1 byte Bitmask for regions in which word is valid. When
88 * omitted it's valid in all regions.
89 * Lowest bit is for region 1.
90 *
91 * <KWORDTREE>: <wordtree>
92 *
93 *
94 * All text characters are in 'encoding', but stored as single bytes.
95 * The region name is ASCII.
96 */
97
Bram Moolenaare19defe2005-03-21 08:23:33 +000098#if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
99# include <io.h> /* for lseek(), must be before vim.h */
100#endif
101
102#include "vim.h"
103
104#if defined(FEAT_SYN_HL) || defined(PROTO)
105
106#ifdef HAVE_FCNTL_H
107# include <fcntl.h>
108#endif
109
Bram Moolenaar51485f02005-06-04 21:55:20 +0000110#define MAXWLEN 250 /* assume max. word len is this many bytes */
Bram Moolenaarfc735152005-03-22 22:54:12 +0000111
Bram Moolenaar51485f02005-06-04 21:55:20 +0000112/* Flags used for a word. */
113#define WF_REGION 0x01 /* region byte follows */
114#define WF_ONECAP 0x02 /* word with one capital (or all capitals) */
115#define WF_ALLCAP 0x04 /* word must be all capitals */
116#define WF_RARE 0x08 /* rare word */
117
118#define WF_KEEPCAP 0x100 /* keep-case word */
119
120#define BY_NOFLAGS 0 /* end of word without flags or region */
121#define BY_FLAGS 1 /* end of word, flag byte follows */
122#define BY_INDEX 2 /* child is shared, index follows */
123#define BY_SPECIAL BY_INDEX /* hightest special byte value */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000124
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000125/* Info from "REP" entries in ".aff" file used in af_rep.
126 * TODO: This is not used yet. Either use it or remove it. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000127typedef struct repentry_S
128{
129 char_u *re_from;
130 char_u *re_to;
131} repentry_T;
132
133/*
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000134 * Structure used to store words and other info for one language, loaded from
135 * a .spl file.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000136 * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the
137 * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words.
138 *
139 * The "byts" array stores the possible bytes in each tree node, preceded by
140 * the number of possible bytes, sorted on byte value:
141 * <len> <byte1> <byte2> ...
142 * The "idxs" array stores the index of the child node corresponding to the
143 * byte in "byts".
144 * Exception: when the byte is zero, the word may end here and "idxs" holds
145 * the flags and region for the word. There may be several zeros in sequence
146 * for alternative flag/region combinations.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000147 */
148typedef struct slang_S slang_T;
149struct slang_S
150{
151 slang_T *sl_next; /* next language */
152 char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000153 char_u *sl_fbyts; /* case-folded word bytes */
154 int *sl_fidxs; /* case-folded word indexes */
155 char_u *sl_kbyts; /* keep-case word bytes */
156 int *sl_kidxs; /* keep-case word indexes */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000157 char_u *sl_try; /* "TRY" from .aff file TODO: not used */
158 garray_T sl_rep; /* list of repentry_T entries from REP lines
159 * TODO not used */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000160 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000161 int sl_error; /* error while loading */
162};
163
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000164/* First language that is loaded, start of the linked list of loaded
165 * languages. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000166static slang_T *first_lang = NULL;
167
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000168#define REGION_ALL 0xff
169
170
171/*
172 * Structure used in "b_langp", filled from 'spelllang'.
173 */
174typedef struct langp_S
175{
176 slang_T *lp_slang; /* info for this language (NULL for last one) */
177 int lp_region; /* bitmask for region or REGION_ALL */
178} langp_T;
179
180#define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
181
182#define SP_OK 0
183#define SP_BAD 1
184#define SP_RARE 2
185#define SP_LOCAL 3
186
Bram Moolenaar51485f02005-06-04 21:55:20 +0000187#define VIMSPELLMAGIC "VIMspell05" /* string at start of Vim spell file */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000188#define VIMSPELLMAGICL 10
189
190/*
191 * Structure to store info for word matching.
192 */
193typedef struct matchinf_S
194{
195 langp_T *mi_lp; /* info for language and region */
196 slang_T *mi_slang; /* info for the language */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000197
198 /* pointers to original text to be checked */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000199 char_u *mi_word; /* start of word being checked */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000200 char_u *mi_end; /* end of matching word */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000201 char_u *mi_fend; /* next char to be added to mi_fword */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000202 char_u *mi_cend; /* char after what was used for
203 mi_capflags */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000204
205 /* case-folded text */
206 char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000207 int mi_fwordlen; /* nr of valid bytes in mi_fword */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000208
209 /* others */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000210 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000211 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000212} matchinf_T;
213
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000214static slang_T *slang_alloc __ARGS((char_u *lang));
215static void slang_free __ARGS((slang_T *lp));
Bram Moolenaar51485f02005-06-04 21:55:20 +0000216static void find_word __ARGS((matchinf_T *mip, int keepcap));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000217static slang_T *spell_load_lang __ARGS((char_u *lang));
218static void spell_load_file __ARGS((char_u *fname, void *cookie));
Bram Moolenaar51485f02005-06-04 21:55:20 +0000219static int read_tree __ARGS((FILE *fd, char_u *byts, int *idxs, int maxidx, int startidx));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000220static int find_region __ARGS((char_u *rp, char_u *region));
221static int captype __ARGS((char_u *word, char_u *end));
222
223/*
224 * Main spell-checking function.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000225 * "ptr" points to a character that could be the start of a word.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000226 * "*attrp" is set to the attributes for a badly spelled word. For a non-word
227 * or when it's OK it remains unchanged.
228 * This must only be called when 'spelllang' is not empty.
229 * Returns the length of the word in bytes, also when it's OK, so that the
230 * caller can skip over the word.
231 */
232 int
Bram Moolenaar51485f02005-06-04 21:55:20 +0000233spell_check(wp, ptr, attrp)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000234 win_T *wp; /* current window */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000235 char_u *ptr;
236 int *attrp;
237{
238 matchinf_T mi; /* Most things are put in "mi" so that it can
239 be passed to functions quickly. */
240
Bram Moolenaar51485f02005-06-04 21:55:20 +0000241 /* Find the end of the word. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000242 mi.mi_word = ptr;
243 mi.mi_end = ptr;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000244
Bram Moolenaar51485f02005-06-04 21:55:20 +0000245 /* A word starting with a number is always OK. Also skip hexadecimal
246 * numbers 0xFF99 and 0X99FF. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000247 if (*ptr >= '0' && *ptr <= '9')
Bram Moolenaar51485f02005-06-04 21:55:20 +0000248 {
249 if (*ptr == '0' && (ptr[1] == 'x' || ptr[2] == 'X'))
250 mi.mi_end = skiphex(ptr);
251 else
252 mi.mi_end = skipdigits(ptr);
253 }
254 else
255 {
256 mi.mi_fend = ptr;
257 if (spell_iswordc(mi.mi_fend))
258 {
259 /* Make case-folded copy of the characters until the next non-word
260 * character. */
261 do
262 {
263 mb_ptr_adv(mi.mi_fend);
264 } while (*mi.mi_fend != NUL && spell_iswordc(mi.mi_fend));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000265
Bram Moolenaar51485f02005-06-04 21:55:20 +0000266 (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
267 MAXWLEN + 1);
268 mi.mi_fwordlen = STRLEN(mi.mi_fword);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000269
Bram Moolenaar51485f02005-06-04 21:55:20 +0000270 /* Check the caps type of the word. */
271 mi.mi_capflags = captype(ptr, mi.mi_fend);
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000272
Bram Moolenaar51485f02005-06-04 21:55:20 +0000273 /* We always use the characters up to the next non-word character,
274 * also for bad words. */
275 mi.mi_end = mi.mi_fend;
276 }
277 else
278 {
279 /* No word characters. Don't case-fold anything, we may quickly
280 * find out this is not a word (but it could be!). */
281 mi.mi_fwordlen = 0;
282 mi.mi_capflags = 0;
283 }
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000284
Bram Moolenaar51485f02005-06-04 21:55:20 +0000285 mi.mi_cend = mi.mi_fend;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000286
Bram Moolenaar51485f02005-06-04 21:55:20 +0000287 /* The word is bad unless we recognize it. */
288 mi.mi_result = SP_BAD;
289
290 /*
291 * Loop over the languages specified in 'spelllang'.
292 * We check them all, because a matching word may be longer than an
293 * already found matching word.
294 */
295 for (mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000296 mi.mi_lp->lp_slang != NULL; ++mi.mi_lp)
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000297 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000298 /* Check for a matching word in case-folded words. */
299 find_word(&mi, FALSE);
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000300
Bram Moolenaar51485f02005-06-04 21:55:20 +0000301 /* Try keep-case words. */
302 find_word(&mi, TRUE);
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000303 }
304
Bram Moolenaar51485f02005-06-04 21:55:20 +0000305 if (mi.mi_result != SP_OK)
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000306 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000307 /* When we are at a non-word character there is no error, just
308 * skip over the character (try looking for a word after it). */
309 if (!spell_iswordc(ptr))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000310 {
311#ifdef FEAT_MBYTE
312 if (has_mbyte)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000313 return mb_ptr2len_check(ptr);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000314#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +0000315 return 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000316 }
Bram Moolenaar51485f02005-06-04 21:55:20 +0000317
318 if (mi.mi_result == SP_BAD)
319 *attrp = highlight_attr[HLF_SPB];
320 else if (mi.mi_result == SP_RARE)
321 *attrp = highlight_attr[HLF_SPR];
322 else
323 *attrp = highlight_attr[HLF_SPL];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000324 }
325 }
326
Bram Moolenaar51485f02005-06-04 21:55:20 +0000327 return (int)(mi.mi_end - ptr);
328}
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000329
Bram Moolenaar51485f02005-06-04 21:55:20 +0000330/*
331 * Check if the word at "mip->mi_word" is in the tree.
332 * When "keepcap" is TRUE check in keep-case word tree.
333 *
334 * For a match mip->mi_result is updated.
335 */
336 static void
337find_word(mip, keepcap)
338 matchinf_T *mip;
339 int keepcap;
340{
341 int arridx = 0;
342 int endlen[MAXWLEN]; /* length at possible word endings */
343 int endidx[MAXWLEN]; /* possible word endings */
344 int endidxcnt = 0;
345 int len;
346 int wlen = 0;
347 int flen;
348 int c;
349 char_u *ptr;
350 unsigned lo, hi, m;
351#ifdef FEAT_MBYTE
352 char_u *s;
353 char_u *p;
354#endif
355 int res;
Bram Moolenaar50cde822005-06-05 21:54:54 +0000356 int valid = FALSE;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000357 slang_T *slang = mip->mi_lp->lp_slang;
358 unsigned flags;
359 char_u *byts;
360 int *idxs;
361
362 if (keepcap)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000363 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000364 /* Check for word with matching case in keep-case tree. */
365 ptr = mip->mi_word;
366 flen = 9999; /* no case folding, always enough bytes */
367 byts = slang->sl_kbyts;
368 idxs = slang->sl_kidxs;
369 }
370 else
371 {
372 /* Check for case-folded in case-folded tree. */
373 ptr = mip->mi_fword;
374 flen = mip->mi_fwordlen; /* available case-folded bytes */
375 byts = slang->sl_fbyts;
376 idxs = slang->sl_fidxs;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000377 }
378
Bram Moolenaar51485f02005-06-04 21:55:20 +0000379 if (byts == NULL)
380 return; /* array is empty */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000381
Bram Moolenaar51485f02005-06-04 21:55:20 +0000382 /*
383 * Repeat advancing in the tree until there is a byte that doesn't match,
384 * we reach the end of the tree or we reach the end of the line.
385 */
386 for (;;)
387 {
388 if (flen == 0 && *mip->mi_fend != NUL)
389 {
390 /* Need to fold at least one more character. Do until next
391 * non-word character for efficiency. */
392 do
393 {
394#ifdef FEAT_MBYTE
395 if (has_mbyte)
396 flen += mb_ptr2len_check(mip->mi_fend + flen);
397 else
398#endif
399 ++flen;
400 } while (spell_iswordc(mip->mi_fend + flen));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000401
Bram Moolenaar51485f02005-06-04 21:55:20 +0000402 (void)spell_casefold(mip->mi_fend, flen,
403 mip->mi_fword + mip->mi_fwordlen,
404 MAXWLEN - mip->mi_fwordlen);
405 mip->mi_fend += flen;
406 flen = STRLEN(mip->mi_fword + mip->mi_fwordlen);
407 mip->mi_fwordlen += flen;
408 }
409
410 len = byts[arridx++];
411
412 /* If the first possible byte is a zero the word could end here.
413 * Remember this index, we first check for the longest word. */
414 if (byts[arridx] == 0)
415 {
416 endlen[endidxcnt] = wlen;
417 endidx[endidxcnt++] = arridx++;
418 --len;
419
420 /* Skip over the zeros, there can be several flag/region
421 * combinations. */
422 while (len > 0 && byts[arridx] == 0)
423 {
424 ++arridx;
425 --len;
426 }
427 if (len == 0)
428 break; /* no children, word must end here */
429 }
430
431 /* Stop looking at end of the line. */
432 if (ptr[wlen] == NUL)
433 break;
434
435 /* Perform a binary search in the list of accepted bytes. */
436 c = ptr[wlen];
437 lo = arridx;
438 hi = arridx + len - 1;
439 while (lo < hi)
440 {
441 m = (lo + hi) / 2;
442 if (byts[m] > c)
443 hi = m - 1;
444 else if (byts[m] < c)
445 lo = m + 1;
446 else
447 {
448 lo = hi = m;
449 break;
450 }
451 }
452
453 /* Stop if there is no matching byte. */
454 if (hi < lo || byts[lo] != c)
455 break;
456
457 /* Continue at the child (if there is one). */
458 arridx = idxs[lo];
459 ++wlen;
460 --flen;
461 }
462
463 /*
464 * Verify that one of the possible endings is valid. Try the longest
465 * first.
466 */
467 while (endidxcnt > 0)
468 {
469 --endidxcnt;
470 arridx = endidx[endidxcnt];
471 wlen = endlen[endidxcnt];
472
473#ifdef FEAT_MBYTE
474 if ((*mb_head_off)(ptr, ptr + wlen) > 0)
475 continue; /* not at first byte of character */
476#endif
477 if (spell_iswordc(ptr + wlen))
478 continue; /* next char is a word character */
479
480#ifdef FEAT_MBYTE
481 if (!keepcap && has_mbyte)
482 {
483 /* Compute byte length in original word, length may change
484 * when folding case. */
485 p = mip->mi_word;
486 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
487 mb_ptr_adv(p);
488 wlen = p - mip->mi_word;
489 }
490#endif
491
492 /* Check flags and region. Repeat this if there are more
493 * flags/region alternatives until there is a match. */
494 res = SP_BAD;
495 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0; --len)
496 {
497 flags = idxs[arridx];
498 if (keepcap)
499 {
500 /* For "keepcap" tree the case is always right. */
501 valid = TRUE;
502 }
503 else
504 {
505 /* Check that the word is in the required case. */
506 if (mip->mi_cend != mip->mi_word + wlen)
507 {
508 /* mi_capflags was set for a different word
509 * length, need to do it again. */
510 mip->mi_cend = mip->mi_word + wlen;
511 mip->mi_capflags = captype(mip->mi_word,
512 mip->mi_cend);
513 }
514
515 valid = (mip->mi_capflags == WF_ALLCAP
516 || ((flags & WF_ALLCAP) == 0
517 && ((flags & WF_ONECAP) == 0
518 || mip->mi_capflags == WF_ONECAP)));
519 }
520
521 if (valid && res != SP_OK)
522 {
523 if (flags & WF_REGION)
524 {
525 /* Check region. */
526 if ((mip->mi_lp->lp_region & (flags >> 8)) != 0)
527 res = SP_OK;
528 else
529 res = SP_LOCAL;
530 }
531 else if (flags & WF_RARE)
532 res = SP_RARE;
533 else
534 res = SP_OK;
535 }
536
537 if (res == SP_OK)
538 break;
539 ++arridx;
540 }
541
542 if (valid)
543 {
544 /* Valid word! Always use the longest match. */
545 if (mip->mi_end < mip->mi_word + wlen)
546 mip->mi_end = mip->mi_word + wlen;
547 if (mip->mi_result != SP_OK)
548 mip->mi_result = res;
549 break;
550 }
551 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000552}
553
Bram Moolenaar51485f02005-06-04 21:55:20 +0000554
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000555/*
556 * Move to next spell error.
557 * Return OK if found, FAIL otherwise.
558 */
559 int
560spell_move_to(dir, allwords)
561 int dir; /* FORWARD or BACKWARD */
562 int allwords; /* TRUE for "[s" and "]s" */
563{
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000564 linenr_T lnum;
565 pos_T found_pos;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000566 char_u *line;
567 char_u *p;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000568 int attr = 0;
569 int len;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000570 int has_syntax = syntax_present(curbuf);
571 int col;
572 int can_spell;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000573
574 if (!curwin->w_p_spell || *curwin->w_buffer->b_p_spl == NUL)
575 {
576 EMSG(_("E756: Spell checking not enabled"));
577 return FAIL;
578 }
579
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000580 /*
581 * Start looking for bad word at the start of the line, because we can't
582 * start halfway a word, we don't know where it starts or ends.
583 *
584 * When searching backwards, we continue in the line to find the last
585 * bad word (in the cursor line: before the cursor).
586 */
587 lnum = curwin->w_cursor.lnum;
588 found_pos.lnum = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000589
590 while (!got_int)
591 {
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000592 line = ml_get(lnum);
593 p = line;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000594
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000595 while (*p != NUL)
596 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000597 /* When searching backward don't search after the cursor. */
598 if (dir == BACKWARD
599 && lnum == curwin->w_cursor.lnum
600 && (colnr_T)(p - line) >= curwin->w_cursor.col)
601 break;
602
603 /* start of word */
604 len = spell_check(curwin, p, &attr);
605
606 if (attr != 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000607 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000608 /* We found a bad word. Check the attribute. */
609 /* TODO: check for syntax @Spell cluster. */
610 if (allwords || attr == highlight_attr[HLF_SPB])
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000611 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000612 /* When searching forward only accept a bad word after
613 * the cursor. */
614 if (dir == BACKWARD
615 || lnum > curwin->w_cursor.lnum
616 || (lnum == curwin->w_cursor.lnum
617 && (colnr_T)(p - line)
618 > curwin->w_cursor.col))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000619 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000620 if (has_syntax)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000621 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000622 col = p - line;
623 (void)syn_get_id(lnum, (colnr_T)col,
624 FALSE, &can_spell);
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000625
Bram Moolenaar51485f02005-06-04 21:55:20 +0000626 /* have to get the line again, a multi-line
627 * regexp may make it invalid */
628 line = ml_get(lnum);
629 p = line + col;
630 }
631 else
632 can_spell = TRUE;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000633
Bram Moolenaar51485f02005-06-04 21:55:20 +0000634 if (can_spell)
635 {
636 found_pos.lnum = lnum;
637 found_pos.col = p - line;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000638#ifdef FEAT_VIRTUALEDIT
Bram Moolenaar51485f02005-06-04 21:55:20 +0000639 found_pos.coladd = 0;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000640#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +0000641 if (dir == FORWARD)
642 {
643 /* No need to search further. */
644 curwin->w_cursor = found_pos;
645 return OK;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000646 }
647 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000648 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000649 }
Bram Moolenaar51485f02005-06-04 21:55:20 +0000650 attr = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000651 }
652
Bram Moolenaar51485f02005-06-04 21:55:20 +0000653 /* advance to character after the word */
654 p += len;
655 if (*p == NUL)
656 break;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000657 }
658
659 /* Advance to next line. */
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000660 if (dir == BACKWARD)
661 {
662 if (found_pos.lnum != 0)
663 {
664 /* Use the last match in the line. */
665 curwin->w_cursor = found_pos;
666 return OK;
667 }
668 if (lnum == 1)
669 return FAIL;
670 --lnum;
671 }
672 else
673 {
674 if (lnum == curbuf->b_ml.ml_line_count)
675 return FAIL;
676 ++lnum;
677 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000678
679 line_breakcheck();
680 }
681
682 return FAIL; /* interrupted */
683}
684
685/*
686 * Load word list for "lang" from a Vim spell file.
687 * "lang" must be the language without the region: "en" or "en-rare".
688 */
689 static slang_T *
690spell_load_lang(lang)
691 char_u *lang;
692{
693 slang_T *lp;
694 char_u fname_enc[80];
695 char_u *p;
696 int r;
697
698 lp = slang_alloc(lang);
699 if (lp != NULL)
700 {
701 /* Find all spell files for "lang" in 'runtimepath' and load them.
702 * Use 'encoding', except that we use "latin1" for "latin9". */
703#ifdef FEAT_MBYTE
704 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
705 p = p_enc;
706 else
707#endif
708 p = (char_u *)"latin1";
Bram Moolenaar9c13b352005-05-19 20:53:52 +0000709 vim_snprintf((char *)fname_enc, sizeof(fname_enc),
710 "spell/%s.%s.spl", lang, p);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000711
712 r = do_in_runtimepath(fname_enc, TRUE, spell_load_file, lp);
Bram Moolenaar5482f332005-04-17 20:18:43 +0000713 if (r == FAIL && !lp->sl_error)
714 {
715 /* Try loading the ASCII version. */
Bram Moolenaar9c13b352005-05-19 20:53:52 +0000716 vim_snprintf((char *)fname_enc, sizeof(fname_enc),
717 "spell/%s.ascii.spl", lang);
Bram Moolenaar5482f332005-04-17 20:18:43 +0000718
719 r = do_in_runtimepath(fname_enc, TRUE, spell_load_file, lp);
720 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000721 if (r == FAIL || lp->sl_error)
722 {
723 slang_free(lp);
724 lp = NULL;
725 if (r == FAIL)
726 smsg((char_u *)_("Warning: Cannot find word list \"%s\""),
727 fname_enc + 6);
728 }
729 else
730 {
731 lp->sl_next = first_lang;
732 first_lang = lp;
733 }
734 }
735
736 return lp;
737}
738
739/*
740 * Allocate a new slang_T.
741 * Caller must fill "sl_next".
742 */
743 static slang_T *
744slang_alloc(lang)
745 char_u *lang;
746{
747 slang_T *lp;
748
Bram Moolenaar51485f02005-06-04 21:55:20 +0000749 lp = (slang_T *)alloc_clear(sizeof(slang_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000750 if (lp != NULL)
751 {
752 lp->sl_name = vim_strsave(lang);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000753 ga_init2(&lp->sl_rep, sizeof(repentry_T), 4);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000754 }
755 return lp;
756}
757
758/*
759 * Free the contents of an slang_T and the structure itself.
760 */
761 static void
762slang_free(lp)
763 slang_T *lp;
764{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000765 vim_free(lp->sl_name);
Bram Moolenaar51485f02005-06-04 21:55:20 +0000766 vim_free(lp->sl_fbyts);
767 vim_free(lp->sl_kbyts);
768 vim_free(lp->sl_fidxs);
769 vim_free(lp->sl_kidxs);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000770 ga_clear(&lp->sl_rep);
771 vim_free(lp->sl_try);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000772 vim_free(lp);
773}
774
775/*
776 * Load one spell file into an slang_T.
777 * Invoked through do_in_runtimepath().
778 */
779 static void
780spell_load_file(fname, cookie)
781 char_u *fname;
782 void *cookie; /* points to the slang_T to be filled */
783{
784 slang_T *lp = cookie;
785 FILE *fd;
786 char_u buf[MAXWLEN + 1];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000787 char_u *p;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000788 int i;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000789 int len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000790 int round;
791 char_u *save_sourcing_name = sourcing_name;
792 linenr_T save_sourcing_lnum = sourcing_lnum;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000793 int cnt, ccnt;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000794 char_u *fol;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000795
796 fd = fopen((char *)fname, "r");
797 if (fd == NULL)
798 {
799 EMSG2(_(e_notopen), fname);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000800 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000801 }
802
803 /* Set sourcing_name, so that error messages mention the file name. */
804 sourcing_name = fname;
805 sourcing_lnum = 0;
806
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000807 /* <HEADER>: <fileID> <regioncnt> <regionname> ...
808 * <charflagslen> <charflags> <fcharslen> <fchars> */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000809 for (i = 0; i < VIMSPELLMAGICL; ++i)
810 buf[i] = getc(fd); /* <fileID> */
811 if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
812 {
813 EMSG(_("E757: Wrong file ID in spell file"));
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000814 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000815 }
816
817 cnt = getc(fd); /* <regioncnt> */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000818 if (cnt < 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000819 {
820truncerr:
821 EMSG(_("E758: Truncated spell file"));
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000822 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000823 }
824 if (cnt > 8)
825 {
826formerr:
827 EMSG(_("E759: Format error in spell file"));
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000828 goto endFAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000829 }
830 for (i = 0; i < cnt; ++i)
831 {
832 lp->sl_regions[i * 2] = getc(fd); /* <regionname> */
833 lp->sl_regions[i * 2 + 1] = getc(fd);
834 }
835 lp->sl_regions[cnt * 2] = NUL;
836
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000837 cnt = getc(fd); /* <charflagslen> */
838 if (cnt > 0)
839 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000840 p = alloc((unsigned)cnt);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000841 if (p == NULL)
842 goto endFAIL;
843 for (i = 0; i < cnt; ++i)
844 p[i] = getc(fd); /* <charflags> */
845
846 ccnt = (getc(fd) << 8) + getc(fd); /* <fcharslen> */
847 if (ccnt <= 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000848 {
849 vim_free(p);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000850 goto formerr;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000851 }
852 fol = alloc((unsigned)ccnt + 1);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000853 if (fol == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000854 {
855 vim_free(p);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000856 goto endFAIL;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000857 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000858 for (i = 0; i < ccnt; ++i)
859 fol[i] = getc(fd); /* <fchars> */
860 fol[i] = NUL;
861
862 /* Set the word-char flags and fill spell_isupper() table. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000863 i = set_spell_charflags(p, cnt, fol);
864 vim_free(p);
865 vim_free(fol);
866 if (i == FAIL)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000867 goto formerr;
868 }
869 else
870 {
871 /* When <charflagslen> is zero then <fcharlen> must also be zero. */
872 cnt = (getc(fd) << 8) + getc(fd);
873 if (cnt != 0)
874 goto formerr;
875 }
876
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000877 /* <SUGGEST> : <suggestlen> <more> ... */
878 /* TODO, just skip this for now */
879 i = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
880 while (i-- > 0)
881 if (getc(fd) == EOF) /* <suggestlen> */
882 goto truncerr;
883
Bram Moolenaar51485f02005-06-04 21:55:20 +0000884 /* round 1: <LWORDTREE>
885 * round 2: <KWORDTREE> */
886 for (round = 1; round <= 2; ++round)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000887 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000888 /* The tree size was computed when writing the file, so that we can
889 * allocate it as one long block. <nodecount> */
890 len = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
891 if (len < 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000892 goto truncerr;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000893 if (len > 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000894 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000895 /* Allocate the byte array. */
896 p = lalloc((long_u)len, TRUE);
897 if (p == NULL)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000898 goto endFAIL;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000899 if (round == 1)
900 lp->sl_fbyts = p;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +0000901 else
Bram Moolenaar51485f02005-06-04 21:55:20 +0000902 lp->sl_kbyts = p;
903
904 /* Allocate the index array. */
905 p = lalloc_clear((long_u)(len * sizeof(int)), TRUE);
906 if (p == NULL)
907 goto endFAIL;
908 if (round == 1)
909 lp->sl_fidxs = (int *)p;
910 else
911 lp->sl_kidxs = (int *)p;
912
913
914 /* Read the tree and store it in the array. */
915 i = read_tree(fd,
916 round == 1 ? lp->sl_fbyts : lp->sl_kbyts,
917 round == 1 ? lp->sl_fidxs : lp->sl_kidxs,
918 len, 0);
919 if (i == -1)
920 goto truncerr;
921 if (i < 0)
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000922 goto formerr;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000923 }
924 }
Bram Moolenaar51485f02005-06-04 21:55:20 +0000925
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000926 goto endOK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000927
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000928endFAIL:
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000929 lp->sl_error = TRUE;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +0000930
931endOK:
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000932 if (fd != NULL)
933 fclose(fd);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000934 sourcing_name = save_sourcing_name;
935 sourcing_lnum = save_sourcing_lnum;
936}
937
938/*
Bram Moolenaar51485f02005-06-04 21:55:20 +0000939 * Read one row of siblings from the spell file and store it in the byte array
940 * "byts" and index array "idxs". Recursively read the children.
941 *
942 * NOTE: The code here must match put_tree().
943 *
944 * Returns the index follosing the siblings.
945 * Returns -1 if the file is shorter than expected.
946 * Returns -2 if there is a format error.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000947 */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000948 static int
949read_tree(fd, byts, idxs, maxidx, startidx)
950 FILE *fd;
951 char_u *byts;
952 int *idxs;
953 int maxidx; /* size of arrays */
954 int startidx; /* current index in "byts" and "idxs" */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000955{
Bram Moolenaar51485f02005-06-04 21:55:20 +0000956 int len;
957 int i;
958 int n;
959 int idx = startidx;
960 int c;
961#define SHARED_MASK 0x8000000
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000962
Bram Moolenaar51485f02005-06-04 21:55:20 +0000963 len = getc(fd); /* <siblingcount> */
964 if (len <= 0)
965 return -1;
966
967 if (startidx + len >= maxidx)
968 return -2;
969 byts[idx++] = len;
970
971 /* Read the byte values, flag/region bytes and shared indexes. */
972 for (i = 1; i <= len; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000973 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000974 c = getc(fd); /* <byte> */
975 if (c < 0)
976 return -1;
977 if (c <= BY_SPECIAL)
978 {
979 if (c == BY_NOFLAGS)
980 {
981 /* No flags, all regions. */
982 idxs[idx] = 0;
983 c = 0;
984 }
985 else if (c == BY_FLAGS)
986 {
987 /* Read flags and option region. */
988 c = getc(fd); /* <flags> */
989 if (c & WF_REGION)
990 c = (getc(fd) << 8) + c; /* <region> */
991 idxs[idx] = c;
992 c = 0;
993 }
994 else /* c == BY_INDEX */
995 {
996 /* <nodeidx> */
997 n = (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
998 if (n < 0 || n >= maxidx)
999 return -2;
1000 idxs[idx] = n + SHARED_MASK;
1001 c = getc(fd); /* <xbyte> */
1002 }
1003 }
1004 byts[idx++] = c;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001005 }
1006
Bram Moolenaar51485f02005-06-04 21:55:20 +00001007 /* Recursively read the children for non-shared siblings.
1008 * Skip the end-of-word ones (zero byte value) and the shared ones (and
1009 * remove SHARED_MASK) */
1010 for (i = 1; i <= len; ++i)
1011 if (byts[startidx + i] != 0)
1012 {
1013 if (idxs[startidx + i] & SHARED_MASK)
1014 idxs[startidx + i] &= ~SHARED_MASK;
1015 else
1016 {
1017 idxs[startidx + i] = idx;
1018 idx = read_tree(fd, byts, idxs, maxidx, idx);
1019 if (idx < 0)
1020 break;
1021 }
1022 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001023
Bram Moolenaar51485f02005-06-04 21:55:20 +00001024 return idx;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001025}
1026
1027/*
1028 * Parse 'spelllang' and set buf->b_langp accordingly.
1029 * Returns an error message or NULL.
1030 */
1031 char_u *
1032did_set_spelllang(buf)
1033 buf_T *buf;
1034{
1035 garray_T ga;
1036 char_u *lang;
1037 char_u *e;
1038 char_u *region;
1039 int region_mask;
1040 slang_T *lp;
1041 int c;
1042 char_u lbuf[MAXWLEN + 1];
1043
1044 ga_init2(&ga, sizeof(langp_T), 2);
1045
1046 /* loop over comma separated languages. */
1047 for (lang = buf->b_p_spl; *lang != NUL; lang = e)
1048 {
1049 e = vim_strchr(lang, ',');
1050 if (e == NULL)
1051 e = lang + STRLEN(lang);
Bram Moolenaar5482f332005-04-17 20:18:43 +00001052 region = NULL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001053 if (e > lang + 2)
1054 {
1055 if (e - lang >= MAXWLEN)
1056 {
1057 ga_clear(&ga);
1058 return e_invarg;
1059 }
1060 if (lang[2] == '_')
1061 region = lang + 3;
1062 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001063
1064 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
1065 if (STRNICMP(lp->sl_name, lang, 2) == 0)
1066 break;
1067
1068 if (lp == NULL)
1069 {
1070 /* Not found, load the language. */
1071 STRNCPY(lbuf, lang, e - lang);
1072 lbuf[e - lang] = NUL;
1073 if (region != NULL)
1074 mch_memmove(lbuf + 2, lbuf + 5, e - lang - 4);
1075 lp = spell_load_lang(lbuf);
1076 }
1077
1078 if (lp != NULL)
1079 {
1080 if (region == NULL)
1081 region_mask = REGION_ALL;
1082 else
1083 {
1084 /* find region in sl_regions */
1085 c = find_region(lp->sl_regions, region);
1086 if (c == REGION_ALL)
1087 {
1088 c = *e;
1089 *e = NUL;
1090 smsg((char_u *)_("Warning: region %s not supported"), lang);
1091 *e = c;
1092 region_mask = REGION_ALL;
1093 }
1094 else
1095 region_mask = 1 << c;
1096 }
1097
1098 if (ga_grow(&ga, 1) == FAIL)
1099 {
1100 ga_clear(&ga);
1101 return e_outofmem;
1102 }
1103 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
1104 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
1105 ++ga.ga_len;
1106 }
1107
1108 if (*e == ',')
1109 ++e;
1110 }
1111
1112 /* Add a NULL entry to mark the end of the list. */
1113 if (ga_grow(&ga, 1) == FAIL)
1114 {
1115 ga_clear(&ga);
1116 return e_outofmem;
1117 }
1118 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = NULL;
1119 ++ga.ga_len;
1120
1121 /* Everything is fine, store the new b_langp value. */
1122 ga_clear(&buf->b_langp);
1123 buf->b_langp = ga;
1124
1125 return NULL;
1126}
1127
1128/*
1129 * Find the region "region[2]" in "rp" (points to "sl_regions").
1130 * Each region is simply stored as the two characters of it's name.
1131 * Returns the index if found, REGION_ALL if not found.
1132 */
1133 static int
1134find_region(rp, region)
1135 char_u *rp;
1136 char_u *region;
1137{
1138 int i;
1139
1140 for (i = 0; ; i += 2)
1141 {
1142 if (rp[i] == NUL)
1143 return REGION_ALL;
1144 if (rp[i] == region[0] && rp[i + 1] == region[1])
1145 break;
1146 }
1147 return i / 2;
1148}
1149
1150/*
1151 * Return type of word:
1152 * w word 0
Bram Moolenaar51485f02005-06-04 21:55:20 +00001153 * Word WF_ONECAP
1154 * W WORD WF_ALLCAP
1155 * WoRd wOrd WF_KEEPCAP
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001156 */
1157 static int
1158captype(word, end)
1159 char_u *word;
1160 char_u *end;
1161{
1162 char_u *p;
1163 int c;
1164 int firstcap;
1165 int allcap;
1166 int past_second = FALSE; /* past second word char */
1167
1168 /* find first letter */
1169 for (p = word; !spell_iswordc(p); mb_ptr_adv(p))
1170 if (p >= end)
1171 return 0; /* only non-word characters, illegal word */
1172#ifdef FEAT_MBYTE
1173 c = mb_ptr2char_adv(&p);
1174#else
1175 c = *p++;
1176#endif
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001177 firstcap = allcap = spell_isupper(c);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001178
1179 /*
1180 * Need to check all letters to find a word with mixed upper/lower.
1181 * But a word with an upper char only at start is a ONECAP.
1182 */
1183 for ( ; p < end; mb_ptr_adv(p))
1184 if (spell_iswordc(p))
1185 {
1186#ifdef FEAT_MBYTE
1187 c = mb_ptr2char(p);
1188#else
1189 c = *p;
1190#endif
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001191 if (!spell_isupper(c))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001192 {
1193 /* UUl -> KEEPCAP */
1194 if (past_second && allcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001195 return WF_KEEPCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001196 allcap = FALSE;
1197 }
1198 else if (!allcap)
1199 /* UlU -> KEEPCAP */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001200 return WF_KEEPCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001201 past_second = TRUE;
1202 }
1203
1204 if (allcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001205 return WF_ALLCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001206 if (firstcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001207 return WF_ONECAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001208 return 0;
1209}
1210
1211# if defined(FEAT_MBYTE) || defined(PROTO)
1212/*
1213 * Clear all spelling tables and reload them.
1214 * Used after 'encoding' is set.
1215 */
1216 void
1217spell_reload()
1218{
1219 buf_T *buf;
1220 slang_T *lp;
1221
1222 /* Initialize the table for spell_iswordc(). */
1223 init_spell_chartab();
1224
1225 /* Unload all allocated memory. */
1226 while (first_lang != NULL)
1227 {
1228 lp = first_lang;
1229 first_lang = lp->sl_next;
1230 slang_free(lp);
1231 }
1232
1233 /* Go through all buffers and handle 'spelllang'. */
1234 for (buf = firstbuf; buf != NULL; buf = buf->b_next)
1235 {
1236 ga_clear(&buf->b_langp);
1237 if (*buf->b_p_spl != NUL)
1238 did_set_spelllang(buf);
1239 }
1240}
1241# endif
1242
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001243
1244#if defined(FEAT_MBYTE) || defined(PROTO)
1245/*
1246 * Functions for ":mkspell".
1247 * Only possible with the multi-byte feature.
1248 */
1249
Bram Moolenaar51485f02005-06-04 21:55:20 +00001250#define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001251 and .dic file. */
1252/*
1253 * Main structure to store the contents of a ".aff" file.
1254 */
1255typedef struct afffile_S
1256{
1257 char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */
1258 char_u *af_try; /* "TRY" line in "af_enc" encoding */
1259 hashtab_T af_pref; /* hashtable for prefixes, affheader_T */
1260 hashtab_T af_suff; /* hashtable for suffixes, affheader_T */
1261 garray_T af_rep; /* list of repentry_T entries from REP lines */
1262} afffile_T;
1263
1264typedef struct affentry_S affentry_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001265/* Affix entry from ".aff" file. Used for prefixes and suffixes. */
1266struct affentry_S
1267{
1268 affentry_T *ae_next; /* next affix with same name/number */
1269 char_u *ae_chop; /* text to chop off basic word (can be NULL) */
1270 char_u *ae_add; /* text to add to basic word (can be NULL) */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001271 char_u *ae_cond; /* condition (NULL for ".") */
1272 regprog_T *ae_prog; /* regexp program for ae_cond or NULL */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001273};
1274
1275/* Affix header from ".aff" file. Used for af_pref and af_suff. */
1276typedef struct affheader_S
1277{
1278 char_u ah_key[2]; /* key for hashtable == name of affix entry */
1279 int ah_combine; /* suffix may combine with prefix */
1280 affentry_T *ah_first; /* first affix entry */
1281} affheader_T;
1282
1283#define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
1284
1285/*
1286 * Structure that is used to store the items in the word tree. This avoids
1287 * the need to keep track of each allocated thing, it's freed all at once
1288 * after ":mkspell" is done.
1289 */
1290#define SBLOCKSIZE 16000 /* size of sb_data */
1291typedef struct sblock_S sblock_T;
1292struct sblock_S
1293{
1294 sblock_T *sb_next; /* next block in list */
1295 int sb_used; /* nr of bytes already in use */
1296 char_u sb_data[1]; /* data, actually longer */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001297};
1298
1299/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00001300 * A node in the tree.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001301 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001302typedef struct wordnode_S wordnode_T;
1303struct wordnode_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001304{
Bram Moolenaar51485f02005-06-04 21:55:20 +00001305 char_u wn_hashkey[6]; /* room for the hash key */
1306 wordnode_T *wn_next; /* next node with same hash key */
1307 wordnode_T *wn_child; /* child (next byte in word) */
1308 wordnode_T *wn_sibling; /* next sibling (alternate byte in word,
1309 always sorted) */
1310 wordnode_T *wn_wnode; /* parent node that will write this node */
1311 int wn_index; /* index in written nodes (valid after first
1312 round) */
1313 char_u wn_byte; /* Byte for this node. NUL for word end */
1314 char_u wn_flags; /* when wn_byte is NUL: WF_ flags */
1315 char_u wn_region; /* when wn_byte is NUL: region mask */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001316};
1317
Bram Moolenaar51485f02005-06-04 21:55:20 +00001318#define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001319
Bram Moolenaar51485f02005-06-04 21:55:20 +00001320/*
1321 * Info used while reading the spell files.
1322 */
1323typedef struct spellinfo_S
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001324{
Bram Moolenaar51485f02005-06-04 21:55:20 +00001325 wordnode_T *si_foldroot; /* tree with case-folded words */
1326 wordnode_T *si_keeproot; /* tree with keep-case words */
1327 sblock_T *si_blocks; /* memory blocks used */
1328 int si_ascii; /* handling only ASCII words */
1329 int si_region; /* region mask */
1330 vimconv_T si_conv; /* for conversion to 'encoding' */
Bram Moolenaar50cde822005-06-05 21:54:54 +00001331 int si_memtot; /* runtime memory used */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001332} spellinfo_T;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001333
Bram Moolenaar51485f02005-06-04 21:55:20 +00001334static afffile_T *spell_read_aff __ARGS((char_u *fname, spellinfo_T *spin));
Bram Moolenaar5482f332005-04-17 20:18:43 +00001335static int has_non_ascii __ARGS((char_u *s));
Bram Moolenaar51485f02005-06-04 21:55:20 +00001336static void spell_free_aff __ARGS((afffile_T *aff));
1337static int spell_read_dic __ARGS((char_u *fname, spellinfo_T *spin, afffile_T *affile));
1338static int store_aff_word __ARGS((char_u *word, spellinfo_T *spin, char_u *afflist, hashtab_T *ht, hashtab_T *xht, int comb));
1339static int spell_read_wordfile __ARGS((char_u *fname, spellinfo_T *spin));
1340static void *getroom __ARGS((sblock_T **blp, size_t len));
1341static char_u *getroom_save __ARGS((sblock_T **blp, char_u *s));
1342static void free_blocks __ARGS((sblock_T *bl));
1343static wordnode_T *wordtree_alloc __ARGS((sblock_T **blp));
1344static int store_word __ARGS((char_u *word, spellinfo_T *spin));
1345static int tree_add_word __ARGS((char_u *word, wordnode_T *tree, int flags, int region, sblock_T **blp));
1346static void wordtree_compress __ARGS((wordnode_T *root));
1347static int node_compress __ARGS((wordnode_T *node, hashtab_T *ht, int *tot));
1348static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2));
1349static void write_vim_spell __ARGS((char_u *fname, spellinfo_T *spin, int regcount, char_u *regchars));
1350static int put_tree __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001351
1352/*
1353 * Read an affix ".aff" file.
1354 * Returns an afffile_T, NULL for failure.
1355 */
1356 static afffile_T *
Bram Moolenaar51485f02005-06-04 21:55:20 +00001357spell_read_aff(fname, spin)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001358 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001359 spellinfo_T *spin;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001360{
1361 FILE *fd;
1362 afffile_T *aff;
1363 char_u rline[MAXLINELEN];
1364 char_u *line;
1365 char_u *pc = NULL;
1366 char_u *(items[6]);
1367 int itemcnt;
1368 char_u *p;
1369 int lnum = 0;
1370 affheader_T *cur_aff = NULL;
1371 int aff_todo = 0;
1372 hashtab_T *tp;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001373 char_u *low = NULL;
1374 char_u *fol = NULL;
1375 char_u *upp = NULL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001376
Bram Moolenaar51485f02005-06-04 21:55:20 +00001377 /*
1378 * Open the file.
1379 */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001380 fd = fopen((char *)fname, "r");
1381 if (fd == NULL)
1382 {
1383 EMSG2(_(e_notopen), fname);
1384 return NULL;
1385 }
1386
1387 smsg((char_u *)_("Reading affix file %s..."), fname);
1388 out_flush();
1389
Bram Moolenaar51485f02005-06-04 21:55:20 +00001390 /*
1391 * Allocate and init the afffile_T structure.
1392 */
1393 aff = (afffile_T *)getroom(&spin->si_blocks, sizeof(afffile_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001394 if (aff == NULL)
1395 return NULL;
1396 hash_init(&aff->af_pref);
1397 hash_init(&aff->af_suff);
1398 ga_init2(&aff->af_rep, (int)sizeof(repentry_T), 20);
1399
1400 /*
1401 * Read all the lines in the file one by one.
1402 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001403 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001404 {
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001405 line_breakcheck();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001406 ++lnum;
1407
1408 /* Skip comment lines. */
1409 if (*rline == '#')
1410 continue;
1411
1412 /* Convert from "SET" to 'encoding' when needed. */
1413 vim_free(pc);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001414 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001415 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001416 pc = string_convert(&spin->si_conv, rline, NULL);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001417 if (pc == NULL)
1418 {
1419 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
1420 fname, lnum, rline);
1421 continue;
1422 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001423 line = pc;
1424 }
1425 else
1426 {
1427 pc = NULL;
1428 line = rline;
1429 }
1430
1431 /* Split the line up in white separated items. Put a NUL after each
1432 * item. */
1433 itemcnt = 0;
1434 for (p = line; ; )
1435 {
1436 while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */
1437 ++p;
1438 if (*p == NUL)
1439 break;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001440 if (itemcnt == 6) /* too many items */
1441 break;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001442 items[itemcnt++] = p;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001443 while (*p > ' ') /* skip until white space or CR/NL */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001444 ++p;
1445 if (*p == NUL)
1446 break;
1447 *p++ = NUL;
1448 }
1449
1450 /* Handle non-empty lines. */
1451 if (itemcnt > 0)
1452 {
1453 if (STRCMP(items[0], "SET") == 0 && itemcnt == 2
1454 && aff->af_enc == NULL)
1455 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001456 /* Setup for conversion from "ENC" to 'encoding'. */
1457 aff->af_enc = enc_canonize(items[1]);
1458 if (aff->af_enc != NULL && !spin->si_ascii
1459 && convert_setup(&spin->si_conv, aff->af_enc,
1460 p_enc) == FAIL)
1461 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
1462 fname, aff->af_enc, p_enc);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001463 }
Bram Moolenaar50cde822005-06-05 21:54:54 +00001464 else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1)
1465 {
1466 /* ignored */
1467 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001468 else if (STRCMP(items[0], "TRY") == 0 && itemcnt == 2
1469 && aff->af_try == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001470 {
1471 aff->af_try = getroom_save(&spin->si_blocks, items[1]);
1472 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001473 else if ((STRCMP(items[0], "PFX") == 0
1474 || STRCMP(items[0], "SFX") == 0)
1475 && aff_todo == 0
1476 && itemcnt == 4)
1477 {
1478 /* New affix letter. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001479 cur_aff = (affheader_T *)getroom(&spin->si_blocks,
1480 sizeof(affheader_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001481 if (cur_aff == NULL)
1482 break;
1483 cur_aff->ah_key[0] = *items[1];
1484 cur_aff->ah_key[1] = NUL;
1485 if (items[1][1] != NUL)
1486 smsg((char_u *)_("Affix name too long in %s line %d: %s"),
1487 fname, lnum, items[1]);
1488 if (*items[2] == 'Y')
1489 cur_aff->ah_combine = TRUE;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001490 else if (*items[2] != 'N')
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001491 smsg((char_u *)_("Expected Y or N in %s line %d: %s"),
1492 fname, lnum, items[2]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001493 if (*items[0] == 'P')
1494 tp = &aff->af_pref;
1495 else
1496 tp = &aff->af_suff;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001497 aff_todo = atoi((char *)items[3]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001498 if (!HASHITEM_EMPTY(hash_find(tp, cur_aff->ah_key)))
Bram Moolenaar51485f02005-06-04 21:55:20 +00001499 {
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001500 smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
1501 fname, lnum, items[1]);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001502 aff_todo = 0;
1503 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001504 else
1505 hash_add(tp, cur_aff->ah_key);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001506 }
1507 else if ((STRCMP(items[0], "PFX") == 0
1508 || STRCMP(items[0], "SFX") == 0)
1509 && aff_todo > 0
1510 && STRCMP(cur_aff->ah_key, items[1]) == 0
1511 && itemcnt == 5)
1512 {
1513 affentry_T *aff_entry;
1514
1515 /* New item for an affix letter. */
1516 --aff_todo;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001517 aff_entry = (affentry_T *)getroom(&spin->si_blocks,
1518 sizeof(affentry_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001519 if (aff_entry == NULL)
1520 break;
Bram Moolenaar5482f332005-04-17 20:18:43 +00001521
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001522 if (STRCMP(items[2], "0") != 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001523 aff_entry->ae_chop = getroom_save(&spin->si_blocks,
1524 items[2]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001525 if (STRCMP(items[3], "0") != 0)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001526 aff_entry->ae_add = getroom_save(&spin->si_blocks,
1527 items[3]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001528
Bram Moolenaar51485f02005-06-04 21:55:20 +00001529 /* Don't use an affix entry with non-ASCII characters when
1530 * "spin->si_ascii" is TRUE. */
1531 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop)
Bram Moolenaar5482f332005-04-17 20:18:43 +00001532 || has_non_ascii(aff_entry->ae_add)))
1533 {
Bram Moolenaar5482f332005-04-17 20:18:43 +00001534 aff_entry->ae_next = cur_aff->ah_first;
1535 cur_aff->ah_first = aff_entry;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001536
1537 if (STRCMP(items[4], ".") != 0)
1538 {
1539 char_u buf[MAXLINELEN];
1540
1541 aff_entry->ae_cond = getroom_save(&spin->si_blocks,
1542 items[4]);
1543 if (*items[0] == 'P')
1544 sprintf((char *)buf, "^%s", items[4]);
1545 else
1546 sprintf((char *)buf, "%s$", items[4]);
1547 aff_entry->ae_prog = vim_regcomp(buf,
1548 RE_MAGIC + RE_STRING);
1549 }
Bram Moolenaar5482f332005-04-17 20:18:43 +00001550 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001551 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001552 else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2)
1553 {
1554 if (fol != NULL)
1555 smsg((char_u *)_("Duplicate FOL in %s line %d"),
1556 fname, lnum);
1557 else
1558 fol = vim_strsave(items[1]);
1559 }
1560 else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2)
1561 {
1562 if (low != NULL)
1563 smsg((char_u *)_("Duplicate LOW in %s line %d"),
1564 fname, lnum);
1565 else
1566 low = vim_strsave(items[1]);
1567 }
1568 else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2)
1569 {
1570 if (upp != NULL)
1571 smsg((char_u *)_("Duplicate UPP in %s line %d"),
1572 fname, lnum);
1573 else
1574 upp = vim_strsave(items[1]);
1575 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001576 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2)
1577 /* Ignore REP count */;
1578 else if (STRCMP(items[0], "REP") == 0 && itemcnt == 3)
1579 {
1580 repentry_T *rp;
1581
1582 /* REP item */
1583 if (ga_grow(&aff->af_rep, 1) == FAIL)
1584 break;
1585 rp = ((repentry_T *)aff->af_rep.ga_data) + aff->af_rep.ga_len;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001586 rp->re_from = getroom_save(&spin->si_blocks, items[1]);
1587 rp->re_to = getroom_save(&spin->si_blocks, items[2]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001588 ++aff->af_rep.ga_len;
1589 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00001590 else
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001591 smsg((char_u *)_("Unrecognized item in %s line %d: %s"),
1592 fname, lnum, items[0]);
1593 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001594 }
1595
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001596 if (fol != NULL || low != NULL || upp != NULL)
1597 {
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00001598 /* Don't write a word table for an ASCII file, so that we don't check
1599 * for conflicts with a word table that matches 'encoding'. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001600 if (!spin->si_ascii)
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00001601 {
1602 if (fol == NULL || low == NULL || upp == NULL)
1603 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
1604 else
1605 set_spell_chartab(fol, low, upp);
1606 }
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001607
1608 vim_free(fol);
1609 vim_free(low);
1610 vim_free(upp);
1611 }
1612
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001613 vim_free(pc);
1614 fclose(fd);
1615 return aff;
1616}
1617
1618/*
Bram Moolenaar5482f332005-04-17 20:18:43 +00001619 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
1620 * When "s" is NULL FALSE is returned.
1621 */
1622 static int
1623has_non_ascii(s)
1624 char_u *s;
1625{
1626 char_u *p;
1627
1628 if (s != NULL)
1629 for (p = s; *p != NUL; ++p)
1630 if (*p >= 128)
1631 return TRUE;
1632 return FALSE;
1633}
1634
1635/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001636 * Free the structure filled by spell_read_aff().
1637 */
1638 static void
1639spell_free_aff(aff)
1640 afffile_T *aff;
1641{
1642 hashtab_T *ht;
1643 hashitem_T *hi;
1644 int todo;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001645 affheader_T *ah;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001646 affentry_T *ae;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001647
1648 vim_free(aff->af_enc);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001649
Bram Moolenaar51485f02005-06-04 21:55:20 +00001650 /* All this trouble to foree the "ae_prog" items... */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001651 for (ht = &aff->af_pref; ; ht = &aff->af_suff)
1652 {
1653 todo = ht->ht_used;
1654 for (hi = ht->ht_array; todo > 0; ++hi)
1655 {
1656 if (!HASHITEM_EMPTY(hi))
1657 {
1658 --todo;
1659 ah = HI2AH(hi);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001660 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
1661 vim_free(ae->ae_prog);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001662 }
1663 }
1664 if (ht == &aff->af_suff)
1665 break;
1666 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00001667
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001668 hash_clear(&aff->af_pref);
1669 hash_clear(&aff->af_suff);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001670 ga_clear(&aff->af_rep);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001671}
1672
1673/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00001674 * Read dictionary file "fname".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001675 * Returns OK or FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001676 */
1677 static int
Bram Moolenaar51485f02005-06-04 21:55:20 +00001678spell_read_dic(fname, spin, affile)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001679 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001680 spellinfo_T *spin;
1681 afffile_T *affile;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001682{
Bram Moolenaar51485f02005-06-04 21:55:20 +00001683 hashtab_T ht;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001684 char_u line[MAXLINELEN];
Bram Moolenaar51485f02005-06-04 21:55:20 +00001685 char_u *afflist;
1686 char_u *dw;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001687 char_u *pc;
1688 char_u *w;
1689 int l;
1690 hash_T hash;
1691 hashitem_T *hi;
1692 FILE *fd;
1693 int lnum = 1;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001694 int non_ascii = 0;
1695 int retval = OK;
1696 char_u message[MAXLINELEN + MAXWLEN];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001697
Bram Moolenaar51485f02005-06-04 21:55:20 +00001698 /*
1699 * Open the file.
1700 */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001701 fd = fopen((char *)fname, "r");
1702 if (fd == NULL)
1703 {
1704 EMSG2(_(e_notopen), fname);
1705 return FAIL;
1706 }
1707
Bram Moolenaar51485f02005-06-04 21:55:20 +00001708 /* The hashtable is only used to detect duplicated words. */
1709 hash_init(&ht);
1710
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001711 smsg((char_u *)_("Reading dictionary file %s..."), fname);
1712 out_flush();
1713
1714 /* Read and ignore the first line: word count. */
1715 (void)vim_fgets(line, MAXLINELEN, fd);
1716 if (!isdigit(*skipwhite(line)))
1717 EMSG2(_("E760: No word count in %s"), fname);
1718
1719 /*
1720 * Read all the lines in the file one by one.
1721 * The words are converted to 'encoding' here, before being added to
1722 * the hashtable.
1723 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001724 while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001725 {
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001726 line_breakcheck();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001727 ++lnum;
1728
Bram Moolenaar51485f02005-06-04 21:55:20 +00001729 /* Remove CR, LF and white space from the end. White space halfway
1730 * the word is kept to allow e.g., "et al.". */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001731 l = STRLEN(line);
1732 while (l > 0 && line[l - 1] <= ' ')
1733 --l;
1734 if (l == 0)
1735 continue; /* empty line */
1736 line[l] = NUL;
1737
Bram Moolenaar51485f02005-06-04 21:55:20 +00001738 /* This takes time, print a message now and then. */
1739 if ((lnum & 0x3ff) == 0)
1740 {
1741 vim_snprintf((char *)message, sizeof(message),
1742 _("line %6d - %s"), lnum, line);
1743 msg_start();
1744 msg_outtrans_attr(message, 0);
1745 msg_clr_eos();
1746 msg_didout = FALSE;
1747 msg_col = 0;
1748 out_flush();
1749 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001750
Bram Moolenaar51485f02005-06-04 21:55:20 +00001751 /* Find the optional affix names. */
1752 afflist = vim_strchr(line, '/');
1753 if (afflist != NULL)
1754 *afflist++ = NUL;
1755
1756 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
1757 if (spin->si_ascii && has_non_ascii(line))
1758 {
1759 ++non_ascii;
Bram Moolenaar5482f332005-04-17 20:18:43 +00001760 continue;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001761 }
Bram Moolenaar5482f332005-04-17 20:18:43 +00001762
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001763 /* Convert from "SET" to 'encoding' when needed. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001764 if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001765 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001766 pc = string_convert(&spin->si_conv, line, NULL);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001767 if (pc == NULL)
1768 {
1769 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
1770 fname, lnum, line);
1771 continue;
1772 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001773 w = pc;
1774 }
1775 else
1776 {
1777 pc = NULL;
1778 w = line;
1779 }
1780
Bram Moolenaar51485f02005-06-04 21:55:20 +00001781 /* Store the word in the hashtable to be able to find duplicates. */
1782 dw = (char_u *)getroom_save(&spin->si_blocks, w);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001783 if (dw == NULL)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001784 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001785 vim_free(pc);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001786 if (retval == FAIL)
1787 break;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001788
Bram Moolenaar51485f02005-06-04 21:55:20 +00001789 hash = hash_hash(dw);
1790 hi = hash_lookup(&ht, dw, hash);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001791 if (!HASHITEM_EMPTY(hi))
1792 smsg((char_u *)_("Duplicate word in %s line %d: %s"),
Bram Moolenaar51485f02005-06-04 21:55:20 +00001793 fname, lnum, line);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001794 else
Bram Moolenaar51485f02005-06-04 21:55:20 +00001795 hash_add_item(&ht, hi, dw, hash);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001796
Bram Moolenaar51485f02005-06-04 21:55:20 +00001797 /* Add the word to the word tree(s). */
1798 if (store_word(dw, spin) == FAIL)
1799 retval = FAIL;
1800
1801 if (afflist != NULL)
1802 {
1803 /* Find all matching suffixes and add the resulting words.
1804 * Additionally do matching prefixes that combine. */
1805 if (store_aff_word(dw, spin, afflist,
1806 &affile->af_suff, &affile->af_pref, FALSE) == FAIL)
1807 retval = FAIL;
1808
1809 /* Find all matching prefixes and add the resulting words. */
1810 if (store_aff_word(dw, spin, afflist,
1811 &affile->af_pref, NULL, FALSE) == FAIL)
1812 retval = FAIL;
1813 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001814 }
1815
Bram Moolenaar51485f02005-06-04 21:55:20 +00001816 if (spin->si_ascii && non_ascii > 0)
1817 smsg((char_u *)_("Ignored %d words with non-ASCII characters"),
1818 non_ascii);
1819 hash_clear(&ht);
1820
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001821 fclose(fd);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001822 return retval;
1823}
1824
1825/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00001826 * Apply affixes to a word and store the resulting words.
1827 * "ht" is the hashtable with affentry_T that need to be applied, either
1828 * prefixes or suffixes.
1829 * "xht", when not NULL, is the prefix hashtable, to be used additionally on
1830 * the resulting words for combining affixes.
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001831 *
1832 * Returns FAIL when out of memory.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001833 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001834 static int
Bram Moolenaar51485f02005-06-04 21:55:20 +00001835store_aff_word(word, spin, afflist, ht, xht, comb)
1836 char_u *word; /* basic word start */
1837 spellinfo_T *spin; /* spell info */
1838 char_u *afflist; /* list of names of supported affixes */
1839 hashtab_T *ht;
1840 hashtab_T *xht;
1841 int comb; /* only use affixes that combine */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001842{
1843 int todo;
1844 hashitem_T *hi;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001845 affheader_T *ah;
1846 affentry_T *ae;
1847 regmatch_T regmatch;
1848 char_u newword[MAXWLEN];
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001849 int retval = OK;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001850 int i;
1851 char_u *p;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001852
Bram Moolenaar51485f02005-06-04 21:55:20 +00001853 todo = ht->ht_used;
1854 for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001855 {
1856 if (!HASHITEM_EMPTY(hi))
1857 {
1858 --todo;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001859 ah = HI2AH(hi);
Bram Moolenaar5482f332005-04-17 20:18:43 +00001860
Bram Moolenaar51485f02005-06-04 21:55:20 +00001861 /* Check that the affix combines, if required, and that the word
1862 * supports this affix. */
1863 if ((!comb || ah->ah_combine)
1864 && vim_strchr(afflist, *ah->ah_key) != NULL)
Bram Moolenaar5482f332005-04-17 20:18:43 +00001865 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001866 /* Loop over all affix entries with this name. */
1867 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001868 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001869 /* Check the condition. It's not logical to match case
1870 * here, but it is required for compatibility with
1871 * Myspell. */
1872 regmatch.regprog = ae->ae_prog;
1873 regmatch.rm_ic = FALSE;
1874 if (ae->ae_prog == NULL
1875 || vim_regexec(&regmatch, word, (colnr_T)0))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001876 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001877 /* Match. Remove the chop and add the affix. */
1878 if (xht == NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001879 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001880 /* prefix: chop/add at the start of the word */
1881 if (ae->ae_add == NULL)
1882 *newword = NUL;
1883 else
1884 STRCPY(newword, ae->ae_add);
1885 p = word;
1886 if (ae->ae_chop != NULL)
1887 /* Skip chop string. */
1888 for (i = mb_charlen(ae->ae_chop); i > 0; --i)
1889 mb_ptr_adv(p);
1890 STRCAT(newword, p);
1891 }
1892 else
1893 {
1894 /* suffix: chop/add at the end of the word */
1895 STRCPY(newword, word);
1896 if (ae->ae_chop != NULL)
1897 {
1898 /* Remove chop string. */
1899 p = newword + STRLEN(newword);
1900 for (i = mb_charlen(ae->ae_chop); i > 0; --i)
1901 mb_ptr_back(newword, p);
1902 *p = NUL;
1903 }
1904 if (ae->ae_add != NULL)
1905 STRCAT(newword, ae->ae_add);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001906 }
1907
Bram Moolenaar51485f02005-06-04 21:55:20 +00001908 /* Store the modified word. */
1909 if (store_word(newword, spin) == FAIL)
1910 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001911
Bram Moolenaar51485f02005-06-04 21:55:20 +00001912 /* When added a suffix and combining is allowed also
1913 * try adding prefixes additionally. */
1914 if (xht != NULL && ah->ah_combine)
1915 if (store_aff_word(newword, spin, afflist,
1916 xht, NULL, TRUE) == FAIL)
1917 retval = FAIL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001918 }
1919 }
1920 }
1921 }
1922 }
1923
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00001924 return retval;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001925}
1926
1927/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00001928 * Read a file with a list of words.
1929 */
1930 static int
1931spell_read_wordfile(fname, spin)
1932 char_u *fname;
1933 spellinfo_T *spin;
1934{
1935 FILE *fd;
1936 long lnum = 0;
1937 char_u rline[MAXLINELEN];
1938 char_u *line;
1939 char_u *pc = NULL;
1940 int l;
1941 int retval = OK;
1942 int did_word = FALSE;
1943 int non_ascii = 0;
1944 char_u *enc;
1945
1946 /*
1947 * Open the file.
1948 */
1949 fd = fopen((char *)fname, "r");
1950 if (fd == NULL)
1951 {
1952 EMSG2(_(e_notopen), fname);
1953 return FAIL;
1954 }
1955
1956 smsg((char_u *)_("Reading word file %s..."), fname);
1957 out_flush();
1958
1959 /*
1960 * Read all the lines in the file one by one.
1961 */
1962 while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
1963 {
1964 line_breakcheck();
1965 ++lnum;
1966
1967 /* Skip comment lines. */
1968 if (*rline == '#')
1969 continue;
1970
1971 /* Remove CR, LF and white space from the end. */
1972 l = STRLEN(rline);
1973 while (l > 0 && rline[l - 1] <= ' ')
1974 --l;
1975 if (l == 0)
1976 continue; /* empty or blank line */
1977 rline[l] = NUL;
1978
1979 /* Convert from "=encoding={encoding}" to 'encoding' when needed. */
1980 vim_free(pc);
1981 if (spin->si_conv.vc_type != CONV_NONE)
1982 {
1983 pc = string_convert(&spin->si_conv, rline, NULL);
1984 if (pc == NULL)
1985 {
1986 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
1987 fname, lnum, rline);
1988 continue;
1989 }
1990 line = pc;
1991 }
1992 else
1993 {
1994 pc = NULL;
1995 line = rline;
1996 }
1997
1998 if (*line == '=')
1999 {
2000 if (STRNCMP(line + 1, "encoding=", 9) == 0)
2001 {
2002 if (spin->si_conv.vc_type != CONV_NONE)
2003 smsg((char_u *)_("Duplicate =encoding= line ignored in %s line %d: %s"),
2004 fname, lnum, line);
2005 else if (did_word)
2006 smsg((char_u *)_("=encoding= line after word ignored in %s line %d: %s"),
2007 fname, lnum, line);
2008 else
2009 {
2010 /* Setup for conversion to 'encoding'. */
2011 enc = enc_canonize(line + 10);
2012 if (enc != NULL && !spin->si_ascii
2013 && convert_setup(&spin->si_conv, enc,
2014 p_enc) == FAIL)
2015 smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
2016 fname, line + 10, p_enc);
2017 vim_free(enc);
2018 }
2019 }
2020 else
2021 smsg((char_u *)_("= line ignored in %s line %d: %s"),
2022 fname, lnum, line);
2023 continue;
2024 }
2025
2026 /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
2027 if (spin->si_ascii && has_non_ascii(line))
2028 {
2029 ++non_ascii;
2030 continue;
2031 }
2032
2033 /* Normal word: store it. */
2034 if (store_word(line, spin) == FAIL)
2035 {
2036 retval = FAIL;
2037 break;
2038 }
2039 did_word = TRUE;
2040 }
2041
2042 vim_free(pc);
2043 fclose(fd);
2044
2045 if (spin->si_ascii && non_ascii > 0)
2046 smsg((char_u *)_("Ignored %d words with non-ASCII characters"),
2047 non_ascii);
2048 return retval;
2049}
2050
2051/*
2052 * Get part of an sblock_T, "len" bytes long.
2053 * This avoids calling free() for every little struct we use.
2054 * The memory is cleared to all zeros.
2055 * Returns NULL when out of memory.
2056 */
2057 static void *
2058getroom(blp, len)
2059 sblock_T **blp;
2060 size_t len; /* length needed */
2061{
2062 char_u *p;
2063 sblock_T *bl = *blp;
2064
2065 if (bl == NULL || bl->sb_used + len > SBLOCKSIZE)
2066 {
2067 /* Allocate a block of memory. This is not freed until much later. */
2068 bl = (sblock_T *)alloc_clear((unsigned)(sizeof(sblock_T) + SBLOCKSIZE));
2069 if (bl == NULL)
2070 return NULL;
2071 bl->sb_next = *blp;
2072 *blp = bl;
2073 bl->sb_used = 0;
2074 }
2075
2076 p = bl->sb_data + bl->sb_used;
2077 bl->sb_used += len;
2078
2079 return p;
2080}
2081
2082/*
2083 * Make a copy of a string into memory allocated with getroom().
2084 */
2085 static char_u *
2086getroom_save(blp, s)
2087 sblock_T **blp;
2088 char_u *s;
2089{
2090 char_u *sc;
2091
2092 sc = (char_u *)getroom(blp, STRLEN(s) + 1);
2093 if (sc != NULL)
2094 STRCPY(sc, s);
2095 return sc;
2096}
2097
2098
2099/*
2100 * Free the list of allocated sblock_T.
2101 */
2102 static void
2103free_blocks(bl)
2104 sblock_T *bl;
2105{
2106 sblock_T *next;
2107
2108 while (bl != NULL)
2109 {
2110 next = bl->sb_next;
2111 vim_free(bl);
2112 bl = next;
2113 }
2114}
2115
2116/*
2117 * Allocate the root of a word tree.
2118 */
2119 static wordnode_T *
2120wordtree_alloc(blp)
2121 sblock_T **blp;
2122{
2123 return (wordnode_T *)getroom(blp, sizeof(wordnode_T));
2124}
2125
2126/*
2127 * Store a word in the tree(s).
2128 * Always store it in the case-folded tree.
2129 * For a keep-case word also store it in the keep-case tree.
2130 */
2131 static int
2132store_word(word, spin)
2133 char_u *word;
2134 spellinfo_T *spin;
2135{
2136 int len = STRLEN(word);
2137 int ct = captype(word, word + len);
2138 char_u foldword[MAXWLEN];
2139 int res;
2140
2141 (void)spell_casefold(word, len, foldword, MAXWLEN);
2142 res = tree_add_word(foldword, spin->si_foldroot, ct, spin->si_region,
2143 &spin->si_blocks);
2144 if (res == OK && ct == WF_KEEPCAP)
2145 res = tree_add_word(word, spin->si_keeproot, ct, spin->si_region,
2146 &spin->si_blocks);
2147 return res;
2148}
2149
2150/*
2151 * Add word "word" to a word tree at "root".
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002152 * Returns FAIL when out of memory.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002153 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002154 static int
Bram Moolenaar51485f02005-06-04 21:55:20 +00002155tree_add_word(word, root, flags, region, blp)
2156 char_u *word;
2157 wordnode_T *root;
2158 int flags;
2159 int region;
2160 sblock_T **blp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002161{
Bram Moolenaar51485f02005-06-04 21:55:20 +00002162 wordnode_T *node = root;
2163 wordnode_T *np;
2164 wordnode_T **prev = NULL;
2165 int i;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002166
Bram Moolenaar51485f02005-06-04 21:55:20 +00002167 /* Add each byte of the word to the tree, including the NUL at the end. */
2168 for (i = 0; ; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002169 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002170 /* Look for the sibling that has the same character. They are sorted
2171 * on byte value, thus stop searching when a sibling is found with a
2172 * higher byte value. For zero bytes (end of word) check that the
2173 * flags are equal, there is a separate zero byte for each flag value.
2174 */
2175 while (node != NULL && (node->wn_byte < word[i]
2176 || (node->wn_byte == 0 && node->wn_flags != flags)))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002177 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002178 prev = &node->wn_sibling;
2179 node = *prev;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002180 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002181 if (node == NULL || node->wn_byte != word[i])
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002182 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002183 /* Allocate a new node. */
2184 np = (wordnode_T *)getroom(blp, sizeof(wordnode_T));
2185 if (np == NULL)
2186 return FAIL;
2187 np->wn_byte = word[i];
2188 *prev = np;
2189 np->wn_sibling = node;
2190 node = np;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002191 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002192
Bram Moolenaar51485f02005-06-04 21:55:20 +00002193 if (word[i] == NUL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002194 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002195 node->wn_flags = flags;
2196 node->wn_region |= region;
2197 break;
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +00002198 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002199 prev = &node->wn_child;
2200 node = *prev;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002201 }
2202
2203 return OK;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002204}
2205
2206/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00002207 * Compress a tree: find tails that are identical and can be shared.
2208 */
2209 static void
2210wordtree_compress(root)
2211 wordnode_T *root;
2212{
2213 hashtab_T ht;
2214 int n;
2215 int tot = 0;
2216
2217 if (root != NULL)
2218 {
2219 hash_init(&ht);
2220 n = node_compress(root, &ht, &tot);
2221 smsg((char_u *)_("Compressed %d of %d nodes; %d%% remaining"),
2222 n, tot, (tot - n) * 100 / tot);
2223 hash_clear(&ht);
2224 }
2225}
2226
2227/*
2228 * Compress a node, its siblings and its children, depth first.
2229 * Returns the number of compressed nodes.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002230 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002231 static int
Bram Moolenaar51485f02005-06-04 21:55:20 +00002232node_compress(node, ht, tot)
2233 wordnode_T *node;
2234 hashtab_T *ht;
2235 int *tot; /* total count of nodes before compressing,
2236 incremented while going through the tree */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002237{
Bram Moolenaar51485f02005-06-04 21:55:20 +00002238 wordnode_T *np;
2239 wordnode_T *tp;
2240 wordnode_T *child;
2241 hash_T hash;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002242 hashitem_T *hi;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002243 int len = 0;
2244 unsigned nr, n;
2245 int compressed = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002246
Bram Moolenaar51485f02005-06-04 21:55:20 +00002247 /*
2248 * Go through the list of siblings. Compress each child and then try
2249 * finding an identical child to replace it.
2250 * Note that with "child" we mean not just the node that is pointed to,
2251 * but the whole list of siblings, of which the node is the first.
2252 */
2253 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002254 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002255 ++len;
2256 if ((child = np->wn_child) != NULL)
2257 {
2258 /* Compress the child. This fills wn_hashkey. */
2259 compressed += node_compress(child, ht, tot);
2260
2261 /* Try to find an identical child. */
2262 hash = hash_hash(child->wn_hashkey);
2263 hi = hash_lookup(ht, child->wn_hashkey, hash);
2264 tp = NULL;
2265 if (!HASHITEM_EMPTY(hi))
2266 {
2267 /* There are children with an identical hash value. Now check
2268 * if there is one that is really identical. */
2269 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_next)
2270 if (node_equal(child, tp))
2271 {
2272 /* Found one! Now use that child in place of the
2273 * current one. This means the current child is
2274 * dropped from the tree. */
2275 np->wn_child = tp;
2276 ++compressed;
2277 break;
2278 }
2279 if (tp == NULL)
2280 {
2281 /* No other child with this hash value equals the child of
2282 * the node, add it to the linked list after the first
2283 * item. */
2284 tp = HI2WN(hi);
2285 child->wn_next = tp->wn_next;
2286 tp->wn_next = child;
2287 }
2288 }
2289 else
2290 /* No other child has this hash value, add it to the
2291 * hashtable. */
2292 hash_add_item(ht, hi, child->wn_hashkey, hash);
2293 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002294 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002295 *tot += len;
2296
2297 /*
2298 * Make a hash key for the node and its siblings, so that we can quickly
2299 * find a lookalike node. This must be done after compressing the sibling
2300 * list, otherwise the hash key would become invalid by the compression.
2301 */
2302 node->wn_hashkey[0] = len;
2303 nr = 0;
2304 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002305 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002306 if (np->wn_byte == NUL)
2307 /* end node: only use wn_flags and wn_region */
2308 n = np->wn_flags + (np->wn_region << 8);
2309 else
2310 /* byte node: use the byte value and the child pointer */
2311 n = np->wn_byte + ((long_u)np->wn_child << 8);
2312 nr = nr * 101 + n;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002313 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002314
2315 /* Avoid NUL bytes, it terminates the hash key. */
2316 n = nr & 0xff;
2317 node->wn_hashkey[1] = n == 0 ? 1 : n;
2318 n = (nr >> 8) & 0xff;
2319 node->wn_hashkey[2] = n == 0 ? 1 : n;
2320 n = (nr >> 16) & 0xff;
2321 node->wn_hashkey[3] = n == 0 ? 1 : n;
2322 n = (nr >> 24) & 0xff;
2323 node->wn_hashkey[4] = n == 0 ? 1 : n;
2324 node->wn_hashkey[5] = NUL;
2325
2326 return compressed;
2327}
2328
2329/*
2330 * Return TRUE when two nodes have identical siblings and children.
2331 */
2332 static int
2333node_equal(n1, n2)
2334 wordnode_T *n1;
2335 wordnode_T *n2;
2336{
2337 wordnode_T *p1;
2338 wordnode_T *p2;
2339
2340 for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
2341 p1 = p1->wn_sibling, p2 = p2->wn_sibling)
2342 if (p1->wn_byte != p2->wn_byte
2343 || (p1->wn_byte == NUL
2344 ? (p1->wn_flags != p2->wn_flags
2345 || p1->wn_region != p2->wn_region)
2346 : (p1->wn_child != p2->wn_child)))
2347 break;
2348
2349 return p1 == NULL && p2 == NULL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002350}
2351
2352/*
2353 * Write a number to file "fd", MSB first, in "len" bytes.
2354 */
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002355 void
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002356put_bytes(fd, nr, len)
2357 FILE *fd;
2358 long_u nr;
2359 int len;
2360{
2361 int i;
2362
2363 for (i = len - 1; i >= 0; --i)
2364 putc((int)(nr >> (i * 8)), fd);
2365}
2366
2367/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002368 * Write the Vim spell file "fname".
2369 */
2370 static void
Bram Moolenaar51485f02005-06-04 21:55:20 +00002371write_vim_spell(fname, spin, regcount, regchars)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002372 char_u *fname;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002373 spellinfo_T *spin;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002374 int regcount; /* number of regions */
2375 char_u *regchars; /* region names */
2376{
Bram Moolenaar51485f02005-06-04 21:55:20 +00002377 FILE *fd;
2378 int regionmask;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002379 int round;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002380 wordnode_T *tree;
2381 int nodecount;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002382
Bram Moolenaar51485f02005-06-04 21:55:20 +00002383 fd = fopen((char *)fname, "w");
2384 if (fd == NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002385 {
2386 EMSG2(_(e_notopen), fname);
2387 return;
2388 }
2389
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002390 /* <HEADER>: <fileID> <regioncnt> <regionname> ...
2391 * <charflagslen> <charflags> <fcharslen> <fchars> */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002392
2393 /* <fileID> */
2394 if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1)
2395 EMSG(_(e_write));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002396
2397 /* write the region names if there is more than one */
2398 if (regcount > 1)
2399 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002400 putc(regcount, fd); /* <regioncnt> <regionname> ... */
2401 fwrite(regchars, (size_t)(regcount * 2), (size_t)1, fd);
2402 regionmask = (1 << regcount) - 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002403 }
2404 else
2405 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002406 putc(0, fd);
2407 regionmask = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002408 }
2409
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002410 /* Write the table with character flags and table for case folding.
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00002411 * <charflagslen> <charflags> <fcharlen> <fchars>
2412 * Skip this for ASCII, the table may conflict with the one used for
2413 * 'encoding'. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002414 if (spin->si_ascii)
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00002415 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002416 putc(0, fd);
2417 putc(0, fd);
2418 putc(0, fd);
Bram Moolenaar6f3058f2005-04-24 21:58:05 +00002419 }
2420 else
Bram Moolenaar51485f02005-06-04 21:55:20 +00002421 write_spell_chartab(fd);
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002422
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002423
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002424 /* <SUGGEST> : <suggestlen> <more> ...
2425 * TODO. Only write a zero length for now. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002426 put_bytes(fd, 0L, 4); /* <suggestlen> */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002427
Bram Moolenaar50cde822005-06-05 21:54:54 +00002428 spin->si_memtot = 0;
2429
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002430 /*
Bram Moolenaar51485f02005-06-04 21:55:20 +00002431 * <LWORDTREE> <KWORDTREE>
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002432 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002433 for (round = 1; round <= 2; ++round)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002434 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002435 tree = (round == 1) ? spin->si_foldroot : spin->si_keeproot;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002436
Bram Moolenaar51485f02005-06-04 21:55:20 +00002437 /* Count the number of nodes. Needed to be able to allocate the
2438 * memory when reading the nodes. Also fills in the index for shared
2439 * nodes. */
2440 nodecount = put_tree(NULL, tree, 0, regionmask);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002441
Bram Moolenaar51485f02005-06-04 21:55:20 +00002442 /* number of nodes in 4 bytes */
2443 put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
Bram Moolenaar50cde822005-06-05 21:54:54 +00002444 spin->si_memtot += nodecount + nodecount * sizeof(int);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002445
Bram Moolenaar51485f02005-06-04 21:55:20 +00002446 /* Write the nodes. */
2447 (void)put_tree(fd, tree, 0, regionmask);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002448 }
2449
Bram Moolenaar51485f02005-06-04 21:55:20 +00002450 fclose(fd);
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002451}
2452
2453/*
Bram Moolenaar51485f02005-06-04 21:55:20 +00002454 * Dump a word tree at node "node".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002455 *
Bram Moolenaar51485f02005-06-04 21:55:20 +00002456 * This first writes the list of possible bytes (siblings). Then for each
2457 * byte recursively write the children.
2458 *
2459 * NOTE: The code here must match the code in read_tree(), since assumptions
2460 * are made about the indexes (so that we don't have to write them in the
2461 * file).
2462 *
2463 * Returns the number of nodes used.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002464 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002465 static int
2466put_tree(fd, node, index, regionmask)
2467 FILE *fd; /* NULL when only counting */
2468 wordnode_T *node;
2469 int index;
2470 int regionmask;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002471{
Bram Moolenaar51485f02005-06-04 21:55:20 +00002472 int newindex = index;
2473 int siblingcount = 0;
2474 wordnode_T *np;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002475 int flags;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002476
Bram Moolenaar51485f02005-06-04 21:55:20 +00002477 /* If "node" is zero the tree is empty. */
2478 if (node == NULL)
2479 return 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002480
Bram Moolenaar51485f02005-06-04 21:55:20 +00002481 /* Store the index where this node is written. */
2482 node->wn_index = index;
2483
2484 /* Count the number of siblings. */
2485 for (np = node; np != NULL; np = np->wn_sibling)
2486 ++siblingcount;
2487
2488 /* Write the sibling count. */
2489 if (fd != NULL)
2490 putc(siblingcount, fd); /* <siblingcount> */
2491
2492 /* Write each sibling byte and optionally extra info. */
2493 for (np = node; np != NULL; np = np->wn_sibling)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002494 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002495 if (np->wn_byte == 0)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002496 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002497 if (fd != NULL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002498 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002499 /* For a NUL byte (end of word) instead of the byte itself
2500 * we write the flag/region items. */
2501 flags = np->wn_flags;
2502 if (regionmask != 0 && np->wn_region != regionmask)
2503 flags |= WF_REGION;
2504 if (flags == 0)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002505 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002506 /* word without flags or region */
2507 putc(BY_NOFLAGS, fd); /* <byte> */
2508 }
2509 else
2510 {
2511 putc(BY_FLAGS, fd); /* <byte> */
2512 putc(flags, fd); /* <flags> */
2513 if (flags & WF_REGION)
2514 putc(np->wn_region, fd); /* <regionmask> */
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002515 }
2516 }
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00002517 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002518 else
2519 {
2520 if (np->wn_child->wn_index != 0 && np->wn_child->wn_wnode != node)
2521 {
2522 /* The child is written elsewhere, write the reference. */
2523 if (fd != NULL)
2524 {
2525 putc(BY_INDEX, fd); /* <byte> */
2526 /* <nodeidx> */
2527 put_bytes(fd, (long_u)np->wn_child->wn_index, 3);
2528 }
2529 }
2530 else if (np->wn_child->wn_wnode == NULL)
2531 /* We will write the child below and give it an index. */
2532 np->wn_child->wn_wnode = node;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002533
Bram Moolenaar51485f02005-06-04 21:55:20 +00002534 if (fd != NULL)
2535 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
2536 {
2537 EMSG(_(e_write));
2538 return 0;
2539 }
2540 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002541 }
Bram Moolenaar51485f02005-06-04 21:55:20 +00002542
2543 /* Space used in the array when reading: one for each sibling and one for
2544 * the count. */
2545 newindex += siblingcount + 1;
2546
2547 /* Recursively dump the children of each sibling. */
2548 for (np = node; np != NULL; np = np->wn_sibling)
2549 if (np->wn_byte != 0 && np->wn_child->wn_wnode == node)
2550 newindex = put_tree(fd, np->wn_child, newindex, regionmask);
2551
2552 return newindex;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002553}
2554
2555
2556/*
2557 * ":mkspell outfile infile ..."
2558 */
2559 void
2560ex_mkspell(eap)
2561 exarg_T *eap;
2562{
2563 int fcount;
2564 char_u **fnames;
2565 char_u fname[MAXPATHL];
2566 char_u wfname[MAXPATHL];
2567 afffile_T *(afile[8]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002568 int i;
2569 int len;
2570 char_u region_name[16];
2571 struct stat st;
Bram Moolenaar5482f332005-04-17 20:18:43 +00002572 char_u *arg = eap->arg;
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002573 int error = FALSE;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002574 spellinfo_T spin;
2575
2576 vim_memset(&spin, 0, sizeof(spin));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002577
Bram Moolenaar5482f332005-04-17 20:18:43 +00002578 if (STRNCMP(arg, "-ascii", 6) == 0)
2579 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002580 spin.si_ascii = TRUE;
Bram Moolenaar5482f332005-04-17 20:18:43 +00002581 arg = skipwhite(arg + 6);
2582 }
2583
2584 /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
2585 if (get_arglist_exp(arg, &fcount, &fnames) == FAIL)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002586 return;
2587 if (fcount < 2)
2588 EMSG(_(e_invarg)); /* need at least output and input names */
2589 else if (fcount > 9)
2590 EMSG(_("E754: Only up to 8 regions supported"));
2591 else
2592 {
2593 /* Check for overwriting before doing things that may take a lot of
2594 * time. */
Bram Moolenaar9c13b352005-05-19 20:53:52 +00002595 vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
Bram Moolenaar51485f02005-06-04 21:55:20 +00002596 spin.si_ascii ? (char_u *)"ascii" : p_enc);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002597 if (!eap->forceit && mch_stat((char *)wfname, &st) >= 0)
2598 {
2599 EMSG(_(e_exists));
2600 goto theend;
2601 }
2602 if (mch_isdir(fnames[0]))
2603 {
2604 EMSG2(_(e_isadir2), fnames[0]);
2605 goto theend;
2606 }
2607
2608 /*
2609 * Init the aff and dic pointers.
2610 * Get the region names if there are more than 2 arguments.
2611 */
2612 for (i = 1; i < fcount; ++i)
2613 {
2614 afile[i - 1] = NULL;
Bram Moolenaar51485f02005-06-04 21:55:20 +00002615
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002616 if (fcount > 2)
2617 {
2618 len = STRLEN(fnames[i]);
2619 if (STRLEN(gettail(fnames[i])) < 5 || fnames[i][len - 3] != '_')
2620 {
2621 EMSG2(_("E755: Invalid region in %s"), fnames[i]);
2622 goto theend;
2623 }
2624 else
2625 {
2626 region_name[(i - 1) * 2] = TOLOWER_ASC(fnames[i][len - 2]);
2627 region_name[(i - 1) * 2 + 1] =
2628 TOLOWER_ASC(fnames[i][len - 1]);
2629 }
2630 }
2631 }
2632
Bram Moolenaar8fef2ad2005-04-23 20:42:23 +00002633 /* Clear the char type tables, don't want to use any of the currently
2634 * used spell properties. */
2635 init_spell_chartab();
2636
Bram Moolenaar51485f02005-06-04 21:55:20 +00002637 spin.si_foldroot = wordtree_alloc(&spin.si_blocks);
2638 spin.si_keeproot = wordtree_alloc(&spin.si_blocks);
2639 if (spin.si_foldroot == NULL || spin.si_keeproot == NULL)
2640 {
2641 error = TRUE;
2642 goto theend;
2643 }
2644
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002645 /*
2646 * Read all the .aff and .dic files.
2647 * Text is converted to 'encoding'.
Bram Moolenaar51485f02005-06-04 21:55:20 +00002648 * Words are stored in the case-folded and keep-case trees.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002649 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002650 for (i = 1; i < fcount && !error; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002651 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002652 spin.si_conv.vc_type = CONV_NONE;
2653 spin.si_region = 1 << (i - 1);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002654
Bram Moolenaar51485f02005-06-04 21:55:20 +00002655 vim_snprintf((char *)fname, sizeof(fname), "%s.aff", fnames[i]);
2656 if (mch_stat((char *)fname, &st) >= 0)
2657 {
2658 /* Read the .aff file. Will init "spin->si_conv" based on the
2659 * "SET" line. */
2660 afile[i - 1] = spell_read_aff(fname, &spin);
2661 if (afile[i - 1] == NULL)
2662 error = TRUE;
2663 else
2664 {
2665 /* Read the .dic file and store the words in the trees. */
2666 vim_snprintf((char *)fname, sizeof(fname), "%s.dic",
2667 fnames[i]);
2668 if (spell_read_dic(fname, &spin, afile[i - 1]) == FAIL)
2669 error = TRUE;
2670 }
2671 }
2672 else
2673 {
2674 /* No .aff file, try reading the file as a word list. Store
2675 * the words in the trees. */
2676 if (spell_read_wordfile(fnames[i], &spin) == FAIL)
2677 error = TRUE;
2678 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002679
2680 /* Free any conversion stuff. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002681 convert_setup(&spin.si_conv, NULL, NULL);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002682 }
2683
Bram Moolenaar51485f02005-06-04 21:55:20 +00002684 if (!error)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002685 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00002686 /*
2687 * Remove the dummy NUL from the start of the tree root.
2688 */
2689 spin.si_foldroot = spin.si_foldroot->wn_sibling;
2690 spin.si_keeproot = spin.si_keeproot->wn_sibling;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002691
2692 /*
Bram Moolenaar51485f02005-06-04 21:55:20 +00002693 * Combine tails in the tree.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002694 */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002695 MSG(_("Compressing word tree..."));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002696 out_flush();
Bram Moolenaar51485f02005-06-04 21:55:20 +00002697 wordtree_compress(spin.si_foldroot);
2698 wordtree_compress(spin.si_keeproot);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002699 }
2700
Bram Moolenaar51485f02005-06-04 21:55:20 +00002701 if (!error)
2702 {
2703 /*
2704 * Write the info in the spell file.
2705 */
2706 smsg((char_u *)_("Writing spell file %s..."), wfname);
2707 out_flush();
2708 write_vim_spell(wfname, &spin, fcount - 1, region_name);
2709 MSG(_("Done!"));
Bram Moolenaar50cde822005-06-05 21:54:54 +00002710
2711 smsg((char_u *)_("Estimated runtime memory use: %d bytes"),
2712 spin.si_memtot);
Bram Moolenaar51485f02005-06-04 21:55:20 +00002713 out_flush();
2714 }
2715
2716 /* Free the allocated memory. */
2717 free_blocks(spin.si_blocks);
2718
2719 /* Free the .aff file structures. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002720 for (i = 1; i < fcount; ++i)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002721 if (afile[i - 1] != NULL)
2722 spell_free_aff(afile[i - 1]);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002723 }
2724
2725theend:
2726 FreeWild(fcount, fnames);
2727}
2728
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002729#endif /* FEAT_MBYTE */
2730
Bram Moolenaar51485f02005-06-04 21:55:20 +00002731
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002732#endif /* FEAT_SYN_HL */