blob: f72325b699a38dba72b84f5776b32c2cc98c6f0a [file] [log] [blame]
Bram Moolenaare19defe2005-03-21 08:23:33 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * spell.c: code for spell checking
Bram Moolenaarfc735152005-03-22 22:54:12 +000012 *
13 * Terminology:
14 * "dword" is a dictionary word, made out of letters and digits.
15 * "nword" is a word with a character that's not a letter or digit.
16 * "word" is either a "dword" or an "nword".
Bram Moolenaare19defe2005-03-21 08:23:33 +000017 */
18
19#if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
20# include <io.h> /* for lseek(), must be before vim.h */
21#endif
22
23#include "vim.h"
24
25#if defined(FEAT_SYN_HL) || defined(PROTO)
26
27#ifdef HAVE_FCNTL_H
28# include <fcntl.h>
29#endif
30
Bram Moolenaarfc735152005-03-22 22:54:12 +000031#define MAXWLEN 100 /* assume max. word len is this many bytes */
32
Bram Moolenaare19defe2005-03-21 08:23:33 +000033/*
34 * Structure that is used to store the text from the language file. This
35 * avoids the need to allocate each individual word and copying it. It's
36 * allocated in big chunks for speed.
37 */
38#define SBLOCKSIZE 4096 /* default size of sb_data */
39typedef struct sblock_S sblock_T;
40struct sblock_S
41{
42 sblock_T *sb_next; /* next block in list */
43 char_u sb_data[1]; /* data, actually longer */
44};
45
Bram Moolenaarfc735152005-03-22 22:54:12 +000046/* Structure to store words and additions. Used twice : once for case-folded
47 * and once for keep-case words. */
48typedef struct winfo_S
49{
50 hashtab_T wi_ht; /* hashtable with all words, both dword_T and
51 nword_T (check flags for DW_NWORD) */
52 garray_T wi_add; /* table with pointers to additions in a
53 dword_T */
54 int wi_addlen; /* longest addition length */
55} winfo_T;
56
Bram Moolenaare19defe2005-03-21 08:23:33 +000057/*
58 * Structure used to store words and other info for one language.
59 */
60typedef struct slang_S slang_T;
Bram Moolenaare19defe2005-03-21 08:23:33 +000061struct slang_S
62{
63 slang_T *sl_next; /* next language */
64 char_u sl_name[2]; /* language name "en", "nl", etc. */
Bram Moolenaarfc735152005-03-22 22:54:12 +000065 winfo_T sl_fwords; /* case-folded words and additions */
66 winfo_T sl_kwords; /* keep-case words and additions */
67 char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
Bram Moolenaare19defe2005-03-21 08:23:33 +000068 sblock_T *sl_block; /* list with allocated memory blocks */
69};
70
71static slang_T *first_lang = NULL;
72
Bram Moolenaarfc735152005-03-22 22:54:12 +000073/* Entry for dword in "sl_ht". Also used for part of an nword, starting with
74 * the first non-word character. And used for additions in wi_add. */
75typedef struct dword_S
76{
77 char_u dw_region; /* one bit per region where it's valid */
78 char_u dw_flags; /* WF_ flags */
79 char_u dw_word[1]; /* actually longer, NUL terminated */
80} dword_T;
81
82#define REGION_ALL 0xff
83
84#define HI2DWORD(hi) (dword_T *)(hi->hi_key - 2)
85
86/* Entry for a nword in "sl_ht". Note that the last three items must be
87 * identical to dword_T, so that they can be in the same hashtable. */
88typedef struct nword_S
89{
90 garray_T nw_ga; /* table with pointers to dword_T for part
91 starting with non-word character */
92 int nw_maxlen; /* longest nword length (after the dword) */
93 char_u nw_region; /* one bit per region where it's valid */
94 char_u nw_flags; /* WF_ flags */
95 char_u nw_word[1]; /* actually longer, NUL terminated */
96} nword_T;
97
98/* Get nword_T pointer from hashitem that uses nw_word */
99static nword_T dumnw;
100#define HI2NWORD(hi) ((nword_T *)((hi)->hi_key - (dumnw.nw_word - (char_u *)&dumnw)))
101
102#define DW_CAP 0x01 /* word must start with capital */
103#define DW_RARE 0x02 /* rare word */
104#define DW_NWORD 0x04 /* this is an nword_T */
105#define DW_DWORD 0x08 /* (also) use as dword without nword */
106
Bram Moolenaare19defe2005-03-21 08:23:33 +0000107/*
108 * Structure used in "b_langp", filled from 'spelllang'.
109 */
110typedef struct langp_S
111{
112 slang_T *lp_slang; /* info for this language (NULL for last one) */
113 int lp_region; /* bitmask for region or REGION_ALL */
114} langp_T;
115
116#define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
Bram Moolenaarfc735152005-03-22 22:54:12 +0000117#define DWORD_ENTRY(gap, i) *(((dword_T **)(gap)->ga_data) + i)
Bram Moolenaare19defe2005-03-21 08:23:33 +0000118
119#define SP_OK 0
120#define SP_BAD 1
121#define SP_RARE 2
122#define SP_LOCAL 3
123
Bram Moolenaarfc735152005-03-22 22:54:12 +0000124static char *e_invchar2 = N_("E753: Invalid character in \"%s\"");
125
Bram Moolenaare19defe2005-03-21 08:23:33 +0000126static slang_T *spell_load_lang __ARGS((char_u *lang));
127static void spell_load_file __ARGS((char_u *fname));
128static int find_region __ARGS((char_u *rp, char_u *region));
129
130/*
131 * Main spell-checking function.
132 * "ptr" points to the start of a word.
133 * "*attrp" is set to the attributes for a badly spelled word. For a non-word
134 * or when it's OK it remains unchanged.
135 * This must only be called when 'spelllang' is not empty.
136 * Returns the length of the word in bytes, also when it's OK, so that the
137 * caller can skip over the word.
138 */
139 int
140spell_check(wp, ptr, attrp)
141 win_T *wp; /* current window */
142 char_u *ptr;
143 int *attrp;
144{
Bram Moolenaarfc735152005-03-22 22:54:12 +0000145 char_u *e; /* end of word */
146 char_u *ne; /* new end of word */
147 char_u *me; /* max. end of match */
Bram Moolenaare19defe2005-03-21 08:23:33 +0000148 langp_T *lp;
149 int result;
150 int len = 0;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000151 hashitem_T *hi;
Bram Moolenaarfc735152005-03-22 22:54:12 +0000152 int round;
153 char_u kword[MAXWLEN + 1]; /* word copy */
154 char_u fword[MAXWLEN + 1]; /* word with case folded */
155 char_u match[MAXWLEN + 1]; /* fword with additional chars */
156 char_u kwordclen[MAXWLEN + 1]; /* len of orig chars after kword[] */
157 char_u fwordclen[MAXWLEN + 1]; /* len of chars after fword[] */
158 char_u *clen;
159 int cidx = 0; /* char index in xwordclen[] */
160 hash_T fhash; /* hash for fword */
161 hash_T khash; /* hash for kword */
162 int match_len = 0; /* length of match[] */
163 int fmatch_len = 0; /* length of nword match in chars */
Bram Moolenaare19defe2005-03-21 08:23:33 +0000164 garray_T *gap;
Bram Moolenaarfc735152005-03-22 22:54:12 +0000165 int l, t;
166 char_u *p, *tp;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000167 int n;
Bram Moolenaarfc735152005-03-22 22:54:12 +0000168 dword_T *dw;
169 dword_T *tdw;
170 winfo_T *wi;
171 nword_T *nw;
172 int w_isupper;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000173
174 /* Find the end of the word. We already know that *ptr is a word char. */
175 e = ptr;
176 do
177 {
178 mb_ptr_adv(e);
179 ++len;
Bram Moolenaarfc735152005-03-22 22:54:12 +0000180 } while (*e != NUL && spell_iswordc(e));
181
182 /* A word starting with a number is always OK. */
183 if (*ptr >= '0' && *ptr <= '9')
184 return (int)(e - ptr);
185
186#ifdef FEAT_MBYTE
187 w_isupper = MB_ISUPPER(mb_ptr2char(ptr));
188#else
189 w_isupper = MB_ISUPPER(*ptr);
190#endif
191
192 /* Make a copy of the word so that it can be NUL terminated.
193 * Compute hash value. */
194 mch_memmove(kword, ptr, e - ptr);
195 kword[e - ptr] = NUL;
196 khash = hash_hash(kword);
197
198 /* Make case-folded copy of the Word. Compute its hash value. */
199 (void)str_foldcase(ptr, e - ptr, fword, MAXWLEN + 1);
200 fhash = hash_hash(fword);
201
202 /* Further case-folded characters to check for an nword match go in
203 * match[]. */
204 me = e;
205
206 /* "ne" is the end for the longest match */
207 ne = e;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000208
209 /* The word is bad unless we find it in the dictionary. */
210 result = SP_BAD;
211
Bram Moolenaare19defe2005-03-21 08:23:33 +0000212 /*
213 * Loop over the languages specified in 'spelllang'.
Bram Moolenaarfc735152005-03-22 22:54:12 +0000214 * We check them all, because a matching nword may be longer than an
215 * already found dword or nword.
Bram Moolenaare19defe2005-03-21 08:23:33 +0000216 */
Bram Moolenaarfc735152005-03-22 22:54:12 +0000217 for (lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); lp->lp_slang != NULL; ++lp)
Bram Moolenaare19defe2005-03-21 08:23:33 +0000218 {
Bram Moolenaarfc735152005-03-22 22:54:12 +0000219 /*
220 * Check for a matching word in the hashtable.
221 * Check both the keep-case word and the fold-case word.
222 */
223 for (round = 0; round <= 1; ++round)
Bram Moolenaare19defe2005-03-21 08:23:33 +0000224 {
Bram Moolenaarfc735152005-03-22 22:54:12 +0000225 if (round == 0)
226 {
227 wi = &lp->lp_slang->sl_kwords;
228 hi = hash_lookup(&wi->wi_ht, kword, khash);
229 }
230 else
231 {
232 wi = &lp->lp_slang->sl_fwords;
233 hi = hash_lookup(&wi->wi_ht, fword, fhash);
234 }
Bram Moolenaare19defe2005-03-21 08:23:33 +0000235 if (!HASHITEM_EMPTY(hi))
236 {
Bram Moolenaarfc735152005-03-22 22:54:12 +0000237 /*
238 * If this is an nword entry, check for match with remainder.
239 */
240 dw = HI2DWORD(hi);
241 if (dw->dw_flags & DW_NWORD)
Bram Moolenaare19defe2005-03-21 08:23:33 +0000242 {
Bram Moolenaarfc735152005-03-22 22:54:12 +0000243 /* If the word is not defined as a dword we must find an
244 * nword. */
245 if ((dw->dw_flags & DW_DWORD) == 0)
246 dw = NULL;
247
248 /* Fold more characters when needed for the nword. Need
249 * to do one extra to check for a non-word character after
250 * the nword. Also keep the byte-size of each character,
251 * both before and after folding case. */
252 nw = HI2NWORD(hi);
253 while ((round == 0
254 ? me - e <= nw->nw_maxlen
255 : match_len <= nw->nw_maxlen)
256 && *me != NUL)
Bram Moolenaare19defe2005-03-21 08:23:33 +0000257 {
Bram Moolenaarfc735152005-03-22 22:54:12 +0000258#ifdef FEAT_MBYTE
259 l = mb_ptr2len_check(me);
260#else
261 l = 1;
262#endif
263 (void)str_foldcase(me, l, match + match_len,
264 MAXWLEN - match_len + 1);
265 me += l;
266 kwordclen[cidx] = l;
267 fwordclen[cidx] = STRLEN(match + match_len);
268 match_len += fwordclen[cidx];
269 ++cidx;
270 }
271
272 if (round == 0)
273 {
274 clen = kwordclen;
275 tp = e;
276 }
277 else
278 {
279 clen = fwordclen;
280 tp = match;
281 }
282
283 /* Match with each item. The longest match wins:
284 * "you've" is longer than "you". */
285 gap = &nw->nw_ga;
286 for (t = 0; t < gap->ga_len; ++t)
287 {
288 /* Skip entries with wrong case for first char.
289 * Continue if it's a rare word without a captial. */
290 tdw = DWORD_ENTRY(gap, t);
291 if ((tdw->dw_flags & (DW_CAP | DW_RARE)) == DW_CAP
292 && !w_isupper)
293 continue;
294
295 p = tdw->dw_word;
296 l = 0;
297 for (n = 0; p[n] != 0; n += clen[l++])
298 if (vim_memcmp(p + n, tp + n, clen[l]) != 0)
299 break;
300
301 /* Use a match if it's longer than previous matches
302 * and the next character is not a word character. */
303 if (p[n] == 0 && l > fmatch_len && (tp[n] == 0
304 || !spell_iswordc(tp + n)))
305 {
306 dw = tdw;
307 fmatch_len = l;
308 if (round == 0)
309 ne = tp + n;
310 else
311 {
312 /* Need to use the length of the original
313 * chars, not the fold-case ones. */
314 ne = e;
315 for (l = 0; l < fmatch_len; ++l)
316 ne += kwordclen[l];
317 }
318 if ((lp->lp_region & tdw->dw_region) == 0)
319 result = SP_LOCAL;
320 else if ((tdw->dw_flags & DW_CAP) && !w_isupper)
321 result = SP_RARE;
322 else
323 result = SP_OK;
324 }
325 }
326
327 }
328
329 if (dw != NULL)
330 {
331 if (dw->dw_flags & DW_CAP)
332 {
333 /* Need to check first letter is uppercase. If it is,
334 * check region. If it isn't it may be a rare word.
335 * */
336 if (w_isupper)
337 {
338 if ((dw->dw_region & lp->lp_region) == 0)
339 result = SP_LOCAL;
340 else
341 result = SP_OK;
342 }
343 else if (dw->dw_flags & DW_RARE)
344 result = SP_RARE;
345 }
346 else
347 {
348 if ((dw->dw_region & lp->lp_region) == 0)
Bram Moolenaare19defe2005-03-21 08:23:33 +0000349 result = SP_LOCAL;
Bram Moolenaarfc735152005-03-22 22:54:12 +0000350 else if (dw->dw_flags & DW_RARE)
351 result = SP_RARE;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000352 else
353 result = SP_OK;
354 }
Bram Moolenaarfc735152005-03-22 22:54:12 +0000355 }
356 }
357 }
358
359 /*
360 * Check for an addition.
361 * Only after a dword, not after an nword.
362 * Check both the keep-case word and the fold-case word.
363 */
364 if (fmatch_len == 0)
365 for (round = 0; round <= 1; ++round)
366 {
367 if (round == 0)
368 wi = &lp->lp_slang->sl_kwords;
369 else
370 wi = &lp->lp_slang->sl_fwords;
371 gap = &wi->wi_add;
372 if (gap->ga_len == 0) /* no additions, skip quickly */
373 continue;
374
375 /* Fold characters when needed for the addition. Need to do one
376 * extra to check for a word character after the addition. */
377 while ((round == 0
378 ? me - e <= wi->wi_addlen
379 : match_len <= wi->wi_addlen)
380 && *me != NUL)
381 {
382#ifdef FEAT_MBYTE
383 l = mb_ptr2len_check(me);
384#else
385 l = 1;
386#endif
387 (void)str_foldcase(me, l, match + match_len,
388 MAXWLEN - match_len + 1);
389 me += l;
390 kwordclen[cidx] = l;
391 fwordclen[cidx] = STRLEN(match + match_len);
392 match_len += fwordclen[cidx];
393 ++cidx;
394 }
395
396 if (round == 0)
397 {
398 clen = kwordclen;
399 tp = e;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000400 }
401 else
402 {
Bram Moolenaarfc735152005-03-22 22:54:12 +0000403 clen = fwordclen;
404 tp = match;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000405 }
Bram Moolenaare19defe2005-03-21 08:23:33 +0000406
Bram Moolenaarfc735152005-03-22 22:54:12 +0000407 /* Addition lookup. Uses a linear search, there should be
408 * very few. If there is a match adjust "ne" to the end.
409 * This doesn't change whether a word was good or bad, only
410 * the length. */
411 for (t = 0; t < gap->ga_len; ++t)
Bram Moolenaare19defe2005-03-21 08:23:33 +0000412 {
Bram Moolenaarfc735152005-03-22 22:54:12 +0000413 tdw = DWORD_ENTRY(gap, t);
414 p = tdw->dw_word;
415 l = 0;
416 for (n = 0; p[n] != 0; n += clen[l++])
417 if (vim_memcmp(p + n, tp + n, clen[l]) != 0)
418 break;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000419
Bram Moolenaarfc735152005-03-22 22:54:12 +0000420 /* Use a match if it's longer than previous matches
421 * and the next character is not a word character. */
422 if (p[n] == 0 && l > fmatch_len
423 && (tp[n] == 0 || !spell_iswordc(tp + n)))
424 {
425 fmatch_len = l;
426 if (round == 0)
427 ne = tp + n;
428 else
429 {
430 /* Need to use the length of the original
431 * chars, not the fold-case ones. */
432 ne = e;
433 for (l = 0; l < fmatch_len; ++l)
434 ne += kwordclen[l];
435 }
436 }
437 }
Bram Moolenaare19defe2005-03-21 08:23:33 +0000438 }
Bram Moolenaare19defe2005-03-21 08:23:33 +0000439 }
440
441 if (result != SP_OK)
442 {
443 if (result == SP_BAD)
444 *attrp = highlight_attr[HLF_SPB];
445 else if (result == SP_RARE)
446 *attrp = highlight_attr[HLF_SPR];
447 else
448 *attrp = highlight_attr[HLF_SPL];
449 }
450
Bram Moolenaarfc735152005-03-22 22:54:12 +0000451 return (int)(ne - ptr);
Bram Moolenaare19defe2005-03-21 08:23:33 +0000452}
453
454static slang_T *load_lp; /* passed from spell_load_lang() to
455 spell_load_file() */
456
457/*
458 * Load language "lang[2]".
459 */
460 static slang_T *
461spell_load_lang(lang)
462 char_u *lang;
463{
464 slang_T *lp;
465 char_u fname_enc[80];
466 char_u fname_ascii[20];
467 char_u *p;
Bram Moolenaarfc735152005-03-22 22:54:12 +0000468 int r;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000469
470 lp = (slang_T *)alloc(sizeof(slang_T));
471 if (lp != NULL)
472 {
473 lp->sl_name[0] = lang[0];
474 lp->sl_name[1] = lang[1];
Bram Moolenaarfc735152005-03-22 22:54:12 +0000475 hash_init(&lp->sl_fwords.wi_ht);
476 ga_init2(&lp->sl_fwords.wi_add, sizeof(dword_T *), 4);
477 lp->sl_fwords.wi_addlen = 0;
478 hash_init(&lp->sl_kwords.wi_ht);
479 ga_init2(&lp->sl_kwords.wi_add, sizeof(dword_T *), 4);
480 lp->sl_kwords.wi_addlen = 0;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000481 lp->sl_regions[0] = NUL;
482 lp->sl_block = NULL;
483
484 /* Find all spell files for "lang" in 'runtimepath' and load them.
485 * Use 'encoding', except that we use "latin1" for "latin9". */
486#ifdef FEAT_MBYTE
487 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
488 p = p_enc;
489 else
490#endif
491 p = (char_u *)"latin1";
492 load_lp = lp;
493 sprintf((char *)fname_enc, "spell/%c%c.%s.spl", lang[0], lang[1], p);
Bram Moolenaarfc735152005-03-22 22:54:12 +0000494 r = do_in_runtimepath(fname_enc, TRUE, spell_load_file);
495 if (r == FAIL)
Bram Moolenaare19defe2005-03-21 08:23:33 +0000496 {
497 /* Try again to find an ASCII spell file. */
498 sprintf((char *)fname_ascii, "spell/%c%c.spl", lang[0], lang[1]);
Bram Moolenaarfc735152005-03-22 22:54:12 +0000499 r = do_in_runtimepath(fname_ascii, TRUE, spell_load_file);
500 }
501
502 if (r == FAIL)
503 {
504 vim_free(lp);
505 lp = NULL;
506 smsg((char_u *)_("Warning: Cannot find dictionary \"%s\""),
Bram Moolenaare19defe2005-03-21 08:23:33 +0000507 fname_enc + 6);
Bram Moolenaare19defe2005-03-21 08:23:33 +0000508 }
509 else
510 {
511 lp->sl_next = first_lang;
512 first_lang = lp;
513 }
514 }
515
516 return lp;
517}
518
519/*
520 * Load one spell file into "load_lp".
521 * Invoked through do_in_runtimepath().
522 */
523 static void
524spell_load_file(fname)
525 char_u *fname;
526{
527 int fd;
528 size_t len;
Bram Moolenaara88d9682005-03-25 21:45:43 +0000529 int l;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000530 char_u *p = NULL, *np;
Bram Moolenaarfc735152005-03-22 22:54:12 +0000531 sblock_T *bl = NULL;
532 int bl_used = 0;
533 size_t rest = 0;
534 char_u *rbuf; /* read buffer */
535 char_u *rbuf_end; /* past last valid char in "rbuf" */
Bram Moolenaare19defe2005-03-21 08:23:33 +0000536 hash_T hash;
537 hashitem_T *hi;
538 int c;
Bram Moolenaarfc735152005-03-22 22:54:12 +0000539 int cc;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000540 int region = REGION_ALL;
Bram Moolenaarfc735152005-03-22 22:54:12 +0000541 int wlen;
542 winfo_T *wi;
543 dword_T *dw, *edw;
544 nword_T *nw = NULL;
545 int flags;
546 char_u *save_sourcing_name = sourcing_name;
547 linenr_T save_sourcing_lnum = sourcing_lnum;
548
549 rbuf = alloc((unsigned)(SBLOCKSIZE + MAXWLEN + 1));
550 if (rbuf == NULL)
551 return;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000552
553 fd = mch_open((char *)fname, O_RDONLY | O_EXTRA, 0);
554 if (fd < 0)
555 {
556 EMSG2(_(e_notopen), fname);
Bram Moolenaarfc735152005-03-22 22:54:12 +0000557 goto theend;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000558 }
559
Bram Moolenaarfc735152005-03-22 22:54:12 +0000560 sourcing_name = fname;
561 sourcing_lnum = 0;
562
Bram Moolenaare19defe2005-03-21 08:23:33 +0000563 /* Get the length of the whole file. */
564 len = lseek(fd, (off_t)0, SEEK_END);
565 lseek(fd, (off_t)0, SEEK_SET);
566
Bram Moolenaarfc735152005-03-22 22:54:12 +0000567 /*
568 * Read the file one block at a time.
Bram Moolenaare19defe2005-03-21 08:23:33 +0000569 * "rest" is the length of an incomplete line at the previous block.
Bram Moolenaarfc735152005-03-22 22:54:12 +0000570 * "p" points to the remainder.
571 */
Bram Moolenaare19defe2005-03-21 08:23:33 +0000572 while (len > 0)
573 {
Bram Moolenaarfc735152005-03-22 22:54:12 +0000574 /* Read a block from the file. Prepend the remainder of the previous
575 * block, if any. */
576 if (rest > 0)
577 {
578 if (rest > MAXWLEN) /* truncate long line (should be comment) */
579 rest = MAXWLEN;
580 mch_memmove(rbuf, p, rest);
581 --sourcing_lnum;
582 }
Bram Moolenaare19defe2005-03-21 08:23:33 +0000583 if (len > SBLOCKSIZE)
584 l = SBLOCKSIZE;
585 else
586 l = len;
587 len -= l;
Bram Moolenaarfc735152005-03-22 22:54:12 +0000588 if (read(fd, rbuf + rest, l) != l)
Bram Moolenaare19defe2005-03-21 08:23:33 +0000589 {
590 EMSG2(_(e_notread), fname);
591 break;
592 }
Bram Moolenaarfc735152005-03-22 22:54:12 +0000593 rbuf_end = rbuf + l + rest;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000594 rest = 0;
595
596 /* Deal with each line that was read until we finish the block. */
Bram Moolenaarfc735152005-03-22 22:54:12 +0000597 for (p = rbuf; p < rbuf_end; p = np)
Bram Moolenaare19defe2005-03-21 08:23:33 +0000598 {
Bram Moolenaarfc735152005-03-22 22:54:12 +0000599 ++sourcing_lnum;
600
601 /* "np" points to the first char after the line (CR, NL or white
602 * space). */
603 for (np = p; np < rbuf_end && *np >= ' '; mb_ptr_adv(np))
604 ;
605 if (np >= rbuf_end)
Bram Moolenaare19defe2005-03-21 08:23:33 +0000606 {
Bram Moolenaarfc735152005-03-22 22:54:12 +0000607 /* Incomplete line or end of file. */
Bram Moolenaare19defe2005-03-21 08:23:33 +0000608 rest = np - p;
609 if (len == 0)
Bram Moolenaarfc735152005-03-22 22:54:12 +0000610 EMSG(_("E751: Truncated spell file"));
Bram Moolenaare19defe2005-03-21 08:23:33 +0000611 break;
612 }
613 *np = NUL; /* terminate the line with a NUL */
614
Bram Moolenaarfc735152005-03-22 22:54:12 +0000615 if (*p == '-')
Bram Moolenaare19defe2005-03-21 08:23:33 +0000616 {
Bram Moolenaarfc735152005-03-22 22:54:12 +0000617 /*
618 * Region marker: ---, -xx, -xx-yy, etc.
619 */
620 ++p;
621 if (*p == '-')
Bram Moolenaare19defe2005-03-21 08:23:33 +0000622 {
Bram Moolenaarfc735152005-03-22 22:54:12 +0000623 if (p[1] != '-' || p[2] != NUL)
Bram Moolenaare19defe2005-03-21 08:23:33 +0000624 {
Bram Moolenaarfc735152005-03-22 22:54:12 +0000625 EMSG2(_(e_invchar2), p - 1);
626 len = 0;
627 break;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000628 }
Bram Moolenaarfc735152005-03-22 22:54:12 +0000629 region = REGION_ALL;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000630 }
631 else
632 {
Bram Moolenaarfc735152005-03-22 22:54:12 +0000633 char_u *rp = load_lp->sl_regions;
634 int r;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000635
Bram Moolenaarfc735152005-03-22 22:54:12 +0000636 /* Start of a region. The region may be repeated:
637 * "-ca-uk". Fill "region" with the bit mask for the
638 * ones we find. */
639 region = 0;
640 for (;;)
641 {
642 r = find_region(rp, p);
643 if (r == REGION_ALL)
Bram Moolenaare19defe2005-03-21 08:23:33 +0000644 {
Bram Moolenaarfc735152005-03-22 22:54:12 +0000645 /* new region, add it to sl_regions[] */
646 r = STRLEN(rp);
647 if (r >= 16)
648 {
649 EMSG2(_("E752: Too many regions: %s"), p);
650 len = 0;
651 break;
652 }
653 else
654 {
655 rp[r] = p[0];
656 rp[r + 1] = p[1];
657 rp[r + 2] = NUL;
658 r = 1 << (r / 2);
659 }
Bram Moolenaare19defe2005-03-21 08:23:33 +0000660 }
Bram Moolenaarfc735152005-03-22 22:54:12 +0000661 else
662 r = 1 << r;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000663
Bram Moolenaarfc735152005-03-22 22:54:12 +0000664 region |= r;
665 if (p[2] != '-')
666 {
667 if (p[2] > ' ')
668 {
669 EMSG2(_(e_invchar2), p - 1);
670 len = 0;
671 }
672 break;
673 }
674 p += 3;
675 }
676 }
677 }
678 else if (*p != '#' && *p != NUL)
679 {
680 /*
681 * Not an empty line or comment.
682 */
683 if (*p == '!')
684 {
685 wi = &load_lp->sl_kwords; /* keep case */
686 ++p;
687 }
688 else
689 wi = &load_lp->sl_fwords; /* fold case */
690
691 flags = 0;
692 c = *p;
693 if (c == '>') /* rare word */
694 {
695 flags = DW_RARE;
696 ++p;
697 }
698 else if (*p == '+') /* addition */
699 ++p;
700
701 if (c != '+' && !spell_iswordc(p))
702 {
703 EMSG2(_(e_invchar2), p);
704 len = 0;
705 break;
706 }
707
708 /* Make sure there is room for the word. Folding case may
709 * double the size. */
710 wlen = np - p;
711 if (bl == NULL || bl_used + sizeof(dword_T) + wlen
712#ifdef FEAT_MBYTE
713 * (has_mbyte ? 2 : 1)
714#endif
715 >= SBLOCKSIZE)
716 {
717 /* Allocate a block of memory to store the dword_T in.
718 * This is not freed until spell_reload() is called. */
719 bl = (sblock_T *)alloc((unsigned)(sizeof(sblock_T)
720 + SBLOCKSIZE));
721 if (bl == NULL)
722 {
723 len = 0;
724 break;
725 }
726 bl->sb_next = load_lp->sl_block;
727 load_lp->sl_block = bl;
728 bl_used = 0;
729 }
730 dw = (dword_T *)(bl->sb_data + bl_used);
731
732 /* For fold-case words fold the case and check for start
733 * with uppercase letter. */
734 if (wi == &load_lp->sl_fwords)
735 {
736#ifdef FEAT_MBYTE
737 if (MB_ISUPPER(mb_ptr2char(p)))
738#else
739 if (MB_ISUPPER(*p))
740#endif
741 flags |= DW_CAP;
742
743 /* Fold case. */
744 (void)str_foldcase(p, np - p, dw->dw_word, wlen
745#ifdef FEAT_MBYTE
746 * (has_mbyte ? 2 : 1)
747#endif
748 + 1);
749#ifdef FEAT_MBYTE
750 /* case folding may change length of word */
751 wlen = STRLEN(dw->dw_word);
752#endif
753 }
754 else
755 {
756 /* Keep case: copy the word as-is. */
757 mch_memmove(dw->dw_word, p, wlen + 1);
758 }
759
760 if (c == '+')
761 {
762 garray_T *gap = &wi->wi_add;
763
764 /* Addition. TODO: search for matching entry? */
765 if (wi->wi_addlen < wlen)
766 wi->wi_addlen = wlen;
767 if (ga_grow(gap, 1) == FAIL)
768 {
769 len = 0;
770 break;
771 }
772 *(((dword_T **)gap->ga_data) + gap->ga_len) = dw;
773 ++gap->ga_len;
774 dw->dw_region = region;
775 dw->dw_flags = flags;
776 bl_used += sizeof(dword_T) + wlen;
777 }
778 else
779 {
780 /*
781 * Check for a non-word character. If found it's
782 * going to be an nword.
783 * For an nword we split in two: the leading dword and
784 * the remainder. The dword goes in the hashtable
785 * with an nword_T, the remainder is put in the
786 * dword_T (starting with the first non-word
787 * character).
788 */
789 cc = NUL;
790 for (p = dw->dw_word; *p != NUL; mb_ptr_adv(p))
791 if (!spell_iswordc(p))
792 {
793 cc = *p;
794 *p = NUL;
795 break;
796 }
797
798 /* check if we already have this dword */
799 hash = hash_hash(dw->dw_word);
800 hi = hash_lookup(&wi->wi_ht, dw->dw_word, hash);
Bram Moolenaare19defe2005-03-21 08:23:33 +0000801 if (!HASHITEM_EMPTY(hi))
802 {
Bram Moolenaarfc735152005-03-22 22:54:12 +0000803 /* Existing entry. */
804 edw = HI2DWORD(hi);
805 if ((edw->dw_flags & (DW_CAP | DW_RARE))
806 == (dw->dw_flags & (DW_CAP | DW_RARE)))
Bram Moolenaare19defe2005-03-21 08:23:33 +0000807 {
808 if (p_verbose > 0)
809 smsg((char_u *)_("Warning: duplicate word \"%s\" in %s"),
Bram Moolenaarfc735152005-03-22 22:54:12 +0000810 dw->dw_word, fname);
811 }
812 }
813
814 if (cc != NUL) /* nword */
815 {
816 if (HASHITEM_EMPTY(hi)
817 || (edw->dw_flags & DW_NWORD) == 0)
818 {
819 sblock_T *sb;
820
821 /* Need to allocate a new nword_T. Put it in an
822 * sblock_T, so that we can free it later. */
823 sb = (sblock_T *)alloc(
824 (unsigned)(sizeof(sblock_T)
825 + sizeof(nword_T) + wlen));
826 if (sb == NULL)
827 {
828 len = 0;
829 break;
830 }
831 sb->sb_next = load_lp->sl_block;
832 load_lp->sl_block = sb;
833 nw = (nword_T *)sb->sb_data;
834
835 ga_init2(&nw->nw_ga, sizeof(dword_T *), 4);
836 nw->nw_maxlen = 0;
837 STRCPY(nw->nw_word, dw->dw_word);
838 if (!HASHITEM_EMPTY(hi))
839 {
840 /* Note: the nw_region and nw_flags is for
841 * the dword that matches with the start
842 * of this nword, not for the nword
843 * itself! */
844 nw->nw_region = edw->dw_region;
845 nw->nw_flags = edw->dw_flags | DW_NWORD;
846
847 /* Remove the dword item so that we can
848 * add it as an nword. */
849 hash_remove(&wi->wi_ht, hi);
850 hi = hash_lookup(&wi->wi_ht,
851 nw->nw_word, hash);
852 }
853 else
854 {
855 nw->nw_region = 0;
856 nw->nw_flags = DW_NWORD;
857 }
Bram Moolenaare19defe2005-03-21 08:23:33 +0000858 }
859 else
Bram Moolenaarfc735152005-03-22 22:54:12 +0000860 nw = HI2NWORD(hi);
Bram Moolenaare19defe2005-03-21 08:23:33 +0000861 }
Bram Moolenaarfc735152005-03-22 22:54:12 +0000862
863 if (HASHITEM_EMPTY(hi))
864 {
865 /* Add new dword or nword entry. */
866 hash_add_item(&wi->wi_ht, hi, cc == NUL
867 ? dw->dw_word : nw->nw_word, hash);
868 if (cc == NUL)
869 {
870 /* New dword: init the values and count the
871 * used space. */
872 dw->dw_flags = DW_DWORD | flags;
873 dw->dw_region = region;
874 bl_used += sizeof(dword_T) + wlen;
875 }
876 }
877 else if (cc == NUL)
878 {
879 /* existing dword: add the region and flags */
880 dw = edw;
881 dw->dw_region |= region;
882 dw->dw_flags |= DW_DWORD | flags;
883 }
884
885 if (cc != NUL)
886 {
887 /* Use the dword for the non-word character and
888 * following characters. */
889 dw->dw_region = region;
890 dw->dw_flags = flags;
891 STRCPY(dw->dw_word + 1, p + 1);
892 dw->dw_word[0] = cc;
893 l = wlen - (p - dw->dw_word);
894 bl_used += sizeof(dword_T) + l;
895 if (nw->nw_maxlen < l)
896 nw->nw_maxlen = l;
897
898 /* Add the dword to the growarray in the nword. */
899 if (ga_grow(&nw->nw_ga, 1) == FAIL)
900 {
901 len = 0;
902 break;
903 }
904 *((dword_T **)nw->nw_ga.ga_data + nw->nw_ga.ga_len)
905 = dw;
906 ++nw->nw_ga.ga_len;
907 }
Bram Moolenaare19defe2005-03-21 08:23:33 +0000908 }
909 }
910
Bram Moolenaarfc735152005-03-22 22:54:12 +0000911 /* Skip over CR and NL characters and trailing white space. */
912 while (np < rbuf_end && *np <= ' ')
Bram Moolenaare19defe2005-03-21 08:23:33 +0000913 ++np;
Bram Moolenaare19defe2005-03-21 08:23:33 +0000914 }
915 }
916
917 close(fd);
Bram Moolenaarfc735152005-03-22 22:54:12 +0000918theend:
919 sourcing_name = save_sourcing_name;
920 sourcing_lnum = save_sourcing_lnum;
921 vim_free(rbuf);
Bram Moolenaare19defe2005-03-21 08:23:33 +0000922}
923
924/*
925 * Parse 'spelllang' and set buf->b_langp accordingly.
926 * Returns an error message or NULL.
927 */
928 char_u *
929did_set_spelllang(buf)
930 buf_T *buf;
931{
932 garray_T ga;
933 char_u *lang;
934 char_u *e;
935 char_u *region;
936 int region_mask;
937 slang_T *lp;
938 int c;
939
940 ga_init2(&ga, sizeof(langp_T), 2);
941
942 /* loop over comma separated languages. */
943 for (lang = buf->b_p_spl; *lang != NUL; lang = e)
944 {
945 e = vim_strchr(lang, ',');
946 if (e == NULL)
947 e = lang + STRLEN(lang);
948 if (e > lang + 2)
949 {
950 if (lang[2] != '_' || e - lang != 5)
951 {
952 ga_clear(&ga);
953 return e_invarg;
954 }
955 region = lang + 3;
956 }
957 else
958 region = NULL;
959
960 for (lp = first_lang; lp != NULL; lp = lp->sl_next)
961 if (STRNICMP(lp->sl_name, lang, 2) == 0)
962 break;
963
964 if (lp == NULL)
965 /* Not found, load the language. */
966 lp = spell_load_lang(lang);
967
968 if (lp != NULL)
969 {
970 if (region == NULL)
971 region_mask = REGION_ALL;
972 else
973 {
974 /* find region in sl_regions */
975 c = find_region(lp->sl_regions, region);
976 if (c == REGION_ALL)
977 {
978 c = lang[5];
979 lang[5] = NUL;
980 smsg((char_u *)_("Warning: region %s not supported"), lang);
981 lang[5] = c;
982 region_mask = REGION_ALL;
983 }
984 else
985 region_mask = 1 << c;
986 }
987
988 if (ga_grow(&ga, 1) == FAIL)
989 {
990 ga_clear(&ga);
991 return e_outofmem;
992 }
993 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
994 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
995 ++ga.ga_len;
996 }
997
998 if (*e == ',')
999 ++e;
1000 }
1001
1002 /* Add a NULL entry to mark the end of the list. */
1003 if (ga_grow(&ga, 1) == FAIL)
1004 {
1005 ga_clear(&ga);
1006 return e_outofmem;
1007 }
1008 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = NULL;
1009 ++ga.ga_len;
1010
1011 /* Everything is fine, store the new b_langp value. */
1012 ga_clear(&buf->b_langp);
1013 buf->b_langp = ga;
1014
1015 return NULL;
1016}
1017
1018/*
1019 * Find the region "region[2]" in "rp" (points to "sl_regions").
1020 * Each region is simply stored as the two characters of it's name.
1021 * Returns the index if found, REGION_ALL if not found.
1022 */
1023 static int
1024find_region(rp, region)
1025 char_u *rp;
1026 char_u *region;
1027{
1028 int i;
1029
1030 for (i = 0; ; i += 2)
1031 {
1032 if (rp[i] == NUL)
1033 return REGION_ALL;
1034 if (rp[i] == region[0] && rp[i + 1] == region[1])
1035 break;
1036 }
1037 return i / 2;
1038}
1039
1040# if defined(FEAT_MBYTE) || defined(PROTO)
1041/*
1042 * Clear all spelling tables and reload them.
1043 * Used after 'encoding' is set.
1044 */
1045 void
1046spell_reload()
1047{
1048 buf_T *buf;
1049 slang_T *lp;
1050 sblock_T *sp;
1051
Bram Moolenaarfc735152005-03-22 22:54:12 +00001052 /* Initialize the table for spell_iswordc(). */
1053 init_spell_chartab();
1054
Bram Moolenaare19defe2005-03-21 08:23:33 +00001055 /* Unload all allocated memory. */
1056 while (first_lang != NULL)
1057 {
1058 lp = first_lang;
1059 first_lang = lp->sl_next;
1060
Bram Moolenaarfc735152005-03-22 22:54:12 +00001061 hash_clear(&lp->sl_fwords.wi_ht);
1062 ga_clear(&lp->sl_fwords.wi_add);
1063 hash_clear(&lp->sl_kwords.wi_ht);
1064 ga_clear(&lp->sl_kwords.wi_add);
Bram Moolenaare19defe2005-03-21 08:23:33 +00001065 while (lp->sl_block != NULL)
1066 {
1067 sp = lp->sl_block;
1068 lp->sl_block = sp->sb_next;
1069 vim_free(sp);
1070 }
1071 }
1072
1073 /* Go through all buffers and handle 'spelllang'. */
1074 for (buf = firstbuf; buf != NULL; buf = buf->b_next)
1075 {
1076 ga_clear(&buf->b_langp);
1077 if (*buf->b_p_spl != NUL)
1078 did_set_spelllang(buf);
1079 }
1080}
1081# endif
1082
1083#endif /* FEAT_SYN_HL */