src/spell.c - android_external_vim - Gitiles

 /* vi:set ts=8 sts=4 sw=4:
  *
  * VIM - Vi IMproved	by Bram Moolenaar
  *
  * Do ":help uganda"  in Vim to read copying and usage conditions.
  * Do ":help credits" in Vim to see a list of people who contributed.
  * See README.txt for an overview of the Vim source code.
  */

 /*
  * spell.c: code for spell checking
  */

 #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
 # include <io.h>	/* for lseek(), must be before vim.h */
 #endif

 #include "vim.h"

 #if defined(FEAT_SYN_HL) || defined(PROTO)

 #ifdef HAVE_FCNTL_H
 # include <fcntl.h>
 #endif

 /*
  * Structure that is used to store the text from the language file.  This
  * avoids the need to allocate each individual word and copying it.  It's
  * allocated in big chunks for speed.
  */
 #define  SBLOCKSIZE 4096	/* default size of sb_data */
 typedef struct sblock_S sblock_T;
 struct sblock_S
 {
     sblock_T	*sb_next;	/* next block in list */
     char_u	sb_data[1];	/* data, actually longer */
 };

 /*
  * Structure used to store words and other info for one language.
  */
 typedef struct slang_S slang_T;

 struct slang_S
 {
     slang_T	*sl_next;	/* next language */
     char_u	sl_name[2];	/* language name "en", "nl", etc. */
     hashtab_T	sl_ht;		/* hashtable with all words */
     garray_T	sl_match;	/* table with pointers to matches */
     garray_T	sl_add;		/* table with pointers to additions */
     char_u	sl_regions[13];	/* table with up to 6 region names */
     sblock_T	*sl_block;	/* list with allocated memory blocks */
 };

 static slang_T *first_lang = NULL;

 /*
  * Structure used in "b_langp", filled from 'spelllang'.
  */
 typedef struct langp_S
 {
     slang_T	*lp_slang;	/* info for this language (NULL for last one) */
     int		lp_region;	/* bitmask for region or REGION_ALL */
 } langp_T;

 #define LANGP_ENTRY(ga, i)	(((langp_T *)(ga).ga_data) + (i))
 #define MATCH_ENTRY(gap, i)	*(((char_u **)(gap)->ga_data) + i)

 /*
  * The byte before a word in the hashtable indicates the type of word.
  * Also used for the byte just before a match.
  * The top two bits are used to indicate rare and case-sensitive words.
  * The lower bits are used to indicate the region in which the word is valid.
  * Words valid in all regions use REGION_ALL.
  */
 #define REGION_MASK	0x3f
 #define REGION_ALL	0x3f
 #define CASE_MASK	0x40
 #define RARE_MASK	0x80

 #define SP_OK		0
 #define SP_BAD		1
 #define SP_RARE		2
 #define SP_LOCAL	3

 static slang_T *spell_load_lang __ARGS((char_u *lang));
 static void spell_load_file __ARGS((char_u *fname));
 static int find_region __ARGS((char_u *rp, char_u *region));

 /*
  * Main spell-checking function.
  * "ptr" points to the start of a word.
  * "*attrp" is set to the attributes for a badly spelled word.  For a non-word
  * or when it's OK it remains unchanged.
  * This must only be called when 'spelllang' is not empty.
  * Returns the length of the word in bytes, also when it's OK, so that the
  * caller can skip over the word.
  */
     int
 spell_check(wp, ptr, attrp)
     win_T	*wp;		/* current window */
     char_u	*ptr;
     int		*attrp;
 {
     char_u	*e;
     langp_T	*lp;
     int		result;
     int		len = 0;
     hash_T	hash;
     hashitem_T	*hi;
     int		c;
 #define MAXWLEN 80	/* assume max. word len is 80 */
     char_u	word[MAXWLEN + 1];
     garray_T	*gap;
     int		l, h, t;
     char_u	*p;
     int		n;

     /* Find the end of the word.  We already know that *ptr is a word char. */
     e = ptr;
     do
     {
 	mb_ptr_adv(e);
 	++len;
     } while (*e != NUL && vim_iswordc_buf(e, wp->w_buffer));

     /* The word is bad unless we find it in the dictionary. */
     result = SP_BAD;

     /* Words are always stored with folded case. */
     (void)str_foldcase(ptr, e - ptr, word, MAXWLEN + 1);
     hash = hash_hash(word);

     /*
      * Loop over the languages specified in 'spelllang'.
      * We check them all, because a match may find a longer word.
      */
     for (lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); lp->lp_slang != NULL;
 								     ++lp)
     {
 	/* Check words when it wasn't recognized as a good word yet. */
 	if (result != SP_OK)
 	{
 	    /* Word lookup.  Using a hash table is fast. */
 	    hi = hash_lookup(&lp->lp_slang->sl_ht, word, hash);
 	    if (!HASHITEM_EMPTY(hi))
 	    {
 		/* The character before the key indicates the type of word. */
 		c = hi->hi_key[-1];
 		if ((c & CASE_MASK) != 0)
 		{
 		    /* Need to check first letter is uppercase.  If it is,
 		     * check region.  If it isn't it may be a rare word. */
 		    if (
 #ifdef FEAT_MBYTE
 			    MB_ISUPPER(mb_ptr2char(ptr))
 #else
 			    MB_ISUPPER(*ptr)
 #endif
 			    )
 		    {
 			if ((c & lp->lp_region) == 0)
 			    result = SP_LOCAL;
 			else
 			    result = SP_OK;
 		    }
 		    else if (c & RARE_MASK)
 			result = SP_RARE;
 		}
 		else
 		{
 		    if ((c & lp->lp_region) == 0)
 			result = SP_LOCAL;
 		    else if (c & RARE_MASK)
 			result = SP_RARE;
 		    else
 			result = SP_OK;
 		}
 	    }
 	}

 	/* Match lookup.  Uses a binary search.  If there is a match adjust
 	 * "e" to the end.  This is also done when a word matched, because
 	 * "you've" is longer than "you". */
 	gap = &lp->lp_slang->sl_match;
 	l = 0;			/* low index */
 	h = gap->ga_len - 1;	/* high index */
 	/* keep searching, the match must be between "l" and "h" (inclusive) */
 	while (h >= l)
 	{
 	    t = (h + l) / 2;
 	    p = MATCH_ENTRY(gap, t) + 1;
 	    for (n = 0; p[n] != 0 && p[n] == ptr[n]; ++n)
 		;
 	    if (p[n] == 0)
 	    {
 		if ((ptr[n] == 0 || !vim_iswordc_buf(ptr + n, wp->w_buffer)))
 		{
 		    /* match! */
 		    e = ptr + n;
 		    if (result != SP_OK)
 		    {
 			if ((lp->lp_region & p[-1]) == 0)
 			    result = SP_LOCAL;
 			else
 			    result = SP_OK;
 		    }
 		    break;
 		}
 		/* match is too short, next item is new low index */
 		l = t + 1;
 	    }
 	    else if (p[n] < ptr[n])
 		/* match is before word, next item is new low index */
 		l = t + 1;
 	    else
 		/* match is after word, previous item is new high index */
 		h = t - 1;
 	}

 	/* Addition lookup.  Uses a linear search, there should be very few.
 	 * If there is a match adjust "e" to the end.  This doesn't change
 	 * whether a word was good or bad, only the length. */
 	gap = &lp->lp_slang->sl_add;
 	for (t = 0; t < gap->ga_len; ++t)
 	{
 	    p = MATCH_ENTRY(gap, t) + 1;
 	    for (n = 0; p[n] != 0 && p[n] == e[n]; ++n)
 		;
 	    if (p[n] == 0
 		      && (e[n] == 0 || !vim_iswordc_buf(e + n, wp->w_buffer)))
 	    {
 		/* match */
 		e += n;
 		break;
 	    }
 	}
     }

     if (result != SP_OK)
     {
 	if (result == SP_BAD)
 	    *attrp = highlight_attr[HLF_SPB];
 	else if (result == SP_RARE)
 	    *attrp = highlight_attr[HLF_SPR];
 	else
 	    *attrp = highlight_attr[HLF_SPL];
     }

     return (int)(e - ptr);
 }

 static slang_T	    *load_lp;	/* passed from spell_load_lang() to
 				   spell_load_file() */

 /*
  * Load language "lang[2]".
  */
     static slang_T *
 spell_load_lang(lang)
     char_u	*lang;
 {
     slang_T	*lp;
     char_u	fname_enc[80];
     char_u	fname_ascii[20];
     char_u	*p;

     lp = (slang_T *)alloc(sizeof(slang_T));
     if (lp != NULL)
     {
 	lp->sl_name[0] = lang[0];
 	lp->sl_name[1] = lang[1];
 	hash_init(&lp->sl_ht);
 	ga_init2(&lp->sl_match, sizeof(char_u *), 20);
 	ga_init2(&lp->sl_add, sizeof(char_u *), 4);
 	lp->sl_regions[0] = NUL;
 	lp->sl_block = NULL;

 	/* Find all spell files for "lang" in 'runtimepath' and load them.
 	 * Use 'encoding', except that we use "latin1" for "latin9". */
 #ifdef FEAT_MBYTE
 	if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
 	    p = p_enc;
 	else
 #endif
 	    p = (char_u *)"latin1";
 	load_lp = lp;
 	sprintf((char *)fname_enc, "spell/%c%c.%s.spl", lang[0], lang[1], p);
 	if (do_in_runtimepath(fname_enc, TRUE, spell_load_file) == FAIL)
 	{
 	    /* Try again to find an ASCII spell file. */
 	    sprintf((char *)fname_ascii, "spell/%c%c.spl", lang[0], lang[1]);
 	    if (do_in_runtimepath(fname_ascii, TRUE, spell_load_file) == FAIL)
 	    {
 		vim_free(lp);
 		lp = NULL;
 		smsg((char_u *)_("Warning: Cannot find dictionary \"%s\""),
 							       fname_enc + 6);
 	    }
 	}
 	else
 	{
 	    lp->sl_next = first_lang;
 	    first_lang = lp;
 	}
     }

     return lp;
 }

 /*
  * Load one spell file into "load_lp".
  * Invoked through do_in_runtimepath().
  */
     static void
 spell_load_file(fname)
     char_u	*fname;
 {
     int		fd;
     size_t	len;
     size_t	l;
     size_t	rest = 0;
     char_u	*p = NULL, *np;
     sblock_T	*bl;
     hash_T	hash;
     hashitem_T	*hi;
     int		c;
     int		region = REGION_ALL;
     char_u	word[MAXWLEN + 1];
     int		n;

     fd = mch_open((char *)fname, O_RDONLY | O_EXTRA, 0);
     if (fd < 0)
     {
 	EMSG2(_(e_notopen), fname);
 	return;
     }

     /* Get the length of the whole file. */
     len = lseek(fd, (off_t)0, SEEK_END);
     lseek(fd, (off_t)0, SEEK_SET);

     /* Loop, reading the file one block at a time.
      * "rest" is the length of an incomplete line at the previous block.
      * "p" points to the remainder. */
     while (len > 0)
     {
 	/* Allocate a block of memory to store the info in.  This is not freed
 	 * until spell_reload() is called. */
 	if (len > SBLOCKSIZE)
 	    l = SBLOCKSIZE;
 	else
 	    l = len;
 	len -= l;
 	bl = (sblock_T *)alloc((unsigned)(sizeof(sblock_T) - 1 + l + rest));
 	if (bl == NULL)
 	    break;
 	bl->sb_next = load_lp->sl_block;
 	load_lp->sl_block = bl;

 	/* Read a block from the file.  Prepend the remainder of the previous
 	 * block. */
 	if (rest > 0)
 	    mch_memmove(bl->sb_data, p, rest);
 	if (read(fd, bl->sb_data + rest, l) != l)
 	{
 	    EMSG2(_(e_notread), fname);
 	    break;
 	}
 	l += rest;
 	rest = 0;

 	/* Deal with each line that was read until we finish the block. */
 	for (p = bl->sb_data; l > 0; p = np)
 	{
 	    /* "np" points to the char after the line (CR or NL). */
 	    for (np = p; l > 0 && *np >= ' '; ++np)
 		--l;
 	    if (l == 0)
 	    {
 		/* Incomplete line (or end of file). */
 		rest = np - p;
 		if (len == 0)
 		    EMSG2(_("E751: Truncated spell file: %s"), fname);
 		break;
 	    }
 	    *np = NUL;	    /* terminate the line with a NUL */

 	    /* Skip comment and empty lines. */
 	    c = *p;
 	    if (c != '#' && np > p)
 	    {
 		if (c == '=' || c == '+')
 		{
 		    garray_T *gap;

 		    /* Match or Add item. */
 		    if (c == '=')
 			gap = &load_lp->sl_match;
 		    else
 			gap = &load_lp->sl_add;

 		    if (ga_grow(gap, 1) == OK)
 		    {
 			for (n = 0; n < gap->ga_len; ++n)
 			    if ((c = STRCMP(p + 1,
 						MATCH_ENTRY(gap, n) + 1)) < 0)
 				break;
 			if (c == 0)
 			{
 			    if (p_verbose > 0)
 				smsg((char_u *)_("Warning: duplicate match \"%s\" in %s"),
 								p + 1, fname);
 			}
 			else
 			{
 			    mch_memmove((char_u **)gap->ga_data + n + 1,
 				    (char_u **)gap->ga_data + n,
 				    (gap->ga_len - n) * sizeof(char_u *));
 			    *(((char_u **)gap->ga_data) + n) = p;
 			    *p = region;
 			    ++gap->ga_len;
 			}
 		    }
 		}
 		else if (c == '-')
 		{
 		    /* region item */
 		    ++p;
 		    if (*p == '-')
 			/* end of a region */
 			region = REGION_ALL;
 		    else
 		    {
 			char_u	*rp = load_lp->sl_regions;
 			int	r;

 			/* The region may be repeated: "-ca-uk".  Fill
 			 * "region" with the bit mask for the ones we find. */
 			region = 0;
 			for (;;)
 			{
 			    /* start of a region */
 			    r = find_region(rp, p);
 			    if (r == REGION_ALL)
 			    {
 				/* new region, add it */
 				r = STRLEN(rp);
 				if (r >= 12)
 				{
 				    EMSG2(_("E752: Too many regions in %s"),
 								       fname);
 				    r = REGION_ALL;
 				}
 				else
 				{
 				    rp[r] = p[0];
 				    rp[r + 1] = p[1];
 				    rp[r + 2] = NUL;
 				    r = 1 << (r / 2);
 				}
 			    }
 			    else
 				r = 1 << r;

 			    region |= r;
 			    if (p[2] != '-')
 			    {
 				if (p[2] != NUL)
 				    EMSG2(_("E753: Invalid character in \"%s\""),
 								       p - 1);
 				break;
 			    }
 			    p += 3;
 			}
 		    }
 		}
 		else
 		{
 		    /* add the word */
 		    if (c == '>')
 			c = region | RARE_MASK;
 		    else
 		    {
 			if (c != ' ')
 			    EMSG2(_("E753: Invalid character in \"%s\""), p);
 			c = region;
 		    }
 #ifdef FEAT_MBYTE
 		    if (MB_ISUPPER(mb_ptr2char(p + 1)))
 #else
 		    if (MB_ISUPPER(p[1]))
 #endif
 			c |= CASE_MASK;
 		    *p++ = c;
 		    (void)str_foldcase(p, np - p, word, MAXWLEN + 1);
 		    n = STRLEN(word);
 		    if (n > np - p)
 		    {
 			sblock_T	*s;

 			/* Folding case made word longer!  We need to allocate
 			 * memory for it. */
 			s = (sblock_T *)alloc((unsigned)sizeof(sblock_T)
 								     + n + 1);
 			if (s != NULL)
 			{
 			    s->sb_next = load_lp->sl_block;
 			    load_lp->sl_block = s;
 			    s->sb_data[0] = p[-1];
 			    p = s->sb_data + 1;
 			}
 		    }
 		    mch_memmove(p, word, n + 1);

 		    hash = hash_hash(p);
 		    hi = hash_lookup(&load_lp->sl_ht, p, hash);
 		    if (!HASHITEM_EMPTY(hi))
 		    {
 			c = hi->hi_key[-1];
 			if ((c & (CASE_MASK | RARE_MASK))
 					 == (p[-1] & (CASE_MASK | RARE_MASK)))
 			{
 			    if (p_verbose > 0)
 				smsg((char_u *)_("Warning: duplicate word \"%s\" in %s"),
 								    p, fname);
 			}
 			else
 			    hi->hi_key[-1] |= (p[-1] & (CASE_MASK | RARE_MASK));
 		    }
 		    else
 			hash_add_item(&load_lp->sl_ht, hi, p, hash);
 		}
 	    }

 	    while (l > 0 && *np < ' ')
 	    {
 		++np;
 		--l;
 	    }
 	}
     }

     close(fd);
 }

 /*
  * Parse 'spelllang' and set buf->b_langp accordingly.
  * Returns an error message or NULL.
  */
     char_u *
 did_set_spelllang(buf)
     buf_T	*buf;
 {
     garray_T	ga;
     char_u	*lang;
     char_u	*e;
     char_u	*region;
     int		region_mask;
     slang_T	*lp;
     int		c;

     ga_init2(&ga, sizeof(langp_T), 2);

     /* loop over comma separated languages. */
     for (lang = buf->b_p_spl; *lang != NUL; lang = e)
     {
 	e = vim_strchr(lang, ',');
 	if (e == NULL)
 	    e = lang + STRLEN(lang);
 	if (e > lang + 2)
 	{
 	    if (lang[2] != '_' || e - lang != 5)
 	    {
 		ga_clear(&ga);
 		return e_invarg;
 	    }
 	    region = lang + 3;
 	}
 	else
 	    region = NULL;

 	for (lp = first_lang; lp != NULL; lp = lp->sl_next)
 	    if (STRNICMP(lp->sl_name, lang, 2) == 0)
 		break;

 	if (lp == NULL)
 	    /* Not found, load the language. */
 	    lp = spell_load_lang(lang);

 	if (lp != NULL)
 	{
 	    if (region == NULL)
 		region_mask = REGION_ALL;
 	    else
 	    {
 		/* find region in sl_regions */
 		c = find_region(lp->sl_regions, region);
 		if (c == REGION_ALL)
 		{
 		    c = lang[5];
 		    lang[5] = NUL;
 		    smsg((char_u *)_("Warning: region %s not supported"), lang);
 		    lang[5] = c;
 		    region_mask = REGION_ALL;
 		}
 		else
 		    region_mask = 1 << c;
 	    }

 	    if (ga_grow(&ga, 1) == FAIL)
 	    {
 		ga_clear(&ga);
 		return e_outofmem;
 	    }
 	    LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
 	    LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
 	    ++ga.ga_len;
 	}

 	if (*e == ',')
 	    ++e;
     }

     /* Add a NULL entry to mark the end of the list. */
     if (ga_grow(&ga, 1) == FAIL)
     {
 	ga_clear(&ga);
 	return e_outofmem;
     }
     LANGP_ENTRY(ga, ga.ga_len)->lp_slang = NULL;
     ++ga.ga_len;

     /* Everything is fine, store the new b_langp value. */
     ga_clear(&buf->b_langp);
     buf->b_langp = ga;

     return NULL;
 }

 /*
  * Find the region "region[2]" in "rp" (points to "sl_regions").
  * Each region is simply stored as the two characters of it's name.
  * Returns the index if found, REGION_ALL if not found.
  */
     static int
 find_region(rp, region)
     char_u	*rp;
     char_u	*region;
 {
     int		i;

     for (i = 0; ; i += 2)
     {
 	if (rp[i] == NUL)
 	    return REGION_ALL;
 	if (rp[i] == region[0] && rp[i + 1] == region[1])
 	    break;
     }
     return i / 2;
 }

 # if defined(FEAT_MBYTE) || defined(PROTO)
 /*
  * Clear all spelling tables and reload them.
  * Used after 'encoding' is set.
  */
     void
 spell_reload()
 {
     buf_T	*buf;
     slang_T	*lp;
     sblock_T	*sp;

     /* Unload all allocated memory. */
     while (first_lang != NULL)
     {
 	lp = first_lang;
 	first_lang = lp->sl_next;

 	hash_clear(&lp->sl_ht);
 	ga_clear(&lp->sl_match);
 	ga_clear(&lp->sl_add);
 	while (lp->sl_block != NULL)
 	{
 	    sp = lp->sl_block;
 	    lp->sl_block = sp->sb_next;
 	    vim_free(sp);
 	}
     }

     /* Go through all buffers and handle 'spelllang'. */
     for (buf = firstbuf; buf != NULL; buf = buf->b_next)
     {
 	ga_clear(&buf->b_langp);
 	if (*buf->b_p_spl != NUL)
 	    did_set_spelllang(buf);
     }
 }
 # endif

 #endif  /* FEAT_SYN_HL */
	/* vi:set ts=8 sts=4 sw=4:
	*
	* VIM - Vi IMproved by Bram Moolenaar
	*
	* Do ":help uganda" in Vim to read copying and usage conditions.
	* Do ":help credits" in Vim to see a list of people who contributed.
	* See README.txt for an overview of the Vim source code.
	*/

	/*
	* spell.c: code for spell checking
	*/

	#if defined(MSDOS) \|\| defined(WIN16) \|\| defined(WIN32) \|\| defined(_WIN64)
	# include <io.h> /* for lseek(), must be before vim.h */
	#endif

	#include "vim.h"

	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)

	#ifdef HAVE_FCNTL_H
	# include <fcntl.h>
	#endif

	/*
	* Structure that is used to store the text from the language file. This
	* avoids the need to allocate each individual word and copying it. It's
	* allocated in big chunks for speed.
	*/
	#define SBLOCKSIZE 4096 /* default size of sb_data */
	typedef struct sblock_S sblock_T;
	struct sblock_S
	{
	sblock_T sb_next; / next block in list */
	char_u sb_data[1]; /* data, actually longer */
	};

	/*
	* Structure used to store words and other info for one language.
	*/
	typedef struct slang_S slang_T;

	struct slang_S
	{
	slang_T sl_next; / next language */
	char_u sl_name[2]; /* language name "en", "nl", etc. */
	hashtab_T sl_ht; /* hashtable with all words */
	garray_T sl_match; /* table with pointers to matches */
	garray_T sl_add; /* table with pointers to additions */
	char_u sl_regions[13]; /* table with up to 6 region names */
	sblock_T sl_block; / list with allocated memory blocks */
	};

	static slang_T *first_lang = NULL;

	/*
	* Structure used in "b_langp", filled from 'spelllang'.
	*/
	typedef struct langp_S
	{
	slang_T lp_slang; / info for this language (NULL for last one) */
	int lp_region; /* bitmask for region or REGION_ALL */
	} langp_T;

	#define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
	#define MATCH_ENTRY(gap, i) (((char_u *)(gap)->ga_data) + i)

	/*
	* The byte before a word in the hashtable indicates the type of word.
	* Also used for the byte just before a match.
	* The top two bits are used to indicate rare and case-sensitive words.
	* The lower bits are used to indicate the region in which the word is valid.
	* Words valid in all regions use REGION_ALL.
	*/
	#define REGION_MASK 0x3f
	#define REGION_ALL 0x3f
	#define CASE_MASK 0x40
	#define RARE_MASK 0x80

	#define SP_OK 0
	#define SP_BAD 1
	#define SP_RARE 2
	#define SP_LOCAL 3

	static slang_T spell_load_lang __ARGS((char_u lang));
	static void spell_load_file __ARGS((char_u *fname));
	static int find_region __ARGS((char_u rp, char_u region));

	/*
	* Main spell-checking function.
	* "ptr" points to the start of a word.
	* "*attrp" is set to the attributes for a badly spelled word. For a non-word
	* or when it's OK it remains unchanged.
	* This must only be called when 'spelllang' is not empty.
	* Returns the length of the word in bytes, also when it's OK, so that the
	* caller can skip over the word.
	*/
	int
	spell_check(wp, ptr, attrp)
	win_T wp; / current window */
	char_u *ptr;
	int *attrp;
	{
	char_u *e;
	langp_T *lp;
	int result;
	int len = 0;
	hash_T hash;
	hashitem_T *hi;
	int c;
	#define MAXWLEN 80 /* assume max. word len is 80 */
	char_u word[MAXWLEN + 1];
	garray_T *gap;
	int l, h, t;
	char_u *p;
	int n;

	/* Find the end of the word. We already know that ptr is a word char. /
	e = ptr;
	do
	{
	mb_ptr_adv(e);
	++len;
	} while (*e != NUL && vim_iswordc_buf(e, wp->w_buffer));

	/* The word is bad unless we find it in the dictionary. */
	result = SP_BAD;

	/* Words are always stored with folded case. */
	(void)str_foldcase(ptr, e - ptr, word, MAXWLEN + 1);
	hash = hash_hash(word);

	/*
	* Loop over the languages specified in 'spelllang'.
	* We check them all, because a match may find a longer word.
	*/
	for (lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); lp->lp_slang != NULL;
	++lp)
	{
	/* Check words when it wasn't recognized as a good word yet. */
	if (result != SP_OK)
	{
	/* Word lookup. Using a hash table is fast. */
	hi = hash_lookup(&lp->lp_slang->sl_ht, word, hash);
	if (!HASHITEM_EMPTY(hi))
	{
	/* The character before the key indicates the type of word. */
	c = hi->hi_key[-1];
	if ((c & CASE_MASK) != 0)
	{
	/* Need to check first letter is uppercase. If it is,
	* check region. If it isn't it may be a rare word. */
	if (
	#ifdef FEAT_MBYTE
	MB_ISUPPER(mb_ptr2char(ptr))
	#else
	MB_ISUPPER(*ptr)
	#endif
	)
	{
	if ((c & lp->lp_region) == 0)
	result = SP_LOCAL;
	else
	result = SP_OK;
	}
	else if (c & RARE_MASK)
	result = SP_RARE;
	}
	else
	{
	if ((c & lp->lp_region) == 0)
	result = SP_LOCAL;
	else if (c & RARE_MASK)
	result = SP_RARE;
	else
	result = SP_OK;
	}
	}
	}

	/* Match lookup. Uses a binary search. If there is a match adjust
	* "e" to the end. This is also done when a word matched, because
	* "you've" is longer than "you". */
	gap = &lp->lp_slang->sl_match;
	l = 0; /* low index */
	h = gap->ga_len - 1; /* high index */
	/* keep searching, the match must be between "l" and "h" (inclusive) */
	while (h >= l)
	{
	t = (h + l) / 2;
	p = MATCH_ENTRY(gap, t) + 1;
	for (n = 0; p[n] != 0 && p[n] == ptr[n]; ++n)
	;
	if (p[n] == 0)
	{
	if ((ptr[n] == 0 \|\| !vim_iswordc_buf(ptr + n, wp->w_buffer)))
	{
	/* match! */
	e = ptr + n;
	if (result != SP_OK)
	{
	if ((lp->lp_region & p[-1]) == 0)
	result = SP_LOCAL;
	else
	result = SP_OK;
	}
	break;
	}
	/* match is too short, next item is new low index */
	l = t + 1;
	}
	else if (p[n] < ptr[n])
	/* match is before word, next item is new low index */
	l = t + 1;
	else
	/* match is after word, previous item is new high index */
	h = t - 1;
	}

	/* Addition lookup. Uses a linear search, there should be very few.
	* If there is a match adjust "e" to the end. This doesn't change
	* whether a word was good or bad, only the length. */
	gap = &lp->lp_slang->sl_add;
	for (t = 0; t < gap->ga_len; ++t)
	{
	p = MATCH_ENTRY(gap, t) + 1;
	for (n = 0; p[n] != 0 && p[n] == e[n]; ++n)
	;
	if (p[n] == 0
	&& (e[n] == 0 \|\| !vim_iswordc_buf(e + n, wp->w_buffer)))
	{
	/* match */
	e += n;
	break;
	}
	}
	}

	if (result != SP_OK)
	{
	if (result == SP_BAD)
	*attrp = highlight_attr[HLF_SPB];
	else if (result == SP_RARE)
	*attrp = highlight_attr[HLF_SPR];
	else
	*attrp = highlight_attr[HLF_SPL];
	}

	return (int)(e - ptr);
	}

	static slang_T load_lp; / passed from spell_load_lang() to
	spell_load_file() */

	/*
	* Load language "lang[2]".
	*/
	static slang_T *
	spell_load_lang(lang)
	char_u *lang;
	{
	slang_T *lp;
	char_u fname_enc[80];
	char_u fname_ascii[20];
	char_u *p;

	lp = (slang_T *)alloc(sizeof(slang_T));
	if (lp != NULL)
	{
	lp->sl_name[0] = lang[0];
	lp->sl_name[1] = lang[1];
	hash_init(&lp->sl_ht);
	ga_init2(&lp->sl_match, sizeof(char_u *), 20);
	ga_init2(&lp->sl_add, sizeof(char_u *), 4);
	lp->sl_regions[0] = NUL;
	lp->sl_block = NULL;

	/* Find all spell files for "lang" in 'runtimepath' and load them.
	* Use 'encoding', except that we use "latin1" for "latin9". */
	#ifdef FEAT_MBYTE
	if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
	p = p_enc;
	else
	#endif
	p = (char_u *)"latin1";
	load_lp = lp;
	sprintf((char *)fname_enc, "spell/%c%c.%s.spl", lang[0], lang[1], p);
	if (do_in_runtimepath(fname_enc, TRUE, spell_load_file) == FAIL)
	{
	/* Try again to find an ASCII spell file. */
	sprintf((char *)fname_ascii, "spell/%c%c.spl", lang[0], lang[1]);
	if (do_in_runtimepath(fname_ascii, TRUE, spell_load_file) == FAIL)
	{
	vim_free(lp);
	lp = NULL;
	smsg((char_u *)_("Warning: Cannot find dictionary \"%s\""),
	fname_enc + 6);
	}
	}
	else
	{
	lp->sl_next = first_lang;
	first_lang = lp;
	}
	}

	return lp;
	}

	/*
	* Load one spell file into "load_lp".
	* Invoked through do_in_runtimepath().
	*/
	static void
	spell_load_file(fname)
	char_u *fname;
	{
	int fd;
	size_t len;
	size_t l;
	size_t rest = 0;
	char_u p = NULL, np;
	sblock_T *bl;
	hash_T hash;
	hashitem_T *hi;
	int c;
	int region = REGION_ALL;
	char_u word[MAXWLEN + 1];
	int n;

	fd = mch_open((char *)fname, O_RDONLY \| O_EXTRA, 0);
	if (fd < 0)
	{
	EMSG2(_(e_notopen), fname);
	return;
	}

	/* Get the length of the whole file. */
	len = lseek(fd, (off_t)0, SEEK_END);
	lseek(fd, (off_t)0, SEEK_SET);

	/* Loop, reading the file one block at a time.
	* "rest" is the length of an incomplete line at the previous block.
	* "p" points to the remainder. */
	while (len > 0)
	{
	/* Allocate a block of memory to store the info in. This is not freed
	* until spell_reload() is called. */
	if (len > SBLOCKSIZE)
	l = SBLOCKSIZE;
	else
	l = len;
	len -= l;
	bl = (sblock_T *)alloc((unsigned)(sizeof(sblock_T) - 1 + l + rest));
	if (bl == NULL)
	break;
	bl->sb_next = load_lp->sl_block;
	load_lp->sl_block = bl;

	/* Read a block from the file. Prepend the remainder of the previous
	* block. */
	if (rest > 0)
	mch_memmove(bl->sb_data, p, rest);
	if (read(fd, bl->sb_data + rest, l) != l)
	{
	EMSG2(_(e_notread), fname);
	break;
	}
	l += rest;
	rest = 0;

	/* Deal with each line that was read until we finish the block. */
	for (p = bl->sb_data; l > 0; p = np)
	{
	/* "np" points to the char after the line (CR or NL). */
	for (np = p; l > 0 && *np >= ' '; ++np)
	--l;
	if (l == 0)
	{
	/* Incomplete line (or end of file). */
	rest = np - p;
	if (len == 0)
	EMSG2(_("E751: Truncated spell file: %s"), fname);
	break;
	}
	np = NUL; / terminate the line with a NUL */

	/* Skip comment and empty lines. */
	c = *p;
	if (c != '#' && np > p)
	{
	if (c == '=' \|\| c == '+')
	{
	garray_T *gap;

	/* Match or Add item. */
	if (c == '=')
	gap = &load_lp->sl_match;
	else
	gap = &load_lp->sl_add;

	if (ga_grow(gap, 1) == OK)
	{
	for (n = 0; n < gap->ga_len; ++n)
	if ((c = STRCMP(p + 1,
	MATCH_ENTRY(gap, n) + 1)) < 0)
	break;
	if (c == 0)
	{
	if (p_verbose > 0)
	smsg((char_u *)_("Warning: duplicate match \"%s\" in %s"),
	p + 1, fname);
	}
	else
	{
	mch_memmove((char_u **)gap->ga_data + n + 1,
	(char_u **)gap->ga_data + n,
	(gap->ga_len - n) * sizeof(char_u *));
	(((char_u *)gap->ga_data) + n) = p;
	*p = region;
	++gap->ga_len;
	}
	}
	}
	else if (c == '-')
	{
	/* region item */
	++p;
	if (*p == '-')
	/* end of a region */
	region = REGION_ALL;
	else
	{
	char_u *rp = load_lp->sl_regions;
	int r;

	/* The region may be repeated: "-ca-uk". Fill
	* "region" with the bit mask for the ones we find. */
	region = 0;
	for (;;)
	{
	/* start of a region */
	r = find_region(rp, p);
	if (r == REGION_ALL)
	{
	/* new region, add it */
	r = STRLEN(rp);
	if (r >= 12)
	{
	EMSG2(_("E752: Too many regions in %s"),
	fname);
	r = REGION_ALL;
	}
	else
	{
	rp[r] = p[0];
	rp[r + 1] = p[1];
	rp[r + 2] = NUL;
	r = 1 << (r / 2);
	}
	}
	else
	r = 1 << r;

	region \|= r;
	if (p[2] != '-')
	{
	if (p[2] != NUL)
	EMSG2(_("E753: Invalid character in \"%s\""),
	p - 1);
	break;
	}
	p += 3;
	}
	}
	}
	else
	{
	/* add the word */
	if (c == '>')
	c = region \| RARE_MASK;
	else
	{
	if (c != ' ')
	EMSG2(_("E753: Invalid character in \"%s\""), p);
	c = region;
	}
	#ifdef FEAT_MBYTE
	if (MB_ISUPPER(mb_ptr2char(p + 1)))
	#else
	if (MB_ISUPPER(p[1]))
	#endif
	c \|= CASE_MASK;
	*p++ = c;
	(void)str_foldcase(p, np - p, word, MAXWLEN + 1);
	n = STRLEN(word);
	if (n > np - p)
	{
	sblock_T *s;

	/* Folding case made word longer! We need to allocate
	* memory for it. */
	s = (sblock_T *)alloc((unsigned)sizeof(sblock_T)
	+ n + 1);
	if (s != NULL)
	{
	s->sb_next = load_lp->sl_block;
	load_lp->sl_block = s;
	s->sb_data[0] = p[-1];
	p = s->sb_data + 1;
	}
	}
	mch_memmove(p, word, n + 1);

	hash = hash_hash(p);
	hi = hash_lookup(&load_lp->sl_ht, p, hash);
	if (!HASHITEM_EMPTY(hi))
	{
	c = hi->hi_key[-1];
	if ((c & (CASE_MASK \| RARE_MASK))
	== (p[-1] & (CASE_MASK \| RARE_MASK)))
	{
	if (p_verbose > 0)
	smsg((char_u *)_("Warning: duplicate word \"%s\" in %s"),
	p, fname);
	}
	else
	hi->hi_key[-1] \|= (p[-1] & (CASE_MASK \| RARE_MASK));
	}
	else
	hash_add_item(&load_lp->sl_ht, hi, p, hash);
	}
	}

	while (l > 0 && *np < ' ')
	{
	++np;
	--l;
	}
	}
	}

	close(fd);
	}

	/*
	* Parse 'spelllang' and set buf->b_langp accordingly.
	* Returns an error message or NULL.
	*/
	char_u *
	did_set_spelllang(buf)
	buf_T *buf;
	{
	garray_T ga;
	char_u *lang;
	char_u *e;
	char_u *region;
	int region_mask;
	slang_T *lp;
	int c;

	ga_init2(&ga, sizeof(langp_T), 2);

	/* loop over comma separated languages. */
	for (lang = buf->b_p_spl; *lang != NUL; lang = e)
	{
	e = vim_strchr(lang, ',');
	if (e == NULL)
	e = lang + STRLEN(lang);
	if (e > lang + 2)
	{
	if (lang[2] != '_' \|\| e - lang != 5)
	{
	ga_clear(&ga);
	return e_invarg;
	}
	region = lang + 3;
	}
	else
	region = NULL;

	for (lp = first_lang; lp != NULL; lp = lp->sl_next)
	if (STRNICMP(lp->sl_name, lang, 2) == 0)
	break;

	if (lp == NULL)
	/* Not found, load the language. */
	lp = spell_load_lang(lang);

	if (lp != NULL)
	{
	if (region == NULL)
	region_mask = REGION_ALL;
	else
	{
	/* find region in sl_regions */
	c = find_region(lp->sl_regions, region);
	if (c == REGION_ALL)
	{
	c = lang[5];
	lang[5] = NUL;
	smsg((char_u *)_("Warning: region %s not supported"), lang);
	lang[5] = c;
	region_mask = REGION_ALL;
	}
	else
	region_mask = 1 << c;
	}

	if (ga_grow(&ga, 1) == FAIL)
	{
	ga_clear(&ga);
	return e_outofmem;
	}
	LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
	LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
	++ga.ga_len;
	}

	if (*e == ',')
	++e;
	}

	/* Add a NULL entry to mark the end of the list. */
	if (ga_grow(&ga, 1) == FAIL)
	{
	ga_clear(&ga);
	return e_outofmem;
	}
	LANGP_ENTRY(ga, ga.ga_len)->lp_slang = NULL;
	++ga.ga_len;

	/* Everything is fine, store the new b_langp value. */
	ga_clear(&buf->b_langp);
	buf->b_langp = ga;

	return NULL;
	}

	/*
	* Find the region "region[2]" in "rp" (points to "sl_regions").
	* Each region is simply stored as the two characters of it's name.
	* Returns the index if found, REGION_ALL if not found.
	*/
	static int
	find_region(rp, region)
	char_u *rp;
	char_u *region;
	{
	int i;

	for (i = 0; ; i += 2)
	{
	if (rp[i] == NUL)
	return REGION_ALL;
	if (rp[i] == region[0] && rp[i + 1] == region[1])
	break;
	}
	return i / 2;
	}

	# if defined(FEAT_MBYTE) \|\| defined(PROTO)
	/*
	* Clear all spelling tables and reload them.
	* Used after 'encoding' is set.
	*/
	void
	spell_reload()
	{
	buf_T *buf;
	slang_T *lp;
	sblock_T *sp;

	/* Unload all allocated memory. */
	while (first_lang != NULL)
	{
	lp = first_lang;
	first_lang = lp->sl_next;

	hash_clear(&lp->sl_ht);
	ga_clear(&lp->sl_match);
	ga_clear(&lp->sl_add);
	while (lp->sl_block != NULL)
	{
	sp = lp->sl_block;
	lp->sl_block = sp->sb_next;
	vim_free(sp);
	}
	}

	/* Go through all buffers and handle 'spelllang'. */
	for (buf = firstbuf; buf != NULL; buf = buf->b_next)
	{
	ga_clear(&buf->b_langp);
	if (*buf->b_p_spl != NUL)
	did_set_spelllang(buf);
	}
	}
	# endif

	#endif /* FEAT_SYN_HL */