Blame - src/spell.c - android_external_vim

blob: 9f1f00c0071fdbde6f4536673b5d698258b37770 [file] [log] [blame]

Bram Moolenaar	e19defe	2005-03-21 08:23:33 +0000	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* VIM - Vi IMproved by Bram Moolenaar
				4	*
				5	* Do ":help uganda" in Vim to read copying and usage conditions.
				6	* Do ":help credits" in Vim to see a list of people who contributed.
				7	* See README.txt for an overview of the Vim source code.
				8	*/
				9
				10	/*
				11	* spell.c: code for spell checking
Bram Moolenaar	fc73515	2005-03-22 22:54:12 +0000	[diff] [blame]	12	*
				13	* Terminology:
				14	* "dword" is a dictionary word, made out of letters and digits.
				15	* "nword" is a word with a character that's not a letter or digit.
				16	* "word" is either a "dword" or an "nword".
Bram Moolenaar	e19defe	2005-03-21 08:23:33 +0000	[diff] [blame]	17	*/
				18
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	19	/*
				20	* Why doesn't Vim use aspell/ispell/myspell/etc.?
				21	* See ":help develop-spell".
				22	*/
				23
Bram Moolenaar	e19defe	2005-03-21 08:23:33 +0000	[diff] [blame]	24	#if defined(MSDOS) \|\| defined(WIN16) \|\| defined(WIN32) \|\| defined(_WIN64)
				25	# include <io.h> /* for lseek(), must be before vim.h */
				26	#endif
				27
				28	#include "vim.h"
				29
				30	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				31
				32	#ifdef HAVE_FCNTL_H
				33	# include <fcntl.h>
				34	#endif
				35
Bram Moolenaar	fc73515	2005-03-22 22:54:12 +0000	[diff] [blame]	36	#define MAXWLEN 100 /* assume max. word len is this many bytes */
				37
Bram Moolenaar	e19defe	2005-03-21 08:23:33 +0000	[diff] [blame]	38	/*
				39	* Structure that is used to store the text from the language file. This
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	40	* avoids the need to allocate space for each individual word. It's allocated
				41	* in big chunks for speed.
				42	*/
				43	#define SBLOCKSIZE 4096 /* default size of sb_data */
				44	typedef struct sblock_S sblock_T;
				45	struct sblock_S
				46	{
				47	sblock_T sb_next; / next block in list */
				48	char_u sb_data[1]; /* data, actually longer */
				49	};
				50
				51	/* Info from "REP" entries in ".aff" file used in af_rep. */
				52	typedef struct repentry_S
				53	{
				54	char_u *re_from;
				55	char_u *re_to;
				56	} repentry_T;
				57
				58	/*
				59	* Structure to store affix info.
				60	*/
				61	typedef struct affitem_S affitem_T;
				62	struct affitem_S
				63	{
				64	affitem_T ai_next; / next affix with same ai_add[] or NULL */
				65	short_u ai_nr; /* affix number */
				66	char_u ai_combine; /* prefix combines with suffix */
				67	char_u ai_choplen; /* length of ai_chop in bytes */
				68	char_u ai_addlen; /* length of ai_add in bytes */
				69	char_u ai_chop; / text chopped off basic word (can be NULL) */
				70	char_u ai_add[1]; /* text added to basic word (actually longer) */
				71	};
				72
				73	/* Get affitem_T pointer from hashitem that uses ai_add */
				74	static affitem_T dumai;
				75	#define HI2AI(hi) ((affitem_T )((hi)->hi_key - (dumai.ai_add - (char_u )&dumai)))
				76
				77	/*
				78	* Structure used to store words and other info for one language.
				79	*/
				80	typedef struct slang_S slang_T;
				81	struct slang_S
				82	{
				83	slang_T sl_next; / next language */
				84	char_u sl_name; / language name "en", "en.rare", "nl", etc. */
				85	hashtab_T sl_words; /* main word table, fword_T */
				86	int sl_prefcnt; /* number of prefix NRs */
				87	garray_T sl_preftab; /* list of hashtables to lookup prefixes */
				88	affitem_T sl_prefzero; / list of prefixes with zero add length */
				89	int sl_suffcnt; /* number of suffix NRs */
				90	garray_T sl_sufftab; /* list of hashtables to lookup suffixes */
				91	affitem_T sl_suffzero; / list of suffixes with zero add length */
				92	char_u sl_try; / "TRY" from .aff file */
				93	garray_T sl_rep; /* list of repentry_T entries from REP lines */
				94	char_u sl_regions[17]; /* table with up to 8 region names plus NUL */
				95	sblock_T sl_block; / list with allocated memory blocks */
				96	int sl_error; /* error while loading */
				97	};
				98
				99	static slang_T *first_lang = NULL;
				100
				101	/*
				102	* Structure to store an addition to a basic word.
				103	*/
				104	typedef struct addword_S addword_T;
				105	struct addword_S
				106	{
				107	addword_T aw_next; / next addition */
				108	char_u aw_flags; /* ADD_ flags */
				109	char_u aw_leadlen; /* length of lead in bytes */
				110	char_u aw_wordlen; /* length of aw_word in bytes */
				111	char_u aw_region; /* region for word with this addition */
				112	char_u aw_word[1]; /* text, actually longer: case-folded addition
				113	plus, with ADD_KEEPCAP: keep-case addition */
				114	};
				115
				116	/*
				117	* Structure to store a basic word.
				118	*/
				119	typedef struct fword_S fword_T;
				120	struct fword_S
				121	{
				122	fword_T fw_next; / same basic word with different caps */
				123	char_u fw_region; /* region bits */
				124	char_u fw_prefixcnt; /* number of prefix numbers */
				125	char_u fw_suffixcnt; /* number of suffix numbers */
				126	short_u fw_flags; /* BWF_ flags */
				127	void fw_prefix; / table with prefix numbers */
				128	void fw_suffix; / table with suffix numbers */
				129	addword_T fw_adds; / first addword_T entry */
				130	char_u fw_word[1]; /* actually longer: case folded word, or
				131	keep-case word when (flags & BWF_KEEPCAP) */
				132	};
				133
				134	/* Get fword_T pointer from hashitem that uses fw_word */
				135	static fword_T dumfw;
				136	#define HI2FWORD(hi) ((fword_T )((hi)->hi_key - (dumfw.fw_word - (char_u )&dumfw)))
				137
				138	#define REGION_ALL 0xff
				139
				140
				141	/*
				142	* Structure used in "b_langp", filled from 'spelllang'.
				143	*/
				144	typedef struct langp_S
				145	{
				146	slang_T lp_slang; / info for this language (NULL for last one) */
				147	int lp_region; /* bitmask for region or REGION_ALL */
				148	} langp_T;
				149
				150	#define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i))
				151
				152	#define SP_OK 0
				153	#define SP_BAD 1
				154	#define SP_RARE 2
				155	#define SP_LOCAL 3
				156
				157	/* flags used for basic words in the spell file */
				158	#define BWF_VALID 0x01 /* word is valid without additions */
				159	#define BWF_REGION 0x02 /* region byte follows */
				160	#define BWF_ONECAP 0x04 /* first letter must be capital */
				161	#define BWF_SUFFIX 0x08 /* has suffix NR list */
				162	#define BWF_SECOND 0x10 /* second flags byte follows */
				163
				164	#define BWF_ADDS 0x0100 /* there are additions */
				165	#define BWF_PREFIX 0x0200 /* has prefix NR list */
				166	#define BWF_ALLCAP 0x0400 /* all letters must be capital (not used
				167	for single-letter words) */
				168	#define BWF_KEEPCAP 0x0800 /* Keep case as-is */
				169
				170	/* flags used for addition in the spell file */
				171	#define ADD_REGION 0x02 /* region byte follows */
				172	#define ADD_ONECAP 0x04 /* first letter must be capital */
				173	#define ADD_ALLCAP 0x40 /* all letters must be capital (not used
				174	for single-letter words) */
				175	#define ADD_KEEPCAP 0x80 /* fixed case */
				176
				177	/* Translate ADD_ flags to BWF_ flags.
				178	* (Needed to keep ADD_ flags in one byte.) */
				179	#define ADD2BWF(x) (((x) & 0x0f) \| (((x) & 0xf0) << 4))
				180
				181	#define VIMSPELLMAGIC "VIMspell01" /* string at start of Vim spell file */
				182	#define VIMSPELLMAGICL 10
				183
				184	/*
				185	* Structure to store info for word matching.
				186	*/
				187	typedef struct matchinf_S
				188	{
				189	langp_T mi_lp; / info for language and region */
				190	slang_T mi_slang; / info for the language */
				191	char_u mi_line; / start of line containing word */
				192	char_u mi_word; / start of word being checked */
				193	char_u mi_end; / first non-word char after mi_word */
				194	char_u mi_wend; / end of matching word (is "mi_end"
				195	* or further) */
				196	char_u mi_cword; / word to check, can be "mi_fword" */
				197	char_u mi_fword[MAXWLEN + 1]; /* "mi_word" to "mi_end" case-folded */
				198	int mi_faddlen; /* length of valid bytes in "mi_fadd" */
				199	char_u mi_faddp; / next char to be added to "mi_fadd" */
				200	char_u mi_fadd[MAXWLEN + 1]; /* "mi_end" and further case-folded */
				201	int mi_result; /* result so far: SP_BAD, SP_OK, etc. */
				202	int mi_capflags; /* BWF_ONECAP BWF_ALLCAP BWF_KEEPCAP */
				203	} matchinf_T;
				204
				205	static int word_match __ARGS((matchinf_T *mip));
				206	static int check_adds __ARGS((matchinf_T mip, fword_T fw, int req_pref, int req_suf));
				207	static int supports_afffix __ARGS((int cnt, void *afffix, int afffixcnt, int nr));
				208	static int prefix_match __ARGS((matchinf_T *mip));
				209	static int suffix_match __ARGS((matchinf_T *mip));
				210	static int match_caps __ARGS((int flags, char_u caseword, matchinf_T mip, char_u cword, char_u end));
				211	static slang_T slang_alloc __ARGS((char_u lang));
				212	static void slang_free __ARGS((slang_T *lp));
				213	static slang_T spell_load_lang __ARGS((char_u lang));
				214	static void spell_load_file __ARGS((char_u fname, void cookie));
				215	static int spell_load_affixes __ARGS((FILE fd, slang_T lp, int bl_usedp, int affm, void *affp));
				216	static void getroom __ARGS((slang_T lp, int *bl_used, int len));
				217	static int find_region __ARGS((char_u rp, char_u region));
				218	static int captype __ARGS((char_u word, char_u end));
				219
				220	/*
				221	* Main spell-checking function.
				222	* "ptr" points to the start of a word.
				223	* "*attrp" is set to the attributes for a badly spelled word. For a non-word
				224	* or when it's OK it remains unchanged.
				225	* This must only be called when 'spelllang' is not empty.
				226	* Returns the length of the word in bytes, also when it's OK, so that the
				227	* caller can skip over the word.
				228	*/
				229	int
				230	spell_check(wp, line, ptr, attrp)
				231	win_T wp; / current window */
				232	char_u line; / start of line where "ptr" points into */
				233	char_u *ptr;
				234	int *attrp;
				235	{
				236	matchinf_T mi; /* Most things are put in "mi" so that it can
				237	be passed to functions quickly. */
				238
				239	/* Find the end of the word. We already know that ptr is a word char. /
				240	mi.mi_word = ptr;
				241	mi.mi_end = ptr;
				242	do
				243	{
				244	mb_ptr_adv(mi.mi_end);
				245	} while (*mi.mi_end != NUL && spell_iswordc(mi.mi_end));
				246
				247	/* A word starting with a number is always OK. */
				248	if (ptr >= '0' && ptr <= '9')
				249	return (int)(mi.mi_end - ptr);
				250
				251	/* Make case-folded copy of the Word. Compute its hash value. */
				252	(void)str_foldcase(ptr, mi.mi_end - ptr, mi.mi_fword, MAXWLEN + 1);
				253	mi.mi_cword = mi.mi_fword;
				254
				255	/* The word is bad unless we find it in the dictionary. */
				256	mi.mi_result = SP_BAD;
				257	mi.mi_wend = mi.mi_end;
				258	mi.mi_faddp = mi.mi_end;
				259	mi.mi_faddlen = 0;
				260	mi.mi_capflags = captype(ptr, mi.mi_end);
				261	mi.mi_line = line;
				262
				263	/*
				264	* Loop over the languages specified in 'spelllang'.
				265	* We check them all, because a matching word may have additions that are
				266	* longer than an already found matching word.
				267	*/
				268	for (mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
				269	mi.mi_lp->lp_slang != NULL; ++mi.mi_lp)
				270	{
				271	/*
				272	* Check for a matching word.
				273	* If not found or wrong region try removing prefixes (and then
				274	* suffixes).
				275	* If still not found or wrong region try removing suffixes.
				276	*/
				277	mi.mi_slang = mi.mi_lp->lp_slang;
				278	if (!word_match(&mi) \|\| mi.mi_result != SP_OK)
				279	if (!prefix_match(&mi) \|\| mi.mi_result != SP_OK)
				280	suffix_match(&mi);
				281	}
				282
				283	if (mi.mi_result != SP_OK)
				284	{
				285	if (mi.mi_result == SP_BAD)
				286	*attrp = highlight_attr[HLF_SPB];
				287	else if (mi.mi_result == SP_RARE)
				288	*attrp = highlight_attr[HLF_SPR];
				289	else
				290	*attrp = highlight_attr[HLF_SPL];
				291	}
				292
				293	return (int)(mi.mi_wend - ptr);
				294	}
				295
				296	/*
				297	* Check if the word "mip->mi_cword" matches.
				298	*/
				299	static int
				300	word_match(mip)
				301	matchinf_T *mip;
				302	{
				303	hash_T fhash = hash_hash(mip->mi_cword);
				304	hashitem_T *hi;
				305	fword_T *fw;
				306	int valid = FALSE;
				307
				308	hi = hash_lookup(&mip->mi_slang->sl_words, mip->mi_cword, fhash);
				309	if (HASHITEM_EMPTY(hi))
				310	return FALSE;
				311
				312	/*
				313	* Find a basic word for which the case of word "cword" is correct.
				314	* If it is, check additions and use the longest one.
				315	*/
				316	for (fw = HI2FWORD(hi); fw != NULL; fw = fw->fw_next)
				317	if (match_caps(fw->fw_flags, fw->fw_word, mip,
				318	mip->mi_word, mip->mi_end))
				319	valid \|= check_adds(mip, fw, -1, -1);
				320
				321	return valid;
				322	}
				323
				324	/*
				325	* Check a matching basic word for additions.
				326	* Return TRUE if we have a valid match.
				327	*/
				328	static int
				329	check_adds(mip, fw, req_pref, req_suf)
				330	matchinf_T *mip;
				331	fword_T *fw;
				332	int req_pref; /* required prefix nr, -1 if none */
				333	int req_suf; /* required suffix nr, -1 if none */
				334	{
				335	int valid = FALSE;
				336	addword_T *aw;
				337	char_u *p;
				338	int addlen;
				339	int fl;
				340
				341	/* A word may be valid without additions. */
				342	if ((fw->fw_flags & BWF_VALID)
				343	&& (req_pref < 0 \|\| supports_afffix(mip->mi_slang->sl_prefcnt,
				344	fw->fw_prefix, fw->fw_prefixcnt, req_pref))
				345	&& (req_suf < 0 \|\| supports_afffix(mip->mi_slang->sl_suffcnt,
				346	fw->fw_suffix, fw->fw_suffixcnt, req_suf)))
				347	{
				348	valid = TRUE;
				349	if (mip->mi_result != SP_OK)
				350	{
				351	if ((fw->fw_region & mip->mi_lp->lp_region) == 0)
				352	mip->mi_result = SP_LOCAL;
				353	else
				354	mip->mi_result = SP_OK;
				355	}
				356	}
				357
				358	/*
				359	* Check additions, both before and after the word.
				360	* This may make the word longer, thus we also need to check
				361	* when we already found a matching word.
				362	*/
				363	for (aw = fw->fw_adds; aw != NULL; aw = aw->aw_next)
				364	{
				365	if (aw->aw_leadlen > 0)
				366	{
				367	/* There is a leader, verify that it matches. */
				368	if (aw->aw_leadlen > mip->mi_word - mip->mi_line
				369	\|\| STRNCMP(mip->mi_word - aw->aw_leadlen,
				370	aw->aw_word, aw->aw_leadlen) != 0)
				371	continue;
				372	if (mip->mi_word - aw->aw_leadlen > mip->mi_line)
				373	{
				374	/* There must not be a word character just before the
				375	* leader. */
				376	p = mip->mi_word - aw->aw_leadlen;
				377	mb_ptr_back(mip->mi_line, p);
				378	if (spell_iswordc(p))
				379	continue;
				380	}
				381	/* Leader matches. Addition is rest of "aw_word". */
				382	p = aw->aw_word + aw->aw_leadlen;
				383	}
				384	else
				385	/* No leader, use whole of "aw_word" for addition. */
				386	p = aw->aw_word;
				387
				388	addlen = aw->aw_wordlen - aw->aw_leadlen;
				389	if (addlen > 0)
				390	{
				391	/* Check for matching addition and no word character after it.
				392	* First make sure we have enough case-folded chars to compare
				393	* with. */
				394	while (mip->mi_faddlen <= addlen)
				395	{
				396	if (*mip->mi_faddp == NUL)
				397	{
				398	mip->mi_fadd[mip->mi_faddlen] = NUL;
				399	break;
				400	}
				401	#ifdef FEAT_MBYTE
				402	fl = (*mb_ptr2len_check)(mip->mi_faddp);
				403	#else
				404	fl = 1;
				405	#endif
				406	(void)str_foldcase(mip->mi_faddp, fl,
				407	mip->mi_fadd + mip->mi_faddlen,
				408	MAXWLEN - mip->mi_faddlen);
				409	mip->mi_faddp += fl;
				410	mip->mi_faddlen += STRLEN(mip->mi_fadd + mip->mi_faddlen);
				411	}
				412
				413	if (STRNCMP(mip->mi_fadd, p, addlen) != 0
				414	\|\| (mip->mi_fadd[addlen] != NUL
				415	&& spell_iswordc(mip->mi_fadd + addlen)))
				416	continue;
				417
				418	/* Compute the length in the original word, before case folding. */
				419	#ifdef FEAT_MBYTE
				420	if (has_mbyte)
				421	{
				422	int l;
				423
				424	p = mip->mi_end;
				425	for (l = 0; l < addlen;
				426	l += (*mb_ptr2len_check)(mip->mi_fadd + l))
				427	mb_ptr_adv(p);
				428	addlen = p - mip->mi_end;
				429	}
				430	#endif
				431
				432	/* Check case of the addition. */
				433	if (!match_caps(ADD2BWF(aw->aw_flags),
				434	aw->aw_word + aw->aw_wordlen + 1, mip,
				435	mip->mi_end, mip->mi_end + addlen))
				436	continue;
				437	}
				438
				439	/* Match! Use the new length if it's longer. */
				440	if (mip->mi_wend < mip->mi_end + addlen)
				441	mip->mi_wend = mip->mi_end + addlen;
				442
				443	valid = TRUE;
				444	if (mip->mi_result != SP_OK)
				445	{
				446	if ((aw->aw_region & mip->mi_lp->lp_region) == 0)
				447	mip->mi_result = SP_LOCAL;
				448	else
				449	mip->mi_result = SP_OK;
				450	}
				451	}
				452
				453	return valid;
				454	}
				455
				456	/*
				457	* Return TRUE if word "fw" supports afffix "nr".
				458	*/
				459	static int
				460	supports_afffix(cnt, afffix, afffixcnt, nr)
				461	int cnt;
				462	void *afffix;
				463	int afffixcnt;
				464	int nr;
				465	{
				466	char_u *pc;
				467	short_u *ps;
				468	int i;
				469
				470	if (cnt <= 256)
				471	{
				472	/* char_u affix numbers */
				473	pc = afffix;
				474	for (i = afffixcnt; --i >= 0; )
				475	if (*pc++ == nr)
				476	return TRUE;
				477	}
				478	else
				479	{
				480	/* short_u affix numbers */
				481	ps = afffix;
				482	for (i = afffixcnt; --i >= 0; )
				483	if (*ps++ == nr)
				484	return TRUE;
				485	}
				486	return FALSE;
				487	}
				488
				489	/*
				490	* Try finding a match for "mip->mi_cword" by removing prefixes.
				491	*/
				492	static int
				493	prefix_match(mip)
				494	matchinf_T *mip;
				495	{
				496	int len = 0;
				497	int charlen = 0;
				498	int cc;
				499	affitem_T *ai;
				500	char_u pword[MAXWLEN + 1];
				501	fword_T *fw;
				502	hashtab_T *ht;
				503	hashitem_T *hi;
				504	int i;
				505	int found_valid = FALSE;
				506	int cstart_charlen = 0;
				507	char_u *cstart = mip->mi_word;
				508	int capflags_save = mip->mi_capflags;
				509	char_u *p;
				510
				511	/*
				512	* Check for prefixes with different character lengths.
				513	* Start with zero length (only chop off).
				514	*/
				515	for (charlen = 0; charlen <= mip->mi_slang->sl_preftab.ga_len; ++charlen)
				516	{
				517	if (charlen > 0)
				518	{
				519	#ifdef FEAT_MBYTE
				520	if (has_mbyte)
				521	len += mb_ptr2len_check(mip->mi_cword + len);
				522	else
				523	#endif
				524	len += 1;
				525	}
				526	if (mip->mi_cword[len] == NUL) /* end of word, no prefix possible */
				527	break;
				528
				529	if (charlen == 0)
				530	ai = mip->mi_slang->sl_prefzero;
				531	else
				532	{
				533	/* Get pointer to hashtab for prefix of this many chars. */
				534	ht = ((hashtab_T *)mip->mi_slang->sl_preftab.ga_data) + charlen - 1;
				535	if (ht->ht_used == 0)
				536	continue;
				537
				538	cc = mip->mi_cword[len];
				539	mip->mi_cword[len] = NUL;
				540	hi = hash_find(ht, mip->mi_cword);
				541	mip->mi_cword[len] = cc;
				542
				543	if (HASHITEM_EMPTY(hi))
				544	ai = NULL;
				545	else
				546	ai = HI2AI(hi);
				547	}
				548
				549	/* Loop over all matching prefixes. */
				550	for ( ; ai != NULL; ai = ai->ai_next)
				551	{
				552	/* Create the basic word by removing the prefix and adding the
				553	* chop string. */
				554	mch_memmove(pword, ai->ai_chop, ai->ai_choplen);
				555	STRCPY(pword + ai->ai_choplen, mip->mi_cword + ai->ai_addlen);
				556
				557	/* Adjust the word start for case checks, we only check the
				558	* part after the prefix. */
				559	while (cstart_charlen < charlen)
				560	{
				561	mb_ptr_adv(cstart);
				562	++cstart_charlen;
				563	}
				564
				565	/* Removing the prefix may change the caps, e.g. for
				566	* "deAlf" removing "de" makes it ONECAP. */
				567	mip->mi_capflags = captype(cstart, mip->mi_end);
				568
				569	/* Find the basic word. */
				570	hi = hash_find(&mip->mi_slang->sl_words, pword);
				571	if (!HASHITEM_EMPTY(hi))
				572	{
				573	/* Check if the word supports this prefix. */
				574	for (fw = HI2FWORD(hi); fw != NULL; fw = fw->fw_next)
				575	if (match_caps(fw->fw_flags, fw->fw_word, mip,
				576	cstart, mip->mi_end))
				577	found_valid \|= check_adds(mip, fw, ai->ai_nr, -1);
				578
				579	if (found_valid && mip->mi_result == SP_OK)
				580	{
				581	/* Found a valid word, no need to try other suffixes. */
				582	mip->mi_capflags = capflags_save;
				583	return TRUE;
				584	}
				585	}
				586
				587	/* No matching basic word without prefix. When combining is
				588	* allowed try with suffixes. */
				589	if (ai->ai_combine)
				590	{
				591	/* Pass the word with prefix removed to suffix_match(). */
				592	mip->mi_cword = pword;
				593	p = mip->mi_word;
				594	mip->mi_word = cstart;
				595	i = suffix_match(mip);
				596	mip->mi_cword = mip->mi_fword;
				597	mip->mi_word = p;
				598	if (i)
				599	{
				600	mip->mi_capflags = capflags_save;
				601	return TRUE;
				602	}
				603	}
				604	}
				605	}
				606
				607	mip->mi_capflags = capflags_save;
				608	return FALSE;
				609	}
				610
				611	/*
				612	* Try finding a match for "mip->mi_cword" by removing suffixes.
				613	*/
				614	static int
				615	suffix_match(mip)
				616	matchinf_T *mip;
				617	{
				618	char_u *sufp;
				619	int charlen;
				620	affitem_T *ai;
				621	char_u pword[MAXWLEN + 1];
				622	fword_T *fw;
				623	hashtab_T *ht;
				624	hashitem_T *hi;
				625	int tlen;
				626	int cend_charlen = 0;
				627	char_u *cend = mip->mi_end;
				628	int found_valid = FALSE;
				629	int capflags_save = mip->mi_capflags;
				630
				631	/*
				632	* Try suffixes of different length, starting with an empty suffix (chop
				633	* only, thus adds something).
				634	* Stop checking if there are no suffixes with so many characters.
				635	*/
				636	sufp = mip->mi_cword + STRLEN(mip->mi_cword);
				637	for (charlen = 0; charlen <= mip->mi_slang->sl_sufftab.ga_len; ++charlen)
				638	{
				639	/* Move the pointer to the possible suffix back one character, unless
				640	* doing the first round (empty suffix). */
				641	if (charlen > 0)
				642	{
				643	mb_ptr_back(mip->mi_cword, sufp);
				644	if (sufp <= mip->mi_cword) /* start of word, no suffix possible */
				645	break;
				646	}
				647
				648	if (charlen == 0)
				649	ai = mip->mi_slang->sl_suffzero;
				650	else
				651	{
				652	/* Get pointer to hashtab for suffix of this many chars. */
				653	ht = ((hashtab_T *)mip->mi_slang->sl_sufftab.ga_data) + charlen - 1;
				654	if (ht->ht_used == 0)
				655	continue;
				656
				657	hi = hash_find(ht, sufp);
				658	if (HASHITEM_EMPTY(hi))
				659	ai = NULL;
				660	else
				661	ai = HI2AI(hi);
				662	}
				663
				664	if (ai != NULL)
				665	{
				666	/* Found a list of matching suffixes. Now check that there is one
				667	* we can use. */
				668	tlen = sufp - mip->mi_cword; /* length of word without suffix */
				669	mch_memmove(pword, mip->mi_cword, tlen);
				670
				671	for ( ; ai != NULL; ai = ai->ai_next)
				672	{
				673	/* Found a matching suffix. Create the basic word by removing
				674	* the suffix and adding the chop string. */
				675	if (ai->ai_choplen == 0)
				676	pword[tlen] = NUL;
				677	else
				678	mch_memmove(pword + tlen, ai->ai_chop, ai->ai_choplen + 1);
				679
				680	/* Find the basic word. */
				681	hi = hash_find(&mip->mi_slang->sl_words, pword);
				682	if (!HASHITEM_EMPTY(hi))
				683	{
				684	/* Adjust the end for case checks, we only check the part
				685	* before the suffix. */
				686	while (cend_charlen < charlen)
				687	{
				688	mb_ptr_back(mip->mi_word, cend);
				689	++cend_charlen;
				690	}
				691
				692	/* Removing the suffix may change the caps, e.g. for
				693	* "UFOs" removing 's' makes it ALLCAP. */
				694	mip->mi_capflags = captype(mip->mi_word, cend);
				695
				696	/* Check if the word supports this suffix. */
				697	for (fw = HI2FWORD(hi); fw != NULL; fw = fw->fw_next)
				698	if (match_caps(fw->fw_flags, fw->fw_word, mip,
				699	mip->mi_word, cend))
				700	found_valid \|= check_adds(mip, fw, -1, ai->ai_nr);
				701
				702	if (found_valid && mip->mi_result == SP_OK)
				703	{
				704	/* Found a valid word, no need to try other suffixes. */
				705	mip->mi_capflags = capflags_save;
				706	return TRUE;
				707	}
				708	}
				709	}
				710	}
				711	}
				712
				713	mip->mi_capflags = capflags_save;
				714	return FALSE;
				715	}
				716
				717	/*
				718	* Return TRUE if case of "cword" meets the requirements of case flags
				719	* "flags".
				720	*/
				721	static int
				722	match_caps(flags, caseword, mip, cword, end)
				723	int flags; /* flags required by basic word or addition */
				724	char_u caseword; / word with case as required */
				725	matchinf_T *mip;
				726	char_u cword; / word to compare against "caseword" */
				727	char_u end; / end of "cword" */
				728	{
				729	char_u *p;
				730	int c;
				731	int len;
				732	int capflags = mip->mi_capflags; /* flags of checked word */
				733	int past_second;
				734
				735	if ((capflags & BWF_KEEPCAP) == 0 && end > mip->mi_end)
				736	{
				737	/* If "end" is past "mip->mi_end" we need to check the characters
				738	* after the basic word. */
				739	#ifdef FEAT_MBYTE
				740	past_second = (mip->mi_word + (*mb_ptr2len_check)(mip->mi_word)
				741	< mip->mi_end);
				742	#else
				743	past_second = mip->mi_word + 1 < mip->mi_end;
				744	#endif
				745	for (p = mip->mi_end; p < end; )
				746	{
				747	if (!spell_iswordc(p))
				748	mb_ptr_adv(p);
				749	else
				750	{
				751	#ifdef FEAT_MBYTE
				752	if (has_mbyte)
				753	c = mb_ptr2char_adv(&p);
				754	else
				755	#endif
				756	c = *p++;
				757	if (MB_ISUPPER(c))
				758	{
				759	if (capflags == 0 \|\| (capflags & BWF_ONECAP))
				760	{
				761	capflags = BWF_KEEPCAP; /* lU or UlU */
				762	break;
				763	}
				764	}
				765	else
				766	{
				767	if (capflags & BWF_ALLCAP)
				768	{
				769	if (past_second)
				770	{
				771	capflags = BWF_KEEPCAP; /* UUl */
				772	break;
				773	}
				774	capflags = BWF_ONECAP; /* Uu */
				775	}
				776	}
				777	past_second = TRUE;
				778	}
				779	}
				780	}
				781
				782	if (capflags == BWF_ALLCAP)
				783	return TRUE; /* All caps is always OK. */
				784
				785	if (flags & BWF_KEEPCAP)
				786	{
				787	len = STRLEN(caseword);
				788	return (len == end - cword && STRNCMP(caseword, cword, len) == 0);
				789	}
				790
				791	if (flags & BWF_ALLCAP)
				792	return FALSE; /* need ALLCAP, already checked above */
				793
				794	if (flags & BWF_ONECAP)
				795	return capflags == BWF_ONECAP;
				796
				797	return capflags != BWF_KEEPCAP; /* no case check, only KEEPCAP is bad */
				798	}
				799
				800	/*
				801	* Move to next spell error.
				802	* Return OK if found, FAIL otherwise.
				803	*/
				804	int
				805	spell_move_to(dir, allwords)
				806	int dir; /* FORWARD or BACKWARD */
				807	int allwords; /* TRUE for "[s" and "]s" */
				808	{
				809	pos_T pos;
				810	char_u *line;
				811	char_u *p;
				812	int wc;
				813	int nwc;
				814	int attr = 0;
				815	int len;
				816
				817	if (!curwin->w_p_spell \|\| *curwin->w_buffer->b_p_spl == NUL)
				818	{
				819	EMSG(_("E756: Spell checking not enabled"));
				820	return FAIL;
				821	}
				822
				823	/* TODO: moving backwards */
				824
				825	/* Start looking for bad word at the start of the line, because we can't
				826	* start halfway a word and know where it ends. */
				827	pos = curwin->w_cursor;
				828	pos.col = 0;
				829	wc = FALSE;
				830
				831	while (!got_int)
				832	{
				833	line = ml_get(pos.lnum);
				834	p = line + pos.col;
				835	while (*p != NUL)
				836	{
				837	nwc = spell_iswordc(p);
				838	if (!wc && nwc)
				839	{
				840	/* start of word */
				841	/* TODO: check for bad word attr */
				842	len = spell_check(curwin, line, p, &attr);
				843	if (attr != 0)
				844	{
				845	if (curwin->w_cursor.lnum < pos.lnum
				846	\|\| (curwin->w_cursor.lnum == pos.lnum
				847	&& curwin->w_cursor.col < (colnr_T)(p - line)))
				848	{
				849	curwin->w_cursor.lnum = pos.lnum;
				850	curwin->w_cursor.col = p - line;
				851	return OK;
				852	}
				853	attr = 0; /* bad word is before or at cursor */
				854	}
				855	p += len;
				856	if (*p == NUL)
				857	break;
				858	nwc = FALSE;
				859	}
				860
				861	/* advance to next character */
				862	mb_ptr_adv(p);
				863	wc = nwc;
				864	}
				865
				866	/* Advance to next line. */
				867	if (pos.lnum == curbuf->b_ml.ml_line_count)
				868	return FAIL;
				869	++pos.lnum;
				870	pos.col = 0;
				871	wc = FALSE;
				872
				873	line_breakcheck();
				874	}
				875
				876	return FAIL; /* interrupted */
				877	}
				878
				879	/*
				880	* Load word list for "lang" from a Vim spell file.
				881	* "lang" must be the language without the region: "en" or "en-rare".
				882	*/
				883	static slang_T *
				884	spell_load_lang(lang)
				885	char_u *lang;
				886	{
				887	slang_T *lp;
				888	char_u fname_enc[80];
				889	char_u *p;
				890	int r;
				891
				892	lp = slang_alloc(lang);
				893	if (lp != NULL)
				894	{
				895	/* Find all spell files for "lang" in 'runtimepath' and load them.
				896	* Use 'encoding', except that we use "latin1" for "latin9". */
				897	#ifdef FEAT_MBYTE
				898	if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
				899	p = p_enc;
				900	else
				901	#endif
				902	p = (char_u *)"latin1";
				903	sprintf((char *)fname_enc, "spell/%s.%s.spl", lang, p);
				904
				905	r = do_in_runtimepath(fname_enc, TRUE, spell_load_file, lp);
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	906	if (r == FAIL && !lp->sl_error)
				907	{
				908	/* Try loading the ASCII version. */
				909	sprintf((char *)fname_enc, "spell/%s.ascii.spl", lang);
				910
				911	r = do_in_runtimepath(fname_enc, TRUE, spell_load_file, lp);
				912	}
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	913	if (r == FAIL \|\| lp->sl_error)
				914	{
				915	slang_free(lp);
				916	lp = NULL;
				917	if (r == FAIL)
				918	smsg((char_u *)_("Warning: Cannot find word list \"%s\""),
				919	fname_enc + 6);
				920	}
				921	else
				922	{
				923	lp->sl_next = first_lang;
				924	first_lang = lp;
				925	}
				926	}
				927
				928	return lp;
				929	}
				930
				931	/*
				932	* Allocate a new slang_T.
				933	* Caller must fill "sl_next".
				934	*/
				935	static slang_T *
				936	slang_alloc(lang)
				937	char_u *lang;
				938	{
				939	slang_T *lp;
				940
				941	lp = (slang_T *)alloc(sizeof(slang_T));
				942	if (lp != NULL)
				943	{
				944	lp->sl_name = vim_strsave(lang);
				945	hash_init(&lp->sl_words);
				946	ga_init2(&lp->sl_preftab, sizeof(hashtab_T), 4);
				947	ga_init2(&lp->sl_sufftab, sizeof(hashtab_T), 4);
				948	lp->sl_prefzero = NULL;
				949	lp->sl_suffzero = NULL;
				950	lp->sl_try = NULL;
				951	ga_init2(&lp->sl_rep, sizeof(repentry_T), 4);
				952	lp->sl_regions[0] = NUL;
				953	lp->sl_block = NULL;
				954	lp->sl_error = FALSE;
				955	}
				956	return lp;
				957	}
				958
				959	/*
				960	* Free the contents of an slang_T and the structure itself.
				961	*/
				962	static void
				963	slang_free(lp)
				964	slang_T *lp;
				965	{
				966	sblock_T *sp;
				967	int i;
				968
				969	vim_free(lp->sl_name);
				970	hash_clear(&lp->sl_words);
				971	for (i = 0; i < lp->sl_preftab.ga_len; ++i)
				972	hash_clear(((hashtab_T *)lp->sl_preftab.ga_data) + i);
				973	ga_clear(&lp->sl_preftab);
				974	for (i = 0; i < lp->sl_sufftab.ga_len; ++i)
				975	hash_clear(((hashtab_T *)lp->sl_sufftab.ga_data) + i);
				976	ga_clear(&lp->sl_sufftab);
				977	ga_clear(&lp->sl_rep);
				978	vim_free(lp->sl_try);
				979	while (lp->sl_block != NULL)
				980	{
				981	sp = lp->sl_block;
				982	lp->sl_block = sp->sb_next;
				983	vim_free(sp);
				984	}
				985	vim_free(lp);
				986	}
				987
				988	/*
				989	* Load one spell file into an slang_T.
				990	* Invoked through do_in_runtimepath().
				991	*/
				992	static void
				993	spell_load_file(fname, cookie)
				994	char_u *fname;
				995	void cookie; / points to the slang_T to be filled */
				996	{
				997	slang_T *lp = cookie;
				998	FILE *fd;
				999	char_u buf[MAXWLEN + 1];
				1000	char_u cbuf[MAXWLEN + 1];
				1001	char_u fbuf[MAXWLEN + 1];
				1002	char_u *p;
				1003	int itm;
				1004	int i;
				1005	int affcount;
				1006	int affnr;
				1007	int affflags;
				1008	int affitemcnt;
				1009	int bl_used = SBLOCKSIZE;
				1010	int widx;
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	1011	int prefm = 0; /* 1 if <= 256 prefixes, sizeof(short_u) otherw. */
				1012	int suffm = 0; /* 1 if <= 256 suffixes, sizeof(short_u) otherw. */
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1013	int wlen;
				1014	int flags;
				1015	affitem_T ai, ai2, **aip;
				1016	int round;
				1017	char_u *save_sourcing_name = sourcing_name;
				1018	linenr_T save_sourcing_lnum = sourcing_lnum;
				1019	int cnt;
				1020	int choplen;
				1021	int addlen;
				1022	int leadlen;
				1023	int wordcount;
				1024	fword_T fw, fw2;
				1025	garray_T *gap;
				1026	hashtab_T *ht;
				1027	hashitem_T *hi;
				1028	hash_T hash;
				1029	int adds;
				1030	addword_T *aw;
				1031	int flen;
				1032
				1033	fd = fopen((char *)fname, "r");
				1034	if (fd == NULL)
				1035	{
				1036	EMSG2(_(e_notopen), fname);
				1037	goto errorend;
				1038	}
				1039
				1040	/* Set sourcing_name, so that error messages mention the file name. */
				1041	sourcing_name = fname;
				1042	sourcing_lnum = 0;
				1043
				1044	/* <HEADER>: <fileID> <regioncnt> <regionname> ... */
				1045	for (i = 0; i < VIMSPELLMAGICL; ++i)
				1046	buf[i] = getc(fd); /* <fileID> */
				1047	if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
				1048	{
				1049	EMSG(_("E757: Wrong file ID in spell file"));
				1050	goto errorend;
				1051	}
				1052
				1053	cnt = getc(fd); /* <regioncnt> */
				1054	if (cnt == EOF)
				1055	{
				1056	truncerr:
				1057	EMSG(_("E758: Truncated spell file"));
				1058	goto errorend;
				1059	}
				1060	if (cnt > 8)
				1061	{
				1062	formerr:
				1063	EMSG(_("E759: Format error in spell file"));
				1064	goto errorend;
				1065	}
				1066	for (i = 0; i < cnt; ++i)
				1067	{
				1068	lp->sl_regions[i * 2] = getc(fd); /* <regionname> */
				1069	lp->sl_regions[i * 2 + 1] = getc(fd);
				1070	}
				1071	lp->sl_regions[cnt * 2] = NUL;
				1072
				1073	/* round 1: <PREFIXLIST>: <affcount> <afftotcnt> <affix> ...
				1074	* round 2: <SUFFIXLIST>: <affcount> <afftotcnt> <affix> ... */
				1075	for (round = 1; round <= 2; ++round)
				1076	{
				1077	affcount = (getc(fd) << 8) + getc(fd); /* <affcount> */
				1078	if (affcount < 0)
				1079	goto truncerr;
				1080	if (round == 1)
				1081	{
				1082	gap = &lp->sl_preftab;
				1083	aip = &lp->sl_prefzero;
				1084	lp->sl_prefcnt = affcount;
				1085	prefm = affcount > 256 ? sizeof(short_u) : 1;
				1086	}
				1087	else
				1088	{
				1089	gap = &lp->sl_sufftab;
				1090	aip = &lp->sl_suffzero;
				1091	lp->sl_suffcnt = affcount;
				1092	suffm = affcount > 256 ? sizeof(short_u) : 1;
				1093	}
				1094
				1095	i = (getc(fd) << 8) + getc(fd); /* <afftotcnt> */
				1096	/* afftotcnt is not used */
				1097
				1098	/*
				1099	* For each affix NR there can be several affixes.
				1100	*/
				1101	for (affnr = 0; affnr < affcount; ++affnr)
				1102	{
				1103	/* <affix>: <affflags> <affitemcnt> <affitem> ... */
				1104	affflags = getc(fd); /* <affflags> */
				1105	if (affflags == EOF)
				1106	goto truncerr;
				1107	affitemcnt = (getc(fd) << 8) + getc(fd); /* <affitemcnt> */
				1108	if (affitemcnt < 0)
				1109	goto truncerr;
				1110	for (itm = 0; itm < affitemcnt; ++itm)
				1111	{
				1112	/* <affitem>: <affchoplen> <affchop> <affaddlen> <affadd> */
				1113	choplen = getc(fd); /* <affchoplen> */
				1114	if (choplen == EOF)
				1115	goto truncerr;
				1116	if (choplen >= MAXWLEN)
				1117	goto formerr;
				1118	for (i = 0; i < choplen; ++i) /* <affchop> */
				1119	buf[i] = getc(fd);
				1120	buf[i] = NUL;
				1121	addlen = getc(fd); /* <affaddlen> */
				1122	if (addlen == EOF)
				1123	goto truncerr;
				1124	/* Get room to store the affitem_T, chop and add strings. */
				1125	p = (char_u *)getroom(lp, &bl_used,
				1126	sizeof(affitem_T) + choplen + addlen + 1);
				1127	if (p == NULL)
				1128	goto errorend;
				1129
				1130	ai = (affitem_T *)p;
				1131	ai->ai_nr = affnr;
				1132	ai->ai_combine = affflags;
				1133	ai->ai_choplen = choplen;
				1134	ai->ai_addlen = addlen;
				1135
				1136	p += sizeof(affitem_T) + addlen;
				1137	ai->ai_chop = p;
				1138	STRCPY(p, buf);
				1139
				1140	p = ai->ai_add;
				1141	for (i = 0; i < addlen; ++i) /* <affadd> */
				1142	p[i] = getc(fd);
				1143	p[i] = NUL;
				1144
				1145	/*
				1146	* Add the affix to a hashtable. Which one depends on the
				1147	* length of the added string in characters.
				1148	*/
				1149	#ifdef FEAT_MBYTE
				1150	/* Change "addlen" from length in bytes to length in chars. */
				1151	if (has_mbyte)
				1152	addlen = mb_charlen(p);
				1153	#endif
				1154	if (addlen == 0)
				1155	{
				1156	/* Link in list of zero length affixes. */
				1157	ai->ai_next = *aip;
				1158	*aip = ai;
				1159	}
				1160	else
				1161	{
				1162	if (gap->ga_len < addlen)
				1163	{
				1164	/* Longer affix, need more hashtables. */
				1165	if (ga_grow(gap, addlen - gap->ga_len) == FAIL)
				1166	goto errorend;
				1167
				1168	/* Re-allocating ga_data means that an ht_array
				1169	* pointing to ht_smallarray becomes invalid. We can
				1170	* recognize this: ht_mask is at its init value. */
				1171	for (i = 0; i < gap->ga_len; ++i)
				1172	{
				1173	ht = ((hashtab_T *)gap->ga_data) + i;
				1174	if (ht->ht_mask == HT_INIT_SIZE - 1)
				1175	ht->ht_array = ht->ht_smallarray;
				1176	}
				1177
				1178	/* Init the newly used hashtable(s). */
				1179	while (gap->ga_len < addlen)
				1180	{
				1181	hash_init(((hashtab_T *)gap->ga_data)
				1182	+ gap->ga_len);
				1183	++gap->ga_len;
				1184	}
				1185	}
				1186	ht = ((hashtab_T *)gap->ga_data) + addlen - 1;
				1187	hash = hash_hash(p);
				1188	hi = hash_lookup(ht, p, hash);
				1189	if (HASHITEM_EMPTY(hi))
				1190	{
				1191	/* First affix with this "ai_add", add to hashtable. */
				1192	hash_add_item(ht, hi, p, hash);
				1193	ai->ai_next = NULL;
				1194	}
				1195	else
				1196	{
				1197	/* There already is an affix with this "ai_add", link
				1198	* in the list. */
				1199	ai2 = HI2AI(hi);
				1200	ai->ai_next = ai2->ai_next;
				1201	ai2->ai_next = ai;
				1202	}
				1203	}
				1204	}
				1205	}
				1206	}
				1207
				1208	/* <SUGGEST> : <suggestlen> <more> ... */
				1209	/* TODO, just skip this for now */
				1210	i = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8) + getc(fd);
				1211	while (i-- > 0)
				1212	if (getc(fd) == EOF) /* <suggestlen> */
				1213	goto truncerr;
				1214
				1215	/* <WORDLIST>: <wordcount> <worditem> ... / / <wordcount> */
				1216	wordcount = (getc(fd) << 24) + (getc(fd) << 16) + (getc(fd) << 8)
				1217	+ getc(fd);
				1218	if (wordcount < 0)
				1219	goto truncerr;
				1220
				1221	/* Init hashtable for this number of words, so that it doesn't need to
				1222	* reallocate the table halfway. */
				1223	hash_lock_size(&lp->sl_words, wordcount);
				1224
				1225	for (widx = 0; ; ++widx)
				1226	{
				1227	/* <worditem>: <nr> <string> <flags> [<flags2>]
				1228	* [<caselen> <caseword>]
				1229	* [<affixcnt> <affixNR> ...] (prefixes)
				1230	* [<affixcnt> <affixNR> ...] (suffixes)
				1231	* [<region>]
				1232	* [<addcnt> <add> ...]
				1233	*/
				1234	/* Use <nr> bytes from the previous word. */
				1235	wlen = getc(fd); /* <nr> */
				1236	if (wlen == EOF)
				1237	{
				1238	if (widx >= wordcount) /* normal way to end the file */
				1239	break;
				1240	goto truncerr;
				1241	}
				1242
				1243	/* Read further word bytes until one below 0x20, that must be the
				1244	* flags. Keep this fast! */
				1245	for (;;)
				1246	{
				1247	if ((buf[wlen] = getc(fd)) < 0x20) /* <string> */
				1248	break;
				1249	if (++wlen == MAXWLEN)
				1250	goto formerr;
				1251	}
				1252	flags = buf[wlen]; /* <flags> */
				1253	buf[wlen] = NUL;
				1254
				1255	/* Get more flags if they're there. */
				1256	if (flags & BWF_SECOND)
				1257	flags += getc(fd) << 8; /* <flags2> */
				1258
				1259	if (flags & BWF_KEEPCAP)
				1260	{
				1261	/* Read <caselen> and <caseword> first, its length may differ from
				1262	* the case-folded word. Note: this should only happen after the
				1263	* basic word! */
				1264	wlen = getc(fd);
				1265	if (wlen == EOF)
				1266	goto truncerr;
				1267	for (i = 0; i < wlen; ++i)
				1268	cbuf[i] = getc(fd);
				1269	cbuf[i] = NUL;
				1270	}
				1271
				1272	/* Find room to store the word in a fword_T. */
				1273	fw = (fword_T *)getroom(lp, &bl_used, (int)sizeof(fword_T) + wlen);
				1274	if (fw == NULL)
				1275	goto errorend;
				1276	mch_memmove(fw->fw_word, (flags & BWF_KEEPCAP) ? cbuf : buf, wlen + 1);
				1277	fw->fw_flags = flags;
				1278
				1279	hash = hash_hash(buf);
				1280	hi = hash_lookup(&lp->sl_words, buf, hash);
				1281	if (HASHITEM_EMPTY(hi))
				1282	{
				1283	if (hash_add_item(&lp->sl_words, hi, fw->fw_word, hash) == FAIL)
				1284	goto errorend;
				1285	fw->fw_next = NULL;
				1286	}
				1287	else
				1288	{
				1289	/* Already have this basic word in the hashtable, this one will
				1290	* have different case flags. */
				1291	fw2 = HI2FWORD(hi);
				1292	fw->fw_next = fw2->fw_next;
				1293	fw2->fw_next = fw;
				1294	--widx; /* don't count this one */
				1295	}
				1296
				1297	/* Optional prefixes and suffixes. */
				1298	if (flags & BWF_PREFIX)
				1299	fw->fw_prefixcnt = spell_load_affixes(fd, lp, &bl_used,
				1300	prefm, &fw->fw_prefix);
				1301	else
				1302	fw->fw_prefixcnt = 0;
				1303	if (flags & BWF_SUFFIX)
				1304	fw->fw_suffixcnt = spell_load_affixes(fd, lp, &bl_used,
				1305	suffm, &fw->fw_suffix);
				1306	else
				1307	fw->fw_suffixcnt = 0;
				1308
				1309	if (flags & BWF_REGION)
				1310	fw->fw_region = getc(fd); /* <region> */
				1311	else
				1312	fw->fw_region = REGION_ALL;
				1313
				1314	fw->fw_adds = NULL;
				1315	if (flags & BWF_ADDS)
				1316	{
				1317	adds = (getc(fd) << 8) + getc(fd); /* <addcnt> */
				1318
				1319	while (--adds >= 0)
				1320	{
				1321	/* <add>: <addflags> <addlen> [<leadlen> <addstring>]
				1322	* [<region>] */
				1323	flags = getc(fd); /* <addflags> */
				1324	addlen = getc(fd); /* <addlen> */
				1325	if (addlen == EOF)
				1326	goto truncerr;
				1327	if (addlen >= MAXWLEN)
				1328	goto formerr;
				1329
				1330	if (addlen > 0)
				1331	{
				1332	leadlen = getc(fd); /* <leadlen> */
				1333	for (i = 0; i < addlen; ++i) /* <addstring> */
				1334	cbuf[i] = getc(fd);
				1335	cbuf[i] = NUL;
				1336	}
				1337	else
				1338	leadlen = 0;
				1339
				1340	if (flags & ADD_KEEPCAP)
				1341	{
				1342	/* <addstring> is in original case, need to get
				1343	* case-folded word too. */
				1344	(void)str_foldcase(cbuf, addlen, fbuf, MAXWLEN);
				1345	flen = addlen - leadlen + 1;
				1346	addlen = STRLEN(fbuf);
				1347	}
				1348	else
				1349	flen = 0;
				1350
				1351	aw = (addword_T *)getroom(lp, &bl_used,
				1352	sizeof(addword_T) + addlen + flen);
				1353	if (aw == NULL)
				1354	goto errorend;
				1355	aw->aw_next = fw->fw_adds;
				1356	fw->fw_adds = aw;
				1357	aw->aw_leadlen = leadlen;
				1358
				1359	if (flags & ADD_KEEPCAP)
				1360	{
				1361	/* Put the addition in original case after the case-folded
				1362	* string. */
				1363	STRCPY(aw->aw_word, fbuf);
				1364	STRCPY(aw->aw_word + addlen + 1, cbuf + leadlen);
				1365	}
				1366	else
				1367	STRCPY(aw->aw_word, cbuf);
				1368
				1369	aw->aw_flags = flags;
				1370	aw->aw_wordlen = addlen;
				1371
				1372	if (flags & ADD_REGION)
				1373	aw->aw_region = getc(fd); /* <region> */
				1374	else
				1375	aw->aw_region = REGION_ALL;
				1376	}
				1377	}
				1378	}
				1379	goto end_OK;
				1380
				1381	errorend:
				1382	lp->sl_error = TRUE;
				1383	end_OK:
				1384	if (fd != NULL)
				1385	fclose(fd);
				1386	hash_unlock(&lp->sl_words);
				1387	sourcing_name = save_sourcing_name;
				1388	sourcing_lnum = save_sourcing_lnum;
				1389	}
				1390
				1391	/*
				1392	* Read a list of affixes from the spell file.
				1393	*/
				1394	static int
				1395	spell_load_affixes(fd, lp, bl_usedp, affm, affp)
				1396	FILE *fd;
				1397	slang_T *lp;
				1398	int *bl_usedp;
				1399	int affm;
				1400	void **affp;
				1401	{
				1402	int cnt;
				1403	int i, n;
				1404	char_u *p;
				1405
				1406	cnt = getc(fd); /* <affixcnt> */
				1407	if (cnt == EOF)
				1408	return 0;
				1409
				1410	/* Get room to store the affixNR list, either as char_u (1
				1411	* byte) or short_u (2 bytes). */
				1412	p = (char_u )getroom(lp, bl_usedp, cnt affm);
				1413	if (p == NULL)
				1414	return 0;
				1415	*affp = p;
				1416	for (n = 0; n < cnt; ++n)
				1417	{
				1418	i = getc(fd); /* <affixNR> */
				1419	if (affm > 1)
				1420	{
				1421	i = (i << 8) + getc(fd);
				1422	(short_u )p = i;
				1423	p += sizeof(short_u);
				1424	}
				1425	else
				1426	{
				1427	(char_u )p = i;
				1428	++p;
				1429	}
				1430	}
				1431	return cnt;
				1432	}
				1433
				1434	/*
				1435	* Get part of an sblock_T, at least "len" bytes long.
				1436	* Returns NULL when out of memory.
				1437	*/
				1438	static void *
				1439	getroom(lp, bl_used, len)
				1440	slang_T lp; / lp->sl_block is current block or NULL */
				1441	int bl_used; / used up from current block */
				1442	int len; /* length needed */
				1443	{
				1444	char_u *p;
				1445	sblock_T *bl = lp->sl_block;
				1446
				1447	if (bl == NULL \|\| *bl_used + len > SBLOCKSIZE)
				1448	{
				1449	/* Allocate a block of memory. This is not freed until spell_reload()
				1450	* is called. */
				1451	bl = (sblock_T *)alloc((unsigned)(sizeof(sblock_T) + SBLOCKSIZE));
				1452	if (bl == NULL)
				1453	return NULL;
				1454	bl->sb_next = lp->sl_block;
				1455	lp->sl_block = bl;
				1456	*bl_used = 0;
				1457	}
				1458
				1459	p = bl->sb_data + *bl_used;
				1460	*bl_used += len;
				1461
				1462	return p;
				1463	}
				1464
				1465	/*
				1466	* Parse 'spelllang' and set buf->b_langp accordingly.
				1467	* Returns an error message or NULL.
				1468	*/
				1469	char_u *
				1470	did_set_spelllang(buf)
				1471	buf_T *buf;
				1472	{
				1473	garray_T ga;
				1474	char_u *lang;
				1475	char_u *e;
				1476	char_u *region;
				1477	int region_mask;
				1478	slang_T *lp;
				1479	int c;
				1480	char_u lbuf[MAXWLEN + 1];
				1481
				1482	ga_init2(&ga, sizeof(langp_T), 2);
				1483
				1484	/* loop over comma separated languages. */
				1485	for (lang = buf->b_p_spl; *lang != NUL; lang = e)
				1486	{
				1487	e = vim_strchr(lang, ',');
				1488	if (e == NULL)
				1489	e = lang + STRLEN(lang);
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	1490	region = NULL;
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1491	if (e > lang + 2)
				1492	{
				1493	if (e - lang >= MAXWLEN)
				1494	{
				1495	ga_clear(&ga);
				1496	return e_invarg;
				1497	}
				1498	if (lang[2] == '_')
				1499	region = lang + 3;
				1500	}
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1501
				1502	for (lp = first_lang; lp != NULL; lp = lp->sl_next)
				1503	if (STRNICMP(lp->sl_name, lang, 2) == 0)
				1504	break;
				1505
				1506	if (lp == NULL)
				1507	{
				1508	/* Not found, load the language. */
				1509	STRNCPY(lbuf, lang, e - lang);
				1510	lbuf[e - lang] = NUL;
				1511	if (region != NULL)
				1512	mch_memmove(lbuf + 2, lbuf + 5, e - lang - 4);
				1513	lp = spell_load_lang(lbuf);
				1514	}
				1515
				1516	if (lp != NULL)
				1517	{
				1518	if (region == NULL)
				1519	region_mask = REGION_ALL;
				1520	else
				1521	{
				1522	/* find region in sl_regions */
				1523	c = find_region(lp->sl_regions, region);
				1524	if (c == REGION_ALL)
				1525	{
				1526	c = *e;
				1527	*e = NUL;
				1528	smsg((char_u *)_("Warning: region %s not supported"), lang);
				1529	*e = c;
				1530	region_mask = REGION_ALL;
				1531	}
				1532	else
				1533	region_mask = 1 << c;
				1534	}
				1535
				1536	if (ga_grow(&ga, 1) == FAIL)
				1537	{
				1538	ga_clear(&ga);
				1539	return e_outofmem;
				1540	}
				1541	LANGP_ENTRY(ga, ga.ga_len)->lp_slang = lp;
				1542	LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
				1543	++ga.ga_len;
				1544	}
				1545
				1546	if (*e == ',')
				1547	++e;
				1548	}
				1549
				1550	/* Add a NULL entry to mark the end of the list. */
				1551	if (ga_grow(&ga, 1) == FAIL)
				1552	{
				1553	ga_clear(&ga);
				1554	return e_outofmem;
				1555	}
				1556	LANGP_ENTRY(ga, ga.ga_len)->lp_slang = NULL;
				1557	++ga.ga_len;
				1558
				1559	/* Everything is fine, store the new b_langp value. */
				1560	ga_clear(&buf->b_langp);
				1561	buf->b_langp = ga;
				1562
				1563	return NULL;
				1564	}
				1565
				1566	/*
				1567	* Find the region "region[2]" in "rp" (points to "sl_regions").
				1568	* Each region is simply stored as the two characters of it's name.
				1569	* Returns the index if found, REGION_ALL if not found.
				1570	*/
				1571	static int
				1572	find_region(rp, region)
				1573	char_u *rp;
				1574	char_u *region;
				1575	{
				1576	int i;
				1577
				1578	for (i = 0; ; i += 2)
				1579	{
				1580	if (rp[i] == NUL)
				1581	return REGION_ALL;
				1582	if (rp[i] == region[0] && rp[i + 1] == region[1])
				1583	break;
				1584	}
				1585	return i / 2;
				1586	}
				1587
				1588	/*
				1589	* Return type of word:
				1590	* w word 0
				1591	* Word BWF_ONECAP
				1592	* W WORD BWF_ALLCAP
				1593	* WoRd wOrd BWF_KEEPCAP
				1594	*/
				1595	static int
				1596	captype(word, end)
				1597	char_u *word;
				1598	char_u *end;
				1599	{
				1600	char_u *p;
				1601	int c;
				1602	int firstcap;
				1603	int allcap;
				1604	int past_second = FALSE; /* past second word char */
				1605
				1606	/* find first letter */
				1607	for (p = word; !spell_iswordc(p); mb_ptr_adv(p))
				1608	if (p >= end)
				1609	return 0; /* only non-word characters, illegal word */
				1610	#ifdef FEAT_MBYTE
				1611	c = mb_ptr2char_adv(&p);
				1612	#else
				1613	c = *p++;
				1614	#endif
				1615	firstcap = allcap = MB_ISUPPER(c);
				1616
				1617	/*
				1618	* Need to check all letters to find a word with mixed upper/lower.
				1619	* But a word with an upper char only at start is a ONECAP.
				1620	*/
				1621	for ( ; p < end; mb_ptr_adv(p))
				1622	if (spell_iswordc(p))
				1623	{
				1624	#ifdef FEAT_MBYTE
				1625	c = mb_ptr2char(p);
				1626	#else
				1627	c = *p;
				1628	#endif
				1629	if (!MB_ISUPPER(c))
				1630	{
				1631	/* UUl -> KEEPCAP */
				1632	if (past_second && allcap)
				1633	return BWF_KEEPCAP;
				1634	allcap = FALSE;
				1635	}
				1636	else if (!allcap)
				1637	/* UlU -> KEEPCAP */
				1638	return BWF_KEEPCAP;
				1639	past_second = TRUE;
				1640	}
				1641
				1642	if (allcap)
				1643	return BWF_ALLCAP;
				1644	if (firstcap)
				1645	return BWF_ONECAP;
				1646	return 0;
				1647	}
				1648
				1649	# if defined(FEAT_MBYTE) \|\| defined(PROTO)
				1650	/*
				1651	* Clear all spelling tables and reload them.
				1652	* Used after 'encoding' is set.
				1653	*/
				1654	void
				1655	spell_reload()
				1656	{
				1657	buf_T *buf;
				1658	slang_T *lp;
				1659
				1660	/* Initialize the table for spell_iswordc(). */
				1661	init_spell_chartab();
				1662
				1663	/* Unload all allocated memory. */
				1664	while (first_lang != NULL)
				1665	{
				1666	lp = first_lang;
				1667	first_lang = lp->sl_next;
				1668	slang_free(lp);
				1669	}
				1670
				1671	/* Go through all buffers and handle 'spelllang'. */
				1672	for (buf = firstbuf; buf != NULL; buf = buf->b_next)
				1673	{
				1674	ga_clear(&buf->b_langp);
				1675	if (*buf->b_p_spl != NUL)
				1676	did_set_spelllang(buf);
				1677	}
				1678	}
				1679	# endif
				1680
				1681	/*
				1682	* Recognizing words uses a two-step mechanism:
				1683	* 1. Locate a basic word, made out of word characters only and separated by
				1684	* non-word characters.
				1685	* 2. When a basic word is found, check if (possibly required) additions
				1686	* before and after the word are present.
				1687	*
				1688	* Both mechanisms use affixes (prefixes and suffixes) to reduce the number of
				1689	* words. When no matching word was found in the hashtable the start of the
				1690	* word is checked for matching prefixes and the end of the word for matching
				1691	* suffixes. All matching affixes are removed and then the resulting word is
				1692	* searched for. If found it is checked if it supports the used affix.
				1693	*/
				1694
				1695
				1696	#if defined(FEAT_MBYTE) \|\| defined(PROTO)
				1697	/*
				1698	* Functions for ":mkspell".
				1699	* Only possible with the multi-byte feature.
				1700	*/
				1701
				1702	#define MAXLINELEN 300 /* Maximum length in bytes of a line in a .aff
				1703	and .dic file. */
				1704	/*
				1705	* Main structure to store the contents of a ".aff" file.
				1706	*/
				1707	typedef struct afffile_S
				1708	{
				1709	char_u af_enc; / "SET", normalized, alloc'ed string or NULL */
				1710	char_u af_try; / "TRY" line in "af_enc" encoding */
				1711	hashtab_T af_pref; /* hashtable for prefixes, affheader_T */
				1712	hashtab_T af_suff; /* hashtable for suffixes, affheader_T */
				1713	garray_T af_rep; /* list of repentry_T entries from REP lines */
				1714	} afffile_T;
				1715
				1716	typedef struct affentry_S affentry_T;
				1717
				1718	/* Affix header from ".aff" file. Used for af_pref and af_suff. */
				1719	typedef struct affheader_S
				1720	{
				1721	char_u ah_key[2]; /* key for hashtable == name of affix entry */
				1722	int ah_combine;
				1723	affentry_T ah_first; / first affix entry */
				1724	short_u ah_affnr; /* used in get_new_aff() */
				1725	} affheader_T;
				1726
				1727	#define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
				1728
				1729	/* Affix entry from ".aff" file. Used for prefixes and suffixes. */
				1730	struct affentry_S
				1731	{
				1732	affentry_T ae_next; / next affix with same name/number */
				1733	char_u ae_chop; / text to chop off basic word (can be NULL) */
				1734	char_u ae_add; / text to add to basic word (can be NULL) */
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	1735	char_u ae_add_nw; / For a suffix: first non-word char in
				1736	* "ae_add"; for a prefix with only non-word
				1737	* chars: equal to "ae_add", for a prefix with
				1738	* word and non-word chars: first non-word
				1739	* char after word char. NULL otherwise. */
				1740	char_u ae_add_pw; / For a prefix with both word and non-word
				1741	* chars: first word char. NULL otherwise. */
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1742	char_u ae_cond; / condition (NULL for ".") */
				1743	regprog_T ae_prog; / regexp program for ae_cond or NULL */
				1744	short_u ae_affnr; /* for old affix: new affix number */
				1745	};
				1746
				1747	/*
				1748	* Structure to store a word from a ".dic" file.
				1749	*/
				1750	typedef struct dicword_S
				1751	{
				1752	char_u dw_affnm; / original affix names */
				1753	char_u dw_word[1]; /* actually longer: the word in 'encoding' */
				1754	} dicword_T;
				1755
				1756	static dicword_T dumdw;
				1757	#define HI2DW(hi) ((dicword_T )((hi)->hi_key - (dumdw.dw_word - (char_u )&dumdw)))
				1758
				1759	/*
				1760	* Structure to store a basic word for the spell file.
				1761	* This is used for ":mkspell", not for spell checking.
				1762	*/
				1763	typedef struct basicword_S basicword_T;
				1764	struct basicword_S
				1765	{
				1766	basicword_T bw_next; / next word with same basic word */
				1767	basicword_T bw_cnext; / next word with same caps */
				1768	int bw_flags; /* BWF_ flags */
				1769	garray_T bw_prefix; /* table with prefix numbers */
				1770	garray_T bw_suffix; /* table with suffix numbers */
				1771	int bw_region; /* region bits */
				1772	char_u bw_caseword; / keep-case word */
				1773	char_u bw_leadstring; / must come before bw_word */
				1774	char_u bw_addstring; / must come after bw_word */
				1775	char_u bw_word[1]; /* actually longer: word case folded */
				1776	};
				1777
				1778	static basicword_T dumbw;
				1779	#define KEY2BW(p) ((basicword_T )((p) - (dumbw.bw_word - (char_u )&dumbw)))
				1780	#define HI2BW(hi) KEY2BW((hi)->hi_key)
				1781
				1782	/* Store the affix number related with a certain string. */
				1783	typedef struct affhash_S
				1784	{
				1785	short_u as_nr; /* the affix nr */
				1786	char_u as_word[1]; /* actually longer */
				1787	} affhash_T;
				1788
				1789	static affhash_T dumas;
				1790	#define HI2AS(hi) ((affhash_T )((hi)->hi_key - (dumas.as_word - (char_u )&dumas)))
				1791
				1792
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	1793	static afffile_T spell_read_aff __ARGS((char_u fname, vimconv_T *conv, int ascii));
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1794	static void spell_free_aff __ARGS((afffile_T *aff));
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	1795	static int has_non_ascii __ARGS((char_u *s));
				1796	static int spell_read_dic __ARGS((hashtab_T ht, char_u fname, vimconv_T *conv, int ascii));
				1797	static int get_new_aff __ARGS((hashtab_T oldaff, garray_T gap, int prefix));
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1798	static void spell_free_dic __ARGS((hashtab_T *dic));
				1799	static int same_affentries __ARGS((affheader_T ah1, affheader_T ah2));
				1800	static void add_affhash __ARGS((hashtab_T ht, char_u key, int newnr));
				1801	static void clear_affhash __ARGS((hashtab_T *ht));
				1802	static void trans_affixes __ARGS((dicword_T dw, basicword_T bw, afffile_T oldaff, hashtab_T newwords));
				1803	static int build_wordlist __ARGS((hashtab_T newwords, hashtab_T oldwords, afffile_T *oldaff, int regionmask));
				1804	static void combine_regions __ARGS((hashtab_T *newwords));
				1805	static int same_affixes __ARGS((basicword_T bw, basicword_T nbw));
				1806	static void expand_affixes __ARGS((hashtab_T newwords, garray_T prefgap, garray_T *suffgap));
				1807	static void expand_one_aff __ARGS((basicword_T bw, garray_T add_words, affentry_T pae, affentry_T sae));
				1808	static void add_to_wordlist __ARGS((hashtab_T newwords, basicword_T bw));
				1809	static void put_bytes __ARGS((FILE *fd, long_u nr, int len));
				1810	static void write_affix __ARGS((FILE fd, affheader_T ah));
				1811	static void write_affixlist __ARGS((FILE fd, garray_T aff, int bytes));
				1812	static void write_vim_spell __ARGS((char_u fname, garray_T prefga, garray_T suffga, hashtab_T newwords, int regcount, char_u *regchars));
				1813	static void write_bword __ARGS((FILE fd, basicword_T bw, int lowcap, basicword_T **prevbw, int regionmask, int prefm, int suffm));
				1814	static void free_wordtable __ARGS((hashtab_T *ht));
				1815	static void free_basicword __ARGS((basicword_T *bw));
				1816	static void free_affixentries __ARGS((affentry_T *first));
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	1817	static void free_affix_entry __ARGS((affentry_T *ap));
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1818
				1819	/*
				1820	* Read an affix ".aff" file.
				1821	* Returns an afffile_T, NULL for failure.
				1822	*/
				1823	static afffile_T *
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	1824	spell_read_aff(fname, conv, ascii)
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1825	char_u *fname;
				1826	vimconv_T conv; / info for encoding conversion */
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	1827	int ascii; /* Only accept ASCII characters */
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1828	{
				1829	FILE *fd;
				1830	afffile_T *aff;
				1831	char_u rline[MAXLINELEN];
				1832	char_u *line;
				1833	char_u *pc = NULL;
				1834	char_u *(items[6]);
				1835	int itemcnt;
				1836	char_u *p;
				1837	int lnum = 0;
				1838	affheader_T *cur_aff = NULL;
				1839	int aff_todo = 0;
				1840	hashtab_T *tp;
				1841
				1842	fd = fopen((char *)fname, "r");
				1843	if (fd == NULL)
				1844	{
				1845	EMSG2(_(e_notopen), fname);
				1846	return NULL;
				1847	}
				1848
				1849	smsg((char_u *)_("Reading affix file %s..."), fname);
				1850	out_flush();
				1851
				1852	aff = (afffile_T *)alloc_clear((unsigned)sizeof(afffile_T));
				1853	if (aff == NULL)
				1854	return NULL;
				1855	hash_init(&aff->af_pref);
				1856	hash_init(&aff->af_suff);
				1857	ga_init2(&aff->af_rep, (int)sizeof(repentry_T), 20);
				1858
				1859	/*
				1860	* Read all the lines in the file one by one.
				1861	*/
				1862	while (!vim_fgets(rline, MAXLINELEN, fd))
				1863	{
				1864	++lnum;
				1865
				1866	/* Skip comment lines. */
				1867	if (*rline == '#')
				1868	continue;
				1869
				1870	/* Convert from "SET" to 'encoding' when needed. */
				1871	vim_free(pc);
				1872	if (conv->vc_type != CONV_NONE)
				1873	{
				1874	pc = string_convert(conv, rline, NULL);
				1875	line = pc;
				1876	}
				1877	else
				1878	{
				1879	pc = NULL;
				1880	line = rline;
				1881	}
				1882
				1883	/* Split the line up in white separated items. Put a NUL after each
				1884	* item. */
				1885	itemcnt = 0;
				1886	for (p = line; ; )
				1887	{
				1888	while (p != NUL && p <= ' ') /* skip white space and CR/NL */
				1889	++p;
				1890	if (*p == NUL)
				1891	break;
				1892	items[itemcnt++] = p;
				1893	while (p > ' ') / skip until white space or CR/NL */
				1894	++p;
				1895	if (*p == NUL)
				1896	break;
				1897	*p++ = NUL;
				1898	}
				1899
				1900	/* Handle non-empty lines. */
				1901	if (itemcnt > 0)
				1902	{
				1903	if (STRCMP(items[0], "SET") == 0 && itemcnt == 2
				1904	&& aff->af_enc == NULL)
				1905	{
				1906	if (aff->af_enc != NULL)
				1907	smsg((char_u *)_("Duplicate SET line ignored in %s line %d: %s"),
				1908	fname, lnum, line);
				1909	else
				1910	{
				1911	/* Setup for conversion from "ENC" to 'encoding'. */
				1912	aff->af_enc = enc_canonize(items[1]);
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	1913	if (aff->af_enc != NULL && !ascii
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1914	&& convert_setup(conv, aff->af_enc, p_enc) == FAIL)
				1915	smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
				1916	fname, aff->af_enc, p_enc);
				1917	}
				1918	}
				1919	else if (STRCMP(items[0], "TRY") == 0 && itemcnt == 2
				1920	&& aff->af_try == NULL)
				1921	aff->af_try = vim_strsave(items[1]);
				1922	else if ((STRCMP(items[0], "PFX") == 0
				1923	\|\| STRCMP(items[0], "SFX") == 0)
				1924	&& aff_todo == 0
				1925	&& itemcnt == 4)
				1926	{
				1927	/* New affix letter. */
				1928	cur_aff = (affheader_T *)alloc((unsigned)sizeof(affheader_T));
				1929	if (cur_aff == NULL)
				1930	break;
				1931	cur_aff->ah_key[0] = *items[1];
				1932	cur_aff->ah_key[1] = NUL;
				1933	if (items[1][1] != NUL)
				1934	smsg((char_u *)_("Affix name too long in %s line %d: %s"),
				1935	fname, lnum, items[1]);
				1936	if (*items[2] == 'Y')
				1937	cur_aff->ah_combine = TRUE;
				1938	else if (*items[2] == 'N')
				1939	cur_aff->ah_combine = FALSE;
				1940	else if (p_verbose > 0)
				1941	smsg((char_u *)_("Expected Y or N in %s line %d: %s"),
				1942	fname, lnum, items[2]);
				1943	cur_aff->ah_first = NULL;
				1944	if (*items[0] == 'P')
				1945	tp = &aff->af_pref;
				1946	else
				1947	tp = &aff->af_suff;
				1948	if (!HASHITEM_EMPTY(hash_find(tp, cur_aff->ah_key)))
				1949	smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
				1950	fname, lnum, items[1]);
				1951	else
				1952	hash_add(tp, cur_aff->ah_key);
				1953
				1954	aff_todo = atoi((char *)items[3]);
				1955	}
				1956	else if ((STRCMP(items[0], "PFX") == 0
				1957	\|\| STRCMP(items[0], "SFX") == 0)
				1958	&& aff_todo > 0
				1959	&& STRCMP(cur_aff->ah_key, items[1]) == 0
				1960	&& itemcnt == 5)
				1961	{
				1962	affentry_T *aff_entry;
				1963
				1964	/* New item for an affix letter. */
				1965	--aff_todo;
				1966	aff_entry = (affentry_T *)alloc_clear(
				1967	(unsigned)sizeof(affentry_T));
				1968	if (aff_entry == NULL)
				1969	break;
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	1970
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1971	if (STRCMP(items[2], "0") != 0)
				1972	aff_entry->ae_chop = vim_strsave(items[2]);
				1973	if (STRCMP(items[3], "0") != 0)
				1974	aff_entry->ae_add = vim_strsave(items[3]);
				1975	if (STRCMP(items[4], ".") != 0)
				1976	{
				1977	char_u buf[MAXLINELEN];
				1978
				1979	aff_entry->ae_cond = vim_strsave(items[4]);
				1980	if (*items[0] == 'P')
				1981	sprintf((char *)buf, "^%s", items[4]);
				1982	else
				1983	sprintf((char *)buf, "%s$", items[4]);
				1984	aff_entry->ae_prog = vim_regcomp(buf, RE_MAGIC + RE_STRING);
				1985	}
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	1986
				1987	if (ascii && (has_non_ascii(aff_entry->ae_chop)
				1988	\|\| has_non_ascii(aff_entry->ae_add)))
				1989	{
				1990	/* Don't use an affix entry with non-ASCII characters when
				1991	* "ascii" is TRUE. */
				1992	free_affix_entry(aff_entry);
				1993	}
				1994	else
				1995	{
				1996	aff_entry->ae_next = cur_aff->ah_first;
				1997	cur_aff->ah_first = aff_entry;
				1998	}
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	1999	}
				2000	else if (STRCMP(items[0], "REP") == 0 && itemcnt == 2)
				2001	/* Ignore REP count */;
				2002	else if (STRCMP(items[0], "REP") == 0 && itemcnt == 3)
				2003	{
				2004	repentry_T *rp;
				2005
				2006	/* REP item */
				2007	if (ga_grow(&aff->af_rep, 1) == FAIL)
				2008	break;
				2009	rp = ((repentry_T *)aff->af_rep.ga_data) + aff->af_rep.ga_len;
				2010	rp->re_from = vim_strsave(items[1]);
				2011	rp->re_to = vim_strsave(items[2]);
				2012	++aff->af_rep.ga_len;
				2013	}
				2014	else if (p_verbose > 0)
				2015	smsg((char_u *)_("Unrecognized item in %s line %d: %s"),
				2016	fname, lnum, items[0]);
				2017	}
				2018
				2019	}
				2020
				2021	vim_free(pc);
				2022	fclose(fd);
				2023	return aff;
				2024	}
				2025
				2026	/*
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2027	* Return TRUE if string "s" contains a non-ASCII character (128 or higher).
				2028	* When "s" is NULL FALSE is returned.
				2029	*/
				2030	static int
				2031	has_non_ascii(s)
				2032	char_u *s;
				2033	{
				2034	char_u *p;
				2035
				2036	if (s != NULL)
				2037	for (p = s; *p != NUL; ++p)
				2038	if (*p >= 128)
				2039	return TRUE;
				2040	return FALSE;
				2041	}
				2042
				2043	/*
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2044	* Free the structure filled by spell_read_aff().
				2045	*/
				2046	static void
				2047	spell_free_aff(aff)
				2048	afffile_T *aff;
				2049	{
				2050	hashtab_T *ht;
				2051	hashitem_T *hi;
				2052	int todo;
				2053	int i;
				2054	repentry_T *rp;
				2055	affheader_T *ah;
				2056
				2057	vim_free(aff->af_enc);
				2058	vim_free(aff->af_try);
				2059
				2060	for (ht = &aff->af_pref; ; ht = &aff->af_suff)
				2061	{
				2062	todo = ht->ht_used;
				2063	for (hi = ht->ht_array; todo > 0; ++hi)
				2064	{
				2065	if (!HASHITEM_EMPTY(hi))
				2066	{
				2067	--todo;
				2068	ah = HI2AH(hi);
				2069	free_affixentries(ah->ah_first);
				2070	vim_free(ah);
				2071	}
				2072	}
				2073	if (ht == &aff->af_suff)
				2074	break;
				2075	}
				2076	hash_clear(&aff->af_pref);
				2077	hash_clear(&aff->af_suff);
				2078
				2079	for (i = 0; i < aff->af_rep.ga_len; ++i)
				2080	{
				2081	rp = ((repentry_T *)aff->af_rep.ga_data) + i;
				2082	vim_free(rp->re_from);
				2083	vim_free(rp->re_to);
				2084	}
				2085	ga_clear(&aff->af_rep);
				2086
				2087	vim_free(aff);
				2088	}
				2089
				2090	/*
				2091	* Read a dictionary ".dic" file.
				2092	* Returns OK or FAIL;
				2093	* Each entry in the hashtab_T is a dicword_T.
				2094	*/
				2095	static int
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2096	spell_read_dic(ht, fname, conv, ascii)
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2097	hashtab_T *ht;
				2098	char_u *fname;
				2099	vimconv_T conv; / info for encoding conversion */
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2100	int ascii; /* only accept ASCII words */
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2101	{
				2102	char_u line[MAXLINELEN];
				2103	char_u *p;
				2104	dicword_T *dw;
				2105	char_u *pc;
				2106	char_u *w;
				2107	int l;
				2108	hash_T hash;
				2109	hashitem_T *hi;
				2110	FILE *fd;
				2111	int lnum = 1;
				2112
				2113	fd = fopen((char *)fname, "r");
				2114	if (fd == NULL)
				2115	{
				2116	EMSG2(_(e_notopen), fname);
				2117	return FAIL;
				2118	}
				2119
				2120	smsg((char_u *)_("Reading dictionary file %s..."), fname);
				2121	out_flush();
				2122
				2123	/* Read and ignore the first line: word count. */
				2124	(void)vim_fgets(line, MAXLINELEN, fd);
				2125	if (!isdigit(*skipwhite(line)))
				2126	EMSG2(_("E760: No word count in %s"), fname);
				2127
				2128	/*
				2129	* Read all the lines in the file one by one.
				2130	* The words are converted to 'encoding' here, before being added to
				2131	* the hashtable.
				2132	*/
				2133	while (!vim_fgets(line, MAXLINELEN, fd))
				2134	{
				2135	++lnum;
				2136
				2137	/* Remove CR, LF and white space from end. */
				2138	l = STRLEN(line);
				2139	while (l > 0 && line[l - 1] <= ' ')
				2140	--l;
				2141	if (l == 0)
				2142	continue; /* empty line */
				2143	line[l] = NUL;
				2144
				2145	/* Find the optional affix names. */
				2146	p = vim_strchr(line, '/');
				2147	if (p != NULL)
				2148	*p++ = NUL;
				2149
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2150	/* Skip non-ASCII words when "ascii" is TRUE. */
				2151	if (ascii && has_non_ascii(line))
				2152	continue;
				2153
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2154	/* Convert from "SET" to 'encoding' when needed. */
				2155	if (conv->vc_type != CONV_NONE)
				2156	{
				2157	pc = string_convert(conv, line, NULL);
				2158	w = pc;
				2159	}
				2160	else
				2161	{
				2162	pc = NULL;
				2163	w = line;
				2164	}
				2165
				2166	dw = (dicword_T *)alloc_clear((unsigned)sizeof(dicword_T)
				2167	+ STRLEN(w));
				2168	if (dw == NULL)
				2169	break;
				2170	STRCPY(dw->dw_word, w);
				2171	vim_free(pc);
				2172
				2173	hash = hash_hash(dw->dw_word);
				2174	hi = hash_lookup(ht, dw->dw_word, hash);
				2175	if (!HASHITEM_EMPTY(hi))
				2176	smsg((char_u *)_("Duplicate word in %s line %d: %s"),
				2177	fname, lnum, line);
				2178	else
				2179	hash_add_item(ht, hi, dw->dw_word, hash);
				2180
				2181	if (p != NULL)
				2182	dw->dw_affnm = vim_strsave(p);
				2183	}
				2184
				2185	fclose(fd);
				2186	return OK;
				2187	}
				2188
				2189	/*
				2190	* Free the structure filled by spell_read_dic().
				2191	*/
				2192	static void
				2193	spell_free_dic(dic)
				2194	hashtab_T *dic;
				2195	{
				2196	int todo;
				2197	dicword_T *dw;
				2198	hashitem_T *hi;
				2199
				2200	todo = dic->ht_used;
				2201	for (hi = dic->ht_array; todo > 0; ++hi)
				2202	{
				2203	if (!HASHITEM_EMPTY(hi))
				2204	{
				2205	--todo;
				2206	dw = HI2DW(hi);
				2207	vim_free(dw->dw_affnm);
				2208	vim_free(dw);
				2209	}
				2210	}
				2211	hash_clear(dic);
				2212	}
				2213
				2214	/*
				2215	* Take the affixes read by spell_read_aff() and add them to the new list.
				2216	* Attempts to re-use the same number for identical affixes (ignoring the
				2217	* condition, since we remove that). That is especially important when using
				2218	* multiple regions.
				2219	* Returns OK or FAIL;
				2220	*/
				2221	static int
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2222	get_new_aff(oldaff, gap, prefix)
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2223	hashtab_T oldaff; / hashtable with affheader_T */
				2224	garray_T gap; / table with new affixes */
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2225	int prefix; /* TRUE when doing prefixes, FALSE for
				2226	suffixes */
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2227	{
				2228	int oldtodo;
				2229	affheader_T oldah, newah, *gapah;
				2230	affentry_T oldae, newae;
				2231	hashitem_T *oldhi;
				2232	hashitem_T *hi;
				2233	hashtab_T condht; /* conditions already found */
				2234	char_u condkey[MAXLINELEN];
				2235	int newnr;
				2236	int gapnr;
				2237	int retval = OK;
				2238	char_u *p;
				2239	garray_T tga;
				2240
				2241	/*
				2242	* Loop over all the old affix names.
				2243	*/
				2244	oldtodo = oldaff->ht_used;
				2245	for (oldhi = oldaff->ht_array; oldtodo > 0 && retval == OK; ++oldhi)
				2246	{
				2247	if (!HASHITEM_EMPTY(oldhi))
				2248	{
				2249	--oldtodo;
				2250	oldah = (affheader_T *)oldhi->hi_key;
				2251
				2252	/* Put entries with the same condition under the same new affix
				2253	* nr in "tga". Use hashtable "condht" to find them. */
				2254	ga_init2(&tga, sizeof(affheader_T), 10);
				2255	hash_init(&condht);
				2256
				2257	/*
				2258	* Loop over all affixes with the same name.
				2259	* The affixes with the same condition will get the same number,
				2260	* since they can be used with the same words.
				2261	* 1. build the lists of new affentry_T, with the headers in "tga".
				2262	* 2. Check if some of the lists already exist in "gap", re-use
				2263	* their number.
				2264	* 3. Assign the new numbers to the old affixes.
				2265	*/
				2266
				2267	/* 1. build the lists of new affentry_T. */
				2268	for (oldae = oldah->ah_first; oldae != NULL && retval == OK;
				2269	oldae = oldae->ae_next)
				2270	{
				2271	oldae->ae_add_nw = NULL;
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2272	oldae->ae_add_pw = NULL;
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2273	if (oldae->ae_add != NULL)
				2274	{
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2275	/* Check for non-word characters in the affix. If there
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2276	* is one this affix will be turned into an addition.
				2277	* This is stored with the old affix, that is where
				2278	* trans_affixes() will check. */
				2279	for (p = oldae->ae_add; *p != NUL; mb_ptr_adv(p))
				2280	if (!spell_iswordc(p))
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2281	{
				2282	oldae->ae_add_nw = p;
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2283	break;
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2284	}
				2285
				2286	if (prefix && oldae->ae_add_nw != NULL)
				2287	{
				2288	/* If a prefix has both word and non-word characters
				2289	* special treatment is necessary. If it has only
				2290	* non-word characters it becomes a leadstring. */
				2291	for (p = oldae->ae_add; *p != NUL; mb_ptr_adv(p))
				2292	if (spell_iswordc(p))
				2293	{
				2294	oldae->ae_add_pw = p;
				2295	break;
				2296	}
				2297	if (oldae->ae_add_pw != NULL)
				2298	{
				2299	/* Mixed prefix, set ae_add_nw to first non-word
				2300	* char after ae_add_pw (if there is one). */
				2301	oldae->ae_add_nw = NULL;
				2302	for ( ; *p != NUL; mb_ptr_adv(p))
				2303	if (!spell_iswordc(p))
				2304	{
				2305	oldae->ae_add_nw = p;
				2306	break;
				2307	}
				2308	}
				2309	}
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2310	}
				2311
				2312	if (oldae->ae_cond == NULL)
				2313	/* hashtable requires a non-empty key */
				2314	STRCPY(condkey, "---");
				2315	else
				2316	STRCPY(condkey, oldae->ae_cond);
				2317
				2318	/* Look for an existing list with this name and condition. */
				2319	hi = hash_find(&condht, condkey);
				2320	if (!HASHITEM_EMPTY(hi))
				2321	/* Match with existing affix, use that one. */
				2322	newnr = HI2AS(hi)->as_nr;
				2323	else
				2324	{
				2325	/* Add a new affix number. */
				2326	newnr = tga.ga_len;
				2327	if (ga_grow(&tga, 1) == FAIL)
				2328	retval = FAIL;
				2329	else
				2330	{
				2331	newah = ((affheader_T *)tga.ga_data) + newnr;
				2332	newah->ah_combine = oldah->ah_combine;
				2333	newah->ah_first = NULL;
				2334	++tga.ga_len;
				2335
				2336	/* Add the new list to the condht hashtable. */
				2337	add_affhash(&condht, condkey, newnr);
				2338	}
				2339	}
				2340
				2341	/* Add the new affentry_T to the list. */
				2342	newah = ((affheader_T *)tga.ga_data) + newnr;
				2343	newae = (affentry_T *)alloc_clear((unsigned)sizeof(affentry_T));
				2344	if (newae == NULL)
				2345	retval = FAIL;
				2346	else
				2347	{
				2348	newae->ae_next = newah->ah_first;
				2349	newah->ah_first = newae;
				2350	if (oldae->ae_chop == NULL)
				2351	newae->ae_chop = NULL;
				2352	else
				2353	newae->ae_chop = vim_strsave(oldae->ae_chop);
				2354	if (oldae->ae_add == NULL)
				2355	newae->ae_add = NULL;
				2356	else
				2357	newae->ae_add = vim_strsave(oldae->ae_add);
				2358
				2359	/* The condition is not copied, since the new affix is
				2360	* only used for words where the condition matches. */
				2361	}
				2362	}
				2363
				2364	/* 2. Check if some of the lists already exist, re-use their
				2365	* number. Otherwise add the list to "gap". */
				2366	for (newnr = 0; newnr < tga.ga_len; ++newnr)
				2367	{
				2368	newah = ((affheader_T *)tga.ga_data) + newnr;
				2369	for (gapnr = 0; gapnr < gap->ga_len; ++gapnr)
				2370	{
				2371	gapah = ((affheader_T *)gap->ga_data) + gapnr;
				2372	if (same_affentries(newah, gapah))
				2373	/* Found an existing affheader_T entry with same
				2374	* affentry_T list, use its number. */
				2375	break;
				2376	}
				2377
				2378	newah->ah_affnr = gapnr;
				2379	if (gapnr == gap->ga_len)
				2380	{
				2381	/* This is a new affentry_T list, add it. */
				2382	if (ga_grow(gap, 1) == FAIL)
				2383	retval = FAIL;
				2384	else
				2385	{
				2386	(((affheader_T )gap->ga_data) + gap->ga_len) = *newah;
				2387	++gap->ga_len;
				2388	}
				2389	}
				2390	else
				2391	{
				2392	/* free unused affentry_T list */
				2393	free_affixentries(newah->ah_first);
				2394	}
				2395	}
				2396
				2397	/* 3. Assign the new affix numbers to the old affixes. */
				2398	for (oldae = oldah->ah_first; oldae != NULL && retval == OK;
				2399	oldae = oldae->ae_next)
				2400	{
				2401	if (oldae->ae_cond == NULL)
				2402	/* hashtable requires a non-empty key */
				2403	STRCPY(condkey, "---");
				2404	else
				2405	STRCPY(condkey, oldae->ae_cond);
				2406
				2407	/* Look for an existing affix with this name and condition. */
				2408	hi = hash_find(&condht, condkey);
				2409	if (!HASHITEM_EMPTY(hi))
				2410	/* Match with existing affix, use that one. */
				2411	newnr = HI2AS(hi)->as_nr;
				2412	else
				2413	{
				2414	EMSG(_(e_internal));
				2415	retval = FAIL;
				2416	}
				2417	newah = ((affheader_T *)tga.ga_data) + newnr;
				2418	oldae->ae_affnr = newah->ah_affnr;
				2419	}
				2420
				2421	ga_clear(&tga);
				2422	clear_affhash(&condht);
				2423	}
				2424	}
				2425
				2426	return retval;
				2427	}
				2428
				2429	/*
				2430	* Return TRUE if the affentry_T lists for "ah1" and "ah2" contain the same
				2431	* items, ignoring the order.
				2432	* Only compares the chop and add strings, not the condition.
				2433	*/
				2434	static int
				2435	same_affentries(ah1, ah2)
				2436	affheader_T *ah1;
				2437	affheader_T *ah2;
				2438	{
				2439	affentry_T ae1, ae2;
				2440
				2441	/* Check the length of the lists first. */
				2442	ae2 = ah2->ah_first;
				2443	for (ae1 = ah1->ah_first; ae1 != NULL; ae1 = ae1->ae_next)
				2444	{
				2445	if (ae2 == NULL)
				2446	return FALSE; /* "ah1" list is longer */
				2447	ae2 = ae2->ae_next;
				2448	}
				2449	if (ae2 != NULL)
				2450	return FALSE; /* "ah2" list is longer */
				2451
				2452	/* Check that each entry in "ah1" appears in "ah2". */
				2453	for (ae1 = ah1->ah_first; ae1 != NULL; ae1 = ae1->ae_next)
				2454	{
				2455	for (ae2 = ah2->ah_first; ae2 != NULL; ae2 = ae2->ae_next)
				2456	{
				2457	if ((ae1->ae_chop == NULL) == (ae2->ae_chop == NULL)
				2458	&& (ae1->ae_add == NULL) == (ae2->ae_add == NULL)
				2459	&& (ae1->ae_chop == NULL
				2460	\|\| STRCMP(ae1->ae_chop, ae2->ae_chop) == 0)
				2461	&& (ae1->ae_add == NULL
				2462	\|\| STRCMP(ae1->ae_add, ae2->ae_add) == 0))
				2463	break;
				2464	}
				2465	if (ae2 == NULL)
				2466	return FALSE;
				2467	}
				2468
				2469	return TRUE;
				2470	}
				2471
				2472	/*
				2473	* Add a chop/add or cond hashtable entry.
				2474	*/
				2475	static void
				2476	add_affhash(ht, key, newnr)
				2477	hashtab_T *ht;
				2478	char_u *key;
				2479	int newnr;
				2480	{
				2481	affhash_T *as;
				2482
				2483	as = (affhash_T *)alloc((unsigned)sizeof(affhash_T) + STRLEN(key));
				2484	if (as != NULL)
				2485	{
				2486	as->as_nr = newnr;
				2487	STRCPY(as->as_word, key);
				2488	hash_add(ht, as->as_word);
				2489	}
				2490	}
				2491
				2492	/*
				2493	* Clear the chop/add hashtable used to detect identical affixes.
				2494	*/
				2495	static void
				2496	clear_affhash(ht)
				2497	hashtab_T *ht;
				2498	{
				2499	int todo;
				2500	hashitem_T *hi;
				2501
				2502	todo = ht->ht_used;
				2503	for (hi = ht->ht_array; todo > 0; ++hi)
				2504	{
				2505	if (!HASHITEM_EMPTY(hi))
				2506	{
				2507	--todo;
				2508	vim_free(HI2AS(hi));
				2509	}
				2510	}
				2511	hash_clear(ht);
				2512	}
				2513
				2514	/*
				2515	* Translate list of affix names for an old word to affix numbers in a new
				2516	* basic word.
				2517	* This checks if the conditions match with the old word. The result is that
				2518	* the new affix does not need to store the condition.
				2519	*/
				2520	static void
				2521	trans_affixes(dw, bw, oldaff, newwords)
				2522	dicword_T dw; / old word */
				2523	basicword_T bw; / basic word */
				2524	afffile_T oldaff; / affixes for "oldwords" */
				2525	hashtab_T newwords; / table with words */
				2526	{
				2527	char_u key[2];
				2528	char_u *p;
				2529	char_u *affnm;
				2530	garray_T *gap;
				2531	hashitem_T *aff_hi;
				2532	affheader_T *ah;
				2533	affentry_T *ae;
				2534	regmatch_T regmatch;
				2535	int i;
				2536	basicword_T *nbw;
				2537	int alen;
				2538	int wlen;
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2539	garray_T suffixga; /* list of words with non-word suffixes */
				2540	garray_T prefixga; /* list of words with non-word prefixes */
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2541	char_u nword[MAXWLEN];
				2542	int flags;
				2543	int n;
				2544
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2545	ga_init2(&suffixga, (int)sizeof(basicword_T *), 5);
				2546	ga_init2(&prefixga, (int)sizeof(basicword_T *), 5);
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2547
				2548	/* Loop over all the affix names of the old word. */
				2549	key[1] = NUL;
				2550	for (affnm = dw->dw_affnm; *affnm != NUL; ++affnm)
				2551	{
				2552	key[0] = *affnm;
				2553	aff_hi = hash_find(&oldaff->af_pref, key);
				2554	if (!HASHITEM_EMPTY(aff_hi))
				2555	gap = &bw->bw_prefix; /* found a prefix */
				2556	else
				2557	{
				2558	gap = &bw->bw_suffix; /* must be a suffix */
				2559	aff_hi = hash_find(&oldaff->af_suff, key);
				2560	if (HASHITEM_EMPTY(aff_hi))
				2561	{
				2562	smsg((char_u *)_("No affix entry '%s' for word %s"),
				2563	key, dw->dw_word);
				2564	continue;
				2565	}
				2566	}
				2567
				2568	/* Loop over all the affix entries for this affix name. */
				2569	ah = HI2AH(aff_hi);
				2570	for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
				2571	{
				2572	regmatch.regprog = ae->ae_prog;
				2573	regmatch.rm_ic = FALSE; /* TODO: Should this be TRUE??? */
				2574	if (ae->ae_prog == NULL
				2575	\|\| vim_regexec(&regmatch, dw->dw_word, (colnr_T)0))
				2576	{
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2577	if ((ae->ae_add_nw != NULL \|\| ae->ae_add_pw != NULL)
				2578	&& (gap != &bw->bw_suffix \|\| bw->bw_addstring == NULL))
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2579	{
				2580	/* Affix has a non-word character and isn't prepended to
				2581	* leader or appended to addition. Need to use another
				2582	* word with an addition. It's a copy of the basicword_T
				2583	* "bw". */
				2584	if (gap == &bw->bw_suffix)
				2585	{
				2586	alen = ae->ae_add_nw - ae->ae_add;
				2587	nbw = (basicword_T *)alloc((unsigned)(
				2588	sizeof(basicword_T) + STRLEN(bw->bw_word)
				2589	+ alen + 1));
				2590	if (nbw != NULL)
				2591	{
				2592	nbw = bw;
				2593	ga_init2(&nbw->bw_prefix, sizeof(short_u), 1);
				2594	ga_init2(&nbw->bw_suffix, sizeof(short_u), 1);
				2595
				2596	/* Adding the suffix may change the caps. */
				2597	STRCPY(nword, dw->dw_word);
				2598	if (ae->ae_chop != NULL)
				2599	{
				2600	/* Remove chop string. */
				2601	p = nword + STRLEN(nword);
				2602	for (i = mb_charlen(ae->ae_chop); i > 0; --i)
				2603	mb_ptr_back(nword, p);
				2604	*p = NUL;
				2605	}
				2606	STRCAT(nword, ae->ae_add);
				2607	flags = captype(nword, nword + STRLEN(nword));
				2608	if (flags & BWF_KEEPCAP)
				2609	{
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2610	/* "caseword" excludes the addition */
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2611	nword[STRLEN(dw->dw_word) + alen] = NUL;
				2612	nbw->bw_caseword = vim_strsave(nword);
				2613	}
				2614	nbw->bw_flags &= ~(BWF_ONECAP \| BWF_ALLCAP
				2615	\| BWF_KEEPCAP);
				2616	nbw->bw_flags \|= flags;
				2617
				2618	if (bw->bw_leadstring != NULL)
				2619	nbw->bw_leadstring =
				2620	vim_strsave(bw->bw_leadstring);
				2621	nbw->bw_addstring = vim_strsave(ae->ae_add_nw);
				2622
				2623	STRCPY(nbw->bw_word, bw->bw_word);
				2624	if (alen > 0 \|\| ae->ae_chop != NULL)
				2625	{
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2626	/* Suffix starts with word character and/or
				2627	* chop off something. Append it to the word.
				2628	* Add new word entry. */
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2629	wlen = STRLEN(nbw->bw_word);
				2630	if (ae->ae_chop != NULL)
				2631	wlen -= STRLEN(ae->ae_chop);
				2632	mch_memmove(nbw->bw_word + wlen, ae->ae_add,
				2633	alen);
				2634	nbw->bw_word[wlen + alen] = NUL;
				2635	add_to_wordlist(newwords, nbw);
				2636	}
				2637	else
				2638	/* Basic word is the same, link "nbw" after
				2639	* "bw". */
				2640	bw->bw_next = nbw;
				2641
				2642	/* Remember this word, we need to set bw_prefix
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2643	* and bw_prefix later. */
				2644	if (ga_grow(&suffixga, 1) == OK)
				2645	((basicword_T **)suffixga.ga_data)
				2646	[suffixga.ga_len++] = nbw;
				2647	}
				2648	}
				2649	else if (ae->ae_add_nw == NULL)
				2650	{
				2651	/* Prefix that starts with non-word char(s) and may be
				2652	* followed by word chars: Make a leadstring and
				2653	* prepend word chars before the word. */
				2654	alen = STRLEN(ae->ae_add_pw);
				2655	nbw = (basicword_T *)alloc((unsigned)(
				2656	sizeof(basicword_T) + STRLEN(bw->bw_word)
				2657	+ alen + 1));
				2658	if (nbw != NULL)
				2659	{
				2660	nbw = bw;
				2661	ga_init2(&nbw->bw_prefix, sizeof(short_u), 1);
				2662	ga_init2(&nbw->bw_suffix, sizeof(short_u), 1);
				2663
				2664	/* Adding the prefix may change the caps. */
				2665	STRCPY(nword, ae->ae_add);
				2666	p = dw->dw_word;
				2667	if (ae->ae_chop != NULL)
				2668	/* Skip chop string. */
				2669	for (i = mb_charlen(ae->ae_chop); i > 0; --i)
				2670	mb_ptr_adv( p);
				2671	STRCAT(nword, p);
				2672
				2673	flags = captype(nword, nword + STRLEN(nword));
				2674	if (flags & BWF_KEEPCAP)
				2675	/* "caseword" excludes the addition */
				2676	nbw->bw_caseword = vim_strsave(nword
				2677	+ (ae->ae_add_pw - ae->ae_add));
				2678	else
				2679	nbw->bw_caseword = NULL;
				2680	nbw->bw_flags &= ~(BWF_ONECAP \| BWF_ALLCAP
				2681	\| BWF_KEEPCAP);
				2682	nbw->bw_flags \|= flags;
				2683
				2684	if (bw->bw_addstring != NULL)
				2685	nbw->bw_addstring =
				2686	vim_strsave(bw->bw_addstring);
				2687	else
				2688	nbw->bw_addstring = NULL;
				2689	nbw->bw_leadstring = vim_strnsave(ae->ae_add,
				2690	ae->ae_add_pw - ae->ae_add);
				2691
				2692	if (alen > 0 \|\| ae->ae_chop != NULL)
				2693	{
				2694	/* Prefix ends in word character and/or chop
				2695	* off something. Prepend it to the word.
				2696	* Add new word entry. */
				2697	STRCPY(nbw->bw_word, ae->ae_add_pw);
				2698	p = bw->bw_word;
				2699	if (ae->ae_chop != NULL)
				2700	p += STRLEN(ae->ae_chop);
				2701	STRCAT(nbw->bw_word, p);
				2702	add_to_wordlist(newwords, nbw);
				2703	}
				2704	else
				2705	{
				2706	/* Basic word is the same, link "nbw" after
				2707	* "bw". */
				2708	STRCPY(nbw->bw_word, bw->bw_word);
				2709	bw->bw_next = nbw;
				2710	}
				2711
				2712	/* Remember this word, we need to set bw_suffix
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2713	* and bw_suffix later. */
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2714	if (ga_grow(&prefixga, 1) == OK)
				2715	((basicword_T **)prefixga.ga_data)
				2716	[prefixga.ga_len++] = nbw;
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2717	}
				2718	}
				2719	else
				2720	{
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2721	/* Prefix with both non-word and word characters: Turn
				2722	* prefix into basic word, original word becomes an
				2723	* addstring. */
				2724
				2725	/* Fold-case the word characters in the prefix into
				2726	* nword[]. */
				2727	alen = 0;
				2728	for (p = ae->ae_add_pw; p < ae->ae_add_nw; p += n)
				2729	{
				2730	#ifdef FEAT_MBYTE
				2731	n = (*mb_ptr2len_check)(p);
				2732	#else
				2733	n = 1;
				2734	#endif
				2735	(void)str_foldcase(p, n, nword + alen,
				2736	MAXWLEN - alen);
				2737	alen += STRLEN(nword + alen);
				2738	}
				2739
				2740	/* Allocate a new word entry. */
				2741	nbw = (basicword_T *)alloc((unsigned)(
				2742	sizeof(basicword_T) + alen + 1));
				2743	if (nbw != NULL)
				2744	{
				2745	nbw = bw;
				2746	ga_init2(&nbw->bw_prefix, sizeof(short_u), 1);
				2747	ga_init2(&nbw->bw_suffix, sizeof(short_u), 1);
				2748
				2749	mch_memmove(nbw->bw_word, nword, alen);
				2750	nbw->bw_word[alen] = NUL;
				2751
				2752	/* Use the cap type of the prefix. */
				2753	alen = ae->ae_add_nw - ae->ae_add_pw;
				2754	mch_memmove(nword, ae->ae_add_pw, alen);
				2755	nword[alen] = NUL;
				2756	flags = captype(nword, nword + STRLEN(nword));
				2757	if (flags & BWF_KEEPCAP)
				2758	nbw->bw_caseword = vim_strsave(nword);
				2759	else
				2760	nbw->bw_caseword = NULL;
				2761	nbw->bw_flags &= ~(BWF_ONECAP \| BWF_ALLCAP
				2762	\| BWF_KEEPCAP);
				2763	nbw->bw_flags \|= flags;
				2764
				2765	/* The addstring is the prefix after the word
				2766	* characters, the original word excluding "chop",
				2767	* plus any addition. */
				2768	STRCPY(nword, ae->ae_add_nw);
				2769	p = bw->bw_word;
				2770	if (ae->ae_chop != NULL)
				2771	p += STRLEN(ae->ae_chop);
				2772	STRCAT(nword, p);
				2773	if (bw->bw_addstring != NULL)
				2774	STRCAT(nword, bw->bw_addstring);
				2775	nbw->bw_addstring = vim_strsave(nword);
				2776
				2777	if (ae->ae_add_pw > ae->ae_add)
				2778	nbw->bw_leadstring = vim_strnsave(ae->ae_add,
				2779	ae->ae_add_pw - ae->ae_add);
				2780	else
				2781	nbw->bw_leadstring = NULL;
				2782
				2783	add_to_wordlist(newwords, nbw);
				2784
				2785	/* Remember this word, we need to set bw_suffix
				2786	* and bw_suffix later. */
				2787	if (ga_grow(&prefixga, 1) == OK)
				2788	((basicword_T **)prefixga.ga_data)
				2789	[prefixga.ga_len++] = nbw;
				2790	}
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2791	}
				2792	}
				2793	else
				2794	{
				2795	/* Affix applies to this word, add the related affix
				2796	* number. But only if it's not there yet. And keep the
				2797	* list sorted, so that we can compare it later. */
				2798	for (i = 0; i < gap->ga_len; ++i)
				2799	{
				2800	n = ((short_u *)gap->ga_data)[i];
				2801	if (n >= ae->ae_affnr)
				2802	{
				2803	if (n == ae->ae_affnr)
				2804	i = -1;
				2805	break;
				2806	}
				2807	}
				2808	if (i >= 0 && ga_grow(gap, 1) == OK)
				2809	{
				2810	if (i < gap->ga_len)
				2811	mch_memmove(((short_u *)gap->ga_data) + i + 1,
				2812	((short_u *)gap->ga_data) + i,
				2813	sizeof(short_u) * (gap->ga_len - i));
				2814	((short_u *)gap->ga_data)[i] = ae->ae_affnr;
				2815	++gap->ga_len;
				2816	}
				2817	}
				2818	}
				2819	}
				2820	}
				2821
				2822	/*
				2823	* For the words that we added for suffixes with non-word characters: Use
				2824	* the prefix list of the main word.
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2825	*/
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2826	for (i = 0; i < suffixga.ga_len; ++i)
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2827	{
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2828	nbw = ((basicword_T **)suffixga.ga_data)[i];
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2829	if (ga_grow(&nbw->bw_prefix, bw->bw_prefix.ga_len) == OK)
				2830	{
				2831	mch_memmove(nbw->bw_prefix.ga_data, bw->bw_prefix.ga_data,
				2832	bw->bw_prefix.ga_len * sizeof(short_u));
				2833	nbw->bw_prefix.ga_len = bw->bw_prefix.ga_len;
				2834	}
				2835	}
				2836
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2837	/*
				2838	* For the words that we added for prefixes with non-word characters: Use
				2839	* the suffix list of the main word.
				2840	*/
				2841	for (i = 0; i < prefixga.ga_len; ++i)
				2842	{
				2843	nbw = ((basicword_T **)prefixga.ga_data)[i];
				2844	if (ga_grow(&nbw->bw_suffix, bw->bw_suffix.ga_len) == OK)
				2845	{
				2846	mch_memmove(nbw->bw_suffix.ga_data, bw->bw_suffix.ga_data,
				2847	bw->bw_suffix.ga_len * sizeof(short_u));
				2848	nbw->bw_suffix.ga_len = bw->bw_suffix.ga_len;
				2849	}
				2850	}
				2851
				2852	ga_clear(&suffixga);
				2853	ga_clear(&prefixga);
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2854	}
				2855
				2856	/*
				2857	* Go over all words in "oldwords" and change the old affix names to the new
				2858	* affix numbers, check the conditions, fold case, extract the basic word and
				2859	* additions.
				2860	*/
				2861	static int
				2862	build_wordlist(newwords, oldwords, oldaff, regionmask)
				2863	hashtab_T newwords; / basicword_T entries */
				2864	hashtab_T oldwords; / dicword_T entries */
				2865	afffile_T oldaff; / affixes for "oldwords" */
				2866	int regionmask; /* value for bw_region */
				2867	{
				2868	int todo;
				2869	hashitem_T *old_hi;
				2870	dicword_T *dw;
				2871	basicword_T *bw;
				2872	char_u foldword[MAXLINELEN];
				2873	int leadlen;
				2874	char_u leadstring[MAXLINELEN];
				2875	int addlen;
				2876	char_u addstring[MAXLINELEN];
				2877	int dwlen;
				2878	char_u *p;
				2879	int clen;
				2880	int flags;
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2881	char_u *cp = NULL;
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2882	int l;
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2883	char_u message[MAXLINELEN + MAXWLEN];
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2884
				2885	todo = oldwords->ht_used;
				2886	for (old_hi = oldwords->ht_array; todo > 0; ++old_hi)
				2887	{
				2888	if (!HASHITEM_EMPTY(old_hi))
				2889	{
				2890	--todo;
				2891	dw = HI2DW(old_hi);
				2892
				2893	/* This takes time, print a message now and then. */
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2894	if ((todo & 0x3ff) == 0 \|\| todo == (int)oldwords->ht_used - 1)
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2895	{
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	2896	sprintf((char *)message, _("%6d todo - %s"),
				2897	todo, dw->dw_word);
				2898	msg_start();
				2899	msg_outtrans_attr(message, 0);
				2900	msg_clr_eos();
				2901	msg_didout = FALSE;
				2902	msg_col = 0;
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	2903	out_flush();
				2904	ui_breakcheck();
				2905	if (got_int)
				2906	break;
				2907	}
				2908
				2909	/* The basic words are always stored with folded case. */
				2910	dwlen = STRLEN(dw->dw_word);
				2911	(void)str_foldcase(dw->dw_word, dwlen, foldword, MAXLINELEN);
				2912	flags = captype(dw->dw_word, dw->dw_word + dwlen);
				2913
				2914	/* Check for non-word characters before the word. */
				2915	clen = 0;
				2916	leadlen = 0;
				2917	if (!spell_iswordc(foldword))
				2918	{
				2919	p = foldword;
				2920	for (;;)
				2921	{
				2922	mb_ptr_adv(p);
				2923	++clen;
				2924	if (p == NUL) / Only non-word chars (bad word!) */
				2925	{
				2926	if (p_verbose > 0)
				2927	smsg((char_u *)_("Warning: word without word characters: \"%s\""),
				2928	foldword);
				2929	break;
				2930	}
				2931	if (spell_iswordc(p))
				2932	{
				2933	/* Move the leader to "leadstring" and remove it from
				2934	* "foldword". */
				2935	leadlen = p - foldword;
				2936	mch_memmove(leadstring, foldword, leadlen);
				2937	leadstring[leadlen] = NUL;
				2938	mch_memmove(foldword, p, STRLEN(p) + 1);
				2939	break;
				2940	}
				2941	}
				2942	}
				2943
				2944	/* Check for non-word characters after word characters. */
				2945	addlen = 0;
				2946	for (p = foldword; spell_iswordc(p); mb_ptr_adv(p))
				2947	{
				2948	if (*p == NUL)
				2949	break;
				2950	++clen;
				2951	}
				2952	if (*p != NUL)
				2953	{
				2954	/* Move the addition to "addstring" and truncate "foldword". */
				2955	if (flags & BWF_KEEPCAP)
				2956	{
				2957	/* Preserve caps, need to skip the right number of
				2958	* characters in the original word (case folding may
				2959	* change the byte count). */
				2960	l = 0;
				2961	for (cp = dw->dw_word; l < clen; mb_ptr_adv(cp))
				2962	++l;
				2963	addlen = STRLEN(cp);
				2964	mch_memmove(addstring, cp, addlen + 1);
				2965	}
				2966	else
				2967	{
				2968	addlen = STRLEN(p);
				2969	mch_memmove(addstring, p, addlen + 1);
				2970	}
				2971	*p = NUL;
				2972	}
				2973
				2974	bw = (basicword_T *)alloc_clear((unsigned)sizeof(basicword_T)
				2975	+ STRLEN(foldword));
				2976	if (bw == NULL)
				2977	break;
				2978	STRCPY(bw->bw_word, foldword);
				2979	bw->bw_region = regionmask;
				2980
				2981	if (leadlen > 0)
				2982	bw->bw_leadstring = vim_strsave(leadstring);
				2983	else
				2984	bw->bw_leadstring = NULL;
				2985	if (addlen > 0)
				2986	bw->bw_addstring = vim_strsave(addstring);
				2987	else
				2988	bw->bw_addstring = NULL;
				2989
				2990	add_to_wordlist(newwords, bw);
				2991
				2992	if (flags & BWF_KEEPCAP)
				2993	{
				2994	if (addlen == 0)
				2995	/* use the whole word */
				2996	bw->bw_caseword = vim_strsave(dw->dw_word + leadlen);
				2997	else
				2998	/* use only up to the addition */
				2999	bw->bw_caseword = vim_strnsave(dw->dw_word + leadlen,
				3000	cp - dw->dw_word - leadlen);
				3001	if (bw->bw_caseword == NULL) /* out of memory */
				3002	flags &= ~BWF_KEEPCAP;
				3003	}
				3004	bw->bw_flags = flags;
				3005
				3006	/* Deal with any affix names on the old word, translate them
				3007	* into affix numbers. */
				3008	ga_init2(&bw->bw_prefix, sizeof(short_u), 10);
				3009	ga_init2(&bw->bw_suffix, sizeof(short_u), 10);
				3010	if (dw->dw_affnm != NULL)
				3011	trans_affixes(dw, bw, oldaff, newwords);
				3012	}
				3013	}
				3014	if (todo > 0)
				3015	return FAIL;
				3016	return OK;
				3017	}
				3018
				3019	/*
				3020	* Go through the list of words and combine the ones that are identical except
				3021	* for the region.
				3022	*/
				3023	static void
				3024	combine_regions(newwords)
				3025	hashtab_T *newwords;
				3026	{
				3027	int todo;
				3028	hashitem_T *hi;
				3029	basicword_T bw, nbw, *pbw;
				3030
				3031	/* Loop over all basic words in the words table. */
				3032	todo = newwords->ht_used;
				3033	for (hi = newwords->ht_array; todo > 0; ++hi)
				3034	{
				3035	if (!HASHITEM_EMPTY(hi))
				3036	{
				3037	--todo;
				3038
				3039	/* Loop over the list of words for this basic word. Compare with
				3040	* each following word in the same list. */
				3041	for (bw = HI2BW(hi); bw != NULL; bw = bw->bw_next)
				3042	{
				3043	pbw = bw;
				3044	for (nbw = pbw->bw_next; nbw != NULL; nbw = pbw->bw_next)
				3045	{
				3046	if (bw->bw_flags == nbw->bw_flags
				3047	&& (bw->bw_leadstring == NULL)
				3048	== (nbw->bw_leadstring == NULL)
				3049	&& (bw->bw_addstring == NULL)
				3050	== (nbw->bw_addstring == NULL)
				3051	&& ((bw->bw_flags & BWF_KEEPCAP) == 0
				3052	\|\| (STRCMP(bw->bw_caseword,
				3053	nbw->bw_caseword) == 0))
				3054	&& (bw->bw_leadstring == NULL
				3055	\|\| (STRCMP(bw->bw_leadstring,
				3056	nbw->bw_leadstring) == 0))
				3057	&& (bw->bw_addstring == NULL
				3058	\|\| (STRCMP(bw->bw_addstring,
				3059	nbw->bw_addstring) == 0))
				3060	&& same_affixes(bw, nbw)
				3061	)
				3062	{
				3063	/* Match, combine regions and delete "nbw". */
				3064	pbw->bw_next = nbw->bw_next;
				3065	bw->bw_region \|= nbw->bw_region;
				3066	free_basicword(nbw);
				3067	}
				3068	else
				3069	/* No match, continue with next one. */
				3070	pbw = nbw;
				3071	}
				3072	}
				3073	}
				3074	}
				3075	}
				3076
				3077	/*
				3078	* Return TRUE when the prefixes and suffixes for "bw" and "nbw" are equal.
				3079	*/
				3080	static int
				3081	same_affixes(bw, nbw)
				3082	basicword_T *bw;
				3083	basicword_T *nbw;
				3084	{
				3085	return (bw->bw_prefix.ga_len == nbw->bw_prefix.ga_len
				3086	&& bw->bw_suffix.ga_len == nbw->bw_suffix.ga_len
				3087	&& (bw->bw_prefix.ga_len == 0
				3088	\|\| vim_memcmp(bw->bw_prefix.ga_data,
				3089	nbw->bw_prefix.ga_data,
				3090	bw->bw_prefix.ga_len * sizeof(short_u)) == 0)
				3091	&& (bw->bw_suffix.ga_len == 0
				3092	\|\| vim_memcmp(bw->bw_suffix.ga_data,
				3093	nbw->bw_suffix.ga_data,
				3094	bw->bw_suffix.ga_len * sizeof(short_u)) == 0));
				3095	}
				3096
				3097	/*
				3098	* For each basic word with additions turn the affixes into other additions
				3099	* and/or new basic words. The result is that no affixes apply to a word with
				3100	* additions.
				3101	*/
				3102	static void
				3103	expand_affixes(newwords, prefgap, suffgap)
				3104	hashtab_T *newwords;
				3105	garray_T *prefgap;
				3106	garray_T *suffgap;
				3107	{
				3108	int todo;
				3109	hashitem_T *hi;
				3110	basicword_T *bw;
				3111	int pi, si;
				3112	affentry_T pae, sae;
				3113	garray_T add_words;
				3114	int n;
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	3115	char_u message[MAXLINELEN + MAXWLEN];
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	3116
				3117	ga_init2(&add_words, sizeof(basicword_T *), 10);
				3118
				3119	todo = newwords->ht_used;
				3120	for (hi = newwords->ht_array; todo > 0; ++hi)
				3121	{
				3122	if (!HASHITEM_EMPTY(hi))
				3123	{
				3124	--todo;
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	3125
				3126	/* This takes time, print a message now and then. */
				3127	if ((todo & 0x3ff) == 0 \|\| todo == (int)newwords->ht_used - 1)
				3128	{
				3129	sprintf((char *)message, _("%6d todo - %s"),
				3130	todo, HI2BW(hi)->bw_word);
				3131	msg_start();
				3132	msg_outtrans_attr(message, 0);
				3133	msg_clr_eos();
				3134	msg_didout = FALSE;
				3135	msg_col = 0;
				3136	out_flush();
				3137	ui_breakcheck();
				3138	if (got_int)
				3139	break;
				3140	}
				3141
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	3142	for (bw = HI2BW(hi); bw != NULL; bw = bw->bw_next)
				3143	{
				3144	/*
				3145	* Need to fix affixes if there is a leader or addition and
				3146	* there are prefixes or suffixes.
				3147	*/
				3148	if ((bw->bw_leadstring != NULL \|\| bw->bw_addstring != NULL)
				3149	&& (bw->bw_prefix.ga_len != 0
				3150	\|\| bw->bw_suffix.ga_len != 0))
				3151	{
				3152	/* Loop over all prefix numbers, but first without a
				3153	* prefix. */
				3154	for (pi = -1; pi < bw->bw_prefix.ga_len; ++pi)
				3155	{
				3156	pae = NULL;
				3157	if (pi >= 0)
				3158	{
				3159	n = ((short_u *)bw->bw_prefix.ga_data)[pi];
				3160	pae = ((affheader_T *)prefgap->ga_data + n)
				3161	->ah_first;
				3162	}
				3163
				3164	/* Loop over all entries for prefix "pi". Do it once
				3165	* when there is no prefix (pi == -1). */
				3166	do
				3167	{
				3168	/* Loop over all suffix numbers. Do without a
				3169	* suffix first when there is a prefix. */
				3170	for (si = (pi == -1 ? 0 : -1);
				3171	si < bw->bw_suffix.ga_len; ++si)
				3172	{
				3173	sae = NULL;
				3174	if (si >= 0)
				3175	{
				3176	n = ((short_u *)bw->bw_suffix.ga_data)[si];
				3177	sae = ((affheader_T *)suffgap->ga_data + n)
				3178	->ah_first;
				3179	}
				3180
				3181	/* Loop over all entries for suffix "si". Do
				3182	* it once when there is no suffix (si == -1).
				3183	*/
				3184	do
				3185	{
				3186	/* Expand the word for this combination of
				3187	* prefixes and affixes. */
				3188	expand_one_aff(bw, &add_words, pae, sae);
				3189
				3190	/* Advance to next suffix entry, if there
				3191	* is one. */
				3192	if (sae != NULL)
				3193	sae = sae->ae_next;
				3194	} while (sae != NULL);
				3195	}
				3196
				3197	/* Advance to next prefix entry, if there is one. */
				3198	if (pae != NULL)
				3199	pae = pae->ae_next;
				3200	} while (pae != NULL);
				3201	}
				3202	}
				3203	}
				3204	}
				3205	}
				3206
				3207	/*
				3208	* Add the new words afterwards, can't change "newwords" while going over
				3209	* all its items.
				3210	*/
				3211	for (pi = 0; pi < add_words.ga_len; ++pi)
				3212	add_to_wordlist(newwords, ((basicword_T **)add_words.ga_data)[pi]);
				3213
				3214	ga_clear(&add_words);
				3215	}
				3216
				3217	/*
				3218	* Add one word to "add_words" for basic word "bw" with additions, adding
				3219	* prefix "pae" and suffix "sae". Either "pae" or "sae" can be NULL.
				3220	*/
				3221	static void
				3222	expand_one_aff(bw, add_words, pae, sae)
				3223	basicword_T *bw;
				3224	garray_T *add_words;
				3225	affentry_T *pae;
				3226	affentry_T *sae;
				3227	{
				3228	char_u word[MAXWLEN + 1];
				3229	char_u caseword[MAXWLEN + 1];
				3230	int l = 0;
				3231	int choplen = 0;
				3232	int ll;
				3233	basicword_T *nbw;
				3234
				3235	/* Prepend prefix to the basic word if there is a prefix and there is no
				3236	* leadstring. */
				3237	if (pae != NULL && bw->bw_leadstring == NULL)
				3238	{
				3239	if (pae->ae_add != NULL)
				3240	{
				3241	l = STRLEN(pae->ae_add);
				3242	mch_memmove(word, pae->ae_add, l);
				3243	}
				3244	if (pae->ae_chop != NULL)
				3245	choplen = STRLEN(pae->ae_chop);
				3246	}
				3247
				3248	/* Copy the body of the word. */
				3249	STRCPY(word + l, bw->bw_word + choplen);
				3250
				3251	/* Do the same for bw_caseword, if it's there. */
				3252	if (bw->bw_flags & BWF_KEEPCAP)
				3253	{
				3254	if (l > 0)
				3255	mch_memmove(caseword, pae->ae_add, l);
				3256	STRCPY(caseword + l, bw->bw_caseword + choplen);
				3257	}
				3258
				3259	/* Append suffix to the basic word if there is a suffix and there is no
				3260	* addstring. */
				3261	if (sae != 0 && bw->bw_addstring == NULL)
				3262	{
				3263	l = STRLEN(word);
				3264	if (sae->ae_chop != NULL)
				3265	l -= STRLEN(sae->ae_chop);
				3266	if (sae->ae_add == NULL)
				3267	word[l] = NUL;
				3268	else
				3269	STRCPY(word + l, sae->ae_add);
				3270
				3271	if (bw->bw_flags & BWF_KEEPCAP)
				3272	{
				3273	/* Do the same for the caseword. */
				3274	l = STRLEN(caseword);
				3275	if (sae->ae_chop != NULL)
				3276	l -= STRLEN(sae->ae_chop);
				3277	if (sae->ae_add == NULL)
				3278	caseword[l] = NUL;
				3279	else
				3280	STRCPY(caseword + l, sae->ae_add);
				3281	}
				3282	}
				3283
				3284	nbw = (basicword_T *)alloc_clear((unsigned)
				3285	sizeof(basicword_T) + STRLEN(word));
				3286	if (nbw != NULL)
				3287	{
				3288	/* Add the new word to the list of words to be added later. */
				3289	if (ga_grow(add_words, 1) == FAIL)
				3290	{
				3291	vim_free(nbw);
				3292	return;
				3293	}
				3294	((basicword_T **)add_words->ga_data)[add_words->ga_len++] = nbw;
				3295
				3296	/* Copy the (modified) basic word, flags and region. */
				3297	STRCPY(nbw->bw_word, word);
				3298	nbw->bw_flags = bw->bw_flags;
				3299	nbw->bw_region = bw->bw_region;
				3300
				3301	/* Set the (modified) caseword. */
				3302	if (bw->bw_flags & BWF_KEEPCAP)
				3303	if ((nbw->bw_caseword = vim_strsave(caseword)) == NULL)
				3304	nbw->bw_flags &= ~BWF_KEEPCAP;
				3305
				3306	if (bw->bw_leadstring != NULL)
				3307	{
				3308	if (pae != NULL)
				3309	{
				3310	/* Prepend prefix to leadstring. */
				3311	ll = STRLEN(bw->bw_leadstring);
				3312	l = choplen = 0;
				3313	if (pae->ae_add != NULL)
				3314	l = STRLEN(pae->ae_add);
				3315	if (pae->ae_chop != NULL)
				3316	{
				3317	choplen = STRLEN(pae->ae_chop);
				3318	if (choplen > ll) /* TODO: error? */
				3319	choplen = ll;
				3320	}
				3321	nbw->bw_leadstring = alloc((unsigned)(ll + l - choplen + 1));
				3322	if (nbw->bw_leadstring != NULL)
				3323	{
				3324	if (l > 0)
				3325	mch_memmove(nbw->bw_leadstring, pae->ae_add, l);
				3326	STRCPY(nbw->bw_leadstring + l, bw->bw_leadstring + choplen);
				3327	}
				3328	}
				3329	else
				3330	nbw->bw_leadstring = vim_strsave(bw->bw_leadstring);
				3331	}
				3332
				3333	if (bw->bw_addstring != NULL)
				3334	{
				3335	if (sae != NULL)
				3336	{
				3337	/* Append suffix to addstring. */
				3338	l = STRLEN(bw->bw_addstring);
				3339	if (sae->ae_chop != NULL)
				3340	{
				3341	l -= STRLEN(sae->ae_chop);
				3342	if (l < 0) /* TODO: error? */
				3343	l = 0;
				3344	}
				3345	if (sae->ae_add == NULL)
				3346	ll = 0;
				3347	else
				3348	ll = STRLEN(sae->ae_add);
				3349	nbw->bw_addstring = alloc((unsigned)(ll + l - choplen + 1));
				3350	if (nbw->bw_addstring != NULL)
				3351	{
				3352	STRCPY(nbw->bw_addstring, bw->bw_addstring);
				3353	if (sae->ae_add == NULL)
				3354	nbw->bw_addstring[l] = NUL;
				3355	else
				3356	STRCPY(nbw->bw_addstring + l, sae->ae_add);
				3357	}
				3358	}
				3359	else
				3360	nbw->bw_addstring = vim_strsave(bw->bw_addstring);
				3361	}
				3362	}
				3363	}
				3364
				3365	/*
				3366	* Add basicword_T "*bw" to wordlist "newwords".
				3367	*/
				3368	static void
				3369	add_to_wordlist(newwords, bw)
				3370	hashtab_T *newwords;
				3371	basicword_T *bw;
				3372	{
				3373	hashitem_T *hi;
				3374	basicword_T *bw2;
				3375
				3376	hi = hash_find(newwords, bw->bw_word);
				3377	if (HASHITEM_EMPTY(hi))
				3378	{
				3379	/* New entry, add to hashlist. */
				3380	hash_add(newwords, bw->bw_word);
				3381	bw->bw_next = NULL;
				3382	}
				3383	else
				3384	{
				3385	/* Existing entry, append to list of basic words. */
				3386	bw2 = HI2BW(hi);
				3387	bw->bw_next = bw2->bw_next;
				3388	bw2->bw_next = bw;
				3389	}
				3390	}
				3391
				3392	/*
				3393	* Write a number to file "fd", MSB first, in "len" bytes.
				3394	*/
				3395	static void
				3396	put_bytes(fd, nr, len)
				3397	FILE *fd;
				3398	long_u nr;
				3399	int len;
				3400	{
				3401	int i;
				3402
				3403	for (i = len - 1; i >= 0; --i)
				3404	putc((int)(nr >> (i * 8)), fd);
				3405	}
				3406
				3407	/*
				3408	* Write affix info. <affflags> <affitemcnt> <affitem> ...
				3409	*/
				3410	static void
				3411	write_affix(fd, ah)
				3412	FILE *fd;
				3413	affheader_T *ah;
				3414	{
				3415	int i = 0;
				3416	affentry_T *ae;
				3417	char_u *p;
				3418	int round;
				3419
				3420	fputc(ah->ah_combine ? 1 : 0, fd); /* <affflags> */
				3421
				3422	/* Count the number of entries. */
				3423	for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
				3424	++i;
				3425	put_bytes(fd, (long_u)i, 2); /* <affitemcnt> */
				3426
				3427	for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
				3428	for (round = 1; round <= 2; ++round)
				3429	{
				3430	p = round == 1 ? ae->ae_chop : ae->ae_add;
				3431	if (p == NULL)
				3432	putc(0, fd); /* <affchoplen> / <affaddlen> */
				3433	else
				3434	{
				3435	putc(STRLEN(p), fd); /* <affchoplen> / <affaddlen> */
				3436	/* <affchop> / <affadd> */
				3437	fwrite(p, STRLEN(p), (size_t)1, fd);
				3438	}
				3439	}
				3440	}
				3441
				3442	/*
				3443	* Write list of affix NRs: <affixcnt> <affixNR> ...
				3444	*/
				3445	static void
				3446	write_affixlist(fd, aff, bytes)
				3447	FILE *fd;
				3448	garray_T *aff;
				3449	int bytes;
				3450	{
				3451	int i;
				3452
				3453	if (aff->ga_len > 0)
				3454	{
				3455	putc(aff->ga_len, fd); /* <affixcnt> */
				3456	for (i = 0; i < aff->ga_len; ++i)
				3457	put_bytes(fd, (long_u )((short_u *)aff->ga_data)[i], bytes);
				3458	}
				3459	}
				3460
				3461	/*
				3462	* Vim spell file format: <HEADER> <PREFIXLIST> <SUFFIXLIST>
				3463	* <SUGGEST> <WORDLIST>
				3464	*
				3465	* <HEADER>: <fileID> <regioncnt> <regionname> ...
				3466	*
				3467	* <fileID> 10 bytes "VIMspell01"
				3468	* <regioncnt> 1 byte number of regions following (8 supported)
				3469	* <regionname> 2 bytes Region name: ca, au, etc.
				3470	* First <regionname> is region 1.
				3471	*
				3472	*
				3473	* <PREFIXLIST>: <affcount> <afftotcnt> <affix> ...
				3474	* <SUFFIXLIST>: <affcount> <afftotcnt> <affix> ...
				3475	* list of possible affixes: prefixes and suffixes.
				3476	*
				3477	* <affcount> 2 bytes Number of affixes (MSB comes first).
				3478	* When more than 256 an affixNR is 2 bytes.
				3479	* This is separate for prefixes and suffixes!
				3480	* First affixNR is 0.
				3481	* <afftotcnt> 2 bytes Total number of affix items (MSB comes first).
				3482	*
				3483	* <affix>: <affflags> <affitemcnt> <affitem> ...
				3484	*
				3485	* <affflags> 1 byte 0x01: prefix combines with suffix.
				3486	* 0x02-0x80: unset
				3487	* <affitemcnt> 2 bytes Number of affixes with this affixNR (MSB first).
				3488	*
				3489	* <affitem>: <affchoplen> <affchop> <affaddlen> <affadd>
				3490	*
				3491	* <affchoplen> 1 byte Length of <affchop> in bytes.
				3492	* <affchop> N bytes To be removed from basic word.
				3493	* <affaddlen> 1 byte Length of <affadd> in bytes.
				3494	* <affadd> N bytes To be added to basic word.
				3495	*
				3496	*
				3497	* <SUGGEST> : <suggestlen> <more> ...
				3498	*
				3499	* <suggestlen> 4 bytes Length of <SUGGEST> in bytes, excluding
				3500	* <suggestlen>. MSB first.
				3501	* <more> To be defined.
				3502	*
				3503	*
				3504	* <WORDLIST>: <wordcount> <worditem> ...
				3505	*
				3506	* <wordcount> 4 bytes Number of <worditem> following. MSB first.
				3507	*
				3508	* <worditem>: <nr> <string> <flags> [<flags2>]
				3509	* [<caselen> <caseword>]
				3510	* [<affixcnt> <affixNR> ...] (prefixes)
				3511	* [<affixcnt> <affixNR> ...] (suffixes)
				3512	* [<region>]
				3513	* [<addcnt> <add> ...]
				3514	*
				3515	* <nr> i 1 byte Number of bytes copied from previous word.
				3516	* <string> N bytes Additional bytes for word, up to byte smaller than
				3517	* 0x20 (space).
				3518	* Must only contain case-folded word characters.
				3519	* <flags> 1 byte 0x01: word is valid without addition
				3520	* 0x02: has region byte
				3521	* 0x04: first letter must be upper-case
				3522	* 0x08: has suffixes, <affixcnt> and <affixNR> follow
				3523	* 0x10: more flags, <flags2> follows next
				3524	* 0x20-0x80: can't be used, unset
				3525	* <flags2> 1 byte 0x01: has additions, <addcnt> and <add> follow
				3526	* 0x02: has prefixes, <affixcnt> and <affixNR> follow
				3527	* 0x04: all letters must be upper-case
				3528	* 0x08: case must match
				3529	* 0x10-0x80: unset
				3530	* <caselen> 1 byte Length of <caseword>.
				3531	* <caseword> N bytes Word with matching case.
				3532	* <affixcnt> 1 byte Number of affix NRs following.
				3533	* <affixNR> 1 or 2 byte Number of possible affix for this word.
				3534	* When using 2 bytes MSB comes first.
				3535	* <region> 1 byte Bitmask for regions in which word is valid. When
				3536	* omitted it's valid in all regions.
				3537	* Lowest bit is for region 1.
				3538	* <addcnt> 2 bytes Number of <add> items following.
				3539	*
				3540	* <add>: <addflags> <addlen> [<leadlen> <addstring>] [<region>]
				3541	*
				3542	* <addflags> 1 byte 0x01: fixed case, <addstring> is the whole word
				3543	* with matching case.
				3544	* 0x02: first letter must be upper-case
				3545	* 0x04: all letters must be upper-case
				3546	* 0x08: has region byte
				3547	* 0x10-0x80: unset
				3548	* <addlen> 1 byte Length of <addstring> in bytes.
				3549	* <leadlen> 1 byte Number of bytes at start of <addstring> that must
				3550	* come before the start of the basic word.
				3551	* <addstring> N bytes Word characters, before/in/after the word.
				3552	*
				3553	* All text characters are in 'encoding': <affchop>, <affadd>, <string>,
				3554	* <caseword>> and <addstring>.
				3555	* All other fields are ASCII: <regionname>
				3556	* <string> is always case-folded.
				3557	*/
				3558
				3559	/*
				3560	* Write the Vim spell file "fname".
				3561	*/
				3562	static void
				3563	write_vim_spell(fname, prefga, suffga, newwords, regcount, regchars)
				3564	char_u *fname;
				3565	garray_T prefga; / prefixes, affheader_T entries */
				3566	garray_T suffga; / suffixes, affheader_T entries */
				3567	hashtab_T newwords; / basic words, basicword_T entries */
				3568	int regcount; /* number of regions */
				3569	char_u regchars; / region names */
				3570	{
				3571	FILE *fd;
				3572	garray_T *gap;
				3573	hashitem_T *hi;
				3574	char_u **wtab;
				3575	int todo;
				3576	int flags, aflags;
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	3577	basicword_T bw, bwf, bw2 = NULL, prevbw = NULL;
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	3578	int regionmask; /* mask for all relevant region bits */
				3579	int i;
				3580	int cnt;
				3581	affentry_T *ae;
				3582	int round;
				3583	int prefm, suffm;
				3584	garray_T bwga;
				3585
				3586	fd = fopen((char *)fname, "w");
				3587	if (fd == NULL)
				3588	{
				3589	EMSG2(_(e_notopen), fname);
				3590	return;
				3591	}
				3592
				3593	fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd);
				3594
				3595	/* write the region names if there is more than one */
				3596	if (regcount > 1)
				3597	{
				3598	putc(regcount, fd);
				3599	fwrite(regchars, (size_t)(regcount * 2), (size_t)1, fd);
				3600	regionmask = (1 << regcount) - 1;
				3601	}
				3602	else
				3603	{
				3604	putc(0, fd);
				3605	regionmask = 0;
				3606	}
				3607
				3608	/* Write the prefix and suffix lists. */
				3609	for (round = 1; round <= 2; ++round)
				3610	{
				3611	gap = round == 1 ? prefga : suffga;
				3612	put_bytes(fd, (long_u)gap->ga_len, 2); /* <affcount> */
				3613
				3614	/* Count the total number of affix items. */
				3615	cnt = 0;
				3616	for (i = 0; i < gap->ga_len; ++i)
				3617	for (ae = ((affheader_T *)gap->ga_data + i)->ah_first;
				3618	ae != NULL; ae = ae->ae_next)
				3619	++cnt;
				3620	put_bytes(fd, (long_u)cnt, 2); /* <afftotcnt> */
				3621
				3622	for (i = 0; i < gap->ga_len; ++i)
				3623	write_affix(fd, (affheader_T *)gap->ga_data + i);
				3624	}
				3625
				3626	/* Number of bytes used for affix NR depends on affix count. */
				3627	prefm = (prefga->ga_len > 256) ? 2 : 1;
				3628	suffm = (suffga->ga_len > 256) ? 2 : 1;
				3629
				3630	/* Write the suggest info. TODO */
				3631	put_bytes(fd, 0L, 4);
				3632
				3633	/*
				3634	* Write the word list. <wordcount> <worditem> ...
				3635	*/
				3636	/* number of basic words in 4 bytes */
				3637	put_bytes(fd, newwords->ht_used, 4); /* <wordcount> */
				3638
				3639	/*
				3640	* Sort the word list, so that we can reuse as many bytes as possible.
				3641	*/
				3642	wtab = (char_u *)alloc((unsigned)(sizeof(char_u ) * newwords->ht_used));
				3643	if (wtab != NULL)
				3644	{
				3645	/* Make a table with pointers to each word. */
				3646	todo = newwords->ht_used;
				3647	for (hi = newwords->ht_array; todo > 0; ++hi)
				3648	if (!HASHITEM_EMPTY(hi))
				3649	wtab[--todo] = hi->hi_key;
				3650
				3651	/* Sort. */
				3652	sort_strings(wtab, (int)newwords->ht_used);
				3653
				3654	/* Now write each basic word to the spell file. */
				3655	ga_init2(&bwga, sizeof(basicword_T *), 10);
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	3656	for (todo = 0; (long_u)todo < newwords->ht_used; ++todo)
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	3657	{
				3658	bwf = KEY2BW(wtab[todo]);
				3659
				3660	/*
				3661	* Reorder the list of basicword_T words: make a list for words
				3662	* with the same case-folded word. Put them together for same
				3663	* caps (ONECAP, ALLCAP and various KEEPCAP words) and same
				3664	* affixes. Each list will then be put as a basic word with
				3665	* additions.
				3666	* This won't take much space, since the basic word is the same
				3667	* every time, only its length is written.
				3668	*/
				3669	bwga.ga_len = 0;
				3670	for (bw = bwf; bw != NULL; bw = bw->bw_next)
				3671	{
				3672	flags = bw->bw_flags & (BWF_ONECAP \| BWF_KEEPCAP \| BWF_ALLCAP);
				3673
				3674	/* Go through the lists we found so far. Break when the case
				3675	* matches. */
				3676	for (i = 0; i < bwga.ga_len; ++i)
				3677	{
				3678	bw2 = ((basicword_T **)bwga.ga_data)[i];
				3679	aflags = bw2->bw_flags & (BWF_ONECAP \| BWF_KEEPCAP
				3680	\| BWF_ALLCAP);
				3681	if (flags == aflags
				3682	&& ((flags & BWF_KEEPCAP) == 0
				3683	\|\| (STRCMP(bw->bw_caseword,
				3684	bw2->bw_caseword) == 0))
				3685	&& same_affixes(bw, bw2))
				3686	break;
				3687	}
				3688	if (i == bwga.ga_len)
				3689	{
				3690	/* No word with similar caps, make a new list. */
				3691	if (ga_grow(&bwga, 1) == FAIL)
				3692	break;
				3693	((basicword_T **)bwga.ga_data)[i] = bw;
				3694	bw->bw_cnext = NULL;
				3695	++bwga.ga_len;
				3696	}
				3697	else
				3698	{
				3699	/* Add to list of words with similar caps. */
				3700	bw->bw_cnext = bw2->bw_cnext;
				3701	bw2->bw_cnext = bw;
				3702	}
				3703	}
				3704
				3705	/* Prefer the word with no caps to use as the first basic word.
				3706	* At least one without KEEPCAP. */
				3707	bw = NULL;
				3708	for (i = 0; i < bwga.ga_len; ++i)
				3709	{
				3710	bw2 = ((basicword_T **)bwga.ga_data)[i];
				3711	if (bw == NULL
				3712	\|\| (bw2->bw_flags & (BWF_ONECAP \| BWF_KEEPCAP
				3713	\| BWF_ALLCAP)) == 0
				3714	\|\| (bw->bw_flags & BWF_KEEPCAP))
				3715	bw = bw2;
				3716	}
				3717
				3718	/* Write first basic word. If it's KEEPCAP then we need a word
				3719	* without VALID flag first (makes it easier to read the list back
				3720	* in). */
				3721	if (bw->bw_flags & BWF_KEEPCAP)
				3722	write_bword(fd, bw, TRUE, &prevbw, regionmask, prefm, suffm);
				3723	write_bword(fd, bw, FALSE, &prevbw, regionmask, prefm, suffm);
				3724
				3725	/* Write other basic words, with different caps. */
				3726	for (i = 0; i < bwga.ga_len; ++i)
				3727	{
				3728	bw2 = ((basicword_T **)bwga.ga_data)[i];
				3729	if (bw2 != bw)
				3730	write_bword(fd, bw2, FALSE, &prevbw, regionmask,
				3731	prefm, suffm);
				3732	}
				3733	}
				3734
				3735	ga_clear(&bwga);
				3736	}
				3737
				3738	fclose(fd);
				3739	}
				3740
				3741	/*
				3742	* Write basic word, followed by any additions.
				3743	*
				3744	* <worditem>: <nr> <string> <flags> [<flags2>]
				3745	* [<caselen> <caseword>]
				3746	* [<affixcnt> <affixNR> ...] (prefixes)
				3747	* [<affixcnt> <affixNR> ...] (suffixes)
				3748	* [<region>]
				3749	* [<addcnt> <add> ...]
				3750	*/
				3751	static void
				3752	write_bword(fd, bwf, lowcap, prevbw, regionmask, prefm, suffm)
				3753	FILE *fd;
				3754	basicword_T *bwf;
				3755	int lowcap; /* write KEEPKAP word as not-valid */
				3756	basicword_T *prevbw; / last written basic word */
				3757	int regionmask; /* mask that includes all possible regions */
				3758	int prefm;
				3759	int suffm;
				3760	{
				3761	int flags;
				3762	int aflags;
				3763	int len;
				3764	int leadlen, addlen;
				3765	int clen;
				3766	int adds = 0;
				3767	int i;
				3768	basicword_T bw, bw2;
				3769
				3770	/* Check how many bytes can be copied from the previous word. */
				3771	len = STRLEN(bwf->bw_word);
				3772	if (*prevbw == NULL)
				3773	clen = 0;
				3774	else
				3775	for (clen = 0; clen < len
				3776	&& (*prevbw)->bw_word[clen] == bwf->bw_word[clen]; ++clen)
				3777	;
				3778	putc(clen, fd); /* <nr> */
				3779	*prevbw = bwf;
				3780	/* <string> */
				3781	if (len > clen)
				3782	fwrite(bwf->bw_word + clen, (size_t)(len - clen), (size_t)1, fd);
				3783
				3784	/* Try to find a word without additions to use first. */
				3785	bw = bwf;
				3786	for (bw2 = bwf; bw2 != NULL; bw2 = bw2->bw_cnext)
				3787	{
				3788	if (bw2->bw_addstring != NULL \|\| bw2->bw_leadstring != NULL)
				3789	++adds;
				3790	else
				3791	bw = bw2;
				3792	}
				3793
				3794	/* Flags: If there is no leadstring and no addstring the basic word is
				3795	* valid, may have prefixes, suffixes and region. */
				3796	flags = bw->bw_flags;
				3797	if (bw->bw_addstring == NULL && bw->bw_leadstring == NULL)
				3798	{
				3799	flags \|= BWF_VALID;
				3800
				3801	/* Add the prefix/suffix list if there are prefixes/suffixes. */
				3802	if (bw->bw_prefix.ga_len > 0)
				3803	flags \|= BWF_PREFIX;
				3804	if (bw->bw_suffix.ga_len > 0)
				3805	flags \|= BWF_SUFFIX;
				3806
				3807	/* Flags: add the region byte if the word isn't valid in all
				3808	* regions. */
				3809	if (regionmask != 0 && (bw->bw_region & regionmask) != regionmask)
				3810	flags \|= BWF_REGION;
				3811	}
				3812
				3813	/* Flags: may have additions. */
				3814	if (adds > 0)
				3815	flags \|= BWF_ADDS;
				3816
				3817	/* The dummy word before a KEEPCAP word doesn't have any flags, they are
				3818	* in the actual word that follows. */
				3819	if (lowcap)
				3820	flags = 0;
				3821
				3822	/* Flags: when the upper byte is not used we only write one flags
				3823	* byte, if it's used then set an extra flag in the first byte and
				3824	* also write the second byte. */
				3825	if ((flags & 0xff00) == 0)
				3826	putc(flags, fd); /* <flags> */
				3827	else
				3828	{
				3829	putc(flags \| BWF_SECOND, fd); /* <flags> */
				3830	putc((int)((unsigned)flags >> 8), fd); /* <flags2> */
				3831	}
				3832
				3833	/* First dummy word doesn't need anything but flags. */
				3834	if (lowcap)
				3835	return;
				3836
				3837	if (flags & BWF_KEEPCAP)
				3838	{
				3839	len = STRLEN(bw->bw_caseword);
				3840	putc(len, fd); /* <caselen> */
				3841	for (i = 0; i < len; ++i)
				3842	putc(bw->bw_caseword[i], fd); /* <caseword> */
				3843	}
				3844
				3845	/* write prefix and suffix lists: <affixcnt> <affixNR> ... */
				3846	if (flags & BWF_PREFIX)
				3847	write_affixlist(fd, &bw->bw_prefix, prefm);
				3848	if (flags & BWF_SUFFIX)
				3849	write_affixlist(fd, &bw->bw_suffix, suffm);
				3850
				3851	if (flags & BWF_REGION)
				3852	putc(bw->bw_region, fd); /* <region> */
				3853
				3854	/*
				3855	* Additions.
				3856	*/
				3857	if (adds > 0)
				3858	{
				3859	put_bytes(fd, (long_u)adds, 2); /* <addcnt> */
				3860
				3861	for (bw = bwf; bw != NULL; bw = bw->bw_cnext)
				3862	if (bw->bw_leadstring != NULL \|\| bw->bw_addstring != NULL)
				3863	{
				3864	/* <add>: <addflags> <addlen> [<leadlen> <addstring>]
				3865	* [<region>] */
				3866	aflags = 0;
				3867	if (bw->bw_flags & BWF_ONECAP)
				3868	aflags \|= ADD_ONECAP;
				3869	if (bw->bw_flags & BWF_ALLCAP)
				3870	aflags \|= ADD_ALLCAP;
				3871	if (bw->bw_flags & BWF_KEEPCAP)
				3872	aflags \|= ADD_KEEPCAP;
				3873	if (regionmask != 0
				3874	&& (bw->bw_region & regionmask) != regionmask)
				3875	aflags \|= ADD_REGION;
				3876	putc(aflags, fd); /* <addflags> */
				3877
				3878	if (bw->bw_leadstring == NULL)
				3879	leadlen = 0;
				3880	else
				3881	leadlen = STRLEN(bw->bw_leadstring);
				3882	if (bw->bw_addstring == NULL)
				3883	addlen = 0;
				3884	else
				3885	addlen = STRLEN(bw->bw_addstring);
				3886	putc(leadlen + addlen, fd); /* <addlen> */
				3887	putc(leadlen, fd); /* <leadlen> */
				3888	/* <addstring> */
				3889	if (bw->bw_leadstring != NULL)
				3890	fwrite(bw->bw_leadstring, (size_t)leadlen, (size_t)1, fd);
				3891	if (bw->bw_addstring != NULL)
				3892	fwrite(bw->bw_addstring, (size_t)addlen, (size_t)1, fd);
				3893
				3894	if (aflags & ADD_REGION)
				3895	putc(bw->bw_region, fd); /* <region> */
				3896	}
				3897	}
				3898	}
				3899
				3900
				3901	/*
				3902	* ":mkspell outfile infile ..."
				3903	*/
				3904	void
				3905	ex_mkspell(eap)
				3906	exarg_T *eap;
				3907	{
				3908	int fcount;
				3909	char_u **fnames;
				3910	char_u fname[MAXPATHL];
				3911	char_u wfname[MAXPATHL];
				3912	afffile_T *(afile[8]);
				3913	hashtab_T dfile[8];
				3914	int i;
				3915	int len;
				3916	char_u region_name[16];
				3917	struct stat st;
				3918	int round;
				3919	vimconv_T conv;
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	3920	int ascii = FALSE;
				3921	char_u *arg = eap->arg;
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	3922
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	3923	if (STRNCMP(arg, "-ascii", 6) == 0)
				3924	{
				3925	ascii = TRUE;
				3926	arg = skipwhite(arg + 6);
				3927	}
				3928
				3929	/* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
				3930	if (get_arglist_exp(arg, &fcount, &fnames) == FAIL)
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	3931	return;
				3932	if (fcount < 2)
				3933	EMSG(_(e_invarg)); /* need at least output and input names */
				3934	else if (fcount > 9)
				3935	EMSG(_("E754: Only up to 8 regions supported"));
				3936	else
				3937	{
				3938	/* Check for overwriting before doing things that may take a lot of
				3939	* time. */
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	3940	sprintf((char *)wfname, "%s.%s.spl", fnames[0],
				3941	ascii ? (char_u *)"ascii" : p_enc);
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	3942	if (!eap->forceit && mch_stat((char *)wfname, &st) >= 0)
				3943	{
				3944	EMSG(_(e_exists));
				3945	goto theend;
				3946	}
				3947	if (mch_isdir(fnames[0]))
				3948	{
				3949	EMSG2(_(e_isadir2), fnames[0]);
				3950	goto theend;
				3951	}
				3952
				3953	/*
				3954	* Init the aff and dic pointers.
				3955	* Get the region names if there are more than 2 arguments.
				3956	*/
				3957	for (i = 1; i < fcount; ++i)
				3958	{
				3959	afile[i - 1] = NULL;
				3960	hash_init(&dfile[i - 1]);
				3961	if (fcount > 2)
				3962	{
				3963	len = STRLEN(fnames[i]);
				3964	if (STRLEN(gettail(fnames[i])) < 5 \|\| fnames[i][len - 3] != '_')
				3965	{
				3966	EMSG2(_("E755: Invalid region in %s"), fnames[i]);
				3967	goto theend;
				3968	}
				3969	else
				3970	{
				3971	region_name[(i - 1) * 2] = TOLOWER_ASC(fnames[i][len - 2]);
				3972	region_name[(i - 1) * 2 + 1] =
				3973	TOLOWER_ASC(fnames[i][len - 1]);
				3974	}
				3975	}
				3976	}
				3977
				3978	/*
				3979	* Read all the .aff and .dic files.
				3980	* Text is converted to 'encoding'.
				3981	*/
				3982	for (i = 1; i < fcount; ++i)
				3983	{
				3984	/* Read the .aff file. Will init "conv" based on the "SET" line. */
				3985	conv.vc_type = CONV_NONE;
				3986	sprintf((char *)fname, "%s.aff", fnames[i]);
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	3987	if ((afile[i - 1] = spell_read_aff(fname, &conv, ascii)) == NULL)
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	3988	break;
				3989
				3990	/* Read the .dic file. */
				3991	sprintf((char *)fname, "%s.dic", fnames[i]);
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	3992	if (spell_read_dic(&dfile[i - 1], fname, &conv, ascii) == FAIL)
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	3993	break;
				3994
				3995	/* Free any conversion stuff. */
				3996	convert_setup(&conv, NULL, NULL);
				3997	}
				3998
				3999	/* Process the data when all the files could be read. */
				4000	if (i == fcount)
				4001	{
				4002	garray_T prefga;
				4003	garray_T suffga;
				4004	garray_T *gap;
				4005	hashtab_T newwords;
				4006
				4007	/*
				4008	* Combine all the affixes into one new affix list. This is done
				4009	* for prefixes and suffixes separately.
				4010	* We need to do this for each region, try to re-use the same
				4011	* affixes.
				4012	* Since we number the new affix entries, a growarray will do. In
				4013	* the affheader_T the ah_key is unused.
				4014	*/
				4015	MSG(_("Combining affixes..."));
				4016	out_flush();
				4017	for (round = 1; round <= 2; ++round)
				4018	{
				4019	gap = round == 1 ? &prefga : &suffga;
				4020	ga_init2(gap, sizeof(affheader_T), 50);
				4021	for (i = 1; i < fcount; ++i)
				4022	get_new_aff(round == 1 ? &afile[i - 1]->af_pref
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	4023	: &afile[i - 1]->af_suff,
				4024	gap, round == 1);
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	4025	}
				4026
				4027	/*
				4028	* Go over all words and:
				4029	* - change the old affix names to the new affix numbers
				4030	* - check the conditions
				4031	* - fold case
				4032	* - extract the basic word and additions.
				4033	* Do this for each region.
				4034	*/
				4035	MSG(_("Building word list..."));
				4036	out_flush();
				4037	hash_init(&newwords);
				4038
				4039	for (i = 1; i < fcount; ++i)
				4040	build_wordlist(&newwords, &dfile[i - 1], afile[i - 1],
				4041	1 << (i - 1));
				4042
				4043	if (fcount > 2)
				4044	{
				4045	/* Combine words for the different regions into one. */
				4046	MSG(_("Combining regions..."));
				4047	out_flush();
				4048	combine_regions(&newwords);
				4049	}
				4050
				4051	/*
				4052	* Affixes on a word with additions are clumsy, would require
				4053	* inefficient searching. Turn the affixes into additions and/or
				4054	* the expanded word.
				4055	*/
				4056	MSG(_("Processing words..."));
				4057	out_flush();
				4058	expand_affixes(&newwords, &prefga, &suffga);
				4059
				4060	/* Write the info in the spell file. */
				4061	smsg((char_u *)_("Writing spell file %s..."), wfname);
				4062	out_flush();
				4063	write_vim_spell(wfname, &prefga, &suffga, &newwords,
				4064	fcount - 1, region_name);
				4065	MSG(_("Done!"));
				4066	out_flush();
				4067
				4068	/* Free the allocated stuff. */
				4069	free_wordtable(&newwords);
				4070	for (round = 1; round <= 2; ++round)
				4071	{
				4072	gap = round == 1 ? &prefga: &suffga;
				4073	for (i = 0; i < gap->ga_len; ++i)
				4074	free_affixentries(((affheader_T *)gap->ga_data + i)
				4075	->ah_first);
				4076	ga_clear(gap);
				4077	}
				4078	}
				4079
				4080	/* Free the .aff and .dic file structures. */
				4081	for (i = 1; i < fcount; ++i)
				4082	{
				4083	if (afile[i - 1] != NULL)
				4084	spell_free_aff(afile[i - 1]);
				4085	spell_free_dic(&dfile[i - 1]);
				4086	}
				4087	}
				4088
				4089	theend:
				4090	FreeWild(fcount, fnames);
				4091	}
				4092
				4093	static void
				4094	free_wordtable(ht)
				4095	hashtab_T *ht;
				4096	{
				4097	int todo;
				4098	basicword_T bw, nbw;
				4099	hashitem_T *hi;
				4100
				4101	todo = ht->ht_used;
				4102	for (hi = ht->ht_array; todo > 0; ++hi)
				4103	{
				4104	if (!HASHITEM_EMPTY(hi))
				4105	{
				4106	--todo;
				4107	for (bw = HI2BW(hi); bw != NULL; bw = nbw)
				4108	{
				4109	nbw = bw->bw_next;
				4110	free_basicword(bw);
				4111	}
				4112	}
				4113	}
				4114	}
				4115
				4116	/*
				4117	* Free a basicword_T and what it contains.
				4118	*/
				4119	static void
				4120	free_basicword(bw)
				4121	basicword_T *bw;
				4122	{
				4123	ga_clear(&bw->bw_prefix);
				4124	ga_clear(&bw->bw_suffix);
				4125	vim_free(bw->bw_caseword);
				4126	vim_free(bw->bw_leadstring);
				4127	vim_free(bw->bw_addstring);
				4128	vim_free(bw);
				4129	}
				4130
				4131	/*
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	4132	* Free a list of affentry_T and what they contain.
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	4133	*/
				4134	static void
				4135	free_affixentries(first)
				4136	affentry_T *first;
				4137	{
				4138	affentry_T ap, an;
				4139
				4140	for (ap = first; ap != NULL; ap = an)
				4141	{
				4142	an = ap->ae_next;
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	4143	free_affix_entry(ap);
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	4144	}
				4145	}
				4146
Bram Moolenaar	5482f33	2005-04-17 20:18:43 +0000	[diff] [blame]	4147	/*
				4148	* Free one affentry_T and what it contains.
				4149	*/
				4150	static void
				4151	free_affix_entry(ap)
				4152	affentry_T *ap;
				4153	{
				4154	vim_free(ap->ae_chop);
				4155	vim_free(ap->ae_add);
				4156	vim_free(ap->ae_cond);
				4157	vim_free(ap->ae_prog);
				4158	vim_free(ap);
				4159	}
				4160
Bram Moolenaar	402d2fe	2005-04-15 21:00:38 +0000	[diff] [blame]	4161	#endif /* FEAT_MBYTE */
				4162
				4163	#endif /* FEAT_SYN_HL */