Blame - src/spellfile.c - android_external_vim

blob: c7d87c6c726bddbf383e235b5222bd3dbd8d975b [file] [log] [blame]

Bram Moolenaar	edf3f97	2016-08-29 22:49:24 +0200	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2	*
				3	* VIM - Vi IMproved by Bram Moolenaar
				4	*
				5	* Do ":help uganda" in Vim to read copying and usage conditions.
				6	* Do ":help credits" in Vim to see a list of people who contributed.
				7	* See README.txt for an overview of the Vim source code.
				8	*/
				9
				10	/*
				11	* spellfile.c: code for reading and writing spell files.
				12	*
				13	* See spell.c for information about spell checking.
				14	*/
				15
				16	/*
				17	* Vim spell file format: <HEADER>
				18	* <SECTIONS>
				19	* <LWORDTREE>
				20	* <KWORDTREE>
				21	* <PREFIXTREE>
				22	*
				23	* <HEADER>: <fileID> <versionnr>
				24	*
				25	* <fileID> 8 bytes "VIMspell"
				26	* <versionnr> 1 byte VIMSPELLVERSION
				27	*
				28	*
				29	* Sections make it possible to add information to the .spl file without
				30	* making it incompatible with previous versions. There are two kinds of
				31	* sections:
				32	* 1. Not essential for correct spell checking. E.g. for making suggestions.
				33	* These are skipped when not supported.
				34	* 2. Optional information, but essential for spell checking when present.
				35	* E.g. conditions for affixes. When this section is present but not
				36	* supported an error message is given.
				37	*
				38	* <SECTIONS>: <section> ... <sectionend>
				39	*
				40	* <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
				41	*
				42	* <sectionID> 1 byte number from 0 to 254 identifying the section
				43	*
				44	* <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct
				45	* spell checking
				46	*
				47	* <sectionlen> 4 bytes length of section contents, MSB first
				48	*
				49	* <sectionend> 1 byte SN_END
				50	*
				51	*
				52	* sectionID == SN_INFO: <infotext>
				53	* <infotext> N bytes free format text with spell file info (version,
				54	* website, etc)
				55	*
				56	* sectionID == SN_REGION: <regionname> ...
				57	* <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case.
				58	* First <regionname> is region 1.
				59	*
				60	* sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
				61	* <folcharslen> <folchars>
				62	* <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
				63	* <charflags> N bytes List of flags (first one is for character 128):
				64	* 0x01 word character CF_WORD
				65	* 0x02 upper-case character CF_UPPER
				66	* <folcharslen> 2 bytes Number of bytes in <folchars>.
				67	* <folchars> N bytes Folded characters, first one is for character 128.
				68	*
				69	* sectionID == SN_MIDWORD: <midword>
				70	* <midword> N bytes Characters that are word characters only when used
				71	* in the middle of a word.
				72	*
				73	* sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
				74	* <prefcondcnt> 2 bytes Number of <prefcond> items following.
				75	* <prefcond> : <condlen> <condstr>
				76	* <condlen> 1 byte Length of <condstr>.
				77	* <condstr> N bytes Condition for the prefix.
				78	*
				79	* sectionID == SN_REP: <repcount> <rep> ...
				80	* <repcount> 2 bytes number of <rep> items, MSB first.
				81	* <rep> : <repfromlen> <repfrom> <reptolen> <repto>
				82	* <repfromlen> 1 byte length of <repfrom>
				83	* <repfrom> N bytes "from" part of replacement
				84	* <reptolen> 1 byte length of <repto>
				85	* <repto> N bytes "to" part of replacement
				86	*
				87	* sectionID == SN_REPSAL: <repcount> <rep> ...
				88	* just like SN_REP but for soundfolded words
				89	*
				90	* sectionID == SN_SAL: <salflags> <salcount> <sal> ...
				91	* <salflags> 1 byte flags for soundsalike conversion:
				92	* SAL_F0LLOWUP
				93	* SAL_COLLAPSE
				94	* SAL_REM_ACCENTS
				95	* <salcount> 2 bytes number of <sal> items following
				96	* <sal> : <salfromlen> <salfrom> <saltolen> <salto>
				97	* <salfromlen> 1 byte length of <salfrom>
				98	* <salfrom> N bytes "from" part of soundsalike
				99	* <saltolen> 1 byte length of <salto>
				100	* <salto> N bytes "to" part of soundsalike
				101	*
				102	* sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
				103	* <sofofromlen> 2 bytes length of <sofofrom>
				104	* <sofofrom> N bytes "from" part of soundfold
				105	* <sofotolen> 2 bytes length of <sofoto>
				106	* <sofoto> N bytes "to" part of soundfold
				107	*
				108	* sectionID == SN_SUGFILE: <timestamp>
				109	* <timestamp> 8 bytes time in seconds that must match with .sug file
				110	*
				111	* sectionID == SN_NOSPLITSUGS: nothing
				112	*
				113	* sectionID == SN_NOCOMPOUNDSUGS: nothing
				114	*
				115	* sectionID == SN_WORDS: <word> ...
				116	* <word> N bytes NUL terminated common word
				117	*
				118	* sectionID == SN_MAP: <mapstr>
				119	* <mapstr> N bytes String with sequences of similar characters,
				120	* separated by slashes.
				121	*
				122	* sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions>
				123	* <comppatcount> <comppattern> ... <compflags>
				124	* <compmax> 1 byte Maximum nr of words in compound word.
				125	* <compminlen> 1 byte Minimal word length for compounding.
				126	* <compsylmax> 1 byte Maximum nr of syllables in compound word.
				127	* <compoptions> 2 bytes COMP_ flags.
				128	* <comppatcount> 2 bytes number of <comppattern> following
				129	* <compflags> N bytes Flags from COMPOUNDRULE items, separated by
				130	* slashes.
				131	*
				132	* <comppattern>: <comppatlen> <comppattext>
				133	* <comppatlen> 1 byte length of <comppattext>
				134	* <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN
				135	*
				136	* sectionID == SN_NOBREAK: (empty, its presence is what matters)
				137	*
				138	* sectionID == SN_SYLLABLE: <syllable>
				139	* <syllable> N bytes String from SYLLABLE item.
				140	*
				141	* <LWORDTREE>: <wordtree>
				142	*
				143	* <KWORDTREE>: <wordtree>
				144	*
				145	* <PREFIXTREE>: <wordtree>
				146	*
				147	*
				148	* <wordtree>: <nodecount> <nodedata> ...
				149	*
				150	* <nodecount> 4 bytes Number of nodes following. MSB first.
				151	*
				152	* <nodedata>: <siblingcount> <sibling> ...
				153	*
				154	* <siblingcount> 1 byte Number of siblings in this node. The siblings
				155	* follow in sorted order.
				156	*
				157	* <sibling>: <byte> [ <nodeidx> <xbyte>
				158	* \| <flags> [<flags2>] [<region>] [<affixID>]
				159	* \| [<pflags>] <affixID> <prefcondnr> ]
				160	*
				161	* <byte> 1 byte Byte value of the sibling. Special cases:
				162	* BY_NOFLAGS: End of word without flags and for all
				163	* regions.
				164	* For PREFIXTREE <affixID> and
				165	* <prefcondnr> follow.
				166	* BY_FLAGS: End of word, <flags> follow.
				167	* For PREFIXTREE <pflags>, <affixID>
				168	* and <prefcondnr> follow.
				169	* BY_FLAGS2: End of word, <flags> and <flags2>
				170	* follow. Not used in PREFIXTREE.
				171	* BY_INDEX: Child of sibling is shared, <nodeidx>
				172	* and <xbyte> follow.
				173	*
				174	* <nodeidx> 3 bytes Index of child for this sibling, MSB first.
				175	*
				176	* <xbyte> 1 byte byte value of the sibling.
				177	*
				178	* <flags> 1 byte bitmask of:
				179	* WF_ALLCAP word must have only capitals
				180	* WF_ONECAP first char of word must be capital
				181	* WF_KEEPCAP keep-case word
				182	* WF_FIXCAP keep-case word, all caps not allowed
				183	* WF_RARE rare word
				184	* WF_BANNED bad word
				185	* WF_REGION <region> follows
				186	* WF_AFX <affixID> follows
				187	*
				188	* <flags2> 1 byte Bitmask of:
				189	* WF_HAS_AFF >> 8 word includes affix
				190	* WF_NEEDCOMP >> 8 word only valid in compound
				191	* WF_NOSUGGEST >> 8 word not used for suggestions
				192	* WF_COMPROOT >> 8 word already a compound
				193	* WF_NOCOMPBEF >> 8 no compounding before this word
				194	* WF_NOCOMPAFT >> 8 no compounding after this word
				195	*
				196	* <pflags> 1 byte bitmask of:
				197	* WFP_RARE rare prefix
				198	* WFP_NC non-combining prefix
				199	* WFP_UP letter after prefix made upper case
				200	*
				201	* <region> 1 byte Bitmask for regions in which word is valid. When
				202	* omitted it's valid in all regions.
				203	* Lowest bit is for region 1.
				204	*
				205	* <affixID> 1 byte ID of affix that can be used with this word. In
				206	* PREFIXTREE used for the required prefix ID.
				207	*
				208	* <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list
				209	* from HEADER.
				210	*
				211	* All text characters are in 'encoding', but stored as single bytes.
				212	*/
				213
				214	/*
				215	* Vim .sug file format: <SUGHEADER>
				216	* <SUGWORDTREE>
				217	* <SUGTABLE>
				218	*
				219	* <SUGHEADER>: <fileID> <versionnr> <timestamp>
				220	*
				221	* <fileID> 6 bytes "VIMsug"
				222	* <versionnr> 1 byte VIMSUGVERSION
				223	* <timestamp> 8 bytes timestamp that must match with .spl file
				224	*
				225	*
				226	* <SUGWORDTREE>: <wordtree> (see above, no flags or region used)
				227	*
				228	*
				229	* <SUGTABLE>: <sugwcount> <sugline> ...
				230	*
				231	* <sugwcount> 4 bytes number of <sugline> following
				232	*
				233	* <sugline>: <sugnr> ... NUL
				234	*
				235	* <sugnr>: X bytes word number that results in this soundfolded word,
				236	* stored as an offset to the previous number in as
				237	* few bytes as possible, see offset2bytes())
				238	*/
				239
				240	#include "vim.h"
				241
				242	#if defined(FEAT_SPELL) \|\| defined(PROTO)
				243
				244	#ifndef UNIX /* it's in os_unix.h for Unix */
				245	# include <time.h> /* for time_t */
				246	#endif
				247
				248	#ifndef UNIX /* it's in os_unix.h for Unix */
				249	# include <time.h> /* for time_t */
				250	#endif
				251
				252	/* Special byte values for <byte>. Some are only used in the tree for
				253	* postponed prefixes, some only in the other trees. This is a bit messy... */
				254	#define BY_NOFLAGS 0 /* end of word without flags or region; for
				255	* postponed prefix: no <pflags> */
				256	#define BY_INDEX 1 /* child is shared, index follows */
				257	#define BY_FLAGS 2 /* end of word, <flags> byte follows; for
				258	* postponed prefix: <pflags> follows */
				259	#define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes
				260	* follow; never used in prefix tree */
				261	#define BY_SPECIAL BY_FLAGS2 /* highest special byte value */
				262
				263	/* Flags used in .spl file for soundsalike flags. */
				264	#define SAL_F0LLOWUP 1
				265	#define SAL_COLLAPSE 2
				266	#define SAL_REM_ACCENTS 4
				267
				268	#define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */
				269	#define VIMSPELLMAGICL 8
				270	#define VIMSPELLVERSION 50
				271
				272	/* Section IDs. Only renumber them when VIMSPELLVERSION changes! */
				273	#define SN_REGION 0 /* <regionname> section */
				274	#define SN_CHARFLAGS 1 /* charflags section */
				275	#define SN_MIDWORD 2 /* <midword> section */
				276	#define SN_PREFCOND 3 /* <prefcond> section */
				277	#define SN_REP 4 /* REP items section */
				278	#define SN_SAL 5 /* SAL items section */
				279	#define SN_SOFO 6 /* soundfolding section */
				280	#define SN_MAP 7 /* MAP items section */
				281	#define SN_COMPOUND 8 /* compound words section */
				282	#define SN_SYLLABLE 9 /* syllable section */
				283	#define SN_NOBREAK 10 /* NOBREAK section */
				284	#define SN_SUGFILE 11 /* timestamp for .sug file */
				285	#define SN_REPSAL 12 /* REPSAL items section */
				286	#define SN_WORDS 13 /* common words */
				287	#define SN_NOSPLITSUGS 14 /* don't split word for suggestions */
				288	#define SN_INFO 15 /* info section */
				289	#define SN_NOCOMPOUNDSUGS 16 /* don't compound for suggestions */
				290	#define SN_END 255 /* end of sections */
				291
				292	#define SNF_REQUIRED 1 /* <sectionflags>: required section */
				293
				294	#define CF_WORD 0x01
				295	#define CF_UPPER 0x02
				296
				297	static int set_spell_finish(spelltab_T *new_st);
				298	static int write_spell_prefcond(FILE fd, garray_T gap);
				299	static char_u read_cnt_string(FILE fd, int cnt_bytes, int *lenp);
				300	static int read_region_section(FILE fd, slang_T slang, int len);
				301	static int read_charflags_section(FILE *fd);
				302	static int read_prefcond_section(FILE fd, slang_T lp);
				303	static int read_rep_section(FILE fd, garray_T gap, short *first);
				304	static int read_sal_section(FILE fd, slang_T slang);
				305	static int read_words_section(FILE fd, slang_T lp, int len);
				306	static int read_sofo_section(FILE fd, slang_T slang);
				307	static int read_compound(FILE fd, slang_T slang, int len);
				308	static int set_sofo(slang_T lp, char_u from, char_u *to);
				309	static void set_sal_first(slang_T *lp);
				310	#ifdef FEAT_MBYTE
				311	static int mb_str2wide(char_u s);
				312	#endif
				313	static int spell_read_tree(FILE fd, char_u bytsp, idx_T *idxsp, int prefixtree, int prefixcnt);
				314	static idx_T read_tree_node(FILE fd, char_u byts, idx_T *idxs, int maxidx, idx_T startidx, int prefixtree, int maxprefcondnr);
				315	static void spell_reload_one(char_u *fname, int added_word);
				316	static void set_spell_charflags(char_u flags, int cnt, char_u upp);
				317	static int set_spell_chartab(char_u fol, char_u low, char_u *upp);
				318	static void set_map_str(slang_T lp, char_u map);
				319
				320
				321	static char *e_spell_trunc = N_("E758: Truncated spell file");
				322	static char *e_afftrailing = N_("Trailing text in %s line %d: %s");
				323	static char *e_affname = N_("Affix name too long in %s line %d: %s");
				324	static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
				325	static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
				326	static char *msg_compressing = N_("Compressing word tree...");
				327
				328	/*
				329	* Load one spell file and store the info into a slang_T.
				330	*
				331	* This is invoked in three ways:
				332	* - From spell_load_cb() to load a spell file for the first time. "lang" is
				333	* the language name, "old_lp" is NULL. Will allocate an slang_T.
				334	* - To reload a spell file that was changed. "lang" is NULL and "old_lp"
				335	* points to the existing slang_T.
				336	* - Just after writing a .spl file; it's read back to produce the .sug file.
				337	* "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T.
				338	*
				339	* Returns the slang_T the spell file was loaded into. NULL for error.
				340	*/
				341	slang_T *
				342	spell_load_file(
				343	char_u *fname,
				344	char_u *lang,
				345	slang_T *old_lp,
				346	int silent) /* no error if file doesn't exist */
				347	{
				348	FILE *fd;
				349	char_u buf[VIMSPELLMAGICL];
				350	char_u *p;
				351	int i;
				352	int n;
				353	int len;
				354	char_u *save_sourcing_name = sourcing_name;
				355	linenr_T save_sourcing_lnum = sourcing_lnum;
				356	slang_T *lp = NULL;
				357	int c = 0;
				358	int res;
				359
				360	fd = mch_fopen((char *)fname, "r");
				361	if (fd == NULL)
				362	{
				363	if (!silent)
				364	EMSG2(_(e_notopen), fname);
				365	else if (p_verbose > 2)
				366	{
				367	verbose_enter();
				368	smsg((char_u *)e_notopen, fname);
				369	verbose_leave();
				370	}
				371	goto endFAIL;
				372	}
				373	if (p_verbose > 2)
				374	{
				375	verbose_enter();
				376	smsg((char_u *)_("Reading spell file \"%s\""), fname);
				377	verbose_leave();
				378	}
				379
				380	if (old_lp == NULL)
				381	{
				382	lp = slang_alloc(lang);
				383	if (lp == NULL)
				384	goto endFAIL;
				385
				386	/* Remember the file name, used to reload the file when it's updated. */
				387	lp->sl_fname = vim_strsave(fname);
				388	if (lp->sl_fname == NULL)
				389	goto endFAIL;
				390
				391	/* Check for .add.spl (_add.spl for VMS). */
				392	lp->sl_add = strstr((char *)gettail(fname), SPL_FNAME_ADD) != NULL;
				393	}
				394	else
				395	lp = old_lp;
				396
				397	/* Set sourcing_name, so that error messages mention the file name. */
				398	sourcing_name = fname;
				399	sourcing_lnum = 0;
				400
				401	/*
				402	* <HEADER>: <fileID>
				403	*/
				404	for (i = 0; i < VIMSPELLMAGICL; ++i)
				405	buf[i] = getc(fd); /* <fileID> */
				406	if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
				407	{
				408	EMSG(_("E757: This does not look like a spell file"));
				409	goto endFAIL;
				410	}
				411	c = getc(fd); /* <versionnr> */
				412	if (c < VIMSPELLVERSION)
				413	{
				414	EMSG(_("E771: Old spell file, needs to be updated"));
				415	goto endFAIL;
				416	}
				417	else if (c > VIMSPELLVERSION)
				418	{
				419	EMSG(_("E772: Spell file is for newer version of Vim"));
				420	goto endFAIL;
				421	}
				422
				423
				424	/*
				425	* <SECTIONS>: <section> ... <sectionend>
				426	* <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
				427	*/
				428	for (;;)
				429	{
				430	n = getc(fd); /* <sectionID> or <sectionend> */
				431	if (n == SN_END)
				432	break;
				433	c = getc(fd); /* <sectionflags> */
				434	len = get4c(fd); /* <sectionlen> */
				435	if (len < 0)
				436	goto truncerr;
				437
				438	res = 0;
				439	switch (n)
				440	{
				441	case SN_INFO:
				442	lp->sl_info = read_string(fd, len); /* <infotext> */
				443	if (lp->sl_info == NULL)
				444	goto endFAIL;
				445	break;
				446
				447	case SN_REGION:
				448	res = read_region_section(fd, lp, len);
				449	break;
				450
				451	case SN_CHARFLAGS:
				452	res = read_charflags_section(fd);
				453	break;
				454
				455	case SN_MIDWORD:
				456	lp->sl_midword = read_string(fd, len); /* <midword> */
				457	if (lp->sl_midword == NULL)
				458	goto endFAIL;
				459	break;
				460
				461	case SN_PREFCOND:
				462	res = read_prefcond_section(fd, lp);
				463	break;
				464
				465	case SN_REP:
				466	res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first);
				467	break;
				468
				469	case SN_REPSAL:
				470	res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first);
				471	break;
				472
				473	case SN_SAL:
				474	res = read_sal_section(fd, lp);
				475	break;
				476
				477	case SN_SOFO:
				478	res = read_sofo_section(fd, lp);
				479	break;
				480
				481	case SN_MAP:
				482	p = read_string(fd, len); /* <mapstr> */
				483	if (p == NULL)
				484	goto endFAIL;
				485	set_map_str(lp, p);
				486	vim_free(p);
				487	break;
				488
				489	case SN_WORDS:
				490	res = read_words_section(fd, lp, len);
				491	break;
				492
				493	case SN_SUGFILE:
				494	lp->sl_sugtime = get8ctime(fd); /* <timestamp> */
				495	break;
				496
				497	case SN_NOSPLITSUGS:
				498	lp->sl_nosplitsugs = TRUE;
				499	break;
				500
				501	case SN_NOCOMPOUNDSUGS:
				502	lp->sl_nocompoundsugs = TRUE;
				503	break;
				504
				505	case SN_COMPOUND:
				506	res = read_compound(fd, lp, len);
				507	break;
				508
				509	case SN_NOBREAK:
				510	lp->sl_nobreak = TRUE;
				511	break;
				512
				513	case SN_SYLLABLE:
				514	lp->sl_syllable = read_string(fd, len); /* <syllable> */
				515	if (lp->sl_syllable == NULL)
				516	goto endFAIL;
				517	if (init_syl_tab(lp) == FAIL)
				518	goto endFAIL;
				519	break;
				520
				521	default:
				522	/* Unsupported section. When it's required give an error
				523	* message. When it's not required skip the contents. */
				524	if (c & SNF_REQUIRED)
				525	{
				526	EMSG(_("E770: Unsupported section in spell file"));
				527	goto endFAIL;
				528	}
				529	while (--len >= 0)
				530	if (getc(fd) < 0)
				531	goto truncerr;
				532	break;
				533	}
				534	someerror:
				535	if (res == SP_FORMERROR)
				536	{
				537	EMSG(_(e_format));
				538	goto endFAIL;
				539	}
				540	if (res == SP_TRUNCERROR)
				541	{
				542	truncerr:
				543	EMSG(_(e_spell_trunc));
				544	goto endFAIL;
				545	}
				546	if (res == SP_OTHERERROR)
				547	goto endFAIL;
				548	}
				549
				550	/* <LWORDTREE> */
				551	res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0);
				552	if (res != 0)
				553	goto someerror;
				554
				555	/* <KWORDTREE> */
				556	res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0);
				557	if (res != 0)
				558	goto someerror;
				559
				560	/* <PREFIXTREE> */
				561	res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE,
				562	lp->sl_prefixcnt);
				563	if (res != 0)
				564	goto someerror;
				565
				566	/* For a new file link it in the list of spell files. */
				567	if (old_lp == NULL && lang != NULL)
				568	{
				569	lp->sl_next = first_lang;
				570	first_lang = lp;
				571	}
				572
				573	goto endOK;
				574
				575	endFAIL:
				576	if (lang != NULL)
				577	/* truncating the name signals the error to spell_load_lang() */
				578	*lang = NUL;
				579	if (lp != NULL && old_lp == NULL)
				580	slang_free(lp);
				581	lp = NULL;
				582
				583	endOK:
				584	if (fd != NULL)
				585	fclose(fd);
				586	sourcing_name = save_sourcing_name;
				587	sourcing_lnum = save_sourcing_lnum;
				588
				589	return lp;
				590	}
				591
				592	/*
				593	* Fill in the wordcount fields for a trie.
				594	* Returns the total number of words.
				595	*/
				596	static void
				597	tree_count_words(char_u byts, idx_T idxs)
				598	{
				599	int depth;
				600	idx_T arridx[MAXWLEN];
				601	int curi[MAXWLEN];
				602	int c;
				603	idx_T n;
				604	int wordcount[MAXWLEN];
				605
				606	arridx[0] = 0;
				607	curi[0] = 1;
				608	wordcount[0] = 0;
				609	depth = 0;
				610	while (depth >= 0 && !got_int)
				611	{
				612	if (curi[depth] > byts[arridx[depth]])
				613	{
				614	/* Done all bytes at this node, go up one level. */
				615	idxs[arridx[depth]] = wordcount[depth];
				616	if (depth > 0)
				617	wordcount[depth - 1] += wordcount[depth];
				618
				619	--depth;
				620	fast_breakcheck();
				621	}
				622	else
				623	{
				624	/* Do one more byte at this node. */
				625	n = arridx[depth] + curi[depth];
				626	++curi[depth];
				627
				628	c = byts[n];
				629	if (c == 0)
				630	{
				631	/* End of word, count it. */
				632	++wordcount[depth];
				633
				634	/* Skip over any other NUL bytes (same word with different
				635	* flags). */
				636	while (byts[n + 1] == 0)
				637	{
				638	++n;
				639	++curi[depth];
				640	}
				641	}
				642	else
				643	{
				644	/* Normal char, go one level deeper to count the words. */
				645	++depth;
				646	arridx[depth] = idxs[n];
				647	curi[depth] = 1;
				648	wordcount[depth] = 0;
				649	}
				650	}
				651	}
				652	}
				653
				654	/*
				655	* Load the .sug files for languages that have one and weren't loaded yet.
				656	*/
				657	void
				658	suggest_load_files(void)
				659	{
				660	langp_T *lp;
				661	int lpi;
				662	slang_T *slang;
				663	char_u *dotp;
				664	FILE *fd;
				665	char_u buf[MAXWLEN];
				666	int i;
				667	time_t timestamp;
				668	int wcount;
				669	int wordnr;
				670	garray_T ga;
				671	int c;
				672
				673	/* Do this for all languages that support sound folding. */
				674	for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
				675	{
				676	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
				677	slang = lp->lp_slang;
				678	if (slang->sl_sugtime != 0 && !slang->sl_sugloaded)
				679	{
				680	/* Change ".spl" to ".sug" and open the file. When the file isn't
				681	* found silently skip it. Do set "sl_sugloaded" so that we
				682	* don't try again and again. */
				683	slang->sl_sugloaded = TRUE;
				684
				685	dotp = vim_strrchr(slang->sl_fname, '.');
				686	if (dotp == NULL \|\| fnamecmp(dotp, ".spl") != 0)
				687	continue;
				688	STRCPY(dotp, ".sug");
				689	fd = mch_fopen((char *)slang->sl_fname, "r");
				690	if (fd == NULL)
				691	goto nextone;
				692
				693	/*
				694	* <SUGHEADER>: <fileID> <versionnr> <timestamp>
				695	*/
				696	for (i = 0; i < VIMSUGMAGICL; ++i)
				697	buf[i] = getc(fd); /* <fileID> */
				698	if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0)
				699	{
				700	EMSG2(_("E778: This does not look like a .sug file: %s"),
				701	slang->sl_fname);
				702	goto nextone;
				703	}
				704	c = getc(fd); /* <versionnr> */
				705	if (c < VIMSUGVERSION)
				706	{
				707	EMSG2(_("E779: Old .sug file, needs to be updated: %s"),
				708	slang->sl_fname);
				709	goto nextone;
				710	}
				711	else if (c > VIMSUGVERSION)
				712	{
				713	EMSG2(_("E780: .sug file is for newer version of Vim: %s"),
				714	slang->sl_fname);
				715	goto nextone;
				716	}
				717
				718	/* Check the timestamp, it must be exactly the same as the one in
				719	* the .spl file. Otherwise the word numbers won't match. */
				720	timestamp = get8ctime(fd); /* <timestamp> */
				721	if (timestamp != slang->sl_sugtime)
				722	{
				723	EMSG2(_("E781: .sug file doesn't match .spl file: %s"),
				724	slang->sl_fname);
				725	goto nextone;
				726	}
				727
				728	/*
				729	* <SUGWORDTREE>: <wordtree>
				730	* Read the trie with the soundfolded words.
				731	*/
				732	if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs,
				733	FALSE, 0) != 0)
				734	{
				735	someerror:
				736	EMSG2(_("E782: error while reading .sug file: %s"),
				737	slang->sl_fname);
				738	slang_clear_sug(slang);
				739	goto nextone;
				740	}
				741
				742	/*
				743	* <SUGTABLE>: <sugwcount> <sugline> ...
				744	*
				745	* Read the table with word numbers. We use a file buffer for
				746	* this, because it's so much like a file with lines. Makes it
				747	* possible to swap the info and save on memory use.
				748	*/
				749	slang->sl_sugbuf = open_spellbuf();
				750	if (slang->sl_sugbuf == NULL)
				751	goto someerror;
				752	/* <sugwcount> */
				753	wcount = get4c(fd);
				754	if (wcount < 0)
				755	goto someerror;
				756
				757	/* Read all the wordnr lists into the buffer, one NUL terminated
				758	* list per line. */
				759	ga_init2(&ga, 1, 100);
				760	for (wordnr = 0; wordnr < wcount; ++wordnr)
				761	{
				762	ga.ga_len = 0;
				763	for (;;)
				764	{
				765	c = getc(fd); /* <sugline> */
				766	if (c < 0 \|\| ga_grow(&ga, 1) == FAIL)
				767	goto someerror;
				768	((char_u *)ga.ga_data)[ga.ga_len++] = c;
				769	if (c == NUL)
				770	break;
				771	}
				772	if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr,
				773	ga.ga_data, ga.ga_len, TRUE) == FAIL)
				774	goto someerror;
				775	}
				776	ga_clear(&ga);
				777
				778	/*
				779	* Need to put word counts in the word tries, so that we can find
				780	* a word by its number.
				781	*/
				782	tree_count_words(slang->sl_fbyts, slang->sl_fidxs);
				783	tree_count_words(slang->sl_sbyts, slang->sl_sidxs);
				784
				785	nextone:
				786	if (fd != NULL)
				787	fclose(fd);
				788	STRCPY(dotp, ".spl");
				789	}
				790	}
				791	}
				792
				793
				794	/*
				795	* Read a length field from "fd" in "cnt_bytes" bytes.
				796	* Allocate memory, read the string into it and add a NUL at the end.
				797	* Returns NULL when the count is zero.
				798	* Sets "cntp" to SP_ERROR when there is an error, length of the result
				799	* otherwise.
				800	*/
				801	static char_u *
				802	read_cnt_string(FILE fd, int cnt_bytes, int cntp)
				803	{
				804	int cnt = 0;
				805	int i;
				806	char_u *str;
				807
				808	/* read the length bytes, MSB first */
				809	for (i = 0; i < cnt_bytes; ++i)
				810	cnt = (cnt << 8) + getc(fd);
				811	if (cnt < 0)
				812	{
				813	*cntp = SP_TRUNCERROR;
				814	return NULL;
				815	}
				816	*cntp = cnt;
				817	if (cnt == 0)
				818	return NULL; /* nothing to read, return NULL */
				819
				820	str = read_string(fd, cnt);
				821	if (str == NULL)
				822	*cntp = SP_OTHERERROR;
				823	return str;
				824	}
				825
				826	/*
				827	* Read SN_REGION: <regionname> ...
				828	* Return SP_*ERROR flags.
				829	*/
				830	static int
				831	read_region_section(FILE fd, slang_T lp, int len)
				832	{
				833	int i;
				834
				835	if (len > 16)
				836	return SP_FORMERROR;
				837	for (i = 0; i < len; ++i)
				838	lp->sl_regions[i] = getc(fd); /* <regionname> */
				839	lp->sl_regions[len] = NUL;
				840	return 0;
				841	}
				842
				843	/*
				844	* Read SN_CHARFLAGS section: <charflagslen> <charflags>
				845	* <folcharslen> <folchars>
				846	* Return SP_*ERROR flags.
				847	*/
				848	static int
				849	read_charflags_section(FILE *fd)
				850	{
				851	char_u *flags;
				852	char_u *fol;
				853	int flagslen, follen;
				854
				855	/* <charflagslen> <charflags> */
				856	flags = read_cnt_string(fd, 1, &flagslen);
				857	if (flagslen < 0)
				858	return flagslen;
				859
				860	/* <folcharslen> <folchars> */
				861	fol = read_cnt_string(fd, 2, &follen);
				862	if (follen < 0)
				863	{
				864	vim_free(flags);
				865	return follen;
				866	}
				867
				868	/* Set the word-char flags and fill SPELL_ISUPPER() table. */
				869	if (flags != NULL && fol != NULL)
				870	set_spell_charflags(flags, flagslen, fol);
				871
				872	vim_free(flags);
				873	vim_free(fol);
				874
				875	/* When <charflagslen> is zero then <fcharlen> must also be zero. */
				876	if ((flags == NULL) != (fol == NULL))
				877	return SP_FORMERROR;
				878	return 0;
				879	}
				880
				881	/*
				882	* Read SN_PREFCOND section.
				883	* Return SP_*ERROR flags.
				884	*/
				885	static int
				886	read_prefcond_section(FILE fd, slang_T lp)
				887	{
				888	int cnt;
				889	int i;
				890	int n;
				891	char_u *p;
				892	char_u buf[MAXWLEN + 1];
				893
				894	/* <prefcondcnt> <prefcond> ... */
				895	cnt = get2c(fd); /* <prefcondcnt> */
				896	if (cnt <= 0)
				897	return SP_FORMERROR;
				898
				899	lp->sl_prefprog = (regprog_T **)alloc_clear(
				900	(unsigned)sizeof(regprog_T ) cnt);
				901	if (lp->sl_prefprog == NULL)
				902	return SP_OTHERERROR;
				903	lp->sl_prefixcnt = cnt;
				904
				905	for (i = 0; i < cnt; ++i)
				906	{
				907	/* <prefcond> : <condlen> <condstr> */
				908	n = getc(fd); /* <condlen> */
				909	if (n < 0 \|\| n >= MAXWLEN)
				910	return SP_FORMERROR;
				911
				912	/* When <condlen> is zero we have an empty condition. Otherwise
				913	* compile the regexp program used to check for the condition. */
				914	if (n > 0)
				915	{
				916	buf[0] = '^'; /* always match at one position only */
				917	p = buf + 1;
				918	while (n-- > 0)
				919	p++ = getc(fd); / <condstr> */
				920	*p = NUL;
				921	lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
				922	}
				923	}
				924	return 0;
				925	}
				926
				927	/*
				928	* Read REP or REPSAL items section from "fd": <repcount> <rep> ...
				929	* Return SP_*ERROR flags.
				930	*/
				931	static int
				932	read_rep_section(FILE fd, garray_T gap, short *first)
				933	{
				934	int cnt;
				935	fromto_T *ftp;
				936	int i;
				937
				938	cnt = get2c(fd); /* <repcount> */
				939	if (cnt < 0)
				940	return SP_TRUNCERROR;
				941
				942	if (ga_grow(gap, cnt) == FAIL)
				943	return SP_OTHERERROR;
				944
				945	/* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
				946	for (; gap->ga_len < cnt; ++gap->ga_len)
				947	{
				948	ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
				949	ftp->ft_from = read_cnt_string(fd, 1, &i);
				950	if (i < 0)
				951	return i;
				952	if (i == 0)
				953	return SP_FORMERROR;
				954	ftp->ft_to = read_cnt_string(fd, 1, &i);
				955	if (i <= 0)
				956	{
				957	vim_free(ftp->ft_from);
				958	if (i < 0)
				959	return i;
				960	return SP_FORMERROR;
				961	}
				962	}
				963
				964	/* Fill the first-index table. */
				965	for (i = 0; i < 256; ++i)
				966	first[i] = -1;
				967	for (i = 0; i < gap->ga_len; ++i)
				968	{
				969	ftp = &((fromto_T *)gap->ga_data)[i];
				970	if (first[*ftp->ft_from] == -1)
				971	first[*ftp->ft_from] = i;
				972	}
				973	return 0;
				974	}
				975
				976	/*
				977	* Read SN_SAL section: <salflags> <salcount> <sal> ...
				978	* Return SP_*ERROR flags.
				979	*/
				980	static int
				981	read_sal_section(FILE fd, slang_T slang)
				982	{
				983	int i;
				984	int cnt;
				985	garray_T *gap;
				986	salitem_T *smp;
				987	int ccnt;
				988	char_u *p;
				989	int c = NUL;
				990
				991	slang->sl_sofo = FALSE;
				992
				993	i = getc(fd); /* <salflags> */
				994	if (i & SAL_F0LLOWUP)
				995	slang->sl_followup = TRUE;
				996	if (i & SAL_COLLAPSE)
				997	slang->sl_collapse = TRUE;
				998	if (i & SAL_REM_ACCENTS)
				999	slang->sl_rem_accents = TRUE;
				1000
				1001	cnt = get2c(fd); /* <salcount> */
				1002	if (cnt < 0)
				1003	return SP_TRUNCERROR;
				1004
				1005	gap = &slang->sl_sal;
				1006	ga_init2(gap, sizeof(salitem_T), 10);
				1007	if (ga_grow(gap, cnt + 1) == FAIL)
				1008	return SP_OTHERERROR;
				1009
				1010	/* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
				1011	for (; gap->ga_len < cnt; ++gap->ga_len)
				1012	{
				1013	smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
				1014	ccnt = getc(fd); /* <salfromlen> */
				1015	if (ccnt < 0)
				1016	return SP_TRUNCERROR;
				1017	if ((p = alloc(ccnt + 2)) == NULL)
				1018	return SP_OTHERERROR;
				1019	smp->sm_lead = p;
				1020
				1021	/* Read up to the first special char into sm_lead. */
				1022	for (i = 0; i < ccnt; ++i)
				1023	{
				1024	c = getc(fd); /* <salfrom> */
				1025	if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
				1026	break;
				1027	*p++ = c;
				1028	}
				1029	smp->sm_leadlen = (int)(p - smp->sm_lead);
				1030	*p++ = NUL;
				1031
				1032	/* Put (abc) chars in sm_oneof, if any. */
				1033	if (c == '(')
				1034	{
				1035	smp->sm_oneof = p;
				1036	for (++i; i < ccnt; ++i)
				1037	{
				1038	c = getc(fd); /* <salfrom> */
				1039	if (c == ')')
				1040	break;
				1041	*p++ = c;
				1042	}
				1043	*p++ = NUL;
				1044	if (++i < ccnt)
				1045	c = getc(fd);
				1046	}
				1047	else
				1048	smp->sm_oneof = NULL;
				1049
				1050	/* Any following chars go in sm_rules. */
				1051	smp->sm_rules = p;
				1052	if (i < ccnt)
				1053	/* store the char we got while checking for end of sm_lead */
				1054	*p++ = c;
				1055	for (++i; i < ccnt; ++i)
				1056	p++ = getc(fd); / <salfrom> */
				1057	*p++ = NUL;
				1058
				1059	/* <saltolen> <salto> */
				1060	smp->sm_to = read_cnt_string(fd, 1, &ccnt);
				1061	if (ccnt < 0)
				1062	{
				1063	vim_free(smp->sm_lead);
				1064	return ccnt;
				1065	}
				1066
				1067	#ifdef FEAT_MBYTE
				1068	if (has_mbyte)
				1069	{
				1070	/* convert the multi-byte strings to wide char strings */
				1071	smp->sm_lead_w = mb_str2wide(smp->sm_lead);
				1072	smp->sm_leadlen = mb_charlen(smp->sm_lead);
				1073	if (smp->sm_oneof == NULL)
				1074	smp->sm_oneof_w = NULL;
				1075	else
				1076	smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
				1077	if (smp->sm_to == NULL)
				1078	smp->sm_to_w = NULL;
				1079	else
				1080	smp->sm_to_w = mb_str2wide(smp->sm_to);
				1081	if (smp->sm_lead_w == NULL
				1082	\|\| (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL)
				1083	\|\| (smp->sm_to_w == NULL && smp->sm_to != NULL))
				1084	{
				1085	vim_free(smp->sm_lead);
				1086	vim_free(smp->sm_to);
				1087	vim_free(smp->sm_lead_w);
				1088	vim_free(smp->sm_oneof_w);
				1089	vim_free(smp->sm_to_w);
				1090	return SP_OTHERERROR;
				1091	}
				1092	}
				1093	#endif
				1094	}
				1095
				1096	if (gap->ga_len > 0)
				1097	{
				1098	/* Add one extra entry to mark the end with an empty sm_lead. Avoids
				1099	* that we need to check the index every time. */
				1100	smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
				1101	if ((p = alloc(1)) == NULL)
				1102	return SP_OTHERERROR;
				1103	p[0] = NUL;
				1104	smp->sm_lead = p;
				1105	smp->sm_leadlen = 0;
				1106	smp->sm_oneof = NULL;
				1107	smp->sm_rules = p;
				1108	smp->sm_to = NULL;
				1109	#ifdef FEAT_MBYTE
				1110	if (has_mbyte)
				1111	{
				1112	smp->sm_lead_w = mb_str2wide(smp->sm_lead);
				1113	smp->sm_leadlen = 0;
				1114	smp->sm_oneof_w = NULL;
				1115	smp->sm_to_w = NULL;
				1116	}
				1117	#endif
				1118	++gap->ga_len;
				1119	}
				1120
				1121	/* Fill the first-index table. */
				1122	set_sal_first(slang);
				1123
				1124	return 0;
				1125	}
				1126
				1127	/*
				1128	* Read SN_WORDS: <word> ...
				1129	* Return SP_*ERROR flags.
				1130	*/
				1131	static int
				1132	read_words_section(FILE fd, slang_T lp, int len)
				1133	{
				1134	int done = 0;
				1135	int i;
				1136	int c;
				1137	char_u word[MAXWLEN];
				1138
				1139	while (done < len)
				1140	{
				1141	/* Read one word at a time. */
				1142	for (i = 0; ; ++i)
				1143	{
				1144	c = getc(fd);
				1145	if (c == EOF)
				1146	return SP_TRUNCERROR;
				1147	word[i] = c;
				1148	if (word[i] == NUL)
				1149	break;
				1150	if (i == MAXWLEN - 1)
				1151	return SP_FORMERROR;
				1152	}
				1153
				1154	/* Init the count to 10. */
				1155	count_common_word(lp, word, -1, 10);
				1156	done += i + 1;
				1157	}
				1158	return 0;
				1159	}
				1160
				1161	/*
				1162	* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
				1163	* Return SP_*ERROR flags.
				1164	*/
				1165	static int
				1166	read_sofo_section(FILE fd, slang_T slang)
				1167	{
				1168	int cnt;
				1169	char_u from, to;
				1170	int res;
				1171
				1172	slang->sl_sofo = TRUE;
				1173
				1174	/* <sofofromlen> <sofofrom> */
				1175	from = read_cnt_string(fd, 2, &cnt);
				1176	if (cnt < 0)
				1177	return cnt;
				1178
				1179	/* <sofotolen> <sofoto> */
				1180	to = read_cnt_string(fd, 2, &cnt);
				1181	if (cnt < 0)
				1182	{
				1183	vim_free(from);
				1184	return cnt;
				1185	}
				1186
				1187	/* Store the info in slang->sl_sal and/or slang->sl_sal_first. */
				1188	if (from != NULL && to != NULL)
				1189	res = set_sofo(slang, from, to);
				1190	else if (from != NULL \|\| to != NULL)
				1191	res = SP_FORMERROR; /* only one of two strings is an error */
				1192	else
				1193	res = 0;
				1194
				1195	vim_free(from);
				1196	vim_free(to);
				1197	return res;
				1198	}
				1199
				1200	/*
				1201	* Read the compound section from the .spl file:
				1202	* <compmax> <compminlen> <compsylmax> <compoptions> <compflags>
				1203	* Returns SP_*ERROR flags.
				1204	*/
				1205	static int
				1206	read_compound(FILE fd, slang_T slang, int len)
				1207	{
				1208	int todo = len;
				1209	int c;
				1210	int atstart;
				1211	char_u *pat;
				1212	char_u *pp;
				1213	char_u *cp;
				1214	char_u *ap;
				1215	char_u *crp;
				1216	int cnt;
				1217	garray_T *gap;
				1218
				1219	if (todo < 2)
				1220	return SP_FORMERROR; /* need at least two bytes */
				1221
				1222	--todo;
				1223	c = getc(fd); /* <compmax> */
				1224	if (c < 2)
				1225	c = MAXWLEN;
				1226	slang->sl_compmax = c;
				1227
				1228	--todo;
				1229	c = getc(fd); /* <compminlen> */
				1230	if (c < 1)
				1231	c = 0;
				1232	slang->sl_compminlen = c;
				1233
				1234	--todo;
				1235	c = getc(fd); /* <compsylmax> */
				1236	if (c < 1)
				1237	c = MAXWLEN;
				1238	slang->sl_compsylmax = c;
				1239
				1240	c = getc(fd); /* <compoptions> */
				1241	if (c != 0)
				1242	ungetc(c, fd); /* be backwards compatible with Vim 7.0b */
				1243	else
				1244	{
				1245	--todo;
				1246	c = getc(fd); /* only use the lower byte for now */
				1247	--todo;
				1248	slang->sl_compoptions = c;
				1249
				1250	gap = &slang->sl_comppat;
				1251	c = get2c(fd); /* <comppatcount> */
				1252	todo -= 2;
				1253	ga_init2(gap, sizeof(char_u *), c);
				1254	if (ga_grow(gap, c) == OK)
				1255	while (--c >= 0)
				1256	{
				1257	((char_u **)(gap->ga_data))[gap->ga_len++] =
				1258	read_cnt_string(fd, 1, &cnt);
				1259	/* <comppatlen> <comppattext> */
				1260	if (cnt < 0)
				1261	return cnt;
				1262	todo -= cnt + 1;
				1263	}
				1264	}
				1265	if (todo < 0)
				1266	return SP_FORMERROR;
				1267
				1268	/* Turn the COMPOUNDRULE items into a regexp pattern:
				1269	* "a[bc]/ab+" -> "^$a[bc]\\|ab\+$$".
				1270	* Inserting backslashes may double the length, "^$<Nul>" is 7 bytes.
				1271	* Conversion to utf-8 may double the size. */
				1272	c = todo * 2 + 7;
				1273	#ifdef FEAT_MBYTE
				1274	if (enc_utf8)
				1275	c += todo * 2;
				1276	#endif
				1277	pat = alloc((unsigned)c);
				1278	if (pat == NULL)
				1279	return SP_OTHERERROR;
				1280
				1281	/* We also need a list of all flags that can appear at the start and one
				1282	* for all flags. */
				1283	cp = alloc(todo + 1);
				1284	if (cp == NULL)
				1285	{
				1286	vim_free(pat);
				1287	return SP_OTHERERROR;
				1288	}
				1289	slang->sl_compstartflags = cp;
				1290	*cp = NUL;
				1291
				1292	ap = alloc(todo + 1);
				1293	if (ap == NULL)
				1294	{
				1295	vim_free(pat);
				1296	return SP_OTHERERROR;
				1297	}
				1298	slang->sl_compallflags = ap;
				1299	*ap = NUL;
				1300
				1301	/* And a list of all patterns in their original form, for checking whether
				1302	* compounding may work in match_compoundrule(). This is freed when we
				1303	* encounter a wildcard, the check doesn't work then. */
				1304	crp = alloc(todo + 1);
				1305	slang->sl_comprules = crp;
				1306
				1307	pp = pat;
				1308	*pp++ = '^';
				1309	*pp++ = '\\';
				1310	*pp++ = '(';
				1311
				1312	atstart = 1;
				1313	while (todo-- > 0)
				1314	{
				1315	c = getc(fd); /* <compflags> */
				1316	if (c == EOF)
				1317	{
				1318	vim_free(pat);
				1319	return SP_TRUNCERROR;
				1320	}
				1321
				1322	/* Add all flags to "sl_compallflags". */
				1323	if (vim_strchr((char_u )"?+[]/", c) == NULL
				1324	&& !byte_in_str(slang->sl_compallflags, c))
				1325	{
				1326	*ap++ = c;
				1327	*ap = NUL;
				1328	}
				1329
				1330	if (atstart != 0)
				1331	{
				1332	/* At start of item: copy flags to "sl_compstartflags". For a
				1333	* [abc] item set "atstart" to 2 and copy up to the ']'. */
				1334	if (c == '[')
				1335	atstart = 2;
				1336	else if (c == ']')
				1337	atstart = 0;
				1338	else
				1339	{
				1340	if (!byte_in_str(slang->sl_compstartflags, c))
				1341	{
				1342	*cp++ = c;
				1343	*cp = NUL;
				1344	}
				1345	if (atstart == 1)
				1346	atstart = 0;
				1347	}
				1348	}
				1349
				1350	/* Copy flag to "sl_comprules", unless we run into a wildcard. */
				1351	if (crp != NULL)
				1352	{
				1353	if (c == '?' \|\| c == '+' \|\| c == '*')
				1354	{
				1355	vim_free(slang->sl_comprules);
				1356	slang->sl_comprules = NULL;
				1357	crp = NULL;
				1358	}
				1359	else
				1360	*crp++ = c;
				1361	}
				1362
				1363	if (c == '/') /* slash separates two items */
				1364	{
				1365	*pp++ = '\\';
				1366	*pp++ = '\|';
				1367	atstart = 1;
				1368	}
				1369	else /* normal char, "[abc]" and '' are copied as-is /
				1370	{
				1371	if (c == '?' \|\| c == '+' \|\| c == '~')
				1372	pp++ = '\\'; / "a?" becomes "a\?", "a+" becomes "a\+" */
				1373	#ifdef FEAT_MBYTE
				1374	if (enc_utf8)
				1375	pp += mb_char2bytes(c, pp);
				1376	else
				1377	#endif
				1378	*pp++ = c;
				1379	}
				1380	}
				1381
				1382	*pp++ = '\\';
				1383	*pp++ = ')';
				1384	*pp++ = '$';
				1385	*pp = NUL;
				1386
				1387	if (crp != NULL)
				1388	*crp = NUL;
				1389
				1390	slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT);
				1391	vim_free(pat);
				1392	if (slang->sl_compprog == NULL)
				1393	return SP_FORMERROR;
				1394
				1395	return 0;
				1396	}
				1397
				1398	/*
				1399	* Set the SOFOFROM and SOFOTO items in language "lp".
				1400	* Returns SP_*ERROR flags when there is something wrong.
				1401	*/
				1402	static int
				1403	set_sofo(slang_T lp, char_u from, char_u *to)
				1404	{
				1405	int i;
				1406
				1407	#ifdef FEAT_MBYTE
				1408	garray_T *gap;
				1409	char_u *s;
				1410	char_u *p;
				1411	int c;
				1412	int *inp;
				1413
				1414	if (has_mbyte)
				1415	{
				1416	/* Use "sl_sal" as an array with 256 pointers to a list of wide
				1417	* characters. The index is the low byte of the character.
				1418	* The list contains from-to pairs with a terminating NUL.
				1419	* sl_sal_first[] is used for latin1 "from" characters. */
				1420	gap = &lp->sl_sal;
				1421	ga_init2(gap, sizeof(int *), 1);
				1422	if (ga_grow(gap, 256) == FAIL)
				1423	return SP_OTHERERROR;
				1424	vim_memset(gap->ga_data, 0, sizeof(int ) 256);
				1425	gap->ga_len = 256;
				1426
				1427	/* First count the number of items for each list. Temporarily use
				1428	* sl_sal_first[] for this. */
				1429	for (p = from, s = to; p != NUL && s != NUL; )
				1430	{
				1431	c = mb_cptr2char_adv(&p);
				1432	mb_cptr_adv(s);
				1433	if (c >= 256)
				1434	++lp->sl_sal_first[c & 0xff];
				1435	}
				1436	if (p != NUL \|\| s != NUL) /* lengths differ */
				1437	return SP_FORMERROR;
				1438
				1439	/* Allocate the lists. */
				1440	for (i = 0; i < 256; ++i)
				1441	if (lp->sl_sal_first[i] > 0)
				1442	{
				1443	p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
				1444	if (p == NULL)
				1445	return SP_OTHERERROR;
				1446	((int *)gap->ga_data)[i] = (int )p;
				1447	(int )p = 0;
				1448	}
				1449
				1450	/* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
				1451	* list. */
				1452	vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
				1453	for (p = from, s = to; p != NUL && s != NUL; )
				1454	{
				1455	c = mb_cptr2char_adv(&p);
				1456	i = mb_cptr2char_adv(&s);
				1457	if (c >= 256)
				1458	{
				1459	/* Append the from-to chars at the end of the list with
				1460	* the low byte. */
				1461	inp = ((int **)gap->ga_data)[c & 0xff];
				1462	while (*inp != 0)
				1463	++inp;
				1464	inp++ = c; / from char */
				1465	inp++ = i; / to char */
				1466	inp++ = NUL; / NUL at the end */
				1467	}
				1468	else
				1469	/* mapping byte to char is done in sl_sal_first[] */
				1470	lp->sl_sal_first[c] = i;
				1471	}
				1472	}
				1473	else
				1474	#endif
				1475	{
				1476	/* mapping bytes to bytes is done in sl_sal_first[] */
				1477	if (STRLEN(from) != STRLEN(to))
				1478	return SP_FORMERROR;
				1479
				1480	for (i = 0; to[i] != NUL; ++i)
				1481	lp->sl_sal_first[from[i]] = to[i];
				1482	lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */
				1483	}
				1484
				1485	return 0;
				1486	}
				1487
				1488	/*
				1489	* Fill the first-index table for "lp".
				1490	*/
				1491	static void
				1492	set_sal_first(slang_T *lp)
				1493	{
				1494	salfirst_T *sfirst;
				1495	int i;
				1496	salitem_T *smp;
				1497	int c;
				1498	garray_T *gap = &lp->sl_sal;
				1499
				1500	sfirst = lp->sl_sal_first;
				1501	for (i = 0; i < 256; ++i)
				1502	sfirst[i] = -1;
				1503	smp = (salitem_T *)gap->ga_data;
				1504	for (i = 0; i < gap->ga_len; ++i)
				1505	{
				1506	#ifdef FEAT_MBYTE
				1507	if (has_mbyte)
				1508	/* Use the lowest byte of the first character. For latin1 it's
				1509	* the character, for other encodings it should differ for most
				1510	* characters. */
				1511	c = *smp[i].sm_lead_w & 0xff;
				1512	else
				1513	#endif
				1514	c = *smp[i].sm_lead;
				1515	if (sfirst[c] == -1)
				1516	{
				1517	sfirst[c] = i;
				1518	#ifdef FEAT_MBYTE
				1519	if (has_mbyte)
				1520	{
				1521	int n;
				1522
				1523	/* Make sure all entries with this byte are following each
				1524	* other. Move the ones that are in the wrong position. Do
				1525	* keep the same ordering! */
				1526	while (i + 1 < gap->ga_len
				1527	&& (*smp[i + 1].sm_lead_w & 0xff) == c)
				1528	/* Skip over entry with same index byte. */
				1529	++i;
				1530
				1531	for (n = 1; i + n < gap->ga_len; ++n)
				1532	if ((*smp[i + n].sm_lead_w & 0xff) == c)
				1533	{
				1534	salitem_T tsal;
				1535
				1536	/* Move entry with same index byte after the entries
				1537	* we already found. */
				1538	++i;
				1539	--n;
				1540	tsal = smp[i + n];
				1541	mch_memmove(smp + i + 1, smp + i,
				1542	sizeof(salitem_T) * n);
				1543	smp[i] = tsal;
				1544	}
				1545	}
				1546	#endif
				1547	}
				1548	}
				1549	}
				1550
				1551	#ifdef FEAT_MBYTE
				1552	/*
				1553	* Turn a multi-byte string into a wide character string.
				1554	* Return it in allocated memory (NULL for out-of-memory)
				1555	*/
				1556	static int *
				1557	mb_str2wide(char_u *s)
				1558	{
				1559	int *res;
				1560	char_u *p;
				1561	int i = 0;
				1562
				1563	res = (int )alloc(sizeof(int) (mb_charlen(s) + 1));
				1564	if (res != NULL)
				1565	{
				1566	for (p = s; *p != NUL; )
				1567	res[i++] = mb_ptr2char_adv(&p);
				1568	res[i] = NUL;
				1569	}
				1570	return res;
				1571	}
				1572	#endif
				1573
				1574	/*
				1575	* Read a tree from the .spl or .sug file.
				1576	* Allocates the memory and stores pointers in "bytsp" and "idxsp".
				1577	* This is skipped when the tree has zero length.
				1578	* Returns zero when OK, SP_ value for an error.
				1579	*/
				1580	static int
				1581	spell_read_tree(
				1582	FILE *fd,
				1583	char_u **bytsp,
				1584	idx_T **idxsp,
				1585	int prefixtree, /* TRUE for the prefix tree */
				1586	int prefixcnt) /* when "prefixtree" is TRUE: prefix count */
				1587	{
				1588	int len;
				1589	int idx;
				1590	char_u *bp;
				1591	idx_T *ip;
				1592
				1593	/* The tree size was computed when writing the file, so that we can
				1594	* allocate it as one long block. <nodecount> */
				1595	len = get4c(fd);
				1596	if (len < 0)
				1597	return SP_TRUNCERROR;
				1598	if (len > 0)
				1599	{
				1600	/* Allocate the byte array. */
				1601	bp = lalloc((long_u)len, TRUE);
				1602	if (bp == NULL)
				1603	return SP_OTHERERROR;
				1604	*bytsp = bp;
				1605
				1606	/* Allocate the index array. */
				1607	ip = (idx_T )lalloc_clear((long_u)(len sizeof(int)), TRUE);
				1608	if (ip == NULL)
				1609	return SP_OTHERERROR;
				1610	*idxsp = ip;
				1611
				1612	/* Recursively read the tree and store it in the array. */
				1613	idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt);
				1614	if (idx < 0)
				1615	return idx;
				1616	}
				1617	return 0;
				1618	}
				1619
				1620	/*
				1621	* Read one row of siblings from the spell file and store it in the byte array
				1622	* "byts" and index array "idxs". Recursively read the children.
				1623	*
				1624	* NOTE: The code here must match put_node()!
				1625	*
				1626	* Returns the index (>= 0) following the siblings.
				1627	* Returns SP_TRUNCERROR if the file is shorter than expected.
				1628	* Returns SP_FORMERROR if there is a format error.
				1629	*/
				1630	static idx_T
				1631	read_tree_node(
				1632	FILE *fd,
				1633	char_u *byts,
				1634	idx_T *idxs,
				1635	int maxidx, /* size of arrays */
				1636	idx_T startidx, /* current index in "byts" and "idxs" */
				1637	int prefixtree, /* TRUE for reading PREFIXTREE */
				1638	int maxprefcondnr) /* maximum for <prefcondnr> */
				1639	{
				1640	int len;
				1641	int i;
				1642	int n;
				1643	idx_T idx = startidx;
				1644	int c;
				1645	int c2;
				1646	#define SHARED_MASK 0x8000000
				1647
				1648	len = getc(fd); /* <siblingcount> */
				1649	if (len <= 0)
				1650	return SP_TRUNCERROR;
				1651
				1652	if (startidx + len >= maxidx)
				1653	return SP_FORMERROR;
				1654	byts[idx++] = len;
				1655
				1656	/* Read the byte values, flag/region bytes and shared indexes. */
				1657	for (i = 1; i <= len; ++i)
				1658	{
				1659	c = getc(fd); /* <byte> */
				1660	if (c < 0)
				1661	return SP_TRUNCERROR;
				1662	if (c <= BY_SPECIAL)
				1663	{
				1664	if (c == BY_NOFLAGS && !prefixtree)
				1665	{
				1666	/* No flags, all regions. */
				1667	idxs[idx] = 0;
				1668	c = 0;
				1669	}
				1670	else if (c != BY_INDEX)
				1671	{
				1672	if (prefixtree)
				1673	{
				1674	/* Read the optional pflags byte, the prefix ID and the
				1675	* condition nr. In idxs[] store the prefix ID in the low
				1676	* byte, the condition index shifted up 8 bits, the flags
				1677	* shifted up 24 bits. */
				1678	if (c == BY_FLAGS)
				1679	c = getc(fd) << 24; /* <pflags> */
				1680	else
				1681	c = 0;
				1682
				1683	c \|= getc(fd); /* <affixID> */
				1684
				1685	n = get2c(fd); /* <prefcondnr> */
				1686	if (n >= maxprefcondnr)
				1687	return SP_FORMERROR;
				1688	c \|= (n << 8);
				1689	}
				1690	else /* c must be BY_FLAGS or BY_FLAGS2 */
				1691	{
				1692	/* Read flags and optional region and prefix ID. In
				1693	* idxs[] the flags go in the low two bytes, region above
				1694	* that and prefix ID above the region. */
				1695	c2 = c;
				1696	c = getc(fd); /* <flags> */
				1697	if (c2 == BY_FLAGS2)
				1698	c = (getc(fd) << 8) + c; /* <flags2> */
				1699	if (c & WF_REGION)
				1700	c = (getc(fd) << 16) + c; /* <region> */
				1701	if (c & WF_AFX)
				1702	c = (getc(fd) << 24) + c; /* <affixID> */
				1703	}
				1704
				1705	idxs[idx] = c;
				1706	c = 0;
				1707	}
				1708	else /* c == BY_INDEX */
				1709	{
				1710	/* <nodeidx> */
				1711	n = get3c(fd);
				1712	if (n < 0 \|\| n >= maxidx)
				1713	return SP_FORMERROR;
				1714	idxs[idx] = n + SHARED_MASK;
				1715	c = getc(fd); /* <xbyte> */
				1716	}
				1717	}
				1718	byts[idx++] = c;
				1719	}
				1720
				1721	/* Recursively read the children for non-shared siblings.
				1722	* Skip the end-of-word ones (zero byte value) and the shared ones (and
				1723	* remove SHARED_MASK) */
				1724	for (i = 1; i <= len; ++i)
				1725	if (byts[startidx + i] != 0)
				1726	{
				1727	if (idxs[startidx + i] & SHARED_MASK)
				1728	idxs[startidx + i] &= ~SHARED_MASK;
				1729	else
				1730	{
				1731	idxs[startidx + i] = idx;
				1732	idx = read_tree_node(fd, byts, idxs, maxidx, idx,
				1733	prefixtree, maxprefcondnr);
				1734	if (idx < 0)
				1735	break;
				1736	}
				1737	}
				1738
				1739	return idx;
				1740	}
				1741
				1742	/*
				1743	* Reload the spell file "fname" if it's loaded.
				1744	*/
				1745	static void
				1746	spell_reload_one(
				1747	char_u *fname,
				1748	int added_word) /* invoked through "zg" */
				1749	{
				1750	slang_T *slang;
				1751	int didit = FALSE;
				1752
				1753	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
				1754	{
				1755	if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME)
				1756	{
				1757	slang_clear(slang);
				1758	if (spell_load_file(fname, NULL, slang, FALSE) == NULL)
				1759	/* reloading failed, clear the language */
				1760	slang_clear(slang);
				1761	redraw_all_later(SOME_VALID);
				1762	didit = TRUE;
				1763	}
				1764	}
				1765
				1766	/* When "zg" was used and the file wasn't loaded yet, should redo
				1767	* 'spelllang' to load it now. */
				1768	if (added_word && !didit)
				1769	did_set_spelllang(curwin);
				1770	}
				1771
				1772
				1773	/*
				1774	* Functions for ":mkspell".
				1775	*/
				1776
				1777	#define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff
				1778	and .dic file. */
				1779	/*
				1780	* Main structure to store the contents of a ".aff" file.
				1781	*/
				1782	typedef struct afffile_S
				1783	{
				1784	char_u af_enc; / "SET", normalized, alloc'ed string or NULL */
				1785	int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */
				1786	unsigned af_rare; /* RARE ID for rare word */
				1787	unsigned af_keepcase; /* KEEPCASE ID for keep-case word */
				1788	unsigned af_bad; /* BAD ID for banned word */
				1789	unsigned af_needaffix; /* NEEDAFFIX ID */
				1790	unsigned af_circumfix; /* CIRCUMFIX ID */
				1791	unsigned af_needcomp; /* NEEDCOMPOUND ID */
				1792	unsigned af_comproot; /* COMPOUNDROOT ID */
				1793	unsigned af_compforbid; /* COMPOUNDFORBIDFLAG ID */
				1794	unsigned af_comppermit; /* COMPOUNDPERMITFLAG ID */
				1795	unsigned af_nosuggest; /* NOSUGGEST ID */
				1796	int af_pfxpostpone; /* postpone prefixes without chop string and
				1797	without flags */
				1798	int af_ignoreextra; /* IGNOREEXTRA present */
				1799	hashtab_T af_pref; /* hashtable for prefixes, affheader_T */
				1800	hashtab_T af_suff; /* hashtable for suffixes, affheader_T */
				1801	hashtab_T af_comp; /* hashtable for compound flags, compitem_T */
				1802	} afffile_T;
				1803
				1804	#define AFT_CHAR 0 /* flags are one character */
				1805	#define AFT_LONG 1 /* flags are two characters */
				1806	#define AFT_CAPLONG 2 /* flags are one or two characters */
				1807	#define AFT_NUM 3 /* flags are numbers, comma separated */
				1808
				1809	typedef struct affentry_S affentry_T;
				1810	/* Affix entry from ".aff" file. Used for prefixes and suffixes. */
				1811	struct affentry_S
				1812	{
				1813	affentry_T ae_next; / next affix with same name/number */
				1814	char_u ae_chop; / text to chop off basic word (can be NULL) */
				1815	char_u ae_add; / text to add to basic word (can be NULL) */
				1816	char_u ae_flags; / flags on the affix (can be NULL) */
				1817	char_u ae_cond; / condition (NULL for ".") */
				1818	regprog_T ae_prog; / regexp program for ae_cond or NULL */
				1819	char ae_compforbid; /* COMPOUNDFORBIDFLAG found */
				1820	char ae_comppermit; /* COMPOUNDPERMITFLAG found */
				1821	};
				1822
				1823	#ifdef FEAT_MBYTE
				1824	# define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */
				1825	#else
				1826	# define AH_KEY_LEN 7 /* 6 digits + NUL */
				1827	#endif
				1828
				1829	/* Affix header from ".aff" file. Used for af_pref and af_suff. */
				1830	typedef struct affheader_S
				1831	{
				1832	char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */
				1833	unsigned ah_flag; /* affix name as number, uses "af_flagtype" */
				1834	int ah_newID; /* prefix ID after renumbering; 0 if not used */
				1835	int ah_combine; /* suffix may combine with prefix */
				1836	int ah_follows; /* another affix block should be following */
				1837	affentry_T ah_first; / first affix entry */
				1838	} affheader_T;
				1839
				1840	#define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
				1841
				1842	/* Flag used in compound items. */
				1843	typedef struct compitem_S
				1844	{
				1845	char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */
				1846	unsigned ci_flag; /* affix name as number, uses "af_flagtype" */
				1847	int ci_newID; /* affix ID after renumbering. */
				1848	} compitem_T;
				1849
				1850	#define HI2CI(hi) ((compitem_T *)(hi)->hi_key)
				1851
				1852	/*
				1853	* Structure that is used to store the items in the word tree. This avoids
				1854	* the need to keep track of each allocated thing, everything is freed all at
				1855	* once after ":mkspell" is done.
				1856	* Note: "sb_next" must be just before "sb_data" to make sure the alignment of
				1857	* "sb_data" is correct for systems where pointers must be aligned on
				1858	* pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc).
				1859	*/
				1860	#define SBLOCKSIZE 16000 /* size of sb_data */
				1861	typedef struct sblock_S sblock_T;
				1862	struct sblock_S
				1863	{
				1864	int sb_used; /* nr of bytes already in use */
				1865	sblock_T sb_next; / next block in list */
				1866	char_u sb_data[1]; /* data, actually longer */
				1867	};
				1868
				1869	/*
				1870	* A node in the tree.
				1871	*/
				1872	typedef struct wordnode_S wordnode_T;
				1873	struct wordnode_S
				1874	{
				1875	union /* shared to save space */
				1876	{
				1877	char_u hashkey[6]; /* the hash key, only used while compressing */
				1878	int index; /* index in written nodes (valid after first
				1879	round) */
				1880	} wn_u1;
				1881	union /* shared to save space */
				1882	{
				1883	wordnode_T next; / next node with same hash key */
				1884	wordnode_T wnode; / parent node that will write this node */
				1885	} wn_u2;
				1886	wordnode_T wn_child; / child (next byte in word) */
				1887	wordnode_T wn_sibling; / next sibling (alternate byte in word,
				1888	always sorted) */
				1889	int wn_refs; /* Nr. of references to this node. Only
				1890	relevant for first node in a list of
				1891	siblings, in following siblings it is
				1892	always one. */
				1893	char_u wn_byte; /* Byte for this node. NUL for word end */
				1894
				1895	/* Info for when "wn_byte" is NUL.
				1896	* In PREFIXTREE "wn_region" is used for the prefcondnr.
				1897	* In the soundfolded word tree "wn_flags" has the MSW of the wordnr and
				1898	* "wn_region" the LSW of the wordnr. */
				1899	char_u wn_affixID; /* supported/required prefix ID or 0 */
				1900	short_u wn_flags; /* WF_ flags */
				1901	short wn_region; /* region mask */
				1902
				1903	#ifdef SPELL_PRINTTREE
				1904	int wn_nr; /* sequence nr for printing */
				1905	#endif
				1906	};
				1907
				1908	#define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */
				1909
				1910	#define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
				1911
				1912	/*
				1913	* Info used while reading the spell files.
				1914	*/
				1915	typedef struct spellinfo_S
				1916	{
				1917	wordnode_T si_foldroot; / tree with case-folded words */
				1918	long si_foldwcount; /* nr of words in si_foldroot */
				1919
				1920	wordnode_T si_keeproot; / tree with keep-case words */
				1921	long si_keepwcount; /* nr of words in si_keeproot */
				1922
				1923	wordnode_T si_prefroot; / tree with postponed prefixes */
				1924
				1925	long si_sugtree; /* creating the soundfolding trie */
				1926
				1927	sblock_T si_blocks; / memory blocks used */
				1928	long si_blocks_cnt; /* memory blocks allocated */
				1929	int si_did_emsg; /* TRUE when ran out of memory */
				1930
				1931	long si_compress_cnt; /* words to add before lowering
				1932	compression limit */
				1933	wordnode_T si_first_free; / List of nodes that have been freed during
				1934	compression, linked by "wn_child" field. */
				1935	long si_free_count; /* number of nodes in si_first_free */
				1936	#ifdef SPELL_PRINTTREE
				1937	int si_wordnode_nr; /* sequence nr for nodes */
				1938	#endif
				1939	buf_T si_spellbuf; / buffer used to store soundfold word table */
				1940
				1941	int si_ascii; /* handling only ASCII words */
				1942	int si_add; /* addition file */
				1943	int si_clear_chartab; /* when TRUE clear char tables */
				1944	int si_region; /* region mask */
				1945	vimconv_T si_conv; /* for conversion to 'encoding' */
				1946	int si_memtot; /* runtime memory used */
				1947	int si_verbose; /* verbose messages */
				1948	int si_msg_count; /* number of words added since last message */
				1949	char_u si_info; / info text chars or NULL */
				1950	int si_region_count; /* number of regions supported (1 when there
				1951	are no regions) */
				1952	char_u si_region_name[17]; /* region names; used only if
				1953	* si_region_count > 1) */
				1954
				1955	garray_T si_rep; /* list of fromto_T entries from REP lines */
				1956	garray_T si_repsal; /* list of fromto_T entries from REPSAL lines */
				1957	garray_T si_sal; /* list of fromto_T entries from SAL lines */
				1958	char_u si_sofofr; / SOFOFROM text */
				1959	char_u si_sofoto; / SOFOTO text */
				1960	int si_nosugfile; /* NOSUGFILE item found */
				1961	int si_nosplitsugs; /* NOSPLITSUGS item found */
				1962	int si_nocompoundsugs; /* NOCOMPOUNDSUGS item found */
				1963	int si_followup; /* soundsalike: ? */
				1964	int si_collapse; /* soundsalike: ? */
				1965	hashtab_T si_commonwords; /* hashtable for common words */
				1966	time_t si_sugtime; /* timestamp for .sug file */
				1967	int si_rem_accents; /* soundsalike: remove accents */
				1968	garray_T si_map; /* MAP info concatenated */
				1969	char_u si_midword; / MIDWORD chars or NULL */
				1970	int si_compmax; /* max nr of words for compounding */
				1971	int si_compminlen; /* minimal length for compounding */
				1972	int si_compsylmax; /* max nr of syllables for compounding */
				1973	int si_compoptions; /* COMP_ flags */
				1974	garray_T si_comppat; /* CHECKCOMPOUNDPATTERN items, each stored as
				1975	a string */
				1976	char_u si_compflags; / flags used for compounding */
				1977	char_u si_nobreak; /* NOBREAK */
				1978	char_u si_syllable; / syllable string */
				1979	garray_T si_prefcond; /* table with conditions for postponed
				1980	* prefixes, each stored as a string */
				1981	int si_newprefID; /* current value for ah_newID */
				1982	int si_newcompID; /* current value for compound ID */
				1983	} spellinfo_T;
				1984
				1985	static afffile_T spell_read_aff(spellinfo_T spin, char_u *fname);
				1986	static int is_aff_rule(char_u *items, int itemcnt, char rulename, int mincount);
				1987	static void aff_process_flags(afffile_T affile, affentry_T entry);
				1988	static int spell_info_item(char_u *s);
				1989	static unsigned affitem2flag(int flagtype, char_u item, char_u fname, int lnum);
				1990	static unsigned get_affitem(int flagtype, char_u **pp);
				1991	static void process_compflags(spellinfo_T spin, afffile_T aff, char_u *compflags);
				1992	static void check_renumber(spellinfo_T *spin);
				1993	static int flag_in_afflist(int flagtype, char_u *afflist, unsigned flag);
				1994	static void aff_check_number(int spinval, int affval, char *name);
				1995	static void aff_check_string(char_u spinval, char_u affval, char *name);
				1996	static int str_equal(char_u s1, char_u s2);
				1997	static void add_fromto(spellinfo_T spin, garray_T gap, char_u from, char_u to);
				1998	static int sal_to_bool(char_u *s);
				1999	static void spell_free_aff(afffile_T *aff);
				2000	static int spell_read_dic(spellinfo_T spin, char_u fname, afffile_T *affile);
				2001	static int get_affix_flags(afffile_T affile, char_u afflist);
				2002	static int get_pfxlist(afffile_T affile, char_u afflist, char_u *store_afflist);
				2003	static void get_compflags(afffile_T affile, char_u afflist, char_u *store_afflist);
				2004	static int store_aff_word(spellinfo_T spin, char_u word, char_u afflist, afffile_T affile, hashtab_T ht, hashtab_T xht, int condit, int flags, char_u *pfxlist, int pfxlen);
				2005	static int spell_read_wordfile(spellinfo_T spin, char_u fname);
				2006	static void getroom(spellinfo_T spin, size_t len, int align);
				2007	static char_u getroom_save(spellinfo_T spin, char_u *s);
				2008	static void free_blocks(sblock_T *bl);
				2009	static wordnode_T wordtree_alloc(spellinfo_T spin);
				2010	static int store_word(spellinfo_T spin, char_u word, int flags, int region, char_u *pfxlist, int need_affix);
				2011	static int tree_add_word(spellinfo_T spin, char_u word, wordnode_T *tree, int flags, int region, int affixID);
				2012	static wordnode_T get_wordnode(spellinfo_T spin);
				2013	static int deref_wordnode(spellinfo_T spin, wordnode_T node);
				2014	static void free_wordnode(spellinfo_T spin, wordnode_T n);
				2015	static void wordtree_compress(spellinfo_T spin, wordnode_T root);
				2016	static int node_compress(spellinfo_T spin, wordnode_T node, hashtab_T ht, int tot);
				2017	static int node_equal(wordnode_T n1, wordnode_T n2);
				2018	static int write_vim_spell(spellinfo_T spin, char_u fname);
				2019	static void clear_node(wordnode_T *node);
				2020	static int put_node(FILE fd, wordnode_T node, int idx, int regionmask, int prefixtree);
				2021	static void spell_make_sugfile(spellinfo_T spin, char_u wfname);
				2022	static int sug_filltree(spellinfo_T spin, slang_T slang);
				2023	static int sug_maketable(spellinfo_T *spin);
				2024	static int sug_filltable(spellinfo_T spin, wordnode_T node, int startwordnr, garray_T *gap);
				2025	static int offset2bytes(int nr, char_u *buf);
				2026	static void sug_write(spellinfo_T spin, char_u fname);
				2027	static void spell_message(spellinfo_T spin, char_u str);
				2028	static void init_spellfile(void);
				2029
				2030	/* In the postponed prefixes tree wn_flags is used to store the WFP_ flags,
				2031	* but it must be negative to indicate the prefix tree to tree_add_word().
				2032	* Use a negative number with the lower 8 bits zero. */
				2033	#define PFX_FLAGS -256
				2034
				2035	/* flags for "condit" argument of store_aff_word() */
				2036	#define CONDIT_COMB 1 /* affix must combine */
				2037	#define CONDIT_CFIX 2 /* affix must have CIRCUMFIX flag */
				2038	#define CONDIT_SUF 4 /* add a suffix for matching flags */
				2039	#define CONDIT_AFF 8 /* word already has an affix */
				2040
				2041	/*
				2042	* Tunable parameters for when the tree is compressed. See 'mkspellmem'.
				2043	*/
				2044	static long compress_start = 30000; /* memory / SBLOCKSIZE */
				2045	static long compress_inc = 100; /* memory / SBLOCKSIZE */
				2046	static long compress_added = 500000; /* word count */
				2047
				2048	/*
				2049	* Check the 'mkspellmem' option. Return FAIL if it's wrong.
				2050	* Sets "sps_flags".
				2051	*/
				2052	int
				2053	spell_check_msm(void)
				2054	{
				2055	char_u *p = p_msm;
				2056	long start = 0;
				2057	long incr = 0;
				2058	long added = 0;
				2059
				2060	if (!VIM_ISDIGIT(*p))
				2061	return FAIL;
				2062	/* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/
				2063	start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102);
				2064	if (*p != ',')
				2065	return FAIL;
				2066	++p;
				2067	if (!VIM_ISDIGIT(*p))
				2068	return FAIL;
				2069	incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10);
				2070	if (*p != ',')
				2071	return FAIL;
				2072	++p;
				2073	if (!VIM_ISDIGIT(*p))
				2074	return FAIL;
				2075	added = getdigits(&p) * 1024;
				2076	if (*p != NUL)
				2077	return FAIL;
				2078
				2079	if (start == 0 \|\| incr == 0 \|\| added == 0 \|\| incr > start)
				2080	return FAIL;
				2081
				2082	compress_start = start;
				2083	compress_inc = incr;
				2084	compress_added = added;
				2085	return OK;
				2086	}
				2087
				2088	#ifdef SPELL_PRINTTREE
				2089	/*
				2090	* For debugging the tree code: print the current tree in a (more or less)
				2091	* readable format, so that we can see what happens when adding a word and/or
				2092	* compressing the tree.
				2093	* Based on code from Olaf Seibert.
				2094	*/
				2095	#define PRINTLINESIZE 1000
				2096	#define PRINTWIDTH 6
				2097
				2098	#define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
				2099	PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
				2100
				2101	static char line1[PRINTLINESIZE];
				2102	static char line2[PRINTLINESIZE];
				2103	static char line3[PRINTLINESIZE];
				2104
				2105	static void
				2106	spell_clear_flags(wordnode_T *node)
				2107	{
				2108	wordnode_T *np;
				2109
				2110	for (np = node; np != NULL; np = np->wn_sibling)
				2111	{
				2112	np->wn_u1.index = FALSE;
				2113	spell_clear_flags(np->wn_child);
				2114	}
				2115	}
				2116
				2117	static void
				2118	spell_print_node(wordnode_T *node, int depth)
				2119	{
				2120	if (node->wn_u1.index)
				2121	{
				2122	/* Done this node before, print the reference. */
				2123	PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
				2124	PRINTSOME(line2, depth, " ", 0, 0);
				2125	PRINTSOME(line3, depth, " ", 0, 0);
				2126	msg((char_u *)line1);
				2127	msg((char_u *)line2);
				2128	msg((char_u *)line3);
				2129	}
				2130	else
				2131	{
				2132	node->wn_u1.index = TRUE;
				2133
				2134	if (node->wn_byte != NUL)
				2135	{
				2136	if (node->wn_child != NULL)
				2137	PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0);
				2138	else
				2139	/* Cannot happen? */
				2140	PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0);
				2141	}
				2142	else
				2143	PRINTSOME(line1, depth, " $ ", 0, 0);
				2144
				2145	PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs);
				2146
				2147	if (node->wn_sibling != NULL)
				2148	PRINTSOME(line3, depth, " \| ", 0, 0);
				2149	else
				2150	PRINTSOME(line3, depth, " ", 0, 0);
				2151
				2152	if (node->wn_byte == NUL)
				2153	{
				2154	msg((char_u *)line1);
				2155	msg((char_u *)line2);
				2156	msg((char_u *)line3);
				2157	}
				2158
				2159	/* do the children */
				2160	if (node->wn_byte != NUL && node->wn_child != NULL)
				2161	spell_print_node(node->wn_child, depth + 1);
				2162
				2163	/* do the siblings */
				2164	if (node->wn_sibling != NULL)
				2165	{
				2166	/* get rid of all parent details except \| */
				2167	STRCPY(line1, line3);
				2168	STRCPY(line2, line3);
				2169	spell_print_node(node->wn_sibling, depth);
				2170	}
				2171	}
				2172	}
				2173
				2174	static void
				2175	spell_print_tree(wordnode_T *root)
				2176	{
				2177	if (root != NULL)
				2178	{
				2179	/* Clear the "wn_u1.index" fields, used to remember what has been
				2180	* done. */
				2181	spell_clear_flags(root);
				2182
				2183	/* Recursively print the tree. */
				2184	spell_print_node(root, 0);
				2185	}
				2186	}
				2187	#endif /* SPELL_PRINTTREE */
				2188
				2189	/*
				2190	* Read the affix file "fname".
				2191	* Returns an afffile_T, NULL for complete failure.
				2192	*/
				2193	static afffile_T *
				2194	spell_read_aff(spellinfo_T spin, char_u fname)
				2195	{
				2196	FILE *fd;
				2197	afffile_T *aff;
				2198	char_u rline[MAXLINELEN];
				2199	char_u *line;
				2200	char_u *pc = NULL;
				2201	#define MAXITEMCNT 30
				2202	char_u *(items[MAXITEMCNT]);
				2203	int itemcnt;
				2204	char_u *p;
				2205	int lnum = 0;
				2206	affheader_T *cur_aff = NULL;
				2207	int did_postpone_prefix = FALSE;
				2208	int aff_todo = 0;
				2209	hashtab_T *tp;
				2210	char_u *low = NULL;
				2211	char_u *fol = NULL;
				2212	char_u *upp = NULL;
				2213	int do_rep;
				2214	int do_repsal;
				2215	int do_sal;
				2216	int do_mapline;
				2217	int found_map = FALSE;
				2218	hashitem_T *hi;
				2219	int l;
				2220	int compminlen = 0; /* COMPOUNDMIN value */
				2221	int compsylmax = 0; /* COMPOUNDSYLMAX value */
				2222	int compoptions = 0; /* COMP_ flags */
				2223	int compmax = 0; /* COMPOUNDWORDMAX value */
				2224	char_u compflags = NULL; / COMPOUNDFLAG and COMPOUNDRULE
				2225	concatenated */
				2226	char_u midword = NULL; / MIDWORD value */
				2227	char_u syllable = NULL; / SYLLABLE value */
				2228	char_u sofofrom = NULL; / SOFOFROM value */
				2229	char_u sofoto = NULL; / SOFOTO value */
				2230
				2231	/*
				2232	* Open the file.
				2233	*/
				2234	fd = mch_fopen((char *)fname, "r");
				2235	if (fd == NULL)
				2236	{
				2237	EMSG2(_(e_notopen), fname);
				2238	return NULL;
				2239	}
				2240
				2241	vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname);
				2242	spell_message(spin, IObuff);
				2243
				2244	/* Only do REP lines when not done in another .aff file already. */
				2245	do_rep = spin->si_rep.ga_len == 0;
				2246
				2247	/* Only do REPSAL lines when not done in another .aff file already. */
				2248	do_repsal = spin->si_repsal.ga_len == 0;
				2249
				2250	/* Only do SAL lines when not done in another .aff file already. */
				2251	do_sal = spin->si_sal.ga_len == 0;
				2252
				2253	/* Only do MAP lines when not done in another .aff file already. */
				2254	do_mapline = spin->si_map.ga_len == 0;
				2255
				2256	/*
				2257	* Allocate and init the afffile_T structure.
				2258	*/
				2259	aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE);
				2260	if (aff == NULL)
				2261	{
				2262	fclose(fd);
				2263	return NULL;
				2264	}
				2265	hash_init(&aff->af_pref);
				2266	hash_init(&aff->af_suff);
				2267	hash_init(&aff->af_comp);
				2268
				2269	/*
				2270	* Read all the lines in the file one by one.
				2271	*/
				2272	while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
				2273	{
				2274	line_breakcheck();
				2275	++lnum;
				2276
				2277	/* Skip comment lines. */
				2278	if (*rline == '#')
				2279	continue;
				2280
				2281	/* Convert from "SET" to 'encoding' when needed. */
				2282	vim_free(pc);
				2283	#ifdef FEAT_MBYTE
				2284	if (spin->si_conv.vc_type != CONV_NONE)
				2285	{
				2286	pc = string_convert(&spin->si_conv, rline, NULL);
				2287	if (pc == NULL)
				2288	{
				2289	smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
				2290	fname, lnum, rline);
				2291	continue;
				2292	}
				2293	line = pc;
				2294	}
				2295	else
				2296	#endif
				2297	{
				2298	pc = NULL;
				2299	line = rline;
				2300	}
				2301
				2302	/* Split the line up in white separated items. Put a NUL after each
				2303	* item. */
				2304	itemcnt = 0;
				2305	for (p = line; ; )
				2306	{
				2307	while (p != NUL && p <= ' ') /* skip white space and CR/NL */
				2308	++p;
				2309	if (*p == NUL)
				2310	break;
				2311	if (itemcnt == MAXITEMCNT) /* too many items */
				2312	break;
				2313	items[itemcnt++] = p;
				2314	/* A few items have arbitrary text argument, don't split them. */
				2315	if (itemcnt == 2 && spell_info_item(items[0]))
				2316	while (p >= ' ' \|\| p == TAB) /* skip until CR/NL */
				2317	++p;
				2318	else
				2319	while (p > ' ') / skip until white space or CR/NL */
				2320	++p;
				2321	if (*p == NUL)
				2322	break;
				2323	*p++ = NUL;
				2324	}
				2325
				2326	/* Handle non-empty lines. */
				2327	if (itemcnt > 0)
				2328	{
				2329	if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL)
				2330	{
				2331	#ifdef FEAT_MBYTE
				2332	/* Setup for conversion from "ENC" to 'encoding'. */
				2333	aff->af_enc = enc_canonize(items[1]);
				2334	if (aff->af_enc != NULL && !spin->si_ascii
				2335	&& convert_setup(&spin->si_conv, aff->af_enc,
				2336	p_enc) == FAIL)
				2337	smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
				2338	fname, aff->af_enc, p_enc);
				2339	spin->si_conv.vc_fail = TRUE;
				2340	#else
				2341	smsg((char_u *)_("Conversion in %s not supported"), fname);
				2342	#endif
				2343	}
				2344	else if (is_aff_rule(items, itemcnt, "FLAG", 2)
				2345	&& aff->af_flagtype == AFT_CHAR)
				2346	{
				2347	if (STRCMP(items[1], "long") == 0)
				2348	aff->af_flagtype = AFT_LONG;
				2349	else if (STRCMP(items[1], "num") == 0)
				2350	aff->af_flagtype = AFT_NUM;
				2351	else if (STRCMP(items[1], "caplong") == 0)
				2352	aff->af_flagtype = AFT_CAPLONG;
				2353	else
				2354	smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"),
				2355	fname, lnum, items[1]);
				2356	if (aff->af_rare != 0
				2357	\|\| aff->af_keepcase != 0
				2358	\|\| aff->af_bad != 0
				2359	\|\| aff->af_needaffix != 0
				2360	\|\| aff->af_circumfix != 0
				2361	\|\| aff->af_needcomp != 0
				2362	\|\| aff->af_comproot != 0
				2363	\|\| aff->af_nosuggest != 0
				2364	\|\| compflags != NULL
				2365	\|\| aff->af_suff.ht_used > 0
				2366	\|\| aff->af_pref.ht_used > 0)
				2367	smsg((char_u *)_("FLAG after using flags in %s line %d: %s"),
				2368	fname, lnum, items[1]);
				2369	}
				2370	else if (spell_info_item(items[0]))
				2371	{
				2372	p = (char_u *)getroom(spin,
				2373	(spin->si_info == NULL ? 0 : STRLEN(spin->si_info))
				2374	+ STRLEN(items[0])
				2375	+ STRLEN(items[1]) + 3, FALSE);
				2376	if (p != NULL)
				2377	{
				2378	if (spin->si_info != NULL)
				2379	{
				2380	STRCPY(p, spin->si_info);
				2381	STRCAT(p, "\n");
				2382	}
				2383	STRCAT(p, items[0]);
				2384	STRCAT(p, " ");
				2385	STRCAT(p, items[1]);
				2386	spin->si_info = p;
				2387	}
				2388	}
				2389	else if (is_aff_rule(items, itemcnt, "MIDWORD", 2)
				2390	&& midword == NULL)
				2391	{
				2392	midword = getroom_save(spin, items[1]);
				2393	}
				2394	else if (is_aff_rule(items, itemcnt, "TRY", 2))
				2395	{
				2396	/* ignored, we look in the tree for what chars may appear */
				2397	}
				2398	/* TODO: remove "RAR" later */
				2399	else if ((is_aff_rule(items, itemcnt, "RAR", 2)
				2400	\|\| is_aff_rule(items, itemcnt, "RARE", 2))
				2401	&& aff->af_rare == 0)
				2402	{
				2403	aff->af_rare = affitem2flag(aff->af_flagtype, items[1],
				2404	fname, lnum);
				2405	}
				2406	/* TODO: remove "KEP" later */
				2407	else if ((is_aff_rule(items, itemcnt, "KEP", 2)
				2408	\|\| is_aff_rule(items, itemcnt, "KEEPCASE", 2))
				2409	&& aff->af_keepcase == 0)
				2410	{
				2411	aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1],
				2412	fname, lnum);
				2413	}
				2414	else if ((is_aff_rule(items, itemcnt, "BAD", 2)
				2415	\|\| is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2))
				2416	&& aff->af_bad == 0)
				2417	{
				2418	aff->af_bad = affitem2flag(aff->af_flagtype, items[1],
				2419	fname, lnum);
				2420	}
				2421	else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2)
				2422	&& aff->af_needaffix == 0)
				2423	{
				2424	aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1],
				2425	fname, lnum);
				2426	}
				2427	else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2)
				2428	&& aff->af_circumfix == 0)
				2429	{
				2430	aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1],
				2431	fname, lnum);
				2432	}
				2433	else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2)
				2434	&& aff->af_nosuggest == 0)
				2435	{
				2436	aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1],
				2437	fname, lnum);
				2438	}
				2439	else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2)
				2440	\|\| is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2))
				2441	&& aff->af_needcomp == 0)
				2442	{
				2443	aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1],
				2444	fname, lnum);
				2445	}
				2446	else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2)
				2447	&& aff->af_comproot == 0)
				2448	{
				2449	aff->af_comproot = affitem2flag(aff->af_flagtype, items[1],
				2450	fname, lnum);
				2451	}
				2452	else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2)
				2453	&& aff->af_compforbid == 0)
				2454	{
				2455	aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1],
				2456	fname, lnum);
				2457	if (aff->af_pref.ht_used > 0)
				2458	smsg((char_u *)_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"),
				2459	fname, lnum);
				2460	}
				2461	else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2)
				2462	&& aff->af_comppermit == 0)
				2463	{
				2464	aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1],
				2465	fname, lnum);
				2466	if (aff->af_pref.ht_used > 0)
				2467	smsg((char_u *)_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"),
				2468	fname, lnum);
				2469	}
				2470	else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2)
				2471	&& compflags == NULL)
				2472	{
				2473	/* Turn flag "c" into COMPOUNDRULE compatible string "c+",
				2474	* "Na" into "Na+", "1234" into "1234+". */
				2475	p = getroom(spin, STRLEN(items[1]) + 2, FALSE);
				2476	if (p != NULL)
				2477	{
				2478	STRCPY(p, items[1]);
				2479	STRCAT(p, "+");
				2480	compflags = p;
				2481	}
				2482	}
				2483	else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2))
				2484	{
				2485	/* We don't use the count, but do check that it's a number and
				2486	* not COMPOUNDRULE mistyped. */
				2487	if (atoi((char *)items[1]) == 0)
				2488	smsg((char_u *)_("Wrong COMPOUNDRULES value in %s line %d: %s"),
				2489	fname, lnum, items[1]);
				2490	}
				2491	else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2))
				2492	{
				2493	/* Don't use the first rule if it is a number. */
				2494	if (compflags != NULL \|\| *skipdigits(items[1]) != NUL)
				2495	{
				2496	/* Concatenate this string to previously defined ones,
				2497	* using a slash to separate them. */
				2498	l = (int)STRLEN(items[1]) + 1;
				2499	if (compflags != NULL)
				2500	l += (int)STRLEN(compflags) + 1;
				2501	p = getroom(spin, l, FALSE);
				2502	if (p != NULL)
				2503	{
				2504	if (compflags != NULL)
				2505	{
				2506	STRCPY(p, compflags);
				2507	STRCAT(p, "/");
				2508	}
				2509	STRCAT(p, items[1]);
				2510	compflags = p;
				2511	}
				2512	}
				2513	}
				2514	else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2)
				2515	&& compmax == 0)
				2516	{
				2517	compmax = atoi((char *)items[1]);
				2518	if (compmax == 0)
				2519	smsg((char_u *)_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"),
				2520	fname, lnum, items[1]);
				2521	}
				2522	else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2)
				2523	&& compminlen == 0)
				2524	{
				2525	compminlen = atoi((char *)items[1]);
				2526	if (compminlen == 0)
				2527	smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"),
				2528	fname, lnum, items[1]);
				2529	}
				2530	else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2)
				2531	&& compsylmax == 0)
				2532	{
				2533	compsylmax = atoi((char *)items[1]);
				2534	if (compsylmax == 0)
				2535	smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"),
				2536	fname, lnum, items[1]);
				2537	}
				2538	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1))
				2539	{
				2540	compoptions \|= COMP_CHECKDUP;
				2541	}
				2542	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1))
				2543	{
				2544	compoptions \|= COMP_CHECKREP;
				2545	}
				2546	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1))
				2547	{
				2548	compoptions \|= COMP_CHECKCASE;
				2549	}
				2550	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1))
				2551	{
				2552	compoptions \|= COMP_CHECKTRIPLE;
				2553	}
				2554	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2))
				2555	{
				2556	if (atoi((char *)items[1]) == 0)
				2557	smsg((char_u *)_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"),
				2558	fname, lnum, items[1]);
				2559	}
				2560	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3))
				2561	{
				2562	garray_T *gap = &spin->si_comppat;
				2563	int i;
				2564
				2565	/* Only add the couple if it isn't already there. */
				2566	for (i = 0; i < gap->ga_len - 1; i += 2)
				2567	if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0
				2568	&& STRCMP(((char_u **)(gap->ga_data))[i + 1],
				2569	items[2]) == 0)
				2570	break;
				2571	if (i >= gap->ga_len && ga_grow(gap, 2) == OK)
				2572	{
				2573	((char_u **)(gap->ga_data))[gap->ga_len++]
				2574	= getroom_save(spin, items[1]);
				2575	((char_u **)(gap->ga_data))[gap->ga_len++]
				2576	= getroom_save(spin, items[2]);
				2577	}
				2578	}
				2579	else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2)
				2580	&& syllable == NULL)
				2581	{
				2582	syllable = getroom_save(spin, items[1]);
				2583	}
				2584	else if (is_aff_rule(items, itemcnt, "NOBREAK", 1))
				2585	{
				2586	spin->si_nobreak = TRUE;
				2587	}
				2588	else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1))
				2589	{
				2590	spin->si_nosplitsugs = TRUE;
				2591	}
				2592	else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1))
				2593	{
				2594	spin->si_nocompoundsugs = TRUE;
				2595	}
				2596	else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1))
				2597	{
				2598	spin->si_nosugfile = TRUE;
				2599	}
				2600	else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1))
				2601	{
				2602	aff->af_pfxpostpone = TRUE;
				2603	}
				2604	else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1))
				2605	{
				2606	aff->af_ignoreextra = TRUE;
				2607	}
				2608	else if ((STRCMP(items[0], "PFX") == 0
				2609	\|\| STRCMP(items[0], "SFX") == 0)
				2610	&& aff_todo == 0
				2611	&& itemcnt >= 4)
				2612	{
				2613	int lasti = 4;
				2614	char_u key[AH_KEY_LEN];
				2615
				2616	if (*items[0] == 'P')
				2617	tp = &aff->af_pref;
				2618	else
				2619	tp = &aff->af_suff;
				2620
				2621	/* Myspell allows the same affix name to be used multiple
				2622	* times. The affix files that do this have an undocumented
				2623	* "S" flag on all but the last block, thus we check for that
				2624	* and store it in ah_follows. */
				2625	vim_strncpy(key, items[1], AH_KEY_LEN - 1);
				2626	hi = hash_find(tp, key);
				2627	if (!HASHITEM_EMPTY(hi))
				2628	{
				2629	cur_aff = HI2AH(hi);
				2630	if (cur_aff->ah_combine != (*items[2] == 'Y'))
				2631	smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"),
				2632	fname, lnum, items[1]);
				2633	if (!cur_aff->ah_follows)
				2634	smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
				2635	fname, lnum, items[1]);
				2636	}
				2637	else
				2638	{
				2639	/* New affix letter. */
				2640	cur_aff = (affheader_T *)getroom(spin,
				2641	sizeof(affheader_T), TRUE);
				2642	if (cur_aff == NULL)
				2643	break;
				2644	cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1],
				2645	fname, lnum);
				2646	if (cur_aff->ah_flag == 0 \|\| STRLEN(items[1]) >= AH_KEY_LEN)
				2647	break;
				2648	if (cur_aff->ah_flag == aff->af_bad
				2649	\|\| cur_aff->ah_flag == aff->af_rare
				2650	\|\| cur_aff->ah_flag == aff->af_keepcase
				2651	\|\| cur_aff->ah_flag == aff->af_needaffix
				2652	\|\| cur_aff->ah_flag == aff->af_circumfix
				2653	\|\| cur_aff->ah_flag == aff->af_nosuggest
				2654	\|\| cur_aff->ah_flag == aff->af_needcomp
				2655	\|\| cur_aff->ah_flag == aff->af_comproot)
				2656	smsg((char_u *)_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"),
				2657	fname, lnum, items[1]);
				2658	STRCPY(cur_aff->ah_key, items[1]);
				2659	hash_add(tp, cur_aff->ah_key);
				2660
				2661	cur_aff->ah_combine = (*items[2] == 'Y');
				2662	}
				2663
				2664	/* Check for the "S" flag, which apparently means that another
				2665	* block with the same affix name is following. */
				2666	if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0)
				2667	{
				2668	++lasti;
				2669	cur_aff->ah_follows = TRUE;
				2670	}
				2671	else
				2672	cur_aff->ah_follows = FALSE;
				2673
				2674	/* Myspell allows extra text after the item, but that might
				2675	* mean mistakes go unnoticed. Require a comment-starter. */
				2676	if (itemcnt > lasti && *items[lasti] != '#')
				2677	smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]);
				2678
				2679	if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0)
				2680	smsg((char_u *)_("Expected Y or N in %s line %d: %s"),
				2681	fname, lnum, items[2]);
				2682
				2683	if (*items[0] == 'P' && aff->af_pfxpostpone)
				2684	{
				2685	if (cur_aff->ah_newID == 0)
				2686	{
				2687	/* Use a new number in the .spl file later, to be able
				2688	* to handle multiple .aff files. */
				2689	check_renumber(spin);
				2690	cur_aff->ah_newID = ++spin->si_newprefID;
				2691
				2692	/* We only really use ah_newID if the prefix is
				2693	* postponed. We know that only after handling all
				2694	* the items. */
				2695	did_postpone_prefix = FALSE;
				2696	}
				2697	else
				2698	/* Did use the ID in a previous block. */
				2699	did_postpone_prefix = TRUE;
				2700	}
				2701
				2702	aff_todo = atoi((char *)items[3]);
				2703	}
				2704	else if ((STRCMP(items[0], "PFX") == 0
				2705	\|\| STRCMP(items[0], "SFX") == 0)
				2706	&& aff_todo > 0
				2707	&& STRCMP(cur_aff->ah_key, items[1]) == 0
				2708	&& itemcnt >= 5)
				2709	{
				2710	affentry_T *aff_entry;
				2711	int upper = FALSE;
				2712	int lasti = 5;
				2713
				2714	/* Myspell allows extra text after the item, but that might
				2715	* mean mistakes go unnoticed. Require a comment-starter,
				2716	* unless IGNOREEXTRA is used. Hunspell uses a "-" item. */
				2717	if (itemcnt > lasti
				2718	&& !aff->af_ignoreextra
				2719	&& *items[lasti] != '#'
				2720	&& (STRCMP(items[lasti], "-") != 0
				2721	\|\| itemcnt != lasti + 1))
				2722	smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]);
				2723
				2724	/* New item for an affix letter. */
				2725	--aff_todo;
				2726	aff_entry = (affentry_T *)getroom(spin,
				2727	sizeof(affentry_T), TRUE);
				2728	if (aff_entry == NULL)
				2729	break;
				2730
				2731	if (STRCMP(items[2], "0") != 0)
				2732	aff_entry->ae_chop = getroom_save(spin, items[2]);
				2733	if (STRCMP(items[3], "0") != 0)
				2734	{
				2735	aff_entry->ae_add = getroom_save(spin, items[3]);
				2736
				2737	/* Recognize flags on the affix: abcd/XYZ */
				2738	aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/');
				2739	if (aff_entry->ae_flags != NULL)
				2740	{
				2741	*aff_entry->ae_flags++ = NUL;
				2742	aff_process_flags(aff, aff_entry);
				2743	}
				2744	}
				2745
				2746	/* Don't use an affix entry with non-ASCII characters when
				2747	* "spin->si_ascii" is TRUE. */
				2748	if (!spin->si_ascii \|\| !(has_non_ascii(aff_entry->ae_chop)
				2749	\|\| has_non_ascii(aff_entry->ae_add)))
				2750	{
				2751	aff_entry->ae_next = cur_aff->ah_first;
				2752	cur_aff->ah_first = aff_entry;
				2753
				2754	if (STRCMP(items[4], ".") != 0)
				2755	{
				2756	char_u buf[MAXLINELEN];
				2757
				2758	aff_entry->ae_cond = getroom_save(spin, items[4]);
				2759	if (*items[0] == 'P')
				2760	sprintf((char *)buf, "^%s", items[4]);
				2761	else
				2762	sprintf((char *)buf, "%s$", items[4]);
				2763	aff_entry->ae_prog = vim_regcomp(buf,
				2764	RE_MAGIC + RE_STRING + RE_STRICT);
				2765	if (aff_entry->ae_prog == NULL)
				2766	smsg((char_u *)_("Broken condition in %s line %d: %s"),
				2767	fname, lnum, items[4]);
				2768	}
				2769
				2770	/* For postponed prefixes we need an entry in si_prefcond
				2771	* for the condition. Use an existing one if possible.
				2772	* Can't be done for an affix with flags, ignoring
				2773	* COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. */
				2774	if (*items[0] == 'P' && aff->af_pfxpostpone
				2775	&& aff_entry->ae_flags == NULL)
				2776	{
				2777	/* When the chop string is one lower-case letter and
				2778	* the add string ends in the upper-case letter we set
				2779	* the "upper" flag, clear "ae_chop" and remove the
				2780	* letters from "ae_add". The condition must either
				2781	* be empty or start with the same letter. */
				2782	if (aff_entry->ae_chop != NULL
				2783	&& aff_entry->ae_add != NULL
				2784	#ifdef FEAT_MBYTE
				2785	&& aff_entry->ae_chop[(*mb_ptr2len)(
				2786	aff_entry->ae_chop)] == NUL
				2787	#else
				2788	&& aff_entry->ae_chop[1] == NUL
				2789	#endif
				2790	)
				2791	{
				2792	int c, c_up;
				2793
				2794	c = PTR2CHAR(aff_entry->ae_chop);
				2795	c_up = SPELL_TOUPPER(c);
				2796	if (c_up != c
				2797	&& (aff_entry->ae_cond == NULL
				2798	\|\| PTR2CHAR(aff_entry->ae_cond) == c))
				2799	{
				2800	p = aff_entry->ae_add
				2801	+ STRLEN(aff_entry->ae_add);
				2802	mb_ptr_back(aff_entry->ae_add, p);
				2803	if (PTR2CHAR(p) == c_up)
				2804	{
				2805	upper = TRUE;
				2806	aff_entry->ae_chop = NULL;
				2807	*p = NUL;
				2808
				2809	/* The condition is matched with the
				2810	* actual word, thus must check for the
				2811	* upper-case letter. */
				2812	if (aff_entry->ae_cond != NULL)
				2813	{
				2814	char_u buf[MAXLINELEN];
				2815	#ifdef FEAT_MBYTE
				2816	if (has_mbyte)
				2817	{
				2818	onecap_copy(items[4], buf, TRUE);
				2819	aff_entry->ae_cond = getroom_save(
				2820	spin, buf);
				2821	}
				2822	else
				2823	#endif
				2824	*aff_entry->ae_cond = c_up;
				2825	if (aff_entry->ae_cond != NULL)
				2826	{
				2827	sprintf((char *)buf, "^%s",
				2828	aff_entry->ae_cond);
				2829	vim_regfree(aff_entry->ae_prog);
				2830	aff_entry->ae_prog = vim_regcomp(
				2831	buf, RE_MAGIC + RE_STRING);
				2832	}
				2833	}
				2834	}
				2835	}
				2836	}
				2837
				2838	if (aff_entry->ae_chop == NULL
				2839	&& aff_entry->ae_flags == NULL)
				2840	{
				2841	int idx;
				2842	char_u **pp;
				2843	int n;
				2844
				2845	/* Find a previously used condition. */
				2846	for (idx = spin->si_prefcond.ga_len - 1; idx >= 0;
				2847	--idx)
				2848	{
				2849	p = ((char_u **)spin->si_prefcond.ga_data)[idx];
				2850	if (str_equal(p, aff_entry->ae_cond))
				2851	break;
				2852	}
				2853	if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK)
				2854	{
				2855	/* Not found, add a new condition. */
				2856	idx = spin->si_prefcond.ga_len++;
				2857	pp = ((char_u **)spin->si_prefcond.ga_data)
				2858	+ idx;
				2859	if (aff_entry->ae_cond == NULL)
				2860	*pp = NULL;
				2861	else
				2862	*pp = getroom_save(spin,
				2863	aff_entry->ae_cond);
				2864	}
				2865
				2866	/* Add the prefix to the prefix tree. */
				2867	if (aff_entry->ae_add == NULL)
				2868	p = (char_u *)"";
				2869	else
				2870	p = aff_entry->ae_add;
				2871
				2872	/* PFX_FLAGS is a negative number, so that
				2873	* tree_add_word() knows this is the prefix tree. */
				2874	n = PFX_FLAGS;
				2875	if (!cur_aff->ah_combine)
				2876	n \|= WFP_NC;
				2877	if (upper)
				2878	n \|= WFP_UP;
				2879	if (aff_entry->ae_comppermit)
				2880	n \|= WFP_COMPPERMIT;
				2881	if (aff_entry->ae_compforbid)
				2882	n \|= WFP_COMPFORBID;
				2883	tree_add_word(spin, p, spin->si_prefroot, n,
				2884	idx, cur_aff->ah_newID);
				2885	did_postpone_prefix = TRUE;
				2886	}
				2887
				2888	/* Didn't actually use ah_newID, backup si_newprefID. */
				2889	if (aff_todo == 0 && !did_postpone_prefix)
				2890	{
				2891	--spin->si_newprefID;
				2892	cur_aff->ah_newID = 0;
				2893	}
				2894	}
				2895	}
				2896	}
				2897	else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL)
				2898	{
				2899	fol = vim_strsave(items[1]);
				2900	}
				2901	else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL)
				2902	{
				2903	low = vim_strsave(items[1]);
				2904	}
				2905	else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL)
				2906	{
				2907	upp = vim_strsave(items[1]);
				2908	}
				2909	else if (is_aff_rule(items, itemcnt, "REP", 2)
				2910	\|\| is_aff_rule(items, itemcnt, "REPSAL", 2))
				2911	{
				2912	/* Ignore REP/REPSAL count */;
				2913	if (!isdigit(*items[1]))
				2914	smsg((char_u *)_("Expected REP(SAL) count in %s line %d"),
				2915	fname, lnum);
				2916	}
				2917	else if ((STRCMP(items[0], "REP") == 0
				2918	\|\| STRCMP(items[0], "REPSAL") == 0)
				2919	&& itemcnt >= 3)
				2920	{
				2921	/* REP/REPSAL item */
				2922	/* Myspell ignores extra arguments, we require it starts with
				2923	* # to detect mistakes. */
				2924	if (itemcnt > 3 && items[3][0] != '#')
				2925	smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]);
				2926	if (items[0][3] == 'S' ? do_repsal : do_rep)
				2927	{
				2928	/* Replace underscore with space (can't include a space
				2929	* directly). */
				2930	for (p = items[1]; *p != NUL; mb_ptr_adv(p))
				2931	if (*p == '_')
				2932	*p = ' ';
				2933	for (p = items[2]; *p != NUL; mb_ptr_adv(p))
				2934	if (*p == '_')
				2935	*p = ' ';
				2936	add_fromto(spin, items[0][3] == 'S'
				2937	? &spin->si_repsal
				2938	: &spin->si_rep, items[1], items[2]);
				2939	}
				2940	}
				2941	else if (is_aff_rule(items, itemcnt, "MAP", 2))
				2942	{
				2943	/* MAP item or count */
				2944	if (!found_map)
				2945	{
				2946	/* First line contains the count. */
				2947	found_map = TRUE;
				2948	if (!isdigit(*items[1]))
				2949	smsg((char_u *)_("Expected MAP count in %s line %d"),
				2950	fname, lnum);
				2951	}
				2952	else if (do_mapline)
				2953	{
				2954	int c;
				2955
				2956	/* Check that every character appears only once. */
				2957	for (p = items[1]; *p != NUL; )
				2958	{
				2959	#ifdef FEAT_MBYTE
				2960	c = mb_ptr2char_adv(&p);
				2961	#else
				2962	c = *p++;
				2963	#endif
				2964	if ((spin->si_map.ga_len > 0
				2965	&& vim_strchr(spin->si_map.ga_data, c)
				2966	!= NULL)
				2967	\|\| vim_strchr(p, c) != NULL)
				2968	smsg((char_u *)_("Duplicate character in MAP in %s line %d"),
				2969	fname, lnum);
				2970	}
				2971
				2972	/* We simply concatenate all the MAP strings, separated by
				2973	* slashes. */
				2974	ga_concat(&spin->si_map, items[1]);
				2975	ga_append(&spin->si_map, '/');
				2976	}
				2977	}
				2978	/* Accept "SAL from to" and "SAL from to #comment". */
				2979	else if (is_aff_rule(items, itemcnt, "SAL", 3))
				2980	{
				2981	if (do_sal)
				2982	{
				2983	/* SAL item (sounds-a-like)
				2984	* Either one of the known keys or a from-to pair. */
				2985	if (STRCMP(items[1], "followup") == 0)
				2986	spin->si_followup = sal_to_bool(items[2]);
				2987	else if (STRCMP(items[1], "collapse_result") == 0)
				2988	spin->si_collapse = sal_to_bool(items[2]);
				2989	else if (STRCMP(items[1], "remove_accents") == 0)
				2990	spin->si_rem_accents = sal_to_bool(items[2]);
				2991	else
				2992	/* when "to" is "_" it means empty */
				2993	add_fromto(spin, &spin->si_sal, items[1],
				2994	STRCMP(items[2], "_") == 0 ? (char_u *)""
				2995	: items[2]);
				2996	}
				2997	}
				2998	else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2)
				2999	&& sofofrom == NULL)
				3000	{
				3001	sofofrom = getroom_save(spin, items[1]);
				3002	}
				3003	else if (is_aff_rule(items, itemcnt, "SOFOTO", 2)
				3004	&& sofoto == NULL)
				3005	{
				3006	sofoto = getroom_save(spin, items[1]);
				3007	}
				3008	else if (STRCMP(items[0], "COMMON") == 0)
				3009	{
				3010	int i;
				3011
				3012	for (i = 1; i < itemcnt; ++i)
				3013	{
				3014	if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords,
				3015	items[i])))
				3016	{
				3017	p = vim_strsave(items[i]);
				3018	if (p == NULL)
				3019	break;
				3020	hash_add(&spin->si_commonwords, p);
				3021	}
				3022	}
				3023	}
				3024	else
				3025	smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"),
				3026	fname, lnum, items[0]);
				3027	}
				3028	}
				3029
				3030	if (fol != NULL \|\| low != NULL \|\| upp != NULL)
				3031	{
				3032	if (spin->si_clear_chartab)
				3033	{
				3034	/* Clear the char type tables, don't want to use any of the
				3035	* currently used spell properties. */
				3036	init_spell_chartab();
				3037	spin->si_clear_chartab = FALSE;
				3038	}
				3039
				3040	/*
				3041	* Don't write a word table for an ASCII file, so that we don't check
				3042	* for conflicts with a word table that matches 'encoding'.
				3043	* Don't write one for utf-8 either, we use utf_*() and
				3044	* mb_get_class(), the list of chars in the file will be incomplete.
				3045	*/
				3046	if (!spin->si_ascii
				3047	#ifdef FEAT_MBYTE
				3048	&& !enc_utf8
				3049	#endif
				3050	)
				3051	{
				3052	if (fol == NULL \|\| low == NULL \|\| upp == NULL)
				3053	smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
				3054	else
				3055	(void)set_spell_chartab(fol, low, upp);
				3056	}
				3057
				3058	vim_free(fol);
				3059	vim_free(low);
				3060	vim_free(upp);
				3061	}
				3062
				3063	/* Use compound specifications of the .aff file for the spell info. */
				3064	if (compmax != 0)
				3065	{
				3066	aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX");
				3067	spin->si_compmax = compmax;
				3068	}
				3069
				3070	if (compminlen != 0)
				3071	{
				3072	aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN");
				3073	spin->si_compminlen = compminlen;
				3074	}
				3075
				3076	if (compsylmax != 0)
				3077	{
				3078	if (syllable == NULL)
				3079	smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE"));
				3080	aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX");
				3081	spin->si_compsylmax = compsylmax;
				3082	}
				3083
				3084	if (compoptions != 0)
				3085	{
				3086	aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options");
				3087	spin->si_compoptions \|= compoptions;
				3088	}
				3089
				3090	if (compflags != NULL)
				3091	process_compflags(spin, aff, compflags);
				3092
				3093	/* Check that we didn't use too many renumbered flags. */
				3094	if (spin->si_newcompID < spin->si_newprefID)
				3095	{
				3096	if (spin->si_newcompID == 127 \|\| spin->si_newcompID == 255)
				3097	MSG(_("Too many postponed prefixes"));
				3098	else if (spin->si_newprefID == 0 \|\| spin->si_newprefID == 127)
				3099	MSG(_("Too many compound flags"));
				3100	else
				3101	MSG(_("Too many postponed prefixes and/or compound flags"));
				3102	}
				3103
				3104	if (syllable != NULL)
				3105	{
				3106	aff_check_string(spin->si_syllable, syllable, "SYLLABLE");
				3107	spin->si_syllable = syllable;
				3108	}
				3109
				3110	if (sofofrom != NULL \|\| sofoto != NULL)
				3111	{
				3112	if (sofofrom == NULL \|\| sofoto == NULL)
				3113	smsg((char_u *)_("Missing SOFO%s line in %s"),
				3114	sofofrom == NULL ? "FROM" : "TO", fname);
				3115	else if (spin->si_sal.ga_len > 0)
				3116	smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname);
				3117	else
				3118	{
				3119	aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM");
				3120	aff_check_string(spin->si_sofoto, sofoto, "SOFOTO");
				3121	spin->si_sofofr = sofofrom;
				3122	spin->si_sofoto = sofoto;
				3123	}
				3124	}
				3125
				3126	if (midword != NULL)
				3127	{
				3128	aff_check_string(spin->si_midword, midword, "MIDWORD");
				3129	spin->si_midword = midword;
				3130	}
				3131
				3132	vim_free(pc);
				3133	fclose(fd);
				3134	return aff;
				3135	}
				3136
				3137	/*
				3138	* Return TRUE when items[0] equals "rulename", there are "mincount" items or
				3139	* a comment is following after item "mincount".
				3140	*/
				3141	static int
				3142	is_aff_rule(
				3143	char_u **items,
				3144	int itemcnt,
				3145	char *rulename,
				3146	int mincount)
				3147	{
				3148	return (STRCMP(items[0], rulename) == 0
				3149	&& (itemcnt == mincount
				3150	\|\| (itemcnt > mincount && items[mincount][0] == '#')));
				3151	}
				3152
				3153	/*
				3154	* For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from
				3155	* ae_flags to ae_comppermit and ae_compforbid.
				3156	*/
				3157	static void
				3158	aff_process_flags(afffile_T affile, affentry_T entry)
				3159	{
				3160	char_u *p;
				3161	char_u *prevp;
				3162	unsigned flag;
				3163
				3164	if (entry->ae_flags != NULL
				3165	&& (affile->af_compforbid != 0 \|\| affile->af_comppermit != 0))
				3166	{
				3167	for (p = entry->ae_flags; *p != NUL; )
				3168	{
				3169	prevp = p;
				3170	flag = get_affitem(affile->af_flagtype, &p);
				3171	if (flag == affile->af_comppermit \|\| flag == affile->af_compforbid)
				3172	{
				3173	STRMOVE(prevp, p);
				3174	p = prevp;
				3175	if (flag == affile->af_comppermit)
				3176	entry->ae_comppermit = TRUE;
				3177	else
				3178	entry->ae_compforbid = TRUE;
				3179	}
				3180	if (affile->af_flagtype == AFT_NUM && *p == ',')
				3181	++p;
				3182	}
				3183	if (*entry->ae_flags == NUL)
				3184	entry->ae_flags = NULL; /* nothing left */
				3185	}
				3186	}
				3187
				3188	/*
				3189	* Return TRUE if "s" is the name of an info item in the affix file.
				3190	*/
				3191	static int
				3192	spell_info_item(char_u *s)
				3193	{
				3194	return STRCMP(s, "NAME") == 0
				3195	\|\| STRCMP(s, "HOME") == 0
				3196	\|\| STRCMP(s, "VERSION") == 0
				3197	\|\| STRCMP(s, "AUTHOR") == 0
				3198	\|\| STRCMP(s, "EMAIL") == 0
				3199	\|\| STRCMP(s, "COPYRIGHT") == 0;
				3200	}
				3201
				3202	/*
				3203	* Turn an affix flag name into a number, according to the FLAG type.
				3204	* returns zero for failure.
				3205	*/
				3206	static unsigned
				3207	affitem2flag(
				3208	int flagtype,
				3209	char_u *item,
				3210	char_u *fname,
				3211	int lnum)
				3212	{
				3213	unsigned res;
				3214	char_u *p = item;
				3215
				3216	res = get_affitem(flagtype, &p);
				3217	if (res == 0)
				3218	{
				3219	if (flagtype == AFT_NUM)
				3220	smsg((char_u *)_("Flag is not a number in %s line %d: %s"),
				3221	fname, lnum, item);
				3222	else
				3223	smsg((char_u *)_("Illegal flag in %s line %d: %s"),
				3224	fname, lnum, item);
				3225	}
				3226	if (*p != NUL)
				3227	{
				3228	smsg((char_u *)_(e_affname), fname, lnum, item);
				3229	return 0;
				3230	}
				3231
				3232	return res;
				3233	}
				3234
				3235	/*
				3236	* Get one affix name from "*pp" and advance the pointer.
				3237	* Returns zero for an error, still advances the pointer then.
				3238	*/
				3239	static unsigned
				3240	get_affitem(int flagtype, char_u **pp)
				3241	{
				3242	int res;
				3243
				3244	if (flagtype == AFT_NUM)
				3245	{
				3246	if (!VIM_ISDIGIT(**pp))
				3247	{
				3248	++pp; / always advance, avoid getting stuck */
				3249	return 0;
				3250	}
				3251	res = getdigits(pp);
				3252	}
				3253	else
				3254	{
				3255	#ifdef FEAT_MBYTE
				3256	res = mb_ptr2char_adv(pp);
				3257	#else
				3258	res = (pp)++;
				3259	#endif
				3260	if (flagtype == AFT_LONG \|\| (flagtype == AFT_CAPLONG
				3261	&& res >= 'A' && res <= 'Z'))
				3262	{
				3263	if (**pp == NUL)
				3264	return 0;
				3265	#ifdef FEAT_MBYTE
				3266	res = mb_ptr2char_adv(pp) + (res << 16);
				3267	#else
				3268	res = (pp)++ + (res << 16);
				3269	#endif
				3270	}
				3271	}
				3272	return res;
				3273	}
				3274
				3275	/*
				3276	* Process the "compflags" string used in an affix file and append it to
				3277	* spin->si_compflags.
				3278	* The processing involves changing the affix names to ID numbers, so that
				3279	* they fit in one byte.
				3280	*/
				3281	static void
				3282	process_compflags(
				3283	spellinfo_T *spin,
				3284	afffile_T *aff,
				3285	char_u *compflags)
				3286	{
				3287	char_u *p;
				3288	char_u *prevp;
				3289	unsigned flag;
				3290	compitem_T *ci;
				3291	int id;
				3292	int len;
				3293	char_u *tp;
				3294	char_u key[AH_KEY_LEN];
				3295	hashitem_T *hi;
				3296
				3297	/* Make room for the old and the new compflags, concatenated with a / in
				3298	* between. Processing it makes it shorter, but we don't know by how
				3299	* much, thus allocate the maximum. */
				3300	len = (int)STRLEN(compflags) + 1;
				3301	if (spin->si_compflags != NULL)
				3302	len += (int)STRLEN(spin->si_compflags) + 1;
				3303	p = getroom(spin, len, FALSE);
				3304	if (p == NULL)
				3305	return;
				3306	if (spin->si_compflags != NULL)
				3307	{
				3308	STRCPY(p, spin->si_compflags);
				3309	STRCAT(p, "/");
				3310	}
				3311	spin->si_compflags = p;
				3312	tp = p + STRLEN(p);
				3313
				3314	for (p = compflags; *p != NUL; )
				3315	{
				3316	if (vim_strchr((char_u )"/?+[]", *p) != NULL)
				3317	/* Copy non-flag characters directly. */
				3318	tp++ = p++;
				3319	else
				3320	{
				3321	/* First get the flag number, also checks validity. */
				3322	prevp = p;
				3323	flag = get_affitem(aff->af_flagtype, &p);
				3324	if (flag != 0)
				3325	{
				3326	/* Find the flag in the hashtable. If it was used before, use
				3327	* the existing ID. Otherwise add a new entry. */
				3328	vim_strncpy(key, prevp, p - prevp);
				3329	hi = hash_find(&aff->af_comp, key);
				3330	if (!HASHITEM_EMPTY(hi))
				3331	id = HI2CI(hi)->ci_newID;
				3332	else
				3333	{
				3334	ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE);
				3335	if (ci == NULL)
				3336	break;
				3337	STRCPY(ci->ci_key, key);
				3338	ci->ci_flag = flag;
				3339	/* Avoid using a flag ID that has a special meaning in a
				3340	* regexp (also inside []). */
				3341	do
				3342	{
				3343	check_renumber(spin);
				3344	id = spin->si_newcompID--;
				3345	} while (vim_strchr((char_u )"/?+[]\\-^", id) != NULL);
				3346	ci->ci_newID = id;
				3347	hash_add(&aff->af_comp, ci->ci_key);
				3348	}
				3349	*tp++ = id;
				3350	}
				3351	if (aff->af_flagtype == AFT_NUM && *p == ',')
				3352	++p;
				3353	}
				3354	}
				3355
				3356	*tp = NUL;
				3357	}
				3358
				3359	/*
				3360	* Check that the new IDs for postponed affixes and compounding don't overrun
				3361	* each other. We have almost 255 available, but start at 0-127 to avoid
				3362	* using two bytes for utf-8. When the 0-127 range is used up go to 128-255.
				3363	* When that is used up an error message is given.
				3364	*/
				3365	static void
				3366	check_renumber(spellinfo_T *spin)
				3367	{
				3368	if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128)
				3369	{
				3370	spin->si_newprefID = 127;
				3371	spin->si_newcompID = 255;
				3372	}
				3373	}
				3374
				3375	/*
				3376	* Return TRUE if flag "flag" appears in affix list "afflist".
				3377	*/
				3378	static int
				3379	flag_in_afflist(int flagtype, char_u *afflist, unsigned flag)
				3380	{
				3381	char_u *p;
				3382	unsigned n;
				3383
				3384	switch (flagtype)
				3385	{
				3386	case AFT_CHAR:
				3387	return vim_strchr(afflist, flag) != NULL;
				3388
				3389	case AFT_CAPLONG:
				3390	case AFT_LONG:
				3391	for (p = afflist; *p != NUL; )
				3392	{
				3393	#ifdef FEAT_MBYTE
				3394	n = mb_ptr2char_adv(&p);
				3395	#else
				3396	n = *p++;
				3397	#endif
				3398	if ((flagtype == AFT_LONG \|\| (n >= 'A' && n <= 'Z'))
				3399	&& *p != NUL)
				3400	#ifdef FEAT_MBYTE
				3401	n = mb_ptr2char_adv(&p) + (n << 16);
				3402	#else
				3403	n = *p++ + (n << 16);
				3404	#endif
				3405	if (n == flag)
				3406	return TRUE;
				3407	}
				3408	break;
				3409
				3410	case AFT_NUM:
				3411	for (p = afflist; *p != NUL; )
				3412	{
				3413	n = getdigits(&p);
				3414	if (n == flag)
				3415	return TRUE;
				3416	if (p != NUL) / skip over comma */
				3417	++p;
				3418	}
				3419	break;
				3420	}
				3421	return FALSE;
				3422	}
				3423
				3424	/*
				3425	* Give a warning when "spinval" and "affval" numbers are set and not the same.
				3426	*/
				3427	static void
				3428	aff_check_number(int spinval, int affval, char *name)
				3429	{
				3430	if (spinval != 0 && spinval != affval)
				3431	smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
				3432	}
				3433
				3434	/*
				3435	* Give a warning when "spinval" and "affval" strings are set and not the same.
				3436	*/
				3437	static void
				3438	aff_check_string(char_u spinval, char_u affval, char *name)
				3439	{
				3440	if (spinval != NULL && STRCMP(spinval, affval) != 0)
				3441	smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
				3442	}
				3443
				3444	/*
				3445	* Return TRUE if strings "s1" and "s2" are equal. Also consider both being
				3446	* NULL as equal.
				3447	*/
				3448	static int
				3449	str_equal(char_u s1, char_u s2)
				3450	{
				3451	if (s1 == NULL \|\| s2 == NULL)
				3452	return s1 == s2;
				3453	return STRCMP(s1, s2) == 0;
				3454	}
				3455
				3456	/*
				3457	* Add a from-to item to "gap". Used for REP and SAL items.
				3458	* They are stored case-folded.
				3459	*/
				3460	static void
				3461	add_fromto(
				3462	spellinfo_T *spin,
				3463	garray_T *gap,
				3464	char_u *from,
				3465	char_u *to)
				3466	{
				3467	fromto_T *ftp;
				3468	char_u word[MAXWLEN];
				3469
				3470	if (ga_grow(gap, 1) == OK)
				3471	{
				3472	ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
				3473	(void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN);
				3474	ftp->ft_from = getroom_save(spin, word);
				3475	(void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN);
				3476	ftp->ft_to = getroom_save(spin, word);
				3477	++gap->ga_len;
				3478	}
				3479	}
				3480
				3481	/*
				3482	* Convert a boolean argument in a SAL line to TRUE or FALSE;
				3483	*/
				3484	static int
				3485	sal_to_bool(char_u *s)
				3486	{
				3487	return STRCMP(s, "1") == 0 \|\| STRCMP(s, "true") == 0;
				3488	}
				3489
				3490	/*
				3491	* Free the structure filled by spell_read_aff().
				3492	*/
				3493	static void
				3494	spell_free_aff(afffile_T *aff)
				3495	{
				3496	hashtab_T *ht;
				3497	hashitem_T *hi;
				3498	int todo;
				3499	affheader_T *ah;
				3500	affentry_T *ae;
				3501
				3502	vim_free(aff->af_enc);
				3503
				3504	/* All this trouble to free the "ae_prog" items... */
				3505	for (ht = &aff->af_pref; ; ht = &aff->af_suff)
				3506	{
				3507	todo = (int)ht->ht_used;
				3508	for (hi = ht->ht_array; todo > 0; ++hi)
				3509	{
				3510	if (!HASHITEM_EMPTY(hi))
				3511	{
				3512	--todo;
				3513	ah = HI2AH(hi);
				3514	for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
				3515	vim_regfree(ae->ae_prog);
				3516	}
				3517	}
				3518	if (ht == &aff->af_suff)
				3519	break;
				3520	}
				3521
				3522	hash_clear(&aff->af_pref);
				3523	hash_clear(&aff->af_suff);
				3524	hash_clear(&aff->af_comp);
				3525	}
				3526
				3527	/*
				3528	* Read dictionary file "fname".
				3529	* Returns OK or FAIL;
				3530	*/
				3531	static int
				3532	spell_read_dic(spellinfo_T spin, char_u fname, afffile_T *affile)
				3533	{
				3534	hashtab_T ht;
				3535	char_u line[MAXLINELEN];
				3536	char_u *p;
				3537	char_u *afflist;
				3538	char_u store_afflist[MAXWLEN];
				3539	int pfxlen;
				3540	int need_affix;
				3541	char_u *dw;
				3542	char_u *pc;
				3543	char_u *w;
				3544	int l;
				3545	hash_T hash;
				3546	hashitem_T *hi;
				3547	FILE *fd;
				3548	int lnum = 1;
				3549	int non_ascii = 0;
				3550	int retval = OK;
				3551	char_u message[MAXLINELEN + MAXWLEN];
				3552	int flags;
				3553	int duplicate = 0;
				3554
				3555	/*
				3556	* Open the file.
				3557	*/
				3558	fd = mch_fopen((char *)fname, "r");
				3559	if (fd == NULL)
				3560	{
				3561	EMSG2(_(e_notopen), fname);
				3562	return FAIL;
				3563	}
				3564
				3565	/* The hashtable is only used to detect duplicated words. */
				3566	hash_init(&ht);
				3567
				3568	vim_snprintf((char *)IObuff, IOSIZE,
				3569	_("Reading dictionary file %s ..."), fname);
				3570	spell_message(spin, IObuff);
				3571
				3572	/* start with a message for the first line */
				3573	spin->si_msg_count = 999999;
				3574
				3575	/* Read and ignore the first line: word count. */
				3576	(void)vim_fgets(line, MAXLINELEN, fd);
				3577	if (!vim_isdigit(*skipwhite(line)))
				3578	EMSG2(_("E760: No word count in %s"), fname);
				3579
				3580	/*
				3581	* Read all the lines in the file one by one.
				3582	* The words are converted to 'encoding' here, before being added to
				3583	* the hashtable.
				3584	*/
				3585	while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
				3586	{
				3587	line_breakcheck();
				3588	++lnum;
				3589	if (line[0] == '#' \|\| line[0] == '/')
				3590	continue; /* comment line */
				3591
				3592	/* Remove CR, LF and white space from the end. White space halfway
				3593	* the word is kept to allow e.g., "et al.". */
				3594	l = (int)STRLEN(line);
				3595	while (l > 0 && line[l - 1] <= ' ')
				3596	--l;
				3597	if (l == 0)
				3598	continue; /* empty line */
				3599	line[l] = NUL;
				3600
				3601	#ifdef FEAT_MBYTE
				3602	/* Convert from "SET" to 'encoding' when needed. */
				3603	if (spin->si_conv.vc_type != CONV_NONE)
				3604	{
				3605	pc = string_convert(&spin->si_conv, line, NULL);
				3606	if (pc == NULL)
				3607	{
				3608	smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
				3609	fname, lnum, line);
				3610	continue;
				3611	}
				3612	w = pc;
				3613	}
				3614	else
				3615	#endif
				3616	{
				3617	pc = NULL;
				3618	w = line;
				3619	}
				3620
				3621	/* Truncate the word at the "/", set "afflist" to what follows.
				3622	* Replace "\/" by "/" and "\\" by "\". */
				3623	afflist = NULL;
				3624	for (p = w; *p != NUL; mb_ptr_adv(p))
				3625	{
				3626	if (*p == '\\' && (p[1] == '\\' \|\| p[1] == '/'))
				3627	STRMOVE(p, p + 1);
				3628	else if (*p == '/')
				3629	{
				3630	*p = NUL;
				3631	afflist = p + 1;
				3632	break;
				3633	}
				3634	}
				3635
				3636	/* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
				3637	if (spin->si_ascii && has_non_ascii(w))
				3638	{
				3639	++non_ascii;
				3640	vim_free(pc);
				3641	continue;
				3642	}
				3643
				3644	/* This takes time, print a message every 10000 words. */
				3645	if (spin->si_verbose && spin->si_msg_count > 10000)
				3646	{
				3647	spin->si_msg_count = 0;
				3648	vim_snprintf((char *)message, sizeof(message),
				3649	_("line %6d, word %6d - %s"),
				3650	lnum, spin->si_foldwcount + spin->si_keepwcount, w);
				3651	msg_start();
				3652	msg_puts_long_attr(message, 0);
				3653	msg_clr_eos();
				3654	msg_didout = FALSE;
				3655	msg_col = 0;
				3656	out_flush();
				3657	}
				3658
				3659	/* Store the word in the hashtable to be able to find duplicates. */
				3660	dw = (char_u *)getroom_save(spin, w);
				3661	if (dw == NULL)
				3662	{
				3663	retval = FAIL;
				3664	vim_free(pc);
				3665	break;
				3666	}
				3667
				3668	hash = hash_hash(dw);
				3669	hi = hash_lookup(&ht, dw, hash);
				3670	if (!HASHITEM_EMPTY(hi))
				3671	{
				3672	if (p_verbose > 0)
				3673	smsg((char_u *)_("Duplicate word in %s line %d: %s"),
				3674	fname, lnum, dw);
				3675	else if (duplicate == 0)
				3676	smsg((char_u *)_("First duplicate word in %s line %d: %s"),
				3677	fname, lnum, dw);
				3678	++duplicate;
				3679	}
				3680	else
				3681	hash_add_item(&ht, hi, dw, hash);
				3682
				3683	flags = 0;
				3684	store_afflist[0] = NUL;
				3685	pfxlen = 0;
				3686	need_affix = FALSE;
				3687	if (afflist != NULL)
				3688	{
				3689	/* Extract flags from the affix list. */
				3690	flags \|= get_affix_flags(affile, afflist);
				3691
				3692	if (affile->af_needaffix != 0 && flag_in_afflist(
				3693	affile->af_flagtype, afflist, affile->af_needaffix))
				3694	need_affix = TRUE;
				3695
				3696	if (affile->af_pfxpostpone)
				3697	/* Need to store the list of prefix IDs with the word. */
				3698	pfxlen = get_pfxlist(affile, afflist, store_afflist);
				3699
				3700	if (spin->si_compflags != NULL)
				3701	/* Need to store the list of compound flags with the word.
				3702	* Concatenate them to the list of prefix IDs. */
				3703	get_compflags(affile, afflist, store_afflist + pfxlen);
				3704	}
				3705
				3706	/* Add the word to the word tree(s). */
				3707	if (store_word(spin, dw, flags, spin->si_region,
				3708	store_afflist, need_affix) == FAIL)
				3709	retval = FAIL;
				3710
				3711	if (afflist != NULL)
				3712	{
				3713	/* Find all matching suffixes and add the resulting words.
				3714	* Additionally do matching prefixes that combine. */
				3715	if (store_aff_word(spin, dw, afflist, affile,
				3716	&affile->af_suff, &affile->af_pref,
				3717	CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
				3718	retval = FAIL;
				3719
				3720	/* Find all matching prefixes and add the resulting words. */
				3721	if (store_aff_word(spin, dw, afflist, affile,
				3722	&affile->af_pref, NULL,
				3723	CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
				3724	retval = FAIL;
				3725	}
				3726
				3727	vim_free(pc);
				3728	}
				3729
				3730	if (duplicate > 0)
				3731	smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname);
				3732	if (spin->si_ascii && non_ascii > 0)
				3733	smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"),
				3734	non_ascii, fname);
				3735	hash_clear(&ht);
				3736
				3737	fclose(fd);
				3738	return retval;
				3739	}
				3740
				3741	/*
				3742	* Check for affix flags in "afflist" that are turned into word flags.
				3743	* Return WF_ flags.
				3744	*/
				3745	static int
				3746	get_affix_flags(afffile_T affile, char_u afflist)
				3747	{
				3748	int flags = 0;
				3749
				3750	if (affile->af_keepcase != 0 && flag_in_afflist(
				3751	affile->af_flagtype, afflist, affile->af_keepcase))
				3752	flags \|= WF_KEEPCAP \| WF_FIXCAP;
				3753	if (affile->af_rare != 0 && flag_in_afflist(
				3754	affile->af_flagtype, afflist, affile->af_rare))
				3755	flags \|= WF_RARE;
				3756	if (affile->af_bad != 0 && flag_in_afflist(
				3757	affile->af_flagtype, afflist, affile->af_bad))
				3758	flags \|= WF_BANNED;
				3759	if (affile->af_needcomp != 0 && flag_in_afflist(
				3760	affile->af_flagtype, afflist, affile->af_needcomp))
				3761	flags \|= WF_NEEDCOMP;
				3762	if (affile->af_comproot != 0 && flag_in_afflist(
				3763	affile->af_flagtype, afflist, affile->af_comproot))
				3764	flags \|= WF_COMPROOT;
				3765	if (affile->af_nosuggest != 0 && flag_in_afflist(
				3766	affile->af_flagtype, afflist, affile->af_nosuggest))
				3767	flags \|= WF_NOSUGGEST;
				3768	return flags;
				3769	}
				3770
				3771	/*
				3772	* Get the list of prefix IDs from the affix list "afflist".
				3773	* Used for PFXPOSTPONE.
				3774	* Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL
				3775	* and return the number of affixes.
				3776	*/
				3777	static int
				3778	get_pfxlist(
				3779	afffile_T *affile,
				3780	char_u *afflist,
				3781	char_u *store_afflist)
				3782	{
				3783	char_u *p;
				3784	char_u *prevp;
				3785	int cnt = 0;
				3786	int id;
				3787	char_u key[AH_KEY_LEN];
				3788	hashitem_T *hi;
				3789
				3790	for (p = afflist; *p != NUL; )
				3791	{
				3792	prevp = p;
				3793	if (get_affitem(affile->af_flagtype, &p) != 0)
				3794	{
				3795	/* A flag is a postponed prefix flag if it appears in "af_pref"
				3796	* and it's ID is not zero. */
				3797	vim_strncpy(key, prevp, p - prevp);
				3798	hi = hash_find(&affile->af_pref, key);
				3799	if (!HASHITEM_EMPTY(hi))
				3800	{
				3801	id = HI2AH(hi)->ah_newID;
				3802	if (id != 0)
				3803	store_afflist[cnt++] = id;
				3804	}
				3805	}
				3806	if (affile->af_flagtype == AFT_NUM && *p == ',')
				3807	++p;
				3808	}
				3809
				3810	store_afflist[cnt] = NUL;
				3811	return cnt;
				3812	}
				3813
				3814	/*
				3815	* Get the list of compound IDs from the affix list "afflist" that are used
				3816	* for compound words.
				3817	* Puts the flags in "store_afflist[]".
				3818	*/
				3819	static void
				3820	get_compflags(
				3821	afffile_T *affile,
				3822	char_u *afflist,
				3823	char_u *store_afflist)
				3824	{
				3825	char_u *p;
				3826	char_u *prevp;
				3827	int cnt = 0;
				3828	char_u key[AH_KEY_LEN];
				3829	hashitem_T *hi;
				3830
				3831	for (p = afflist; *p != NUL; )
				3832	{
				3833	prevp = p;
				3834	if (get_affitem(affile->af_flagtype, &p) != 0)
				3835	{
				3836	/* A flag is a compound flag if it appears in "af_comp". */
				3837	vim_strncpy(key, prevp, p - prevp);
				3838	hi = hash_find(&affile->af_comp, key);
				3839	if (!HASHITEM_EMPTY(hi))
				3840	store_afflist[cnt++] = HI2CI(hi)->ci_newID;
				3841	}
				3842	if (affile->af_flagtype == AFT_NUM && *p == ',')
				3843	++p;
				3844	}
				3845
				3846	store_afflist[cnt] = NUL;
				3847	}
				3848
				3849	/*
				3850	* Apply affixes to a word and store the resulting words.
				3851	* "ht" is the hashtable with affentry_T that need to be applied, either
				3852	* prefixes or suffixes.
				3853	* "xht", when not NULL, is the prefix hashtable, to be used additionally on
				3854	* the resulting words for combining affixes.
				3855	*
				3856	* Returns FAIL when out of memory.
				3857	*/
				3858	static int
				3859	store_aff_word(
				3860	spellinfo_T spin, / spell info */
				3861	char_u word, / basic word start */
				3862	char_u afflist, / list of names of supported affixes */
				3863	afffile_T *affile,
				3864	hashtab_T *ht,
				3865	hashtab_T *xht,
				3866	int condit, /* CONDIT_SUF et al. */
				3867	int flags, /* flags for the word */
				3868	char_u pfxlist, / list of prefix IDs */
				3869	int pfxlen) /* nr of flags in "pfxlist" for prefixes, rest
				3870	* is compound flags */
				3871	{
				3872	int todo;
				3873	hashitem_T *hi;
				3874	affheader_T *ah;
				3875	affentry_T *ae;
				3876	char_u newword[MAXWLEN];
				3877	int retval = OK;
				3878	int i, j;
				3879	char_u *p;
				3880	int use_flags;
				3881	char_u *use_pfxlist;
				3882	int use_pfxlen;
				3883	int need_affix;
				3884	char_u store_afflist[MAXWLEN];
				3885	char_u pfx_pfxlist[MAXWLEN];
				3886	size_t wordlen = STRLEN(word);
				3887	int use_condit;
				3888
				3889	todo = (int)ht->ht_used;
				3890	for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
				3891	{
				3892	if (!HASHITEM_EMPTY(hi))
				3893	{
				3894	--todo;
				3895	ah = HI2AH(hi);
				3896
				3897	/* Check that the affix combines, if required, and that the word
				3898	* supports this affix. */
				3899	if (((condit & CONDIT_COMB) == 0 \|\| ah->ah_combine)
				3900	&& flag_in_afflist(affile->af_flagtype, afflist,
				3901	ah->ah_flag))
				3902	{
				3903	/* Loop over all affix entries with this name. */
				3904	for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
				3905	{
				3906	/* Check the condition. It's not logical to match case
				3907	* here, but it is required for compatibility with
				3908	* Myspell.
				3909	* Another requirement from Myspell is that the chop
				3910	* string is shorter than the word itself.
				3911	* For prefixes, when "PFXPOSTPONE" was used, only do
				3912	* prefixes with a chop string and/or flags.
				3913	* When a previously added affix had CIRCUMFIX this one
				3914	* must have it too, if it had not then this one must not
				3915	* have one either. */
				3916	if ((xht != NULL \|\| !affile->af_pfxpostpone
				3917	\|\| ae->ae_chop != NULL
				3918	\|\| ae->ae_flags != NULL)
				3919	&& (ae->ae_chop == NULL
				3920	\|\| STRLEN(ae->ae_chop) < wordlen)
				3921	&& (ae->ae_prog == NULL
				3922	\|\| vim_regexec_prog(&ae->ae_prog, FALSE,
				3923	word, (colnr_T)0))
				3924	&& (((condit & CONDIT_CFIX) == 0)
				3925	== ((condit & CONDIT_AFF) == 0
				3926	\|\| ae->ae_flags == NULL
				3927	\|\| !flag_in_afflist(affile->af_flagtype,
				3928	ae->ae_flags, affile->af_circumfix))))
				3929	{
				3930	/* Match. Remove the chop and add the affix. */
				3931	if (xht == NULL)
				3932	{
				3933	/* prefix: chop/add at the start of the word */
				3934	if (ae->ae_add == NULL)
				3935	*newword = NUL;
				3936	else
				3937	vim_strncpy(newword, ae->ae_add, MAXWLEN - 1);
				3938	p = word;
				3939	if (ae->ae_chop != NULL)
				3940	{
				3941	/* Skip chop string. */
				3942	#ifdef FEAT_MBYTE
				3943	if (has_mbyte)
				3944	{
				3945	i = mb_charlen(ae->ae_chop);
				3946	for ( ; i > 0; --i)
				3947	mb_ptr_adv(p);
				3948	}
				3949	else
				3950	#endif
				3951	p += STRLEN(ae->ae_chop);
				3952	}
				3953	STRCAT(newword, p);
				3954	}
				3955	else
				3956	{
				3957	/* suffix: chop/add at the end of the word */
				3958	vim_strncpy(newword, word, MAXWLEN - 1);
				3959	if (ae->ae_chop != NULL)
				3960	{
				3961	/* Remove chop string. */
				3962	p = newword + STRLEN(newword);
				3963	i = (int)MB_CHARLEN(ae->ae_chop);
				3964	for ( ; i > 0; --i)
				3965	mb_ptr_back(newword, p);
				3966	*p = NUL;
				3967	}
				3968	if (ae->ae_add != NULL)
				3969	STRCAT(newword, ae->ae_add);
				3970	}
				3971
				3972	use_flags = flags;
				3973	use_pfxlist = pfxlist;
				3974	use_pfxlen = pfxlen;
				3975	need_affix = FALSE;
				3976	use_condit = condit \| CONDIT_COMB \| CONDIT_AFF;
				3977	if (ae->ae_flags != NULL)
				3978	{
				3979	/* Extract flags from the affix list. */
				3980	use_flags \|= get_affix_flags(affile, ae->ae_flags);
				3981
				3982	if (affile->af_needaffix != 0 && flag_in_afflist(
				3983	affile->af_flagtype, ae->ae_flags,
				3984	affile->af_needaffix))
				3985	need_affix = TRUE;
				3986
				3987	/* When there is a CIRCUMFIX flag the other affix
				3988	* must also have it and we don't add the word
				3989	* with one affix. */
				3990	if (affile->af_circumfix != 0 && flag_in_afflist(
				3991	affile->af_flagtype, ae->ae_flags,
				3992	affile->af_circumfix))
				3993	{
				3994	use_condit \|= CONDIT_CFIX;
				3995	if ((condit & CONDIT_CFIX) == 0)
				3996	need_affix = TRUE;
				3997	}
				3998
				3999	if (affile->af_pfxpostpone
				4000	\|\| spin->si_compflags != NULL)
				4001	{
				4002	if (affile->af_pfxpostpone)
				4003	/* Get prefix IDS from the affix list. */
				4004	use_pfxlen = get_pfxlist(affile,
				4005	ae->ae_flags, store_afflist);
				4006	else
				4007	use_pfxlen = 0;
				4008	use_pfxlist = store_afflist;
				4009
				4010	/* Combine the prefix IDs. Avoid adding the
				4011	* same ID twice. */
				4012	for (i = 0; i < pfxlen; ++i)
				4013	{
				4014	for (j = 0; j < use_pfxlen; ++j)
				4015	if (pfxlist[i] == use_pfxlist[j])
				4016	break;
				4017	if (j == use_pfxlen)
				4018	use_pfxlist[use_pfxlen++] = pfxlist[i];
				4019	}
				4020
				4021	if (spin->si_compflags != NULL)
				4022	/* Get compound IDS from the affix list. */
				4023	get_compflags(affile, ae->ae_flags,
				4024	use_pfxlist + use_pfxlen);
				4025
				4026	/* Combine the list of compound flags.
				4027	* Concatenate them to the prefix IDs list.
				4028	* Avoid adding the same ID twice. */
				4029	for (i = pfxlen; pfxlist[i] != NUL; ++i)
				4030	{
				4031	for (j = use_pfxlen;
				4032	use_pfxlist[j] != NUL; ++j)
				4033	if (pfxlist[i] == use_pfxlist[j])
				4034	break;
				4035	if (use_pfxlist[j] == NUL)
				4036	{
				4037	use_pfxlist[j++] = pfxlist[i];
				4038	use_pfxlist[j] = NUL;
				4039	}
				4040	}
				4041	}
				4042	}
				4043
				4044	/* Obey a "COMPOUNDFORBIDFLAG" of the affix: don't
				4045	* use the compound flags. */
				4046	if (use_pfxlist != NULL && ae->ae_compforbid)
				4047	{
				4048	vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen);
				4049	use_pfxlist = pfx_pfxlist;
				4050	}
				4051
				4052	/* When there are postponed prefixes... */
				4053	if (spin->si_prefroot != NULL
				4054	&& spin->si_prefroot->wn_sibling != NULL)
				4055	{
				4056	/* ... add a flag to indicate an affix was used. */
				4057	use_flags \|= WF_HAS_AFF;
				4058
				4059	/* ... don't use a prefix list if combining
				4060	* affixes is not allowed. But do use the
				4061	* compound flags after them. */
				4062	if (!ah->ah_combine && use_pfxlist != NULL)
				4063	use_pfxlist += use_pfxlen;
				4064	}
				4065
				4066	/* When compounding is supported and there is no
				4067	* "COMPOUNDPERMITFLAG" then forbid compounding on the
				4068	* side where the affix is applied. */
				4069	if (spin->si_compflags != NULL && !ae->ae_comppermit)
				4070	{
				4071	if (xht != NULL)
				4072	use_flags \|= WF_NOCOMPAFT;
				4073	else
				4074	use_flags \|= WF_NOCOMPBEF;
				4075	}
				4076
				4077	/* Store the modified word. */
				4078	if (store_word(spin, newword, use_flags,
				4079	spin->si_region, use_pfxlist,
				4080	need_affix) == FAIL)
				4081	retval = FAIL;
				4082
				4083	/* When added a prefix or a first suffix and the affix
				4084	* has flags may add a(nother) suffix. RECURSIVE! */
				4085	if ((condit & CONDIT_SUF) && ae->ae_flags != NULL)
				4086	if (store_aff_word(spin, newword, ae->ae_flags,
				4087	affile, &affile->af_suff, xht,
				4088	use_condit & (xht == NULL
				4089	? ~0 : ~CONDIT_SUF),
				4090	use_flags, use_pfxlist, pfxlen) == FAIL)
				4091	retval = FAIL;
				4092
				4093	/* When added a suffix and combining is allowed also
				4094	* try adding a prefix additionally. Both for the
				4095	* word flags and for the affix flags. RECURSIVE! */
				4096	if (xht != NULL && ah->ah_combine)
				4097	{
				4098	if (store_aff_word(spin, newword,
				4099	afflist, affile,
				4100	xht, NULL, use_condit,
				4101	use_flags, use_pfxlist,
				4102	pfxlen) == FAIL
				4103	\|\| (ae->ae_flags != NULL
				4104	&& store_aff_word(spin, newword,
				4105	ae->ae_flags, affile,
				4106	xht, NULL, use_condit,
				4107	use_flags, use_pfxlist,
				4108	pfxlen) == FAIL))
				4109	retval = FAIL;
				4110	}
				4111	}
				4112	}
				4113	}
				4114	}
				4115	}
				4116
				4117	return retval;
				4118	}
				4119
				4120	/*
				4121	* Read a file with a list of words.
				4122	*/
				4123	static int
				4124	spell_read_wordfile(spellinfo_T spin, char_u fname)
				4125	{
				4126	FILE *fd;
				4127	long lnum = 0;
				4128	char_u rline[MAXLINELEN];
				4129	char_u *line;
				4130	char_u *pc = NULL;
				4131	char_u *p;
				4132	int l;
				4133	int retval = OK;
				4134	int did_word = FALSE;
				4135	int non_ascii = 0;
				4136	int flags;
				4137	int regionmask;
				4138
				4139	/*
				4140	* Open the file.
				4141	*/
				4142	fd = mch_fopen((char *)fname, "r");
				4143	if (fd == NULL)
				4144	{
				4145	EMSG2(_(e_notopen), fname);
				4146	return FAIL;
				4147	}
				4148
				4149	vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname);
				4150	spell_message(spin, IObuff);
				4151
				4152	/*
				4153	* Read all the lines in the file one by one.
				4154	*/
				4155	while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
				4156	{
				4157	line_breakcheck();
				4158	++lnum;
				4159
				4160	/* Skip comment lines. */
				4161	if (*rline == '#')
				4162	continue;
				4163
				4164	/* Remove CR, LF and white space from the end. */
				4165	l = (int)STRLEN(rline);
				4166	while (l > 0 && rline[l - 1] <= ' ')
				4167	--l;
				4168	if (l == 0)
				4169	continue; /* empty or blank line */
				4170	rline[l] = NUL;
				4171
				4172	/* Convert from "/encoding={encoding}" to 'encoding' when needed. */
				4173	vim_free(pc);
				4174	#ifdef FEAT_MBYTE
				4175	if (spin->si_conv.vc_type != CONV_NONE)
				4176	{
				4177	pc = string_convert(&spin->si_conv, rline, NULL);
				4178	if (pc == NULL)
				4179	{
				4180	smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
				4181	fname, lnum, rline);
				4182	continue;
				4183	}
				4184	line = pc;
				4185	}
				4186	else
				4187	#endif
				4188	{
				4189	pc = NULL;
				4190	line = rline;
				4191	}
				4192
				4193	if (*line == '/')
				4194	{
				4195	++line;
				4196	if (STRNCMP(line, "encoding=", 9) == 0)
				4197	{
				4198	if (spin->si_conv.vc_type != CONV_NONE)
				4199	smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"),
				4200	fname, lnum, line - 1);
				4201	else if (did_word)
				4202	smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"),
				4203	fname, lnum, line - 1);
				4204	else
				4205	{
				4206	#ifdef FEAT_MBYTE
				4207	char_u *enc;
				4208
				4209	/* Setup for conversion to 'encoding'. */
				4210	line += 9;
				4211	enc = enc_canonize(line);
				4212	if (enc != NULL && !spin->si_ascii
				4213	&& convert_setup(&spin->si_conv, enc,
				4214	p_enc) == FAIL)
				4215	smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
				4216	fname, line, p_enc);
				4217	vim_free(enc);
				4218	spin->si_conv.vc_fail = TRUE;
				4219	#else
				4220	smsg((char_u *)_("Conversion in %s not supported"), fname);
				4221	#endif
				4222	}
				4223	continue;
				4224	}
				4225
				4226	if (STRNCMP(line, "regions=", 8) == 0)
				4227	{
				4228	if (spin->si_region_count > 1)
				4229	smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"),
				4230	fname, lnum, line);
				4231	else
				4232	{
				4233	line += 8;
				4234	if (STRLEN(line) > 16)
				4235	smsg((char_u *)_("Too many regions in %s line %d: %s"),
				4236	fname, lnum, line);
				4237	else
				4238	{
				4239	spin->si_region_count = (int)STRLEN(line) / 2;
				4240	STRCPY(spin->si_region_name, line);
				4241
				4242	/* Adjust the mask for a word valid in all regions. */
				4243	spin->si_region = (1 << spin->si_region_count) - 1;
				4244	}
				4245	}
				4246	continue;
				4247	}
				4248
				4249	smsg((char_u *)_("/ line ignored in %s line %d: %s"),
				4250	fname, lnum, line - 1);
				4251	continue;
				4252	}
				4253
				4254	flags = 0;
				4255	regionmask = spin->si_region;
				4256
				4257	/* Check for flags and region after a slash. */
				4258	p = vim_strchr(line, '/');
				4259	if (p != NULL)
				4260	{
				4261	*p++ = NUL;
				4262	while (*p != NUL)
				4263	{
				4264	if (p == '=') / keep-case word */
				4265	flags \|= WF_KEEPCAP \| WF_FIXCAP;
				4266	else if (p == '!') / Bad, bad, wicked word. */
				4267	flags \|= WF_BANNED;
				4268	else if (p == '?') / Rare word. */
				4269	flags \|= WF_RARE;
				4270	else if (VIM_ISDIGIT(p)) / region number(s) */
				4271	{
				4272	if ((flags & WF_REGION) == 0) /* first one */
				4273	regionmask = 0;
				4274	flags \|= WF_REGION;
				4275
				4276	l = *p - '0';
				4277	if (l > spin->si_region_count)
				4278	{
				4279	smsg((char_u *)_("Invalid region nr in %s line %d: %s"),
				4280	fname, lnum, p);
				4281	break;
				4282	}
				4283	regionmask \|= 1 << (l - 1);
				4284	}
				4285	else
				4286	{
				4287	smsg((char_u *)_("Unrecognized flags in %s line %d: %s"),
				4288	fname, lnum, p);
				4289	break;
				4290	}
				4291	++p;
				4292	}
				4293	}
				4294
				4295	/* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
				4296	if (spin->si_ascii && has_non_ascii(line))
				4297	{
				4298	++non_ascii;
				4299	continue;
				4300	}
				4301
				4302	/* Normal word: store it. */
				4303	if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL)
				4304	{
				4305	retval = FAIL;
				4306	break;
				4307	}
				4308	did_word = TRUE;
				4309	}
				4310
				4311	vim_free(pc);
				4312	fclose(fd);
				4313
				4314	if (spin->si_ascii && non_ascii > 0)
				4315	{
				4316	vim_snprintf((char *)IObuff, IOSIZE,
				4317	_("Ignored %d words with non-ASCII characters"), non_ascii);
				4318	spell_message(spin, IObuff);
				4319	}
				4320
				4321	return retval;
				4322	}
				4323
				4324	/*
				4325	* Get part of an sblock_T, "len" bytes long.
				4326	* This avoids calling free() for every little struct we use (and keeping
				4327	* track of them).
				4328	* The memory is cleared to all zeros.
				4329	* Returns NULL when out of memory.
				4330	*/
				4331	static void *
				4332	getroom(
				4333	spellinfo_T *spin,
				4334	size_t len, /* length needed */
				4335	int align) /* align for pointer */
				4336	{
				4337	char_u *p;
				4338	sblock_T *bl = spin->si_blocks;
				4339
				4340	if (align && bl != NULL)
				4341	/* Round size up for alignment. On some systems structures need to be
				4342	* aligned to the size of a pointer (e.g., SPARC). */
				4343	bl->sb_used = (bl->sb_used + sizeof(char *) - 1)
				4344	& ~(sizeof(char *) - 1);
				4345
				4346	if (bl == NULL \|\| bl->sb_used + len > SBLOCKSIZE)
				4347	{
				4348	if (len >= SBLOCKSIZE)
				4349	bl = NULL;
				4350	else
				4351	/* Allocate a block of memory. It is not freed until much later. */
				4352	bl = (sblock_T *)alloc_clear(
				4353	(unsigned)(sizeof(sblock_T) + SBLOCKSIZE));
				4354	if (bl == NULL)
				4355	{
				4356	if (!spin->si_did_emsg)
				4357	{
				4358	EMSG(_("E845: Insufficient memory, word list will be incomplete"));
				4359	spin->si_did_emsg = TRUE;
				4360	}
				4361	return NULL;
				4362	}
				4363	bl->sb_next = spin->si_blocks;
				4364	spin->si_blocks = bl;
				4365	bl->sb_used = 0;
				4366	++spin->si_blocks_cnt;
				4367	}
				4368
				4369	p = bl->sb_data + bl->sb_used;
				4370	bl->sb_used += (int)len;
				4371
				4372	return p;
				4373	}
				4374
				4375	/*
				4376	* Make a copy of a string into memory allocated with getroom().
				4377	* Returns NULL when out of memory.
				4378	*/
				4379	static char_u *
				4380	getroom_save(spellinfo_T spin, char_u s)
				4381	{
				4382	char_u *sc;
				4383
				4384	sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE);
				4385	if (sc != NULL)
				4386	STRCPY(sc, s);
				4387	return sc;
				4388	}
				4389
				4390
				4391	/*
				4392	* Free the list of allocated sblock_T.
				4393	*/
				4394	static void
				4395	free_blocks(sblock_T *bl)
				4396	{
				4397	sblock_T *next;
				4398
				4399	while (bl != NULL)
				4400	{
				4401	next = bl->sb_next;
				4402	vim_free(bl);
				4403	bl = next;
				4404	}
				4405	}
				4406
				4407	/*
				4408	* Allocate the root of a word tree.
				4409	* Returns NULL when out of memory.
				4410	*/
				4411	static wordnode_T *
				4412	wordtree_alloc(spellinfo_T *spin)
				4413	{
				4414	return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
				4415	}
				4416
				4417	/*
				4418	* Store a word in the tree(s).
				4419	* Always store it in the case-folded tree. For a keep-case word this is
				4420	* useful when the word can also be used with all caps (no WF_FIXCAP flag) and
				4421	* used to find suggestions.
				4422	* For a keep-case word also store it in the keep-case tree.
				4423	* When "pfxlist" is not NULL store the word for each postponed prefix ID and
				4424	* compound flag.
				4425	*/
				4426	static int
				4427	store_word(
				4428	spellinfo_T *spin,
				4429	char_u *word,
				4430	int flags, /* extra flags, WF_BANNED */
				4431	int region, /* supported region(s) */
				4432	char_u pfxlist, / list of prefix IDs or NULL */
				4433	int need_affix) /* only store word with affix ID */
				4434	{
				4435	int len = (int)STRLEN(word);
				4436	int ct = captype(word, word + len);
				4437	char_u foldword[MAXWLEN];
				4438	int res = OK;
				4439	char_u *p;
				4440
				4441	(void)spell_casefold(word, len, foldword, MAXWLEN);
				4442	for (p = pfxlist; res == OK; ++p)
				4443	{
				4444	if (!need_affix \|\| (p != NULL && *p != NUL))
				4445	res = tree_add_word(spin, foldword, spin->si_foldroot, ct \| flags,
				4446	region, p == NULL ? 0 : *p);
				4447	if (p == NULL \|\| *p == NUL)
				4448	break;
				4449	}
				4450	++spin->si_foldwcount;
				4451
				4452	if (res == OK && (ct == WF_KEEPCAP \|\| (flags & WF_KEEPCAP)))
				4453	{
				4454	for (p = pfxlist; res == OK; ++p)
				4455	{
				4456	if (!need_affix \|\| (p != NULL && *p != NUL))
				4457	res = tree_add_word(spin, word, spin->si_keeproot, flags,
				4458	region, p == NULL ? 0 : *p);
				4459	if (p == NULL \|\| *p == NUL)
				4460	break;
				4461	}
				4462	++spin->si_keepwcount;
				4463	}
				4464	return res;
				4465	}
				4466
				4467	/*
				4468	* Add word "word" to a word tree at "root".
				4469	* When "flags" < 0 we are adding to the prefix tree where "flags" is used for
				4470	* "rare" and "region" is the condition nr.
				4471	* Returns FAIL when out of memory.
				4472	*/
				4473	static int
				4474	tree_add_word(
				4475	spellinfo_T *spin,
				4476	char_u *word,
				4477	wordnode_T *root,
				4478	int flags,
				4479	int region,
				4480	int affixID)
				4481	{
				4482	wordnode_T *node = root;
				4483	wordnode_T *np;
				4484	wordnode_T copyp, *copyprev;
				4485	wordnode_T **prev = NULL;
				4486	int i;
				4487
				4488	/* Add each byte of the word to the tree, including the NUL at the end. */
				4489	for (i = 0; ; ++i)
				4490	{
				4491	/* When there is more than one reference to this node we need to make
				4492	* a copy, so that we can modify it. Copy the whole list of siblings
				4493	* (we don't optimize for a partly shared list of siblings). */
				4494	if (node != NULL && node->wn_refs > 1)
				4495	{
				4496	--node->wn_refs;
				4497	copyprev = prev;
				4498	for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling)
				4499	{
				4500	/* Allocate a new node and copy the info. */
				4501	np = get_wordnode(spin);
				4502	if (np == NULL)
				4503	return FAIL;
				4504	np->wn_child = copyp->wn_child;
				4505	if (np->wn_child != NULL)
				4506	++np->wn_child->wn_refs; /* child gets extra ref */
				4507	np->wn_byte = copyp->wn_byte;
				4508	if (np->wn_byte == NUL)
				4509	{
				4510	np->wn_flags = copyp->wn_flags;
				4511	np->wn_region = copyp->wn_region;
				4512	np->wn_affixID = copyp->wn_affixID;
				4513	}
				4514
				4515	/* Link the new node in the list, there will be one ref. */
				4516	np->wn_refs = 1;
				4517	if (copyprev != NULL)
				4518	*copyprev = np;
				4519	copyprev = &np->wn_sibling;
				4520
				4521	/* Let "node" point to the head of the copied list. */
				4522	if (copyp == node)
				4523	node = np;
				4524	}
				4525	}
				4526
				4527	/* Look for the sibling that has the same character. They are sorted
				4528	* on byte value, thus stop searching when a sibling is found with a
				4529	* higher byte value. For zero bytes (end of word) the sorting is
				4530	* done on flags and then on affixID. */
				4531	while (node != NULL
				4532	&& (node->wn_byte < word[i]
				4533	\|\| (node->wn_byte == NUL
				4534	&& (flags < 0
				4535	? node->wn_affixID < (unsigned)affixID
				4536	: (node->wn_flags < (unsigned)(flags & WN_MASK)
				4537	\|\| (node->wn_flags == (flags & WN_MASK)
				4538	&& (spin->si_sugtree
				4539	? (node->wn_region & 0xffff) < region
				4540	: node->wn_affixID
				4541	< (unsigned)affixID)))))))
				4542	{
				4543	prev = &node->wn_sibling;
				4544	node = *prev;
				4545	}
				4546	if (node == NULL
				4547	\|\| node->wn_byte != word[i]
				4548	\|\| (word[i] == NUL
				4549	&& (flags < 0
				4550	\|\| spin->si_sugtree
				4551	\|\| node->wn_flags != (flags & WN_MASK)
				4552	\|\| node->wn_affixID != affixID)))
				4553	{
				4554	/* Allocate a new node. */
				4555	np = get_wordnode(spin);
				4556	if (np == NULL)
				4557	return FAIL;
				4558	np->wn_byte = word[i];
				4559
				4560	/* If "node" is NULL this is a new child or the end of the sibling
				4561	* list: ref count is one. Otherwise use ref count of sibling and
				4562	* make ref count of sibling one (matters when inserting in front
				4563	* of the list of siblings). */
				4564	if (node == NULL)
				4565	np->wn_refs = 1;
				4566	else
				4567	{
				4568	np->wn_refs = node->wn_refs;
				4569	node->wn_refs = 1;
				4570	}
				4571	if (prev != NULL)
				4572	*prev = np;
				4573	np->wn_sibling = node;
				4574	node = np;
				4575	}
				4576
				4577	if (word[i] == NUL)
				4578	{
				4579	node->wn_flags = flags;
				4580	node->wn_region \|= region;
				4581	node->wn_affixID = affixID;
				4582	break;
				4583	}
				4584	prev = &node->wn_child;
				4585	node = *prev;
				4586	}
				4587	#ifdef SPELL_PRINTTREE
				4588	smsg((char_u *)"Added \"%s\"", word);
				4589	spell_print_tree(root->wn_sibling);
				4590	#endif
				4591
				4592	/* count nr of words added since last message */
				4593	++spin->si_msg_count;
				4594
				4595	if (spin->si_compress_cnt > 1)
				4596	{
				4597	if (--spin->si_compress_cnt == 1)
				4598	/* Did enough words to lower the block count limit. */
				4599	spin->si_blocks_cnt += compress_inc;
				4600	}
				4601
				4602	/*
				4603	* When we have allocated lots of memory we need to compress the word tree
				4604	* to free up some room. But compression is slow, and we might actually
				4605	* need that room, thus only compress in the following situations:
				4606	* 1. When not compressed before (si_compress_cnt == 0): when using
				4607	* "compress_start" blocks.
				4608	* 2. When compressed before and used "compress_inc" blocks before
				4609	* adding "compress_added" words (si_compress_cnt > 1).
				4610	* 3. When compressed before, added "compress_added" words
				4611	* (si_compress_cnt == 1) and the number of free nodes drops below the
				4612	* maximum word length.
				4613	*/
				4614	#ifndef SPELL_COMPRESS_ALLWAYS
				4615	if (spin->si_compress_cnt == 1
				4616	? spin->si_free_count < MAXWLEN
				4617	: spin->si_blocks_cnt >= compress_start)
				4618	#endif
				4619	{
				4620	/* Decrement the block counter. The effect is that we compress again
				4621	* when the freed up room has been used and another "compress_inc"
				4622	* blocks have been allocated. Unless "compress_added" words have
				4623	* been added, then the limit is put back again. */
				4624	spin->si_blocks_cnt -= compress_inc;
				4625	spin->si_compress_cnt = compress_added;
				4626
				4627	if (spin->si_verbose)
				4628	{
				4629	msg_start();
				4630	msg_puts((char_u *)_(msg_compressing));
				4631	msg_clr_eos();
				4632	msg_didout = FALSE;
				4633	msg_col = 0;
				4634	out_flush();
				4635	}
				4636
				4637	/* Compress both trees. Either they both have many nodes, which makes
				4638	* compression useful, or one of them is small, which means
				4639	* compression goes fast. But when filling the soundfold word tree
				4640	* there is no keep-case tree. */
				4641	wordtree_compress(spin, spin->si_foldroot);
				4642	if (affixID >= 0)
				4643	wordtree_compress(spin, spin->si_keeproot);
				4644	}
				4645
				4646	return OK;
				4647	}
				4648
				4649	/*
				4650	* Get a wordnode_T, either from the list of previously freed nodes or
				4651	* allocate a new one.
				4652	* Returns NULL when out of memory.
				4653	*/
				4654	static wordnode_T *
				4655	get_wordnode(spellinfo_T *spin)
				4656	{
				4657	wordnode_T *n;
				4658
				4659	if (spin->si_first_free == NULL)
				4660	n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
				4661	else
				4662	{
				4663	n = spin->si_first_free;
				4664	spin->si_first_free = n->wn_child;
				4665	vim_memset(n, 0, sizeof(wordnode_T));
				4666	--spin->si_free_count;
				4667	}
				4668	#ifdef SPELL_PRINTTREE
				4669	if (n != NULL)
				4670	n->wn_nr = ++spin->si_wordnode_nr;
				4671	#endif
				4672	return n;
				4673	}
				4674
				4675	/*
				4676	* Decrement the reference count on a node (which is the head of a list of
				4677	* siblings). If the reference count becomes zero free the node and its
				4678	* siblings.
				4679	* Returns the number of nodes actually freed.
				4680	*/
				4681	static int
				4682	deref_wordnode(spellinfo_T spin, wordnode_T node)
				4683	{
				4684	wordnode_T *np;
				4685	int cnt = 0;
				4686
				4687	if (--node->wn_refs == 0)
				4688	{
				4689	for (np = node; np != NULL; np = np->wn_sibling)
				4690	{
				4691	if (np->wn_child != NULL)
				4692	cnt += deref_wordnode(spin, np->wn_child);
				4693	free_wordnode(spin, np);
				4694	++cnt;
				4695	}
				4696	++cnt; /* length field */
				4697	}
				4698	return cnt;
				4699	}
				4700
				4701	/*
				4702	* Free a wordnode_T for re-use later.
				4703	* Only the "wn_child" field becomes invalid.
				4704	*/
				4705	static void
				4706	free_wordnode(spellinfo_T spin, wordnode_T n)
				4707	{
				4708	n->wn_child = spin->si_first_free;
				4709	spin->si_first_free = n;
				4710	++spin->si_free_count;
				4711	}
				4712
				4713	/*
				4714	* Compress a tree: find tails that are identical and can be shared.
				4715	*/
				4716	static void
				4717	wordtree_compress(spellinfo_T spin, wordnode_T root)
				4718	{
				4719	hashtab_T ht;
				4720	int n;
				4721	int tot = 0;
				4722	int perc;
				4723
				4724	/* Skip the root itself, it's not actually used. The first sibling is the
				4725	* start of the tree. */
				4726	if (root->wn_sibling != NULL)
				4727	{
				4728	hash_init(&ht);
				4729	n = node_compress(spin, root->wn_sibling, &ht, &tot);
				4730
				4731	#ifndef SPELL_PRINTTREE
				4732	if (spin->si_verbose \|\| p_verbose > 2)
				4733	#endif
				4734	{
				4735	if (tot > 1000000)
				4736	perc = (tot - n) / (tot / 100);
				4737	else if (tot == 0)
				4738	perc = 0;
				4739	else
				4740	perc = (tot - n) * 100 / tot;
				4741	vim_snprintf((char *)IObuff, IOSIZE,
				4742	_("Compressed %d of %d nodes; %d (%d%%) remaining"),
				4743	n, tot, tot - n, perc);
				4744	spell_message(spin, IObuff);
				4745	}
				4746	#ifdef SPELL_PRINTTREE
				4747	spell_print_tree(root->wn_sibling);
				4748	#endif
				4749	hash_clear(&ht);
				4750	}
				4751	}
				4752
				4753	/*
				4754	* Compress a node, its siblings and its children, depth first.
				4755	* Returns the number of compressed nodes.
				4756	*/
				4757	static int
				4758	node_compress(
				4759	spellinfo_T *spin,
				4760	wordnode_T *node,
				4761	hashtab_T *ht,
				4762	int tot) / total count of nodes before compressing,
				4763	incremented while going through the tree */
				4764	{
				4765	wordnode_T *np;
				4766	wordnode_T *tp;
				4767	wordnode_T *child;
				4768	hash_T hash;
				4769	hashitem_T *hi;
				4770	int len = 0;
				4771	unsigned nr, n;
				4772	int compressed = 0;
				4773
				4774	/*
				4775	* Go through the list of siblings. Compress each child and then try
				4776	* finding an identical child to replace it.
				4777	* Note that with "child" we mean not just the node that is pointed to,
				4778	* but the whole list of siblings of which the child node is the first.
				4779	*/
				4780	for (np = node; np != NULL && !got_int; np = np->wn_sibling)
				4781	{
				4782	++len;
				4783	if ((child = np->wn_child) != NULL)
				4784	{
				4785	/* Compress the child first. This fills hashkey. */
				4786	compressed += node_compress(spin, child, ht, tot);
				4787
				4788	/* Try to find an identical child. */
				4789	hash = hash_hash(child->wn_u1.hashkey);
				4790	hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
				4791	if (!HASHITEM_EMPTY(hi))
				4792	{
				4793	/* There are children we encountered before with a hash value
				4794	* identical to the current child. Now check if there is one
				4795	* that is really identical. */
				4796	for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
				4797	if (node_equal(child, tp))
				4798	{
				4799	/* Found one! Now use that child in place of the
				4800	* current one. This means the current child and all
				4801	* its siblings is unlinked from the tree. */
				4802	++tp->wn_refs;
				4803	compressed += deref_wordnode(spin, child);
				4804	np->wn_child = tp;
				4805	break;
				4806	}
				4807	if (tp == NULL)
				4808	{
				4809	/* No other child with this hash value equals the child of
				4810	* the node, add it to the linked list after the first
				4811	* item. */
				4812	tp = HI2WN(hi);
				4813	child->wn_u2.next = tp->wn_u2.next;
				4814	tp->wn_u2.next = child;
				4815	}
				4816	}
				4817	else
				4818	/* No other child has this hash value, add it to the
				4819	* hashtable. */
				4820	hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
				4821	}
				4822	}
				4823	tot += len + 1; / add one for the node that stores the length */
				4824
				4825	/*
				4826	* Make a hash key for the node and its siblings, so that we can quickly
				4827	* find a lookalike node. This must be done after compressing the sibling
				4828	* list, otherwise the hash key would become invalid by the compression.
				4829	*/
				4830	node->wn_u1.hashkey[0] = len;
				4831	nr = 0;
				4832	for (np = node; np != NULL; np = np->wn_sibling)
				4833	{
				4834	if (np->wn_byte == NUL)
				4835	/* end node: use wn_flags, wn_region and wn_affixID */
				4836	n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16);
				4837	else
				4838	/* byte node: use the byte value and the child pointer */
				4839	n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8));
				4840	nr = nr * 101 + n;
				4841	}
				4842
				4843	/* Avoid NUL bytes, it terminates the hash key. */
				4844	n = nr & 0xff;
				4845	node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
				4846	n = (nr >> 8) & 0xff;
				4847	node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
				4848	n = (nr >> 16) & 0xff;
				4849	node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
				4850	n = (nr >> 24) & 0xff;
				4851	node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
				4852	node->wn_u1.hashkey[5] = NUL;
				4853
				4854	/* Check for CTRL-C pressed now and then. */
				4855	fast_breakcheck();
				4856
				4857	return compressed;
				4858	}
				4859
				4860	/*
				4861	* Return TRUE when two nodes have identical siblings and children.
				4862	*/
				4863	static int
				4864	node_equal(wordnode_T n1, wordnode_T n2)
				4865	{
				4866	wordnode_T *p1;
				4867	wordnode_T *p2;
				4868
				4869	for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
				4870	p1 = p1->wn_sibling, p2 = p2->wn_sibling)
				4871	if (p1->wn_byte != p2->wn_byte
				4872	\|\| (p1->wn_byte == NUL
				4873	? (p1->wn_flags != p2->wn_flags
				4874	\|\| p1->wn_region != p2->wn_region
				4875	\|\| p1->wn_affixID != p2->wn_affixID)
				4876	: (p1->wn_child != p2->wn_child)))
				4877	break;
				4878
				4879	return p1 == NULL && p2 == NULL;
				4880	}
				4881
				4882	static int
				4883	#ifdef __BORLANDC__
				4884	_RTLENTRYF
				4885	#endif
				4886	rep_compare(const void s1, const void s2);
				4887
				4888	/*
				4889	* Function given to qsort() to sort the REP items on "from" string.
				4890	*/
				4891	static int
				4892	#ifdef __BORLANDC__
				4893	_RTLENTRYF
				4894	#endif
				4895	rep_compare(const void s1, const void s2)
				4896	{
				4897	fromto_T p1 = (fromto_T )s1;
				4898	fromto_T p2 = (fromto_T )s2;
				4899
				4900	return STRCMP(p1->ft_from, p2->ft_from);
				4901	}
				4902
				4903	/*
				4904	* Write the Vim .spl file "fname".
				4905	* Return FAIL or OK;
				4906	*/
				4907	static int
				4908	write_vim_spell(spellinfo_T spin, char_u fname)
				4909	{
				4910	FILE *fd;
				4911	int regionmask;
				4912	int round;
				4913	wordnode_T *tree;
				4914	int nodecount;
				4915	int i;
				4916	int l;
				4917	garray_T *gap;
				4918	fromto_T *ftp;
				4919	char_u *p;
				4920	int rr;
				4921	int retval = OK;
				4922	size_t fwv = 1; /* collect return value of fwrite() to avoid
				4923	warnings from picky compiler */
				4924
				4925	fd = mch_fopen((char *)fname, "w");
				4926	if (fd == NULL)
				4927	{
				4928	EMSG2(_(e_notopen), fname);
				4929	return FAIL;
				4930	}
				4931
				4932	/* <HEADER>: <fileID> <versionnr> */
				4933	/* <fileID> */
				4934	fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd);
				4935	if (fwv != (size_t)1)
				4936	/* Catch first write error, don't try writing more. */
				4937	goto theend;
				4938
				4939	putc(VIMSPELLVERSION, fd); /* <versionnr> */
				4940
				4941	/*
				4942	* <SECTIONS>: <section> ... <sectionend>
				4943	*/
				4944
				4945	/* SN_INFO: <infotext> */
				4946	if (spin->si_info != NULL)
				4947	{
				4948	putc(SN_INFO, fd); /* <sectionID> */
				4949	putc(0, fd); /* <sectionflags> */
				4950
				4951	i = (int)STRLEN(spin->si_info);
				4952	put_bytes(fd, (long_u)i, 4); /* <sectionlen> */
				4953	fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); /* <infotext> */
				4954	}
				4955
				4956	/* SN_REGION: <regionname> ...
				4957	* Write the region names only if there is more than one. */
				4958	if (spin->si_region_count > 1)
				4959	{
				4960	putc(SN_REGION, fd); /* <sectionID> */
				4961	putc(SNF_REQUIRED, fd); /* <sectionflags> */
				4962	l = spin->si_region_count * 2;
				4963	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				4964	fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd);
				4965	/* <regionname> ... */
				4966	regionmask = (1 << spin->si_region_count) - 1;
				4967	}
				4968	else
				4969	regionmask = 0;
				4970
				4971	/* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars>
				4972	*
				4973	* The table with character flags and the table for case folding.
				4974	* This makes sure the same characters are recognized as word characters
				4975	* when generating an when using a spell file.
				4976	* Skip this for ASCII, the table may conflict with the one used for
				4977	* 'encoding'.
				4978	* Also skip this for an .add.spl file, the main spell file must contain
				4979	* the table (avoids that it conflicts). File is shorter too.
				4980	*/
				4981	if (!spin->si_ascii && !spin->si_add)
				4982	{
				4983	char_u folchars[128 * 8];
				4984	int flags;
				4985
				4986	putc(SN_CHARFLAGS, fd); /* <sectionID> */
				4987	putc(SNF_REQUIRED, fd); /* <sectionflags> */
				4988
				4989	/* Form the <folchars> string first, we need to know its length. */
				4990	l = 0;
				4991	for (i = 128; i < 256; ++i)
				4992	{
				4993	#ifdef FEAT_MBYTE
				4994	if (has_mbyte)
				4995	l += mb_char2bytes(spelltab.st_fold[i], folchars + l);
				4996	else
				4997	#endif
				4998	folchars[l++] = spelltab.st_fold[i];
				4999	}
				5000	put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */
				5001
				5002	fputc(128, fd); /* <charflagslen> */
				5003	for (i = 128; i < 256; ++i)
				5004	{
				5005	flags = 0;
				5006	if (spelltab.st_isw[i])
				5007	flags \|= CF_WORD;
				5008	if (spelltab.st_isu[i])
				5009	flags \|= CF_UPPER;
				5010	fputc(flags, fd); /* <charflags> */
				5011	}
				5012
				5013	put_bytes(fd, (long_u)l, 2); /* <folcharslen> */
				5014	fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */
				5015	}
				5016
				5017	/* SN_MIDWORD: <midword> */
				5018	if (spin->si_midword != NULL)
				5019	{
				5020	putc(SN_MIDWORD, fd); /* <sectionID> */
				5021	putc(SNF_REQUIRED, fd); /* <sectionflags> */
				5022
				5023	i = (int)STRLEN(spin->si_midword);
				5024	put_bytes(fd, (long_u)i, 4); /* <sectionlen> */
				5025	fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd);
				5026	/* <midword> */
				5027	}
				5028
				5029	/* SN_PREFCOND: <prefcondcnt> <prefcond> ... */
				5030	if (spin->si_prefcond.ga_len > 0)
				5031	{
				5032	putc(SN_PREFCOND, fd); /* <sectionID> */
				5033	putc(SNF_REQUIRED, fd); /* <sectionflags> */
				5034
				5035	l = write_spell_prefcond(NULL, &spin->si_prefcond);
				5036	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				5037
				5038	write_spell_prefcond(fd, &spin->si_prefcond);
				5039	}
				5040
				5041	/* SN_REP: <repcount> <rep> ...
				5042	* SN_SAL: <salflags> <salcount> <sal> ...
				5043	* SN_REPSAL: <repcount> <rep> ... */
				5044
				5045	/* round 1: SN_REP section
				5046	* round 2: SN_SAL section (unless SN_SOFO is used)
				5047	* round 3: SN_REPSAL section */
				5048	for (round = 1; round <= 3; ++round)
				5049	{
				5050	if (round == 1)
				5051	gap = &spin->si_rep;
				5052	else if (round == 2)
				5053	{
				5054	/* Don't write SN_SAL when using a SN_SOFO section */
				5055	if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
				5056	continue;
				5057	gap = &spin->si_sal;
				5058	}
				5059	else
				5060	gap = &spin->si_repsal;
				5061
				5062	/* Don't write the section if there are no items. */
				5063	if (gap->ga_len == 0)
				5064	continue;
				5065
				5066	/* Sort the REP/REPSAL items. */
				5067	if (round != 2)
				5068	qsort(gap->ga_data, (size_t)gap->ga_len,
				5069	sizeof(fromto_T), rep_compare);
				5070
				5071	i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL);
				5072	putc(i, fd); /* <sectionID> */
				5073
				5074	/* This is for making suggestions, section is not required. */
				5075	putc(0, fd); /* <sectionflags> */
				5076
				5077	/* Compute the length of what follows. */
				5078	l = 2; /* count <repcount> or <salcount> */
				5079	for (i = 0; i < gap->ga_len; ++i)
				5080	{
				5081	ftp = &((fromto_T *)gap->ga_data)[i];
				5082	l += 1 + (int)STRLEN(ftp->ft_from); /* count <fromlen> and <from> */
				5083	l += 1 + (int)STRLEN(ftp->ft_to); /* count <tolen> and <to> */
				5084	}
				5085	if (round == 2)
				5086	++l; /* count <salflags> */
				5087	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				5088
				5089	if (round == 2)
				5090	{
				5091	i = 0;
				5092	if (spin->si_followup)
				5093	i \|= SAL_F0LLOWUP;
				5094	if (spin->si_collapse)
				5095	i \|= SAL_COLLAPSE;
				5096	if (spin->si_rem_accents)
				5097	i \|= SAL_REM_ACCENTS;
				5098	putc(i, fd); /* <salflags> */
				5099	}
				5100
				5101	put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */
				5102	for (i = 0; i < gap->ga_len; ++i)
				5103	{
				5104	/* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
				5105	/* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
				5106	ftp = &((fromto_T *)gap->ga_data)[i];
				5107	for (rr = 1; rr <= 2; ++rr)
				5108	{
				5109	p = rr == 1 ? ftp->ft_from : ftp->ft_to;
				5110	l = (int)STRLEN(p);
				5111	putc(l, fd);
				5112	if (l > 0)
				5113	fwv &= fwrite(p, l, (size_t)1, fd);
				5114	}
				5115	}
				5116
				5117	}
				5118
				5119	/* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
				5120	* This is for making suggestions, section is not required. */
				5121	if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
				5122	{
				5123	putc(SN_SOFO, fd); /* <sectionID> */
				5124	putc(0, fd); /* <sectionflags> */
				5125
				5126	l = (int)STRLEN(spin->si_sofofr);
				5127	put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4);
				5128	/* <sectionlen> */
				5129
				5130	put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */
				5131	fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */
				5132
				5133	l = (int)STRLEN(spin->si_sofoto);
				5134	put_bytes(fd, (long_u)l, 2); /* <sofotolen> */
				5135	fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */
				5136	}
				5137
				5138	/* SN_WORDS: <word> ...
				5139	* This is for making suggestions, section is not required. */
				5140	if (spin->si_commonwords.ht_used > 0)
				5141	{
				5142	putc(SN_WORDS, fd); /* <sectionID> */
				5143	putc(0, fd); /* <sectionflags> */
				5144
				5145	/* round 1: count the bytes
				5146	* round 2: write the bytes */
				5147	for (round = 1; round <= 2; ++round)
				5148	{
				5149	int todo;
				5150	int len = 0;
				5151	hashitem_T *hi;
				5152
				5153	todo = (int)spin->si_commonwords.ht_used;
				5154	for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi)
				5155	if (!HASHITEM_EMPTY(hi))
				5156	{
				5157	l = (int)STRLEN(hi->hi_key) + 1;
				5158	len += l;
				5159	if (round == 2) /* <word> */
				5160	fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd);
				5161	--todo;
				5162	}
				5163	if (round == 1)
				5164	put_bytes(fd, (long_u)len, 4); /* <sectionlen> */
				5165	}
				5166	}
				5167
				5168	/* SN_MAP: <mapstr>
				5169	* This is for making suggestions, section is not required. */
				5170	if (spin->si_map.ga_len > 0)
				5171	{
				5172	putc(SN_MAP, fd); /* <sectionID> */
				5173	putc(0, fd); /* <sectionflags> */
				5174	l = spin->si_map.ga_len;
				5175	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				5176	fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd);
				5177	/* <mapstr> */
				5178	}
				5179
				5180	/* SN_SUGFILE: <timestamp>
				5181	* This is used to notify that a .sug file may be available and at the
				5182	* same time allows for checking that a .sug file that is found matches
				5183	* with this .spl file. That's because the word numbers must be exactly
				5184	* right. */
				5185	if (!spin->si_nosugfile
				5186	&& (spin->si_sal.ga_len > 0
				5187	\|\| (spin->si_sofofr != NULL && spin->si_sofoto != NULL)))
				5188	{
				5189	putc(SN_SUGFILE, fd); /* <sectionID> */
				5190	putc(0, fd); /* <sectionflags> */
				5191	put_bytes(fd, (long_u)8, 4); /* <sectionlen> */
				5192
				5193	/* Set si_sugtime and write it to the file. */
				5194	spin->si_sugtime = time(NULL);
				5195	put_time(fd, spin->si_sugtime); /* <timestamp> */
				5196	}
				5197
				5198	/* SN_NOSPLITSUGS: nothing
				5199	* This is used to notify that no suggestions with word splits are to be
				5200	* made. */
				5201	if (spin->si_nosplitsugs)
				5202	{
				5203	putc(SN_NOSPLITSUGS, fd); /* <sectionID> */
				5204	putc(0, fd); /* <sectionflags> */
				5205	put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
				5206	}
				5207
				5208	/* SN_NOCOMPUNDSUGS: nothing
				5209	* This is used to notify that no suggestions with compounds are to be
				5210	* made. */
				5211	if (spin->si_nocompoundsugs)
				5212	{
				5213	putc(SN_NOCOMPOUNDSUGS, fd); /* <sectionID> */
				5214	putc(0, fd); /* <sectionflags> */
				5215	put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
				5216	}
				5217
				5218	/* SN_COMPOUND: compound info.
				5219	* We don't mark it required, when not supported all compound words will
				5220	* be bad words. */
				5221	if (spin->si_compflags != NULL)
				5222	{
				5223	putc(SN_COMPOUND, fd); /* <sectionID> */
				5224	putc(0, fd); /* <sectionflags> */
				5225
				5226	l = (int)STRLEN(spin->si_compflags);
				5227	for (i = 0; i < spin->si_comppat.ga_len; ++i)
				5228	l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1;
				5229	put_bytes(fd, (long_u)(l + 7), 4); /* <sectionlen> */
				5230
				5231	putc(spin->si_compmax, fd); /* <compmax> */
				5232	putc(spin->si_compminlen, fd); /* <compminlen> */
				5233	putc(spin->si_compsylmax, fd); /* <compsylmax> */
				5234	putc(0, fd); /* for Vim 7.0b compatibility */
				5235	putc(spin->si_compoptions, fd); /* <compoptions> */
				5236	put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2);
				5237	/* <comppatcount> */
				5238	for (i = 0; i < spin->si_comppat.ga_len; ++i)
				5239	{
				5240	p = ((char_u **)(spin->si_comppat.ga_data))[i];
				5241	putc((int)STRLEN(p), fd); /* <comppatlen> */
				5242	fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd);
				5243	/* <comppattext> */
				5244	}
				5245	/* <compflags> */
				5246	fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags),
				5247	(size_t)1, fd);
				5248	}
				5249
				5250	/* SN_NOBREAK: NOBREAK flag */
				5251	if (spin->si_nobreak)
				5252	{
				5253	putc(SN_NOBREAK, fd); /* <sectionID> */
				5254	putc(0, fd); /* <sectionflags> */
				5255
				5256	/* It's empty, the presence of the section flags the feature. */
				5257	put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
				5258	}
				5259
				5260	/* SN_SYLLABLE: syllable info.
				5261	* We don't mark it required, when not supported syllables will not be
				5262	* counted. */
				5263	if (spin->si_syllable != NULL)
				5264	{
				5265	putc(SN_SYLLABLE, fd); /* <sectionID> */
				5266	putc(0, fd); /* <sectionflags> */
				5267
				5268	l = (int)STRLEN(spin->si_syllable);
				5269	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				5270	fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd);
				5271	/* <syllable> */
				5272	}
				5273
				5274	/* end of <SECTIONS> */
				5275	putc(SN_END, fd); /* <sectionend> */
				5276
				5277
				5278	/*
				5279	* <LWORDTREE> <KWORDTREE> <PREFIXTREE>
				5280	*/
				5281	spin->si_memtot = 0;
				5282	for (round = 1; round <= 3; ++round)
				5283	{
				5284	if (round == 1)
				5285	tree = spin->si_foldroot->wn_sibling;
				5286	else if (round == 2)
				5287	tree = spin->si_keeproot->wn_sibling;
				5288	else
				5289	tree = spin->si_prefroot->wn_sibling;
				5290
				5291	/* Clear the index and wnode fields in the tree. */
				5292	clear_node(tree);
				5293
				5294	/* Count the number of nodes. Needed to be able to allocate the
				5295	* memory when reading the nodes. Also fills in index for shared
				5296	* nodes. */
				5297	nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
				5298
				5299	/* number of nodes in 4 bytes */
				5300	put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
				5301	spin->si_memtot += nodecount + nodecount * sizeof(int);
				5302
				5303	/* Write the nodes. */
				5304	(void)put_node(fd, tree, 0, regionmask, round == 3);
				5305	}
				5306
				5307	/* Write another byte to check for errors (file system full). */
				5308	if (putc(0, fd) == EOF)
				5309	retval = FAIL;
				5310	theend:
				5311	if (fclose(fd) == EOF)
				5312	retval = FAIL;
				5313
				5314	if (fwv != (size_t)1)
				5315	retval = FAIL;
				5316	if (retval == FAIL)
				5317	EMSG(_(e_write));
				5318
				5319	return retval;
				5320	}
				5321
				5322	/*
				5323	* Clear the index and wnode fields of "node", it siblings and its
				5324	* children. This is needed because they are a union with other items to save
				5325	* space.
				5326	*/
				5327	static void
				5328	clear_node(wordnode_T *node)
				5329	{
				5330	wordnode_T *np;
				5331
				5332	if (node != NULL)
				5333	for (np = node; np != NULL; np = np->wn_sibling)
				5334	{
				5335	np->wn_u1.index = 0;
				5336	np->wn_u2.wnode = NULL;
				5337
				5338	if (np->wn_byte != NUL)
				5339	clear_node(np->wn_child);
				5340	}
				5341	}
				5342
				5343
				5344	/*
				5345	* Dump a word tree at node "node".
				5346	*
				5347	* This first writes the list of possible bytes (siblings). Then for each
				5348	* byte recursively write the children.
				5349	*
				5350	* NOTE: The code here must match the code in read_tree_node(), since
				5351	* assumptions are made about the indexes (so that we don't have to write them
				5352	* in the file).
				5353	*
				5354	* Returns the number of nodes used.
				5355	*/
				5356	static int
				5357	put_node(
				5358	FILE fd, / NULL when only counting */
				5359	wordnode_T *node,
				5360	int idx,
				5361	int regionmask,
				5362	int prefixtree) /* TRUE for PREFIXTREE */
				5363	{
				5364	int newindex = idx;
				5365	int siblingcount = 0;
				5366	wordnode_T *np;
				5367	int flags;
				5368
				5369	/* If "node" is zero the tree is empty. */
				5370	if (node == NULL)
				5371	return 0;
				5372
				5373	/* Store the index where this node is written. */
				5374	node->wn_u1.index = idx;
				5375
				5376	/* Count the number of siblings. */
				5377	for (np = node; np != NULL; np = np->wn_sibling)
				5378	++siblingcount;
				5379
				5380	/* Write the sibling count. */
				5381	if (fd != NULL)
				5382	putc(siblingcount, fd); /* <siblingcount> */
				5383
				5384	/* Write each sibling byte and optionally extra info. */
				5385	for (np = node; np != NULL; np = np->wn_sibling)
				5386	{
				5387	if (np->wn_byte == 0)
				5388	{
				5389	if (fd != NULL)
				5390	{
				5391	/* For a NUL byte (end of word) write the flags etc. */
				5392	if (prefixtree)
				5393	{
				5394	/* In PREFIXTREE write the required affixID and the
				5395	* associated condition nr (stored in wn_region). The
				5396	* byte value is misused to store the "rare" and "not
				5397	* combining" flags */
				5398	if (np->wn_flags == (short_u)PFX_FLAGS)
				5399	putc(BY_NOFLAGS, fd); /* <byte> */
				5400	else
				5401	{
				5402	putc(BY_FLAGS, fd); /* <byte> */
				5403	putc(np->wn_flags, fd); /* <pflags> */
				5404	}
				5405	putc(np->wn_affixID, fd); /* <affixID> */
				5406	put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */
				5407	}
				5408	else
				5409	{
				5410	/* For word trees we write the flag/region items. */
				5411	flags = np->wn_flags;
				5412	if (regionmask != 0 && np->wn_region != regionmask)
				5413	flags \|= WF_REGION;
				5414	if (np->wn_affixID != 0)
				5415	flags \|= WF_AFX;
				5416	if (flags == 0)
				5417	{
				5418	/* word without flags or region */
				5419	putc(BY_NOFLAGS, fd); /* <byte> */
				5420	}
				5421	else
				5422	{
				5423	if (np->wn_flags >= 0x100)
				5424	{
				5425	putc(BY_FLAGS2, fd); /* <byte> */
				5426	putc(flags, fd); /* <flags> */
				5427	putc((unsigned)flags >> 8, fd); /* <flags2> */
				5428	}
				5429	else
				5430	{
				5431	putc(BY_FLAGS, fd); /* <byte> */
				5432	putc(flags, fd); /* <flags> */
				5433	}
				5434	if (flags & WF_REGION)
				5435	putc(np->wn_region, fd); /* <region> */
				5436	if (flags & WF_AFX)
				5437	putc(np->wn_affixID, fd); /* <affixID> */
				5438	}
				5439	}
				5440	}
				5441	}
				5442	else
				5443	{
				5444	if (np->wn_child->wn_u1.index != 0
				5445	&& np->wn_child->wn_u2.wnode != node)
				5446	{
				5447	/* The child is written elsewhere, write the reference. */
				5448	if (fd != NULL)
				5449	{
				5450	putc(BY_INDEX, fd); /* <byte> */
				5451	/* <nodeidx> */
				5452	put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
				5453	}
				5454	}
				5455	else if (np->wn_child->wn_u2.wnode == NULL)
				5456	/* We will write the child below and give it an index. */
				5457	np->wn_child->wn_u2.wnode = node;
				5458
				5459	if (fd != NULL)
				5460	if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
				5461	{
				5462	EMSG(_(e_write));
				5463	return 0;
				5464	}
				5465	}
				5466	}
				5467
				5468	/* Space used in the array when reading: one for each sibling and one for
				5469	* the count. */
				5470	newindex += siblingcount + 1;
				5471
				5472	/* Recursively dump the children of each sibling. */
				5473	for (np = node; np != NULL; np = np->wn_sibling)
				5474	if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
				5475	newindex = put_node(fd, np->wn_child, newindex, regionmask,
				5476	prefixtree);
				5477
				5478	return newindex;
				5479	}
				5480
				5481
				5482	/*
				5483	* ":mkspell [-ascii] outfile infile ..."
				5484	* ":mkspell [-ascii] addfile"
				5485	*/
				5486	void
				5487	ex_mkspell(exarg_T *eap)
				5488	{
				5489	int fcount;
				5490	char_u **fnames;
				5491	char_u *arg = eap->arg;
				5492	int ascii = FALSE;
				5493
				5494	if (STRNCMP(arg, "-ascii", 6) == 0)
				5495	{
				5496	ascii = TRUE;
				5497	arg = skipwhite(arg + 6);
				5498	}
				5499
				5500	/* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
				5501	if (get_arglist_exp(arg, &fcount, &fnames, FALSE) == OK)
				5502	{
				5503	mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
				5504	FreeWild(fcount, fnames);
				5505	}
				5506	}
				5507
				5508	/*
				5509	* Create the .sug file.
				5510	* Uses the soundfold info in "spin".
				5511	* Writes the file with the name "wfname", with ".spl" changed to ".sug".
				5512	*/
				5513	static void
				5514	spell_make_sugfile(spellinfo_T spin, char_u wfname)
				5515	{
				5516	char_u *fname = NULL;
				5517	int len;
				5518	slang_T *slang;
				5519	int free_slang = FALSE;
				5520
				5521	/*
				5522	* Read back the .spl file that was written. This fills the required
				5523	* info for soundfolding. This also uses less memory than the
				5524	* pointer-linked version of the trie. And it avoids having two versions
				5525	* of the code for the soundfolding stuff.
				5526	* It might have been done already by spell_reload_one().
				5527	*/
				5528	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
				5529	if (fullpathcmp(wfname, slang->sl_fname, FALSE) == FPC_SAME)
				5530	break;
				5531	if (slang == NULL)
				5532	{
				5533	spell_message(spin, (char_u *)_("Reading back spell file..."));
				5534	slang = spell_load_file(wfname, NULL, NULL, FALSE);
				5535	if (slang == NULL)
				5536	return;
				5537	free_slang = TRUE;
				5538	}
				5539
				5540	/*
				5541	* Clear the info in "spin" that is used.
				5542	*/
				5543	spin->si_blocks = NULL;
				5544	spin->si_blocks_cnt = 0;
				5545	spin->si_compress_cnt = 0; /* will stay at 0 all the time*/
				5546	spin->si_free_count = 0;
				5547	spin->si_first_free = NULL;
				5548	spin->si_foldwcount = 0;
				5549
				5550	/*
				5551	* Go through the trie of good words, soundfold each word and add it to
				5552	* the soundfold trie.
				5553	*/
				5554	spell_message(spin, (char_u *)_("Performing soundfolding..."));
				5555	if (sug_filltree(spin, slang) == FAIL)
				5556	goto theend;
				5557
				5558	/*
				5559	* Create the table which links each soundfold word with a list of the
				5560	* good words it may come from. Creates buffer "spin->si_spellbuf".
				5561	* This also removes the wordnr from the NUL byte entries to make
				5562	* compression possible.
				5563	*/
				5564	if (sug_maketable(spin) == FAIL)
				5565	goto theend;
				5566
				5567	smsg((char_u *)_("Number of words after soundfolding: %ld"),
				5568	(long)spin->si_spellbuf->b_ml.ml_line_count);
				5569
				5570	/*
				5571	* Compress the soundfold trie.
				5572	*/
				5573	spell_message(spin, (char_u *)_(msg_compressing));
				5574	wordtree_compress(spin, spin->si_foldroot);
				5575
				5576	/*
				5577	* Write the .sug file.
				5578	* Make the file name by changing ".spl" to ".sug".
				5579	*/
				5580	fname = alloc(MAXPATHL);
				5581	if (fname == NULL)
				5582	goto theend;
				5583	vim_strncpy(fname, wfname, MAXPATHL - 1);
				5584	len = (int)STRLEN(fname);
				5585	fname[len - 2] = 'u';
				5586	fname[len - 1] = 'g';
				5587	sug_write(spin, fname);
				5588
				5589	theend:
				5590	vim_free(fname);
				5591	if (free_slang)
				5592	slang_free(slang);
				5593	free_blocks(spin->si_blocks);
				5594	close_spellbuf(spin->si_spellbuf);
				5595	}
				5596
				5597	/*
				5598	* Build the soundfold trie for language "slang".
				5599	*/
				5600	static int
				5601	sug_filltree(spellinfo_T spin, slang_T slang)
				5602	{
				5603	char_u *byts;
				5604	idx_T *idxs;
				5605	int depth;
				5606	idx_T arridx[MAXWLEN];
				5607	int curi[MAXWLEN];
				5608	char_u tword[MAXWLEN];
				5609	char_u tsalword[MAXWLEN];
				5610	int c;
				5611	idx_T n;
				5612	unsigned words_done = 0;
				5613	int wordcount[MAXWLEN];
				5614
				5615	/* We use si_foldroot for the soundfolded trie. */
				5616	spin->si_foldroot = wordtree_alloc(spin);
				5617	if (spin->si_foldroot == NULL)
				5618	return FAIL;
				5619
				5620	/* let tree_add_word() know we're adding to the soundfolded tree */
				5621	spin->si_sugtree = TRUE;
				5622
				5623	/*
				5624	* Go through the whole case-folded tree, soundfold each word and put it
				5625	* in the trie.
				5626	*/
				5627	byts = slang->sl_fbyts;
				5628	idxs = slang->sl_fidxs;
				5629
				5630	arridx[0] = 0;
				5631	curi[0] = 1;
				5632	wordcount[0] = 0;
				5633
				5634	depth = 0;
				5635	while (depth >= 0 && !got_int)
				5636	{
				5637	if (curi[depth] > byts[arridx[depth]])
				5638	{
				5639	/* Done all bytes at this node, go up one level. */
				5640	idxs[arridx[depth]] = wordcount[depth];
				5641	if (depth > 0)
				5642	wordcount[depth - 1] += wordcount[depth];
				5643
				5644	--depth;
				5645	line_breakcheck();
				5646	}
				5647	else
				5648	{
				5649
				5650	/* Do one more byte at this node. */
				5651	n = arridx[depth] + curi[depth];
				5652	++curi[depth];
				5653
				5654	c = byts[n];
				5655	if (c == 0)
				5656	{
				5657	/* Sound-fold the word. */
				5658	tword[depth] = NUL;
				5659	spell_soundfold(slang, tword, TRUE, tsalword);
				5660
				5661	/* We use the "flags" field for the MSB of the wordnr,
				5662	* "region" for the LSB of the wordnr. */
				5663	if (tree_add_word(spin, tsalword, spin->si_foldroot,
				5664	words_done >> 16, words_done & 0xffff,
				5665	0) == FAIL)
				5666	return FAIL;
				5667
				5668	++words_done;
				5669	++wordcount[depth];
				5670
				5671	/* Reset the block count each time to avoid compression
				5672	* kicking in. */
				5673	spin->si_blocks_cnt = 0;
				5674
				5675	/* Skip over any other NUL bytes (same word with different
				5676	* flags). */
				5677	while (byts[n + 1] == 0)
				5678	{
				5679	++n;
				5680	++curi[depth];
				5681	}
				5682	}
				5683	else
				5684	{
				5685	/* Normal char, go one level deeper. */
				5686	tword[depth++] = c;
				5687	arridx[depth] = idxs[n];
				5688	curi[depth] = 1;
				5689	wordcount[depth] = 0;
				5690	}
				5691	}
				5692	}
				5693
				5694	smsg((char_u *)_("Total number of words: %d"), words_done);
				5695
				5696	return OK;
				5697	}
				5698
				5699	/*
				5700	* Make the table that links each word in the soundfold trie to the words it
				5701	* can be produced from.
				5702	* This is not unlike lines in a file, thus use a memfile to be able to access
				5703	* the table efficiently.
				5704	* Returns FAIL when out of memory.
				5705	*/
				5706	static int
				5707	sug_maketable(spellinfo_T *spin)
				5708	{
				5709	garray_T ga;
				5710	int res = OK;
				5711
				5712	/* Allocate a buffer, open a memline for it and create the swap file
				5713	* (uses a temp file, not a .swp file). */
				5714	spin->si_spellbuf = open_spellbuf();
				5715	if (spin->si_spellbuf == NULL)
				5716	return FAIL;
				5717
				5718	/* Use a buffer to store the line info, avoids allocating many small
				5719	* pieces of memory. */
				5720	ga_init2(&ga, 1, 100);
				5721
				5722	/* recursively go through the tree */
				5723	if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1)
				5724	res = FAIL;
				5725
				5726	ga_clear(&ga);
				5727	return res;
				5728	}
				5729
				5730	/*
				5731	* Fill the table for one node and its children.
				5732	* Returns the wordnr at the start of the node.
				5733	* Returns -1 when out of memory.
				5734	*/
				5735	static int
				5736	sug_filltable(
				5737	spellinfo_T *spin,
				5738	wordnode_T *node,
				5739	int startwordnr,
				5740	garray_T gap) / place to store line of numbers */
				5741	{
				5742	wordnode_T p, np;
				5743	int wordnr = startwordnr;
				5744	int nr;
				5745	int prev_nr;
				5746
				5747	for (p = node; p != NULL; p = p->wn_sibling)
				5748	{
				5749	if (p->wn_byte == NUL)
				5750	{
				5751	gap->ga_len = 0;
				5752	prev_nr = 0;
				5753	for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling)
				5754	{
				5755	if (ga_grow(gap, 10) == FAIL)
				5756	return -1;
				5757
				5758	nr = (np->wn_flags << 16) + (np->wn_region & 0xffff);
				5759	/* Compute the offset from the previous nr and store the
				5760	* offset in a way that it takes a minimum number of bytes.
				5761	* It's a bit like utf-8, but without the need to mark
				5762	* following bytes. */
				5763	nr -= prev_nr;
				5764	prev_nr += nr;
				5765	gap->ga_len += offset2bytes(nr,
				5766	(char_u *)gap->ga_data + gap->ga_len);
				5767	}
				5768
				5769	/* add the NUL byte */
				5770	((char_u *)gap->ga_data)[gap->ga_len++] = NUL;
				5771
				5772	if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr,
				5773	gap->ga_data, gap->ga_len, TRUE) == FAIL)
				5774	return -1;
				5775	++wordnr;
				5776
				5777	/* Remove extra NUL entries, we no longer need them. We don't
				5778	* bother freeing the nodes, the won't be reused anyway. */
				5779	while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL)
				5780	p->wn_sibling = p->wn_sibling->wn_sibling;
				5781
				5782	/* Clear the flags on the remaining NUL node, so that compression
				5783	* works a lot better. */
				5784	p->wn_flags = 0;
				5785	p->wn_region = 0;
				5786	}
				5787	else
				5788	{
				5789	wordnr = sug_filltable(spin, p->wn_child, wordnr, gap);
				5790	if (wordnr == -1)
				5791	return -1;
				5792	}
				5793	}
				5794	return wordnr;
				5795	}
				5796
				5797	/*
				5798	* Convert an offset into a minimal number of bytes.
				5799	* Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL
				5800	* bytes.
				5801	*/
				5802	static int
				5803	offset2bytes(int nr, char_u *buf)
				5804	{
				5805	int rem;
				5806	int b1, b2, b3, b4;
				5807
				5808	/* Split the number in parts of base 255. We need to avoid NUL bytes. */
				5809	b1 = nr % 255 + 1;
				5810	rem = nr / 255;
				5811	b2 = rem % 255 + 1;
				5812	rem = rem / 255;
				5813	b3 = rem % 255 + 1;
				5814	b4 = rem / 255 + 1;
				5815
				5816	if (b4 > 1 \|\| b3 > 0x1f) /* 4 bytes */
				5817	{
				5818	buf[0] = 0xe0 + b4;
				5819	buf[1] = b3;
				5820	buf[2] = b2;
				5821	buf[3] = b1;
				5822	return 4;
				5823	}
				5824	if (b3 > 1 \|\| b2 > 0x3f ) /* 3 bytes */
				5825	{
				5826	buf[0] = 0xc0 + b3;
				5827	buf[1] = b2;
				5828	buf[2] = b1;
				5829	return 3;
				5830	}
				5831	if (b2 > 1 \|\| b1 > 0x7f ) /* 2 bytes */
				5832	{
				5833	buf[0] = 0x80 + b2;
				5834	buf[1] = b1;
				5835	return 2;
				5836	}
				5837	/* 1 byte */
				5838	buf[0] = b1;
				5839	return 1;
				5840	}
				5841
				5842	/*
				5843	* Write the .sug file in "fname".
				5844	*/
				5845	static void
				5846	sug_write(spellinfo_T spin, char_u fname)
				5847	{
				5848	FILE *fd;
				5849	wordnode_T *tree;
				5850	int nodecount;
				5851	int wcount;
				5852	char_u *line;
				5853	linenr_T lnum;
				5854	int len;
				5855
				5856	/* Create the file. Note that an existing file is silently overwritten! */
				5857	fd = mch_fopen((char *)fname, "w");
				5858	if (fd == NULL)
				5859	{
				5860	EMSG2(_(e_notopen), fname);
				5861	return;
				5862	}
				5863
				5864	vim_snprintf((char *)IObuff, IOSIZE,
				5865	_("Writing suggestion file %s ..."), fname);
				5866	spell_message(spin, IObuff);
				5867
				5868	/*
				5869	* <SUGHEADER>: <fileID> <versionnr> <timestamp>
				5870	*/
				5871	if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */
				5872	{
				5873	EMSG(_(e_write));
				5874	goto theend;
				5875	}
				5876	putc(VIMSUGVERSION, fd); /* <versionnr> */
				5877
				5878	/* Write si_sugtime to the file. */
				5879	put_time(fd, spin->si_sugtime); /* <timestamp> */
				5880
				5881	/*
				5882	* <SUGWORDTREE>
				5883	*/
				5884	spin->si_memtot = 0;
				5885	tree = spin->si_foldroot->wn_sibling;
				5886
				5887	/* Clear the index and wnode fields in the tree. */
				5888	clear_node(tree);
				5889
				5890	/* Count the number of nodes. Needed to be able to allocate the
				5891	* memory when reading the nodes. Also fills in index for shared
				5892	* nodes. */
				5893	nodecount = put_node(NULL, tree, 0, 0, FALSE);
				5894
				5895	/* number of nodes in 4 bytes */
				5896	put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
				5897	spin->si_memtot += nodecount + nodecount * sizeof(int);
				5898
				5899	/* Write the nodes. */
				5900	(void)put_node(fd, tree, 0, 0, FALSE);
				5901
				5902	/*
				5903	* <SUGTABLE>: <sugwcount> <sugline> ...
				5904	*/
				5905	wcount = spin->si_spellbuf->b_ml.ml_line_count;
				5906	put_bytes(fd, (long_u)wcount, 4); /* <sugwcount> */
				5907
				5908	for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum)
				5909	{
				5910	/* <sugline>: <sugnr> ... NUL */
				5911	line = ml_get_buf(spin->si_spellbuf, lnum, FALSE);
				5912	len = (int)STRLEN(line) + 1;
				5913	if (fwrite(line, (size_t)len, (size_t)1, fd) == 0)
				5914	{
				5915	EMSG(_(e_write));
				5916	goto theend;
				5917	}
				5918	spin->si_memtot += len;
				5919	}
				5920
				5921	/* Write another byte to check for errors. */
				5922	if (putc(0, fd) == EOF)
				5923	EMSG(_(e_write));
				5924
				5925	vim_snprintf((char *)IObuff, IOSIZE,
				5926	_("Estimated runtime memory use: %d bytes"), spin->si_memtot);
				5927	spell_message(spin, IObuff);
				5928
				5929	theend:
				5930	/* close the file */
				5931	fclose(fd);
				5932	}
				5933
				5934
				5935	/*
				5936	* Create a Vim spell file from one or more word lists.
				5937	* "fnames[0]" is the output file name.
				5938	* "fnames[fcount - 1]" is the last input file name.
				5939	* Exception: when "fnames[0]" ends in ".add" it's used as the input file name
				5940	* and ".spl" is appended to make the output file name.
				5941	*/
				5942	void
				5943	mkspell(
				5944	int fcount,
				5945	char_u **fnames,
				5946	int ascii, /* -ascii argument given */
				5947	int over_write, /* overwrite existing output file */
				5948	int added_word) /* invoked through "zg" */
				5949	{
				5950	char_u *fname = NULL;
				5951	char_u *wfname;
				5952	char_u **innames;
				5953	int incount;
				5954	afffile_T *(afile[8]);
				5955	int i;
				5956	int len;
				5957	stat_T st;
				5958	int error = FALSE;
				5959	spellinfo_T spin;
				5960
				5961	vim_memset(&spin, 0, sizeof(spin));
				5962	spin.si_verbose = !added_word;
				5963	spin.si_ascii = ascii;
				5964	spin.si_followup = TRUE;
				5965	spin.si_rem_accents = TRUE;
				5966	ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20);
				5967	ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20);
				5968	ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
				5969	ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
				5970	ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20);
				5971	ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
				5972	hash_init(&spin.si_commonwords);
				5973	spin.si_newcompID = 127; /* start compound ID at first maximum */
				5974
				5975	/* default: fnames[0] is output file, following are input files */
				5976	innames = &fnames[1];
				5977	incount = fcount - 1;
				5978
				5979	wfname = alloc(MAXPATHL);
				5980	if (wfname == NULL)
				5981	return;
				5982
				5983	if (fcount >= 1)
				5984	{
				5985	len = (int)STRLEN(fnames[0]);
				5986	if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
				5987	{
				5988	/* For ":mkspell path/en.latin1.add" output file is
				5989	* "path/en.latin1.add.spl". */
				5990	innames = &fnames[0];
				5991	incount = 1;
				5992	vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]);
				5993	}
				5994	else if (fcount == 1)
				5995	{
				5996	/* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */
				5997	innames = &fnames[0];
				5998	incount = 1;
				5999	vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
				6000	fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
				6001	}
				6002	else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
				6003	{
				6004	/* Name ends in ".spl", use as the file name. */
				6005	vim_strncpy(wfname, fnames[0], MAXPATHL - 1);
				6006	}
				6007	else
				6008	/* Name should be language, make the file name from it. */
				6009	vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
				6010	fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
				6011
				6012	/* Check for .ascii.spl. */
				6013	if (strstr((char *)gettail(wfname), SPL_FNAME_ASCII) != NULL)
				6014	spin.si_ascii = TRUE;
				6015
				6016	/* Check for .add.spl. */
				6017	if (strstr((char *)gettail(wfname), SPL_FNAME_ADD) != NULL)
				6018	spin.si_add = TRUE;
				6019	}
				6020
				6021	if (incount <= 0)
				6022	EMSG(_(e_invarg)); /* need at least output and input names */
				6023	else if (vim_strchr(gettail(wfname), '_') != NULL)
				6024	EMSG(_("E751: Output file name must not have region name"));
				6025	else if (incount > 8)
				6026	EMSG(_("E754: Only up to 8 regions supported"));
				6027	else
				6028	{
				6029	/* Check for overwriting before doing things that may take a lot of
				6030	* time. */
				6031	if (!over_write && mch_stat((char *)wfname, &st) >= 0)
				6032	{
				6033	EMSG(_(e_exists));
				6034	goto theend;
				6035	}
				6036	if (mch_isdir(wfname))
				6037	{
				6038	EMSG2(_(e_isadir2), wfname);
				6039	goto theend;
				6040	}
				6041
				6042	fname = alloc(MAXPATHL);
				6043	if (fname == NULL)
				6044	goto theend;
				6045
				6046	/*
				6047	* Init the aff and dic pointers.
				6048	* Get the region names if there are more than 2 arguments.
				6049	*/
				6050	for (i = 0; i < incount; ++i)
				6051	{
				6052	afile[i] = NULL;
				6053
				6054	if (incount > 1)
				6055	{
				6056	len = (int)STRLEN(innames[i]);
				6057	if (STRLEN(gettail(innames[i])) < 5
				6058	\|\| innames[i][len - 3] != '_')
				6059	{
				6060	EMSG2(_("E755: Invalid region in %s"), innames[i]);
				6061	goto theend;
				6062	}
				6063	spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
				6064	spin.si_region_name[i * 2 + 1] =
				6065	TOLOWER_ASC(innames[i][len - 1]);
				6066	}
				6067	}
				6068	spin.si_region_count = incount;
				6069
				6070	spin.si_foldroot = wordtree_alloc(&spin);
				6071	spin.si_keeproot = wordtree_alloc(&spin);
				6072	spin.si_prefroot = wordtree_alloc(&spin);
				6073	if (spin.si_foldroot == NULL
				6074	\|\| spin.si_keeproot == NULL
				6075	\|\| spin.si_prefroot == NULL)
				6076	{
				6077	free_blocks(spin.si_blocks);
				6078	goto theend;
				6079	}
				6080
				6081	/* When not producing a .add.spl file clear the character table when
				6082	* we encounter one in the .aff file. This means we dump the current
				6083	* one in the .spl file if the .aff file doesn't define one. That's
				6084	* better than guessing the contents, the table will match a
				6085	* previously loaded spell file. */
				6086	if (!spin.si_add)
				6087	spin.si_clear_chartab = TRUE;
				6088
				6089	/*
				6090	* Read all the .aff and .dic files.
				6091	* Text is converted to 'encoding'.
				6092	* Words are stored in the case-folded and keep-case trees.
				6093	*/
				6094	for (i = 0; i < incount && !error; ++i)
				6095	{
				6096	spin.si_conv.vc_type = CONV_NONE;
				6097	spin.si_region = 1 << i;
				6098
				6099	vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]);
				6100	if (mch_stat((char *)fname, &st) >= 0)
				6101	{
				6102	/* Read the .aff file. Will init "spin->si_conv" based on the
				6103	* "SET" line. */
				6104	afile[i] = spell_read_aff(&spin, fname);
				6105	if (afile[i] == NULL)
				6106	error = TRUE;
				6107	else
				6108	{
				6109	/* Read the .dic file and store the words in the trees. */
				6110	vim_snprintf((char *)fname, MAXPATHL, "%s.dic",
				6111	innames[i]);
				6112	if (spell_read_dic(&spin, fname, afile[i]) == FAIL)
				6113	error = TRUE;
				6114	}
				6115	}
				6116	else
				6117	{
				6118	/* No .aff file, try reading the file as a word list. Store
				6119	* the words in the trees. */
				6120	if (spell_read_wordfile(&spin, innames[i]) == FAIL)
				6121	error = TRUE;
				6122	}
				6123
				6124	#ifdef FEAT_MBYTE
				6125	/* Free any conversion stuff. */
				6126	convert_setup(&spin.si_conv, NULL, NULL);
				6127	#endif
				6128	}
				6129
				6130	if (spin.si_compflags != NULL && spin.si_nobreak)
				6131	MSG(_("Warning: both compounding and NOBREAK specified"));
				6132
				6133	if (!error && !got_int)
				6134	{
				6135	/*
				6136	* Combine tails in the tree.
				6137	*/
				6138	spell_message(&spin, (char_u *)_(msg_compressing));
				6139	wordtree_compress(&spin, spin.si_foldroot);
				6140	wordtree_compress(&spin, spin.si_keeproot);
				6141	wordtree_compress(&spin, spin.si_prefroot);
				6142	}
				6143
				6144	if (!error && !got_int)
				6145	{
				6146	/*
				6147	* Write the info in the spell file.
				6148	*/
				6149	vim_snprintf((char *)IObuff, IOSIZE,
				6150	_("Writing spell file %s ..."), wfname);
				6151	spell_message(&spin, IObuff);
				6152
				6153	error = write_vim_spell(&spin, wfname) == FAIL;
				6154
				6155	spell_message(&spin, (char_u *)_("Done!"));
				6156	vim_snprintf((char *)IObuff, IOSIZE,
				6157	_("Estimated runtime memory use: %d bytes"), spin.si_memtot);
				6158	spell_message(&spin, IObuff);
				6159
				6160	/*
				6161	* If the file is loaded need to reload it.
				6162	*/
				6163	if (!error)
				6164	spell_reload_one(wfname, added_word);
				6165	}
				6166
				6167	/* Free the allocated memory. */
				6168	ga_clear(&spin.si_rep);
				6169	ga_clear(&spin.si_repsal);
				6170	ga_clear(&spin.si_sal);
				6171	ga_clear(&spin.si_map);
				6172	ga_clear(&spin.si_comppat);
				6173	ga_clear(&spin.si_prefcond);
				6174	hash_clear_all(&spin.si_commonwords, 0);
				6175
				6176	/* Free the .aff file structures. */
				6177	for (i = 0; i < incount; ++i)
				6178	if (afile[i] != NULL)
				6179	spell_free_aff(afile[i]);
				6180
				6181	/* Free all the bits and pieces at once. */
				6182	free_blocks(spin.si_blocks);
				6183
				6184	/*
				6185	* If there is soundfolding info and no NOSUGFILE item create the
				6186	* .sug file with the soundfolded word trie.
				6187	*/
				6188	if (spin.si_sugtime != 0 && !error && !got_int)
				6189	spell_make_sugfile(&spin, wfname);
				6190
				6191	}
				6192
				6193	theend:
				6194	vim_free(fname);
				6195	vim_free(wfname);
				6196	}
				6197
				6198	/*
				6199	* Display a message for spell file processing when 'verbose' is set or using
				6200	* ":mkspell". "str" can be IObuff.
				6201	*/
				6202	static void
				6203	spell_message(spellinfo_T spin, char_u str)
				6204	{
				6205	if (spin->si_verbose \|\| p_verbose > 2)
				6206	{
				6207	if (!spin->si_verbose)
				6208	verbose_enter();
				6209	MSG(str);
				6210	out_flush();
				6211	if (!spin->si_verbose)
				6212	verbose_leave();
				6213	}
				6214	}
				6215
				6216	/*
				6217	* ":[count]spellgood {word}"
				6218	* ":[count]spellwrong {word}"
				6219	* ":[count]spellundo {word}"
				6220	*/
				6221	void
				6222	ex_spell(exarg_T *eap)
				6223	{
				6224	spell_add_word(eap->arg, (int)STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong,
				6225	eap->forceit ? 0 : (int)eap->line2,
				6226	eap->cmdidx == CMD_spellundo);
				6227	}
				6228
				6229	/*
				6230	* Add "word[len]" to 'spellfile' as a good or bad word.
				6231	*/
				6232	void
				6233	spell_add_word(
				6234	char_u *word,
				6235	int len,
				6236	int bad,
				6237	int idx, /* "zG" and "zW": zero, otherwise index in
				6238	'spellfile' */
				6239	int undo) /* TRUE for "zug", "zuG", "zuw" and "zuW" */
				6240	{
				6241	FILE *fd = NULL;
				6242	buf_T *buf = NULL;
				6243	int new_spf = FALSE;
				6244	char_u *fname;
				6245	char_u *fnamebuf = NULL;
				6246	char_u line[MAXWLEN * 2];
				6247	long fpos, fpos_next = 0;
				6248	int i;
				6249	char_u *spf;
				6250
				6251	if (idx == 0) /* use internal wordlist */
				6252	{
				6253	if (int_wordlist == NULL)
				6254	{
				6255	int_wordlist = vim_tempname('s', FALSE);
				6256	if (int_wordlist == NULL)
				6257	return;
				6258	}
				6259	fname = int_wordlist;
				6260	}
				6261	else
				6262	{
				6263	/* If 'spellfile' isn't set figure out a good default value. */
				6264	if (*curwin->w_s->b_p_spf == NUL)
				6265	{
				6266	init_spellfile();
				6267	new_spf = TRUE;
				6268	}
				6269
				6270	if (*curwin->w_s->b_p_spf == NUL)
				6271	{
				6272	EMSG2(_(e_notset), "spellfile");
				6273	return;
				6274	}
				6275	fnamebuf = alloc(MAXPATHL);
				6276	if (fnamebuf == NULL)
				6277	return;
				6278
				6279	for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i)
				6280	{
				6281	copy_option_part(&spf, fnamebuf, MAXPATHL, ",");
				6282	if (i == idx)
				6283	break;
				6284	if (*spf == NUL)
				6285	{
				6286	EMSGN(_("E765: 'spellfile' does not have %ld entries"), idx);
				6287	vim_free(fnamebuf);
				6288	return;
				6289	}
				6290	}
				6291
				6292	/* Check that the user isn't editing the .add file somewhere. */
				6293	buf = buflist_findname_exp(fnamebuf);
				6294	if (buf != NULL && buf->b_ml.ml_mfp == NULL)
				6295	buf = NULL;
				6296	if (buf != NULL && bufIsChanged(buf))
				6297	{
				6298	EMSG(_(e_bufloaded));
				6299	vim_free(fnamebuf);
				6300	return;
				6301	}
				6302
				6303	fname = fnamebuf;
				6304	}
				6305
				6306	if (bad \|\| undo)
				6307	{
				6308	/* When the word appears as good word we need to remove that one,
				6309	* since its flags sort before the one with WF_BANNED. */
				6310	fd = mch_fopen((char *)fname, "r");
				6311	if (fd != NULL)
				6312	{
				6313	while (!vim_fgets(line, MAXWLEN * 2, fd))
				6314	{
				6315	fpos = fpos_next;
				6316	fpos_next = ftell(fd);
				6317	if (STRNCMP(word, line, len) == 0
				6318	&& (line[len] == '/' \|\| line[len] < ' '))
				6319	{
				6320	/* Found duplicate word. Remove it by writing a '#' at
				6321	* the start of the line. Mixing reading and writing
				6322	* doesn't work for all systems, close the file first. */
				6323	fclose(fd);
				6324	fd = mch_fopen((char *)fname, "r+");
				6325	if (fd == NULL)
				6326	break;
				6327	if (fseek(fd, fpos, SEEK_SET) == 0)
				6328	{
				6329	fputc('#', fd);
				6330	if (undo)
				6331	{
				6332	home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
				6333	smsg((char_u )_("Word '%.s' removed from %s"),
				6334	len, word, NameBuff);
				6335	}
				6336	}
				6337	fseek(fd, fpos_next, SEEK_SET);
				6338	}
				6339	}
				6340	if (fd != NULL)
				6341	fclose(fd);
				6342	}
				6343	}
				6344
				6345	if (!undo)
				6346	{
				6347	fd = mch_fopen((char *)fname, "a");
				6348	if (fd == NULL && new_spf)
				6349	{
				6350	char_u *p;
				6351
				6352	/* We just initialized the 'spellfile' option and can't open the
				6353	* file. We may need to create the "spell" directory first. We
				6354	* already checked the runtime directory is writable in
				6355	* init_spellfile(). */
				6356	if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname)
				6357	{
				6358	int c = *p;
				6359
				6360	/* The directory doesn't exist. Try creating it and opening
				6361	* the file again. */
				6362	*p = NUL;
				6363	vim_mkdir(fname, 0755);
				6364	*p = c;
				6365	fd = mch_fopen((char *)fname, "a");
				6366	}
				6367	}
				6368
				6369	if (fd == NULL)
				6370	EMSG2(_(e_notopen), fname);
				6371	else
				6372	{
				6373	if (bad)
				6374	fprintf(fd, "%.*s/!\n", len, word);
				6375	else
				6376	fprintf(fd, "%.*s\n", len, word);
				6377	fclose(fd);
				6378
				6379	home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
				6380	smsg((char_u )_("Word '%.s' added to %s"), len, word, NameBuff);
				6381	}
				6382	}
				6383
				6384	if (fd != NULL)
				6385	{
				6386	/* Update the .add.spl file. */
				6387	mkspell(1, &fname, FALSE, TRUE, TRUE);
				6388
				6389	/* If the .add file is edited somewhere, reload it. */
				6390	if (buf != NULL)
				6391	buf_reload(buf, buf->b_orig_mode);
				6392
				6393	redraw_all_later(SOME_VALID);
				6394	}
				6395	vim_free(fnamebuf);
				6396	}
				6397
				6398	/*
				6399	* Initialize 'spellfile' for the current buffer.
				6400	*/
				6401	static void
				6402	init_spellfile(void)
				6403	{
				6404	char_u *buf;
				6405	int l;
				6406	char_u *fname;
				6407	char_u *rtp;
				6408	char_u *lend;
				6409	int aspath = FALSE;
				6410	char_u *lstart = curbuf->b_s.b_p_spl;
				6411
				6412	if (*curwin->w_s->b_p_spl != NUL && curwin->w_s->b_langp.ga_len > 0)
				6413	{
				6414	buf = alloc(MAXPATHL);
				6415	if (buf == NULL)
				6416	return;
				6417
				6418	/* Find the end of the language name. Exclude the region. If there
				6419	* is a path separator remember the start of the tail. */
				6420	for (lend = curwin->w_s->b_p_spl; *lend != NUL
				6421	&& vim_strchr((char_u )",._", lend) == NULL; ++lend)
				6422	if (vim_ispathsep(*lend))
				6423	{
				6424	aspath = TRUE;
				6425	lstart = lend + 1;
				6426	}
				6427
				6428	/* Loop over all entries in 'runtimepath'. Use the first one where we
				6429	* are allowed to write. */
				6430	rtp = p_rtp;
				6431	while (*rtp != NUL)
				6432	{
				6433	if (aspath)
				6434	/* Use directory of an entry with path, e.g., for
				6435	* "/dir/lg.utf-8.spl" use "/dir". */
				6436	vim_strncpy(buf, curbuf->b_s.b_p_spl,
				6437	lstart - curbuf->b_s.b_p_spl - 1);
				6438	else
				6439	/* Copy the path from 'runtimepath' to buf[]. */
				6440	copy_option_part(&rtp, buf, MAXPATHL, ",");
				6441	if (filewritable(buf) == 2)
				6442	{
				6443	/* Use the first language name from 'spelllang' and the
				6444	* encoding used in the first loaded .spl file. */
				6445	if (aspath)
				6446	vim_strncpy(buf, curbuf->b_s.b_p_spl,
				6447	lend - curbuf->b_s.b_p_spl);
				6448	else
				6449	{
				6450	/* Create the "spell" directory if it doesn't exist yet. */
				6451	l = (int)STRLEN(buf);
				6452	vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell");
				6453	if (filewritable(buf) != 2)
				6454	vim_mkdir(buf, 0755);
				6455
				6456	l = (int)STRLEN(buf);
				6457	vim_snprintf((char *)buf + l, MAXPATHL - l,
				6458	"/%.*s", (int)(lend - lstart), lstart);
				6459	}
				6460	l = (int)STRLEN(buf);
				6461	fname = LANGP_ENTRY(curwin->w_s->b_langp, 0)
				6462	->lp_slang->sl_fname;
				6463	vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add",
				6464	fname != NULL
				6465	&& strstr((char *)gettail(fname), ".ascii.") != NULL
				6466	? (char_u *)"ascii" : spell_enc());
				6467	set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
				6468	break;
				6469	}
				6470	aspath = FALSE;
				6471	}
				6472
				6473	vim_free(buf);
				6474	}
				6475	}
				6476
				6477
				6478
				6479	/*
				6480	* Set the spell character tables from strings in the affix file.
				6481	*/
				6482	static int
				6483	set_spell_chartab(char_u fol, char_u low, char_u *upp)
				6484	{
				6485	/* We build the new tables here first, so that we can compare with the
				6486	* previous one. */
				6487	spelltab_T new_st;
				6488	char_u pf = fol, pl = low, *pu = upp;
				6489	int f, l, u;
				6490
				6491	clear_spell_chartab(&new_st);
				6492
				6493	while (*pf != NUL)
				6494	{
				6495	if (pl == NUL \|\| pu == NUL)
				6496	{
				6497	EMSG(_(e_affform));
				6498	return FAIL;
				6499	}
				6500	#ifdef FEAT_MBYTE
				6501	f = mb_ptr2char_adv(&pf);
				6502	l = mb_ptr2char_adv(&pl);
				6503	u = mb_ptr2char_adv(&pu);
				6504	#else
				6505	f = *pf++;
				6506	l = *pl++;
				6507	u = *pu++;
				6508	#endif
				6509	/* Every character that appears is a word character. */
				6510	if (f < 256)
				6511	new_st.st_isw[f] = TRUE;
				6512	if (l < 256)
				6513	new_st.st_isw[l] = TRUE;
				6514	if (u < 256)
				6515	new_st.st_isw[u] = TRUE;
				6516
				6517	/* if "LOW" and "FOL" are not the same the "LOW" char needs
				6518	* case-folding */
				6519	if (l < 256 && l != f)
				6520	{
				6521	if (f >= 256)
				6522	{
				6523	EMSG(_(e_affrange));
				6524	return FAIL;
				6525	}
				6526	new_st.st_fold[l] = f;
				6527	}
				6528
				6529	/* if "UPP" and "FOL" are not the same the "UPP" char needs
				6530	* case-folding, it's upper case and the "UPP" is the upper case of
				6531	* "FOL" . */
				6532	if (u < 256 && u != f)
				6533	{
				6534	if (f >= 256)
				6535	{
				6536	EMSG(_(e_affrange));
				6537	return FAIL;
				6538	}
				6539	new_st.st_fold[u] = f;
				6540	new_st.st_isu[u] = TRUE;
				6541	new_st.st_upper[f] = u;
				6542	}
				6543	}
				6544
				6545	if (pl != NUL \|\| pu != NUL)
				6546	{
				6547	EMSG(_(e_affform));
				6548	return FAIL;
				6549	}
				6550
				6551	return set_spell_finish(&new_st);
				6552	}
				6553
				6554	/*
				6555	* Set the spell character tables from strings in the .spl file.
				6556	*/
				6557	static void
				6558	set_spell_charflags(
				6559	char_u *flags,
				6560	int cnt, /* length of "flags" */
				6561	char_u *fol)
				6562	{
				6563	/* We build the new tables here first, so that we can compare with the
				6564	* previous one. */
				6565	spelltab_T new_st;
				6566	int i;
				6567	char_u *p = fol;
				6568	int c;
				6569
				6570	clear_spell_chartab(&new_st);
				6571
				6572	for (i = 0; i < 128; ++i)
				6573	{
				6574	if (i < cnt)
				6575	{
				6576	new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
				6577	new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
				6578	}
				6579
				6580	if (*p != NUL)
				6581	{
				6582	#ifdef FEAT_MBYTE
				6583	c = mb_ptr2char_adv(&p);
				6584	#else
				6585	c = *p++;
				6586	#endif
				6587	new_st.st_fold[i + 128] = c;
				6588	if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
				6589	new_st.st_upper[c] = i + 128;
				6590	}
				6591	}
				6592
				6593	(void)set_spell_finish(&new_st);
				6594	}
				6595
				6596	static int
				6597	set_spell_finish(spelltab_T *new_st)
				6598	{
				6599	int i;
				6600
				6601	if (did_set_spelltab)
				6602	{
				6603	/* check that it's the same table */
				6604	for (i = 0; i < 256; ++i)
				6605	{
				6606	if (spelltab.st_isw[i] != new_st->st_isw[i]
				6607	\|\| spelltab.st_isu[i] != new_st->st_isu[i]
				6608	\|\| spelltab.st_fold[i] != new_st->st_fold[i]
				6609	\|\| spelltab.st_upper[i] != new_st->st_upper[i])
				6610	{
				6611	EMSG(_("E763: Word characters differ between spell files"));
				6612	return FAIL;
				6613	}
				6614	}
				6615	}
				6616	else
				6617	{
				6618	/* copy the new spelltab into the one being used */
				6619	spelltab = *new_st;
				6620	did_set_spelltab = TRUE;
				6621	}
				6622
				6623	return OK;
				6624	}
				6625
				6626	/*
				6627	* Write the table with prefix conditions to the .spl file.
				6628	* When "fd" is NULL only count the length of what is written.
				6629	*/
				6630	static int
				6631	write_spell_prefcond(FILE fd, garray_T gap)
				6632	{
				6633	int i;
				6634	char_u *p;
				6635	int len;
				6636	int totlen;
				6637	size_t x = 1; /* collect return value of fwrite() */
				6638
				6639	if (fd != NULL)
				6640	put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */
				6641
				6642	totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */
				6643
				6644	for (i = 0; i < gap->ga_len; ++i)
				6645	{
				6646	/* <prefcond> : <condlen> <condstr> */
				6647	p = ((char_u **)gap->ga_data)[i];
				6648	if (p != NULL)
				6649	{
				6650	len = (int)STRLEN(p);
				6651	if (fd != NULL)
				6652	{
				6653	fputc(len, fd);
				6654	x &= fwrite(p, (size_t)len, (size_t)1, fd);
				6655	}
				6656	totlen += len;
				6657	}
				6658	else if (fd != NULL)
				6659	fputc(0, fd);
				6660	}
				6661
				6662	return totlen;
				6663	}
				6664
				6665
				6666	/*
				6667	* Use map string "map" for languages "lp".
				6668	*/
				6669	static void
				6670	set_map_str(slang_T lp, char_u map)
				6671	{
				6672	char_u *p;
				6673	int headc = 0;
				6674	int c;
				6675	int i;
				6676
				6677	if (*map == NUL)
				6678	{
				6679	lp->sl_has_map = FALSE;
				6680	return;
				6681	}
				6682	lp->sl_has_map = TRUE;
				6683
				6684	/* Init the array and hash tables empty. */
				6685	for (i = 0; i < 256; ++i)
				6686	lp->sl_map_array[i] = 0;
				6687	#ifdef FEAT_MBYTE
				6688	hash_init(&lp->sl_map_hash);
				6689	#endif
				6690
				6691	/*
				6692	* The similar characters are stored separated with slashes:
				6693	* "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and
				6694	* before the same slash. For characters above 255 sl_map_hash is used.
				6695	*/
				6696	for (p = map; *p != NUL; )
				6697	{
				6698	#ifdef FEAT_MBYTE
				6699	c = mb_cptr2char_adv(&p);
				6700	#else
				6701	c = *p++;
				6702	#endif
				6703	if (c == '/')
				6704	headc = 0;
				6705	else
				6706	{
				6707	if (headc == 0)
				6708	headc = c;
				6709
				6710	#ifdef FEAT_MBYTE
				6711	/* Characters above 255 don't fit in sl_map_array[], put them in
				6712	* the hash table. Each entry is the char, a NUL the headchar and
				6713	* a NUL. */
				6714	if (c >= 256)
				6715	{
				6716	int cl = mb_char2len(c);
				6717	int headcl = mb_char2len(headc);
				6718	char_u *b;
				6719	hash_T hash;
				6720	hashitem_T *hi;
				6721
				6722	b = alloc((unsigned)(cl + headcl + 2));
				6723	if (b == NULL)
				6724	return;
				6725	mb_char2bytes(c, b);
				6726	b[cl] = NUL;
				6727	mb_char2bytes(headc, b + cl + 1);
				6728	b[cl + 1 + headcl] = NUL;
				6729	hash = hash_hash(b);
				6730	hi = hash_lookup(&lp->sl_map_hash, b, hash);
				6731	if (HASHITEM_EMPTY(hi))
				6732	hash_add_item(&lp->sl_map_hash, hi, b, hash);
				6733	else
				6734	{
				6735	/* This should have been checked when generating the .spl
				6736	* file. */
				6737	EMSG(_("E783: duplicate char in MAP entry"));
				6738	vim_free(b);
				6739	}
				6740	}
				6741	else
				6742	#endif
				6743	lp->sl_map_array[c] = headc;
				6744	}
				6745	}
				6746	}
				6747
				6748
				6749	#endif /* FEAT_SPELL */