Blame - src/spellfile.c - android_external_vim

blob: 00ef019a6a6ffbdc3ba920bbd3adf25b43005d41 [file] [log] [blame]

Bram Moolenaar	edf3f97	2016-08-29 22:49:24 +0200	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2	*
				3	* VIM - Vi IMproved by Bram Moolenaar
				4	*
				5	* Do ":help uganda" in Vim to read copying and usage conditions.
				6	* Do ":help credits" in Vim to see a list of people who contributed.
				7	* See README.txt for an overview of the Vim source code.
				8	*/
				9
				10	/*
				11	* spellfile.c: code for reading and writing spell files.
				12	*
				13	* See spell.c for information about spell checking.
				14	*/
				15
				16	/*
				17	* Vim spell file format: <HEADER>
				18	* <SECTIONS>
				19	* <LWORDTREE>
				20	* <KWORDTREE>
				21	* <PREFIXTREE>
				22	*
				23	* <HEADER>: <fileID> <versionnr>
				24	*
				25	* <fileID> 8 bytes "VIMspell"
				26	* <versionnr> 1 byte VIMSPELLVERSION
				27	*
				28	*
				29	* Sections make it possible to add information to the .spl file without
				30	* making it incompatible with previous versions. There are two kinds of
				31	* sections:
				32	* 1. Not essential for correct spell checking. E.g. for making suggestions.
				33	* These are skipped when not supported.
				34	* 2. Optional information, but essential for spell checking when present.
				35	* E.g. conditions for affixes. When this section is present but not
				36	* supported an error message is given.
				37	*
				38	* <SECTIONS>: <section> ... <sectionend>
				39	*
				40	* <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
				41	*
				42	* <sectionID> 1 byte number from 0 to 254 identifying the section
				43	*
				44	* <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct
				45	* spell checking
				46	*
				47	* <sectionlen> 4 bytes length of section contents, MSB first
				48	*
				49	* <sectionend> 1 byte SN_END
				50	*
				51	*
				52	* sectionID == SN_INFO: <infotext>
				53	* <infotext> N bytes free format text with spell file info (version,
				54	* website, etc)
				55	*
				56	* sectionID == SN_REGION: <regionname> ...
				57	* <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case.
				58	* First <regionname> is region 1.
				59	*
				60	* sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
				61	* <folcharslen> <folchars>
				62	* <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
				63	* <charflags> N bytes List of flags (first one is for character 128):
				64	* 0x01 word character CF_WORD
				65	* 0x02 upper-case character CF_UPPER
				66	* <folcharslen> 2 bytes Number of bytes in <folchars>.
				67	* <folchars> N bytes Folded characters, first one is for character 128.
				68	*
				69	* sectionID == SN_MIDWORD: <midword>
				70	* <midword> N bytes Characters that are word characters only when used
				71	* in the middle of a word.
				72	*
				73	* sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
				74	* <prefcondcnt> 2 bytes Number of <prefcond> items following.
				75	* <prefcond> : <condlen> <condstr>
				76	* <condlen> 1 byte Length of <condstr>.
				77	* <condstr> N bytes Condition for the prefix.
				78	*
				79	* sectionID == SN_REP: <repcount> <rep> ...
				80	* <repcount> 2 bytes number of <rep> items, MSB first.
				81	* <rep> : <repfromlen> <repfrom> <reptolen> <repto>
				82	* <repfromlen> 1 byte length of <repfrom>
				83	* <repfrom> N bytes "from" part of replacement
				84	* <reptolen> 1 byte length of <repto>
				85	* <repto> N bytes "to" part of replacement
				86	*
				87	* sectionID == SN_REPSAL: <repcount> <rep> ...
				88	* just like SN_REP but for soundfolded words
				89	*
				90	* sectionID == SN_SAL: <salflags> <salcount> <sal> ...
				91	* <salflags> 1 byte flags for soundsalike conversion:
				92	* SAL_F0LLOWUP
				93	* SAL_COLLAPSE
				94	* SAL_REM_ACCENTS
				95	* <salcount> 2 bytes number of <sal> items following
				96	* <sal> : <salfromlen> <salfrom> <saltolen> <salto>
				97	* <salfromlen> 1 byte length of <salfrom>
				98	* <salfrom> N bytes "from" part of soundsalike
				99	* <saltolen> 1 byte length of <salto>
				100	* <salto> N bytes "to" part of soundsalike
				101	*
				102	* sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
				103	* <sofofromlen> 2 bytes length of <sofofrom>
				104	* <sofofrom> N bytes "from" part of soundfold
				105	* <sofotolen> 2 bytes length of <sofoto>
				106	* <sofoto> N bytes "to" part of soundfold
				107	*
				108	* sectionID == SN_SUGFILE: <timestamp>
				109	* <timestamp> 8 bytes time in seconds that must match with .sug file
				110	*
				111	* sectionID == SN_NOSPLITSUGS: nothing
				112	*
				113	* sectionID == SN_NOCOMPOUNDSUGS: nothing
				114	*
				115	* sectionID == SN_WORDS: <word> ...
				116	* <word> N bytes NUL terminated common word
				117	*
				118	* sectionID == SN_MAP: <mapstr>
				119	* <mapstr> N bytes String with sequences of similar characters,
				120	* separated by slashes.
				121	*
				122	* sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions>
				123	* <comppatcount> <comppattern> ... <compflags>
				124	* <compmax> 1 byte Maximum nr of words in compound word.
				125	* <compminlen> 1 byte Minimal word length for compounding.
				126	* <compsylmax> 1 byte Maximum nr of syllables in compound word.
				127	* <compoptions> 2 bytes COMP_ flags.
				128	* <comppatcount> 2 bytes number of <comppattern> following
				129	* <compflags> N bytes Flags from COMPOUNDRULE items, separated by
				130	* slashes.
				131	*
				132	* <comppattern>: <comppatlen> <comppattext>
				133	* <comppatlen> 1 byte length of <comppattext>
				134	* <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN
				135	*
				136	* sectionID == SN_NOBREAK: (empty, its presence is what matters)
				137	*
				138	* sectionID == SN_SYLLABLE: <syllable>
				139	* <syllable> N bytes String from SYLLABLE item.
				140	*
				141	* <LWORDTREE>: <wordtree>
				142	*
				143	* <KWORDTREE>: <wordtree>
				144	*
				145	* <PREFIXTREE>: <wordtree>
				146	*
				147	*
				148	* <wordtree>: <nodecount> <nodedata> ...
				149	*
				150	* <nodecount> 4 bytes Number of nodes following. MSB first.
				151	*
				152	* <nodedata>: <siblingcount> <sibling> ...
				153	*
				154	* <siblingcount> 1 byte Number of siblings in this node. The siblings
				155	* follow in sorted order.
				156	*
				157	* <sibling>: <byte> [ <nodeidx> <xbyte>
				158	* \| <flags> [<flags2>] [<region>] [<affixID>]
				159	* \| [<pflags>] <affixID> <prefcondnr> ]
				160	*
				161	* <byte> 1 byte Byte value of the sibling. Special cases:
				162	* BY_NOFLAGS: End of word without flags and for all
				163	* regions.
				164	* For PREFIXTREE <affixID> and
				165	* <prefcondnr> follow.
				166	* BY_FLAGS: End of word, <flags> follow.
				167	* For PREFIXTREE <pflags>, <affixID>
				168	* and <prefcondnr> follow.
				169	* BY_FLAGS2: End of word, <flags> and <flags2>
				170	* follow. Not used in PREFIXTREE.
				171	* BY_INDEX: Child of sibling is shared, <nodeidx>
				172	* and <xbyte> follow.
				173	*
				174	* <nodeidx> 3 bytes Index of child for this sibling, MSB first.
				175	*
				176	* <xbyte> 1 byte byte value of the sibling.
				177	*
				178	* <flags> 1 byte bitmask of:
				179	* WF_ALLCAP word must have only capitals
				180	* WF_ONECAP first char of word must be capital
				181	* WF_KEEPCAP keep-case word
				182	* WF_FIXCAP keep-case word, all caps not allowed
				183	* WF_RARE rare word
				184	* WF_BANNED bad word
				185	* WF_REGION <region> follows
				186	* WF_AFX <affixID> follows
				187	*
				188	* <flags2> 1 byte Bitmask of:
				189	* WF_HAS_AFF >> 8 word includes affix
				190	* WF_NEEDCOMP >> 8 word only valid in compound
				191	* WF_NOSUGGEST >> 8 word not used for suggestions
				192	* WF_COMPROOT >> 8 word already a compound
				193	* WF_NOCOMPBEF >> 8 no compounding before this word
				194	* WF_NOCOMPAFT >> 8 no compounding after this word
				195	*
				196	* <pflags> 1 byte bitmask of:
				197	* WFP_RARE rare prefix
				198	* WFP_NC non-combining prefix
				199	* WFP_UP letter after prefix made upper case
				200	*
				201	* <region> 1 byte Bitmask for regions in which word is valid. When
				202	* omitted it's valid in all regions.
				203	* Lowest bit is for region 1.
				204	*
				205	* <affixID> 1 byte ID of affix that can be used with this word. In
				206	* PREFIXTREE used for the required prefix ID.
				207	*
				208	* <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list
				209	* from HEADER.
				210	*
				211	* All text characters are in 'encoding', but stored as single bytes.
				212	*/
				213
				214	/*
				215	* Vim .sug file format: <SUGHEADER>
				216	* <SUGWORDTREE>
				217	* <SUGTABLE>
				218	*
				219	* <SUGHEADER>: <fileID> <versionnr> <timestamp>
				220	*
				221	* <fileID> 6 bytes "VIMsug"
				222	* <versionnr> 1 byte VIMSUGVERSION
				223	* <timestamp> 8 bytes timestamp that must match with .spl file
				224	*
				225	*
				226	* <SUGWORDTREE>: <wordtree> (see above, no flags or region used)
				227	*
				228	*
				229	* <SUGTABLE>: <sugwcount> <sugline> ...
				230	*
				231	* <sugwcount> 4 bytes number of <sugline> following
				232	*
				233	* <sugline>: <sugnr> ... NUL
				234	*
				235	* <sugnr>: X bytes word number that results in this soundfolded word,
				236	* stored as an offset to the previous number in as
				237	* few bytes as possible, see offset2bytes())
				238	*/
				239
				240	#include "vim.h"
				241
				242	#if defined(FEAT_SPELL) \|\| defined(PROTO)
				243
				244	#ifndef UNIX /* it's in os_unix.h for Unix */
				245	# include <time.h> /* for time_t */
				246	#endif
				247
				248	#ifndef UNIX /* it's in os_unix.h for Unix */
				249	# include <time.h> /* for time_t */
				250	#endif
				251
				252	/* Special byte values for <byte>. Some are only used in the tree for
				253	* postponed prefixes, some only in the other trees. This is a bit messy... */
				254	#define BY_NOFLAGS 0 /* end of word without flags or region; for
				255	* postponed prefix: no <pflags> */
				256	#define BY_INDEX 1 /* child is shared, index follows */
				257	#define BY_FLAGS 2 /* end of word, <flags> byte follows; for
				258	* postponed prefix: <pflags> follows */
				259	#define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes
				260	* follow; never used in prefix tree */
				261	#define BY_SPECIAL BY_FLAGS2 /* highest special byte value */
				262
				263	/* Flags used in .spl file for soundsalike flags. */
				264	#define SAL_F0LLOWUP 1
				265	#define SAL_COLLAPSE 2
				266	#define SAL_REM_ACCENTS 4
				267
				268	#define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */
				269	#define VIMSPELLMAGICL 8
				270	#define VIMSPELLVERSION 50
				271
				272	/* Section IDs. Only renumber them when VIMSPELLVERSION changes! */
				273	#define SN_REGION 0 /* <regionname> section */
				274	#define SN_CHARFLAGS 1 /* charflags section */
				275	#define SN_MIDWORD 2 /* <midword> section */
				276	#define SN_PREFCOND 3 /* <prefcond> section */
				277	#define SN_REP 4 /* REP items section */
				278	#define SN_SAL 5 /* SAL items section */
				279	#define SN_SOFO 6 /* soundfolding section */
				280	#define SN_MAP 7 /* MAP items section */
				281	#define SN_COMPOUND 8 /* compound words section */
				282	#define SN_SYLLABLE 9 /* syllable section */
				283	#define SN_NOBREAK 10 /* NOBREAK section */
				284	#define SN_SUGFILE 11 /* timestamp for .sug file */
				285	#define SN_REPSAL 12 /* REPSAL items section */
				286	#define SN_WORDS 13 /* common words */
				287	#define SN_NOSPLITSUGS 14 /* don't split word for suggestions */
				288	#define SN_INFO 15 /* info section */
				289	#define SN_NOCOMPOUNDSUGS 16 /* don't compound for suggestions */
				290	#define SN_END 255 /* end of sections */
				291
				292	#define SNF_REQUIRED 1 /* <sectionflags>: required section */
				293
				294	#define CF_WORD 0x01
				295	#define CF_UPPER 0x02
				296
				297	static int set_spell_finish(spelltab_T *new_st);
				298	static int write_spell_prefcond(FILE fd, garray_T gap);
				299	static char_u read_cnt_string(FILE fd, int cnt_bytes, int *lenp);
				300	static int read_region_section(FILE fd, slang_T slang, int len);
				301	static int read_charflags_section(FILE *fd);
				302	static int read_prefcond_section(FILE fd, slang_T lp);
				303	static int read_rep_section(FILE fd, garray_T gap, short *first);
				304	static int read_sal_section(FILE fd, slang_T slang);
				305	static int read_words_section(FILE fd, slang_T lp, int len);
				306	static int read_sofo_section(FILE fd, slang_T slang);
				307	static int read_compound(FILE fd, slang_T slang, int len);
				308	static int set_sofo(slang_T lp, char_u from, char_u *to);
				309	static void set_sal_first(slang_T *lp);
				310	#ifdef FEAT_MBYTE
				311	static int mb_str2wide(char_u s);
				312	#endif
				313	static int spell_read_tree(FILE fd, char_u bytsp, idx_T *idxsp, int prefixtree, int prefixcnt);
				314	static idx_T read_tree_node(FILE fd, char_u byts, idx_T *idxs, int maxidx, idx_T startidx, int prefixtree, int maxprefcondnr);
				315	static void spell_reload_one(char_u *fname, int added_word);
				316	static void set_spell_charflags(char_u flags, int cnt, char_u upp);
				317	static int set_spell_chartab(char_u fol, char_u low, char_u *upp);
				318	static void set_map_str(slang_T lp, char_u map);
				319
				320
				321	static char *e_spell_trunc = N_("E758: Truncated spell file");
				322	static char *e_afftrailing = N_("Trailing text in %s line %d: %s");
				323	static char *e_affname = N_("Affix name too long in %s line %d: %s");
				324	static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
				325	static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
				326	static char *msg_compressing = N_("Compressing word tree...");
				327
				328	/*
				329	* Load one spell file and store the info into a slang_T.
				330	*
				331	* This is invoked in three ways:
				332	* - From spell_load_cb() to load a spell file for the first time. "lang" is
				333	* the language name, "old_lp" is NULL. Will allocate an slang_T.
				334	* - To reload a spell file that was changed. "lang" is NULL and "old_lp"
				335	* points to the existing slang_T.
				336	* - Just after writing a .spl file; it's read back to produce the .sug file.
				337	* "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T.
				338	*
				339	* Returns the slang_T the spell file was loaded into. NULL for error.
				340	*/
				341	slang_T *
				342	spell_load_file(
				343	char_u *fname,
				344	char_u *lang,
				345	slang_T *old_lp,
				346	int silent) /* no error if file doesn't exist */
				347	{
				348	FILE *fd;
				349	char_u buf[VIMSPELLMAGICL];
				350	char_u *p;
				351	int i;
				352	int n;
				353	int len;
				354	char_u *save_sourcing_name = sourcing_name;
				355	linenr_T save_sourcing_lnum = sourcing_lnum;
				356	slang_T *lp = NULL;
				357	int c = 0;
				358	int res;
				359
				360	fd = mch_fopen((char *)fname, "r");
				361	if (fd == NULL)
				362	{
				363	if (!silent)
				364	EMSG2(_(e_notopen), fname);
				365	else if (p_verbose > 2)
				366	{
				367	verbose_enter();
				368	smsg((char_u *)e_notopen, fname);
				369	verbose_leave();
				370	}
				371	goto endFAIL;
				372	}
				373	if (p_verbose > 2)
				374	{
				375	verbose_enter();
				376	smsg((char_u *)_("Reading spell file \"%s\""), fname);
				377	verbose_leave();
				378	}
				379
				380	if (old_lp == NULL)
				381	{
				382	lp = slang_alloc(lang);
				383	if (lp == NULL)
				384	goto endFAIL;
				385
				386	/* Remember the file name, used to reload the file when it's updated. */
				387	lp->sl_fname = vim_strsave(fname);
				388	if (lp->sl_fname == NULL)
				389	goto endFAIL;
				390
				391	/* Check for .add.spl (_add.spl for VMS). */
				392	lp->sl_add = strstr((char *)gettail(fname), SPL_FNAME_ADD) != NULL;
				393	}
				394	else
				395	lp = old_lp;
				396
				397	/* Set sourcing_name, so that error messages mention the file name. */
				398	sourcing_name = fname;
				399	sourcing_lnum = 0;
				400
				401	/*
				402	* <HEADER>: <fileID>
				403	*/
				404	for (i = 0; i < VIMSPELLMAGICL; ++i)
				405	buf[i] = getc(fd); /* <fileID> */
				406	if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
				407	{
				408	EMSG(_("E757: This does not look like a spell file"));
				409	goto endFAIL;
				410	}
				411	c = getc(fd); /* <versionnr> */
				412	if (c < VIMSPELLVERSION)
				413	{
				414	EMSG(_("E771: Old spell file, needs to be updated"));
				415	goto endFAIL;
				416	}
				417	else if (c > VIMSPELLVERSION)
				418	{
				419	EMSG(_("E772: Spell file is for newer version of Vim"));
				420	goto endFAIL;
				421	}
				422
				423
				424	/*
				425	* <SECTIONS>: <section> ... <sectionend>
				426	* <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
				427	*/
				428	for (;;)
				429	{
				430	n = getc(fd); /* <sectionID> or <sectionend> */
				431	if (n == SN_END)
				432	break;
				433	c = getc(fd); /* <sectionflags> */
				434	len = get4c(fd); /* <sectionlen> */
				435	if (len < 0)
				436	goto truncerr;
				437
				438	res = 0;
				439	switch (n)
				440	{
				441	case SN_INFO:
				442	lp->sl_info = read_string(fd, len); /* <infotext> */
				443	if (lp->sl_info == NULL)
				444	goto endFAIL;
				445	break;
				446
				447	case SN_REGION:
				448	res = read_region_section(fd, lp, len);
				449	break;
				450
				451	case SN_CHARFLAGS:
				452	res = read_charflags_section(fd);
				453	break;
				454
				455	case SN_MIDWORD:
				456	lp->sl_midword = read_string(fd, len); /* <midword> */
				457	if (lp->sl_midword == NULL)
				458	goto endFAIL;
				459	break;
				460
				461	case SN_PREFCOND:
				462	res = read_prefcond_section(fd, lp);
				463	break;
				464
				465	case SN_REP:
				466	res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first);
				467	break;
				468
				469	case SN_REPSAL:
				470	res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first);
				471	break;
				472
				473	case SN_SAL:
				474	res = read_sal_section(fd, lp);
				475	break;
				476
				477	case SN_SOFO:
				478	res = read_sofo_section(fd, lp);
				479	break;
				480
				481	case SN_MAP:
				482	p = read_string(fd, len); /* <mapstr> */
				483	if (p == NULL)
				484	goto endFAIL;
				485	set_map_str(lp, p);
				486	vim_free(p);
				487	break;
				488
				489	case SN_WORDS:
				490	res = read_words_section(fd, lp, len);
				491	break;
				492
				493	case SN_SUGFILE:
				494	lp->sl_sugtime = get8ctime(fd); /* <timestamp> */
				495	break;
				496
				497	case SN_NOSPLITSUGS:
				498	lp->sl_nosplitsugs = TRUE;
				499	break;
				500
				501	case SN_NOCOMPOUNDSUGS:
				502	lp->sl_nocompoundsugs = TRUE;
				503	break;
				504
				505	case SN_COMPOUND:
				506	res = read_compound(fd, lp, len);
				507	break;
				508
				509	case SN_NOBREAK:
				510	lp->sl_nobreak = TRUE;
				511	break;
				512
				513	case SN_SYLLABLE:
				514	lp->sl_syllable = read_string(fd, len); /* <syllable> */
				515	if (lp->sl_syllable == NULL)
				516	goto endFAIL;
				517	if (init_syl_tab(lp) == FAIL)
				518	goto endFAIL;
				519	break;
				520
				521	default:
				522	/* Unsupported section. When it's required give an error
				523	* message. When it's not required skip the contents. */
				524	if (c & SNF_REQUIRED)
				525	{
				526	EMSG(_("E770: Unsupported section in spell file"));
				527	goto endFAIL;
				528	}
				529	while (--len >= 0)
				530	if (getc(fd) < 0)
				531	goto truncerr;
				532	break;
				533	}
				534	someerror:
				535	if (res == SP_FORMERROR)
				536	{
				537	EMSG(_(e_format));
				538	goto endFAIL;
				539	}
				540	if (res == SP_TRUNCERROR)
				541	{
				542	truncerr:
				543	EMSG(_(e_spell_trunc));
				544	goto endFAIL;
				545	}
				546	if (res == SP_OTHERERROR)
				547	goto endFAIL;
				548	}
				549
				550	/* <LWORDTREE> */
				551	res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0);
				552	if (res != 0)
				553	goto someerror;
				554
				555	/* <KWORDTREE> */
				556	res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0);
				557	if (res != 0)
				558	goto someerror;
				559
				560	/* <PREFIXTREE> */
				561	res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE,
				562	lp->sl_prefixcnt);
				563	if (res != 0)
				564	goto someerror;
				565
				566	/* For a new file link it in the list of spell files. */
				567	if (old_lp == NULL && lang != NULL)
				568	{
				569	lp->sl_next = first_lang;
				570	first_lang = lp;
				571	}
				572
				573	goto endOK;
				574
				575	endFAIL:
				576	if (lang != NULL)
				577	/* truncating the name signals the error to spell_load_lang() */
				578	*lang = NUL;
				579	if (lp != NULL && old_lp == NULL)
				580	slang_free(lp);
				581	lp = NULL;
				582
				583	endOK:
				584	if (fd != NULL)
				585	fclose(fd);
				586	sourcing_name = save_sourcing_name;
				587	sourcing_lnum = save_sourcing_lnum;
				588
				589	return lp;
				590	}
				591
				592	/*
				593	* Fill in the wordcount fields for a trie.
				594	* Returns the total number of words.
				595	*/
				596	static void
				597	tree_count_words(char_u byts, idx_T idxs)
				598	{
				599	int depth;
				600	idx_T arridx[MAXWLEN];
				601	int curi[MAXWLEN];
				602	int c;
				603	idx_T n;
				604	int wordcount[MAXWLEN];
				605
				606	arridx[0] = 0;
				607	curi[0] = 1;
				608	wordcount[0] = 0;
				609	depth = 0;
				610	while (depth >= 0 && !got_int)
				611	{
				612	if (curi[depth] > byts[arridx[depth]])
				613	{
				614	/* Done all bytes at this node, go up one level. */
				615	idxs[arridx[depth]] = wordcount[depth];
				616	if (depth > 0)
				617	wordcount[depth - 1] += wordcount[depth];
				618
				619	--depth;
				620	fast_breakcheck();
				621	}
				622	else
				623	{
				624	/* Do one more byte at this node. */
				625	n = arridx[depth] + curi[depth];
				626	++curi[depth];
				627
				628	c = byts[n];
				629	if (c == 0)
				630	{
				631	/* End of word, count it. */
				632	++wordcount[depth];
				633
				634	/* Skip over any other NUL bytes (same word with different
				635	* flags). */
				636	while (byts[n + 1] == 0)
				637	{
				638	++n;
				639	++curi[depth];
				640	}
				641	}
				642	else
				643	{
				644	/* Normal char, go one level deeper to count the words. */
				645	++depth;
				646	arridx[depth] = idxs[n];
				647	curi[depth] = 1;
				648	wordcount[depth] = 0;
				649	}
				650	}
				651	}
				652	}
				653
				654	/*
				655	* Load the .sug files for languages that have one and weren't loaded yet.
				656	*/
				657	void
				658	suggest_load_files(void)
				659	{
				660	langp_T *lp;
				661	int lpi;
				662	slang_T *slang;
				663	char_u *dotp;
				664	FILE *fd;
				665	char_u buf[MAXWLEN];
				666	int i;
				667	time_t timestamp;
				668	int wcount;
				669	int wordnr;
				670	garray_T ga;
				671	int c;
				672
				673	/* Do this for all languages that support sound folding. */
				674	for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
				675	{
				676	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
				677	slang = lp->lp_slang;
				678	if (slang->sl_sugtime != 0 && !slang->sl_sugloaded)
				679	{
				680	/* Change ".spl" to ".sug" and open the file. When the file isn't
				681	* found silently skip it. Do set "sl_sugloaded" so that we
				682	* don't try again and again. */
				683	slang->sl_sugloaded = TRUE;
				684
				685	dotp = vim_strrchr(slang->sl_fname, '.');
				686	if (dotp == NULL \|\| fnamecmp(dotp, ".spl") != 0)
				687	continue;
				688	STRCPY(dotp, ".sug");
				689	fd = mch_fopen((char *)slang->sl_fname, "r");
				690	if (fd == NULL)
				691	goto nextone;
				692
				693	/*
				694	* <SUGHEADER>: <fileID> <versionnr> <timestamp>
				695	*/
				696	for (i = 0; i < VIMSUGMAGICL; ++i)
				697	buf[i] = getc(fd); /* <fileID> */
				698	if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0)
				699	{
				700	EMSG2(_("E778: This does not look like a .sug file: %s"),
				701	slang->sl_fname);
				702	goto nextone;
				703	}
				704	c = getc(fd); /* <versionnr> */
				705	if (c < VIMSUGVERSION)
				706	{
				707	EMSG2(_("E779: Old .sug file, needs to be updated: %s"),
				708	slang->sl_fname);
				709	goto nextone;
				710	}
				711	else if (c > VIMSUGVERSION)
				712	{
				713	EMSG2(_("E780: .sug file is for newer version of Vim: %s"),
				714	slang->sl_fname);
				715	goto nextone;
				716	}
				717
				718	/* Check the timestamp, it must be exactly the same as the one in
				719	* the .spl file. Otherwise the word numbers won't match. */
				720	timestamp = get8ctime(fd); /* <timestamp> */
				721	if (timestamp != slang->sl_sugtime)
				722	{
				723	EMSG2(_("E781: .sug file doesn't match .spl file: %s"),
				724	slang->sl_fname);
				725	goto nextone;
				726	}
				727
				728	/*
				729	* <SUGWORDTREE>: <wordtree>
				730	* Read the trie with the soundfolded words.
				731	*/
				732	if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs,
				733	FALSE, 0) != 0)
				734	{
				735	someerror:
				736	EMSG2(_("E782: error while reading .sug file: %s"),
				737	slang->sl_fname);
				738	slang_clear_sug(slang);
				739	goto nextone;
				740	}
				741
				742	/*
				743	* <SUGTABLE>: <sugwcount> <sugline> ...
				744	*
				745	* Read the table with word numbers. We use a file buffer for
				746	* this, because it's so much like a file with lines. Makes it
				747	* possible to swap the info and save on memory use.
				748	*/
				749	slang->sl_sugbuf = open_spellbuf();
				750	if (slang->sl_sugbuf == NULL)
				751	goto someerror;
				752	/* <sugwcount> */
				753	wcount = get4c(fd);
				754	if (wcount < 0)
				755	goto someerror;
				756
				757	/* Read all the wordnr lists into the buffer, one NUL terminated
				758	* list per line. */
				759	ga_init2(&ga, 1, 100);
				760	for (wordnr = 0; wordnr < wcount; ++wordnr)
				761	{
				762	ga.ga_len = 0;
				763	for (;;)
				764	{
				765	c = getc(fd); /* <sugline> */
				766	if (c < 0 \|\| ga_grow(&ga, 1) == FAIL)
				767	goto someerror;
				768	((char_u *)ga.ga_data)[ga.ga_len++] = c;
				769	if (c == NUL)
				770	break;
				771	}
				772	if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr,
				773	ga.ga_data, ga.ga_len, TRUE) == FAIL)
				774	goto someerror;
				775	}
				776	ga_clear(&ga);
				777
				778	/*
				779	* Need to put word counts in the word tries, so that we can find
				780	* a word by its number.
				781	*/
				782	tree_count_words(slang->sl_fbyts, slang->sl_fidxs);
				783	tree_count_words(slang->sl_sbyts, slang->sl_sidxs);
				784
				785	nextone:
				786	if (fd != NULL)
				787	fclose(fd);
				788	STRCPY(dotp, ".spl");
				789	}
				790	}
				791	}
				792
				793
				794	/*
				795	* Read a length field from "fd" in "cnt_bytes" bytes.
				796	* Allocate memory, read the string into it and add a NUL at the end.
				797	* Returns NULL when the count is zero.
				798	* Sets "cntp" to SP_ERROR when there is an error, length of the result
				799	* otherwise.
				800	*/
				801	static char_u *
				802	read_cnt_string(FILE fd, int cnt_bytes, int cntp)
				803	{
				804	int cnt = 0;
				805	int i;
				806	char_u *str;
				807
				808	/* read the length bytes, MSB first */
				809	for (i = 0; i < cnt_bytes; ++i)
				810	cnt = (cnt << 8) + getc(fd);
				811	if (cnt < 0)
				812	{
				813	*cntp = SP_TRUNCERROR;
				814	return NULL;
				815	}
				816	*cntp = cnt;
				817	if (cnt == 0)
				818	return NULL; /* nothing to read, return NULL */
				819
				820	str = read_string(fd, cnt);
				821	if (str == NULL)
				822	*cntp = SP_OTHERERROR;
				823	return str;
				824	}
				825
				826	/*
				827	* Read SN_REGION: <regionname> ...
				828	* Return SP_*ERROR flags.
				829	*/
				830	static int
				831	read_region_section(FILE fd, slang_T lp, int len)
				832	{
				833	int i;
				834
				835	if (len > 16)
				836	return SP_FORMERROR;
				837	for (i = 0; i < len; ++i)
				838	lp->sl_regions[i] = getc(fd); /* <regionname> */
				839	lp->sl_regions[len] = NUL;
				840	return 0;
				841	}
				842
				843	/*
				844	* Read SN_CHARFLAGS section: <charflagslen> <charflags>
				845	* <folcharslen> <folchars>
				846	* Return SP_*ERROR flags.
				847	*/
				848	static int
				849	read_charflags_section(FILE *fd)
				850	{
				851	char_u *flags;
				852	char_u *fol;
				853	int flagslen, follen;
				854
				855	/* <charflagslen> <charflags> */
				856	flags = read_cnt_string(fd, 1, &flagslen);
				857	if (flagslen < 0)
				858	return flagslen;
				859
				860	/* <folcharslen> <folchars> */
				861	fol = read_cnt_string(fd, 2, &follen);
				862	if (follen < 0)
				863	{
				864	vim_free(flags);
				865	return follen;
				866	}
				867
				868	/* Set the word-char flags and fill SPELL_ISUPPER() table. */
				869	if (flags != NULL && fol != NULL)
				870	set_spell_charflags(flags, flagslen, fol);
				871
				872	vim_free(flags);
				873	vim_free(fol);
				874
				875	/* When <charflagslen> is zero then <fcharlen> must also be zero. */
				876	if ((flags == NULL) != (fol == NULL))
				877	return SP_FORMERROR;
				878	return 0;
				879	}
				880
				881	/*
				882	* Read SN_PREFCOND section.
				883	* Return SP_*ERROR flags.
				884	*/
				885	static int
				886	read_prefcond_section(FILE fd, slang_T lp)
				887	{
				888	int cnt;
				889	int i;
				890	int n;
				891	char_u *p;
				892	char_u buf[MAXWLEN + 1];
				893
				894	/* <prefcondcnt> <prefcond> ... */
				895	cnt = get2c(fd); /* <prefcondcnt> */
				896	if (cnt <= 0)
				897	return SP_FORMERROR;
				898
				899	lp->sl_prefprog = (regprog_T **)alloc_clear(
				900	(unsigned)sizeof(regprog_T ) cnt);
				901	if (lp->sl_prefprog == NULL)
				902	return SP_OTHERERROR;
				903	lp->sl_prefixcnt = cnt;
				904
				905	for (i = 0; i < cnt; ++i)
				906	{
				907	/* <prefcond> : <condlen> <condstr> */
				908	n = getc(fd); /* <condlen> */
				909	if (n < 0 \|\| n >= MAXWLEN)
				910	return SP_FORMERROR;
				911
				912	/* When <condlen> is zero we have an empty condition. Otherwise
				913	* compile the regexp program used to check for the condition. */
				914	if (n > 0)
				915	{
				916	buf[0] = '^'; /* always match at one position only */
				917	p = buf + 1;
				918	while (n-- > 0)
				919	p++ = getc(fd); / <condstr> */
				920	*p = NUL;
				921	lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
				922	}
				923	}
				924	return 0;
				925	}
				926
				927	/*
				928	* Read REP or REPSAL items section from "fd": <repcount> <rep> ...
				929	* Return SP_*ERROR flags.
				930	*/
				931	static int
				932	read_rep_section(FILE fd, garray_T gap, short *first)
				933	{
				934	int cnt;
				935	fromto_T *ftp;
				936	int i;
				937
				938	cnt = get2c(fd); /* <repcount> */
				939	if (cnt < 0)
				940	return SP_TRUNCERROR;
				941
				942	if (ga_grow(gap, cnt) == FAIL)
				943	return SP_OTHERERROR;
				944
				945	/* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
				946	for (; gap->ga_len < cnt; ++gap->ga_len)
				947	{
				948	ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
				949	ftp->ft_from = read_cnt_string(fd, 1, &i);
				950	if (i < 0)
				951	return i;
				952	if (i == 0)
				953	return SP_FORMERROR;
				954	ftp->ft_to = read_cnt_string(fd, 1, &i);
				955	if (i <= 0)
				956	{
				957	vim_free(ftp->ft_from);
				958	if (i < 0)
				959	return i;
				960	return SP_FORMERROR;
				961	}
				962	}
				963
				964	/* Fill the first-index table. */
				965	for (i = 0; i < 256; ++i)
				966	first[i] = -1;
				967	for (i = 0; i < gap->ga_len; ++i)
				968	{
				969	ftp = &((fromto_T *)gap->ga_data)[i];
				970	if (first[*ftp->ft_from] == -1)
				971	first[*ftp->ft_from] = i;
				972	}
				973	return 0;
				974	}
				975
				976	/*
				977	* Read SN_SAL section: <salflags> <salcount> <sal> ...
				978	* Return SP_*ERROR flags.
				979	*/
				980	static int
				981	read_sal_section(FILE fd, slang_T slang)
				982	{
				983	int i;
				984	int cnt;
				985	garray_T *gap;
				986	salitem_T *smp;
				987	int ccnt;
				988	char_u *p;
				989	int c = NUL;
				990
				991	slang->sl_sofo = FALSE;
				992
				993	i = getc(fd); /* <salflags> */
				994	if (i & SAL_F0LLOWUP)
				995	slang->sl_followup = TRUE;
				996	if (i & SAL_COLLAPSE)
				997	slang->sl_collapse = TRUE;
				998	if (i & SAL_REM_ACCENTS)
				999	slang->sl_rem_accents = TRUE;
				1000
				1001	cnt = get2c(fd); /* <salcount> */
				1002	if (cnt < 0)
				1003	return SP_TRUNCERROR;
				1004
				1005	gap = &slang->sl_sal;
				1006	ga_init2(gap, sizeof(salitem_T), 10);
				1007	if (ga_grow(gap, cnt + 1) == FAIL)
				1008	return SP_OTHERERROR;
				1009
				1010	/* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
				1011	for (; gap->ga_len < cnt; ++gap->ga_len)
				1012	{
				1013	smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
				1014	ccnt = getc(fd); /* <salfromlen> */
				1015	if (ccnt < 0)
				1016	return SP_TRUNCERROR;
				1017	if ((p = alloc(ccnt + 2)) == NULL)
				1018	return SP_OTHERERROR;
				1019	smp->sm_lead = p;
				1020
				1021	/* Read up to the first special char into sm_lead. */
				1022	for (i = 0; i < ccnt; ++i)
				1023	{
				1024	c = getc(fd); /* <salfrom> */
				1025	if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
				1026	break;
				1027	*p++ = c;
				1028	}
				1029	smp->sm_leadlen = (int)(p - smp->sm_lead);
				1030	*p++ = NUL;
				1031
				1032	/* Put (abc) chars in sm_oneof, if any. */
				1033	if (c == '(')
				1034	{
				1035	smp->sm_oneof = p;
				1036	for (++i; i < ccnt; ++i)
				1037	{
				1038	c = getc(fd); /* <salfrom> */
				1039	if (c == ')')
				1040	break;
				1041	*p++ = c;
				1042	}
				1043	*p++ = NUL;
				1044	if (++i < ccnt)
				1045	c = getc(fd);
				1046	}
				1047	else
				1048	smp->sm_oneof = NULL;
				1049
				1050	/* Any following chars go in sm_rules. */
				1051	smp->sm_rules = p;
				1052	if (i < ccnt)
				1053	/* store the char we got while checking for end of sm_lead */
				1054	*p++ = c;
				1055	for (++i; i < ccnt; ++i)
				1056	p++ = getc(fd); / <salfrom> */
				1057	*p++ = NUL;
				1058
				1059	/* <saltolen> <salto> */
				1060	smp->sm_to = read_cnt_string(fd, 1, &ccnt);
				1061	if (ccnt < 0)
				1062	{
				1063	vim_free(smp->sm_lead);
				1064	return ccnt;
				1065	}
				1066
				1067	#ifdef FEAT_MBYTE
				1068	if (has_mbyte)
				1069	{
				1070	/* convert the multi-byte strings to wide char strings */
				1071	smp->sm_lead_w = mb_str2wide(smp->sm_lead);
				1072	smp->sm_leadlen = mb_charlen(smp->sm_lead);
				1073	if (smp->sm_oneof == NULL)
				1074	smp->sm_oneof_w = NULL;
				1075	else
				1076	smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
				1077	if (smp->sm_to == NULL)
				1078	smp->sm_to_w = NULL;
				1079	else
				1080	smp->sm_to_w = mb_str2wide(smp->sm_to);
				1081	if (smp->sm_lead_w == NULL
				1082	\|\| (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL)
				1083	\|\| (smp->sm_to_w == NULL && smp->sm_to != NULL))
				1084	{
				1085	vim_free(smp->sm_lead);
				1086	vim_free(smp->sm_to);
				1087	vim_free(smp->sm_lead_w);
				1088	vim_free(smp->sm_oneof_w);
				1089	vim_free(smp->sm_to_w);
				1090	return SP_OTHERERROR;
				1091	}
				1092	}
				1093	#endif
				1094	}
				1095
				1096	if (gap->ga_len > 0)
				1097	{
				1098	/* Add one extra entry to mark the end with an empty sm_lead. Avoids
				1099	* that we need to check the index every time. */
				1100	smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
				1101	if ((p = alloc(1)) == NULL)
				1102	return SP_OTHERERROR;
				1103	p[0] = NUL;
				1104	smp->sm_lead = p;
				1105	smp->sm_leadlen = 0;
				1106	smp->sm_oneof = NULL;
				1107	smp->sm_rules = p;
				1108	smp->sm_to = NULL;
				1109	#ifdef FEAT_MBYTE
				1110	if (has_mbyte)
				1111	{
				1112	smp->sm_lead_w = mb_str2wide(smp->sm_lead);
				1113	smp->sm_leadlen = 0;
				1114	smp->sm_oneof_w = NULL;
				1115	smp->sm_to_w = NULL;
				1116	}
				1117	#endif
				1118	++gap->ga_len;
				1119	}
				1120
				1121	/* Fill the first-index table. */
				1122	set_sal_first(slang);
				1123
				1124	return 0;
				1125	}
				1126
				1127	/*
				1128	* Read SN_WORDS: <word> ...
				1129	* Return SP_*ERROR flags.
				1130	*/
				1131	static int
				1132	read_words_section(FILE fd, slang_T lp, int len)
				1133	{
				1134	int done = 0;
				1135	int i;
				1136	int c;
				1137	char_u word[MAXWLEN];
				1138
				1139	while (done < len)
				1140	{
				1141	/* Read one word at a time. */
				1142	for (i = 0; ; ++i)
				1143	{
				1144	c = getc(fd);
				1145	if (c == EOF)
				1146	return SP_TRUNCERROR;
				1147	word[i] = c;
				1148	if (word[i] == NUL)
				1149	break;
				1150	if (i == MAXWLEN - 1)
				1151	return SP_FORMERROR;
				1152	}
				1153
				1154	/* Init the count to 10. */
				1155	count_common_word(lp, word, -1, 10);
				1156	done += i + 1;
				1157	}
				1158	return 0;
				1159	}
				1160
				1161	/*
				1162	* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
				1163	* Return SP_*ERROR flags.
				1164	*/
				1165	static int
				1166	read_sofo_section(FILE fd, slang_T slang)
				1167	{
				1168	int cnt;
				1169	char_u from, to;
				1170	int res;
				1171
				1172	slang->sl_sofo = TRUE;
				1173
				1174	/* <sofofromlen> <sofofrom> */
				1175	from = read_cnt_string(fd, 2, &cnt);
				1176	if (cnt < 0)
				1177	return cnt;
				1178
				1179	/* <sofotolen> <sofoto> */
				1180	to = read_cnt_string(fd, 2, &cnt);
				1181	if (cnt < 0)
				1182	{
				1183	vim_free(from);
				1184	return cnt;
				1185	}
				1186
				1187	/* Store the info in slang->sl_sal and/or slang->sl_sal_first. */
				1188	if (from != NULL && to != NULL)
				1189	res = set_sofo(slang, from, to);
				1190	else if (from != NULL \|\| to != NULL)
				1191	res = SP_FORMERROR; /* only one of two strings is an error */
				1192	else
				1193	res = 0;
				1194
				1195	vim_free(from);
				1196	vim_free(to);
				1197	return res;
				1198	}
				1199
				1200	/*
				1201	* Read the compound section from the .spl file:
				1202	* <compmax> <compminlen> <compsylmax> <compoptions> <compflags>
				1203	* Returns SP_*ERROR flags.
				1204	*/
				1205	static int
				1206	read_compound(FILE fd, slang_T slang, int len)
				1207	{
				1208	int todo = len;
				1209	int c;
				1210	int atstart;
				1211	char_u *pat;
				1212	char_u *pp;
				1213	char_u *cp;
				1214	char_u *ap;
				1215	char_u *crp;
				1216	int cnt;
				1217	garray_T *gap;
				1218
				1219	if (todo < 2)
				1220	return SP_FORMERROR; /* need at least two bytes */
				1221
				1222	--todo;
				1223	c = getc(fd); /* <compmax> */
				1224	if (c < 2)
				1225	c = MAXWLEN;
				1226	slang->sl_compmax = c;
				1227
				1228	--todo;
				1229	c = getc(fd); /* <compminlen> */
				1230	if (c < 1)
				1231	c = 0;
				1232	slang->sl_compminlen = c;
				1233
				1234	--todo;
				1235	c = getc(fd); /* <compsylmax> */
				1236	if (c < 1)
				1237	c = MAXWLEN;
				1238	slang->sl_compsylmax = c;
				1239
				1240	c = getc(fd); /* <compoptions> */
				1241	if (c != 0)
				1242	ungetc(c, fd); /* be backwards compatible with Vim 7.0b */
				1243	else
				1244	{
				1245	--todo;
				1246	c = getc(fd); /* only use the lower byte for now */
				1247	--todo;
				1248	slang->sl_compoptions = c;
				1249
				1250	gap = &slang->sl_comppat;
				1251	c = get2c(fd); /* <comppatcount> */
				1252	todo -= 2;
				1253	ga_init2(gap, sizeof(char_u *), c);
				1254	if (ga_grow(gap, c) == OK)
				1255	while (--c >= 0)
				1256	{
				1257	((char_u **)(gap->ga_data))[gap->ga_len++] =
				1258	read_cnt_string(fd, 1, &cnt);
				1259	/* <comppatlen> <comppattext> */
				1260	if (cnt < 0)
				1261	return cnt;
				1262	todo -= cnt + 1;
				1263	}
				1264	}
				1265	if (todo < 0)
				1266	return SP_FORMERROR;
				1267
				1268	/* Turn the COMPOUNDRULE items into a regexp pattern:
				1269	* "a[bc]/ab+" -> "^$a[bc]\\|ab\+$$".
				1270	* Inserting backslashes may double the length, "^$<Nul>" is 7 bytes.
				1271	* Conversion to utf-8 may double the size. */
				1272	c = todo * 2 + 7;
				1273	#ifdef FEAT_MBYTE
				1274	if (enc_utf8)
				1275	c += todo * 2;
				1276	#endif
				1277	pat = alloc((unsigned)c);
				1278	if (pat == NULL)
				1279	return SP_OTHERERROR;
				1280
				1281	/* We also need a list of all flags that can appear at the start and one
				1282	* for all flags. */
				1283	cp = alloc(todo + 1);
				1284	if (cp == NULL)
				1285	{
				1286	vim_free(pat);
				1287	return SP_OTHERERROR;
				1288	}
				1289	slang->sl_compstartflags = cp;
				1290	*cp = NUL;
				1291
				1292	ap = alloc(todo + 1);
				1293	if (ap == NULL)
				1294	{
				1295	vim_free(pat);
				1296	return SP_OTHERERROR;
				1297	}
				1298	slang->sl_compallflags = ap;
				1299	*ap = NUL;
				1300
				1301	/* And a list of all patterns in their original form, for checking whether
				1302	* compounding may work in match_compoundrule(). This is freed when we
				1303	* encounter a wildcard, the check doesn't work then. */
				1304	crp = alloc(todo + 1);
				1305	slang->sl_comprules = crp;
				1306
				1307	pp = pat;
				1308	*pp++ = '^';
				1309	*pp++ = '\\';
				1310	*pp++ = '(';
				1311
				1312	atstart = 1;
				1313	while (todo-- > 0)
				1314	{
				1315	c = getc(fd); /* <compflags> */
				1316	if (c == EOF)
				1317	{
				1318	vim_free(pat);
				1319	return SP_TRUNCERROR;
				1320	}
				1321
				1322	/* Add all flags to "sl_compallflags". */
				1323	if (vim_strchr((char_u )"?+[]/", c) == NULL
				1324	&& !byte_in_str(slang->sl_compallflags, c))
				1325	{
				1326	*ap++ = c;
				1327	*ap = NUL;
				1328	}
				1329
				1330	if (atstart != 0)
				1331	{
				1332	/* At start of item: copy flags to "sl_compstartflags". For a
				1333	* [abc] item set "atstart" to 2 and copy up to the ']'. */
				1334	if (c == '[')
				1335	atstart = 2;
				1336	else if (c == ']')
				1337	atstart = 0;
				1338	else
				1339	{
				1340	if (!byte_in_str(slang->sl_compstartflags, c))
				1341	{
				1342	*cp++ = c;
				1343	*cp = NUL;
				1344	}
				1345	if (atstart == 1)
				1346	atstart = 0;
				1347	}
				1348	}
				1349
				1350	/* Copy flag to "sl_comprules", unless we run into a wildcard. */
				1351	if (crp != NULL)
				1352	{
				1353	if (c == '?' \|\| c == '+' \|\| c == '*')
				1354	{
				1355	vim_free(slang->sl_comprules);
				1356	slang->sl_comprules = NULL;
				1357	crp = NULL;
				1358	}
				1359	else
				1360	*crp++ = c;
				1361	}
				1362
				1363	if (c == '/') /* slash separates two items */
				1364	{
				1365	*pp++ = '\\';
				1366	*pp++ = '\|';
				1367	atstart = 1;
				1368	}
				1369	else /* normal char, "[abc]" and '' are copied as-is /
				1370	{
				1371	if (c == '?' \|\| c == '+' \|\| c == '~')
				1372	pp++ = '\\'; / "a?" becomes "a\?", "a+" becomes "a\+" */
				1373	#ifdef FEAT_MBYTE
				1374	if (enc_utf8)
				1375	pp += mb_char2bytes(c, pp);
				1376	else
				1377	#endif
				1378	*pp++ = c;
				1379	}
				1380	}
				1381
				1382	*pp++ = '\\';
				1383	*pp++ = ')';
				1384	*pp++ = '$';
				1385	*pp = NUL;
				1386
				1387	if (crp != NULL)
				1388	*crp = NUL;
				1389
				1390	slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT);
				1391	vim_free(pat);
				1392	if (slang->sl_compprog == NULL)
				1393	return SP_FORMERROR;
				1394
				1395	return 0;
				1396	}
				1397
				1398	/*
				1399	* Set the SOFOFROM and SOFOTO items in language "lp".
				1400	* Returns SP_*ERROR flags when there is something wrong.
				1401	*/
				1402	static int
				1403	set_sofo(slang_T lp, char_u from, char_u *to)
				1404	{
				1405	int i;
				1406
				1407	#ifdef FEAT_MBYTE
				1408	garray_T *gap;
				1409	char_u *s;
				1410	char_u *p;
				1411	int c;
				1412	int *inp;
				1413
				1414	if (has_mbyte)
				1415	{
				1416	/* Use "sl_sal" as an array with 256 pointers to a list of wide
				1417	* characters. The index is the low byte of the character.
				1418	* The list contains from-to pairs with a terminating NUL.
				1419	* sl_sal_first[] is used for latin1 "from" characters. */
				1420	gap = &lp->sl_sal;
				1421	ga_init2(gap, sizeof(int *), 1);
				1422	if (ga_grow(gap, 256) == FAIL)
				1423	return SP_OTHERERROR;
				1424	vim_memset(gap->ga_data, 0, sizeof(int ) 256);
				1425	gap->ga_len = 256;
				1426
				1427	/* First count the number of items for each list. Temporarily use
				1428	* sl_sal_first[] for this. */
				1429	for (p = from, s = to; p != NUL && s != NUL; )
				1430	{
				1431	c = mb_cptr2char_adv(&p);
				1432	mb_cptr_adv(s);
				1433	if (c >= 256)
				1434	++lp->sl_sal_first[c & 0xff];
				1435	}
				1436	if (p != NUL \|\| s != NUL) /* lengths differ */
				1437	return SP_FORMERROR;
				1438
				1439	/* Allocate the lists. */
				1440	for (i = 0; i < 256; ++i)
				1441	if (lp->sl_sal_first[i] > 0)
				1442	{
				1443	p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
				1444	if (p == NULL)
				1445	return SP_OTHERERROR;
				1446	((int *)gap->ga_data)[i] = (int )p;
				1447	(int )p = 0;
				1448	}
				1449
				1450	/* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
				1451	* list. */
				1452	vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
				1453	for (p = from, s = to; p != NUL && s != NUL; )
				1454	{
				1455	c = mb_cptr2char_adv(&p);
				1456	i = mb_cptr2char_adv(&s);
				1457	if (c >= 256)
				1458	{
				1459	/* Append the from-to chars at the end of the list with
				1460	* the low byte. */
				1461	inp = ((int **)gap->ga_data)[c & 0xff];
				1462	while (*inp != 0)
				1463	++inp;
				1464	inp++ = c; / from char */
				1465	inp++ = i; / to char */
				1466	inp++ = NUL; / NUL at the end */
				1467	}
				1468	else
				1469	/* mapping byte to char is done in sl_sal_first[] */
				1470	lp->sl_sal_first[c] = i;
				1471	}
				1472	}
				1473	else
				1474	#endif
				1475	{
				1476	/* mapping bytes to bytes is done in sl_sal_first[] */
				1477	if (STRLEN(from) != STRLEN(to))
				1478	return SP_FORMERROR;
				1479
				1480	for (i = 0; to[i] != NUL; ++i)
				1481	lp->sl_sal_first[from[i]] = to[i];
				1482	lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */
				1483	}
				1484
				1485	return 0;
				1486	}
				1487
				1488	/*
				1489	* Fill the first-index table for "lp".
				1490	*/
				1491	static void
				1492	set_sal_first(slang_T *lp)
				1493	{
				1494	salfirst_T *sfirst;
				1495	int i;
				1496	salitem_T *smp;
				1497	int c;
				1498	garray_T *gap = &lp->sl_sal;
				1499
				1500	sfirst = lp->sl_sal_first;
				1501	for (i = 0; i < 256; ++i)
				1502	sfirst[i] = -1;
				1503	smp = (salitem_T *)gap->ga_data;
				1504	for (i = 0; i < gap->ga_len; ++i)
				1505	{
				1506	#ifdef FEAT_MBYTE
				1507	if (has_mbyte)
				1508	/* Use the lowest byte of the first character. For latin1 it's
				1509	* the character, for other encodings it should differ for most
				1510	* characters. */
				1511	c = *smp[i].sm_lead_w & 0xff;
				1512	else
				1513	#endif
				1514	c = *smp[i].sm_lead;
				1515	if (sfirst[c] == -1)
				1516	{
				1517	sfirst[c] = i;
				1518	#ifdef FEAT_MBYTE
				1519	if (has_mbyte)
				1520	{
				1521	int n;
				1522
				1523	/* Make sure all entries with this byte are following each
				1524	* other. Move the ones that are in the wrong position. Do
				1525	* keep the same ordering! */
				1526	while (i + 1 < gap->ga_len
				1527	&& (*smp[i + 1].sm_lead_w & 0xff) == c)
				1528	/* Skip over entry with same index byte. */
				1529	++i;
				1530
				1531	for (n = 1; i + n < gap->ga_len; ++n)
				1532	if ((*smp[i + n].sm_lead_w & 0xff) == c)
				1533	{
				1534	salitem_T tsal;
				1535
				1536	/* Move entry with same index byte after the entries
				1537	* we already found. */
				1538	++i;
				1539	--n;
				1540	tsal = smp[i + n];
				1541	mch_memmove(smp + i + 1, smp + i,
				1542	sizeof(salitem_T) * n);
				1543	smp[i] = tsal;
				1544	}
				1545	}
				1546	#endif
				1547	}
				1548	}
				1549	}
				1550
				1551	#ifdef FEAT_MBYTE
				1552	/*
				1553	* Turn a multi-byte string into a wide character string.
				1554	* Return it in allocated memory (NULL for out-of-memory)
				1555	*/
				1556	static int *
				1557	mb_str2wide(char_u *s)
				1558	{
				1559	int *res;
				1560	char_u *p;
				1561	int i = 0;
				1562
				1563	res = (int )alloc(sizeof(int) (mb_charlen(s) + 1));
				1564	if (res != NULL)
				1565	{
				1566	for (p = s; *p != NUL; )
				1567	res[i++] = mb_ptr2char_adv(&p);
				1568	res[i] = NUL;
				1569	}
				1570	return res;
				1571	}
				1572	#endif
				1573
				1574	/*
				1575	* Read a tree from the .spl or .sug file.
				1576	* Allocates the memory and stores pointers in "bytsp" and "idxsp".
				1577	* This is skipped when the tree has zero length.
				1578	* Returns zero when OK, SP_ value for an error.
				1579	*/
				1580	static int
				1581	spell_read_tree(
				1582	FILE *fd,
				1583	char_u **bytsp,
				1584	idx_T **idxsp,
				1585	int prefixtree, /* TRUE for the prefix tree */
				1586	int prefixcnt) /* when "prefixtree" is TRUE: prefix count */
				1587	{
Bram Moolenaar	6d3c858	2017-02-26 15:27:23 +0100	[diff] [blame]	1588	long len;
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1589	int idx;
				1590	char_u *bp;
				1591	idx_T *ip;
				1592
				1593	/* The tree size was computed when writing the file, so that we can
				1594	* allocate it as one long block. <nodecount> */
				1595	len = get4c(fd);
				1596	if (len < 0)
				1597	return SP_TRUNCERROR;
Bram Moolenaar	6d3c858	2017-02-26 15:27:23 +0100	[diff] [blame]	1598	if (len >= LONG_MAX / (long)sizeof(int))
Bram Moolenaar	399c297	2017-02-09 21:07:12 +0100	[diff] [blame]	1599	/* Invalid length, multiply with sizeof(int) would overflow. */
				1600	return SP_FORMERROR;
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1601	if (len > 0)
				1602	{
				1603	/* Allocate the byte array. */
				1604	bp = lalloc((long_u)len, TRUE);
				1605	if (bp == NULL)
				1606	return SP_OTHERERROR;
				1607	*bytsp = bp;
				1608
				1609	/* Allocate the index array. */
				1610	ip = (idx_T )lalloc_clear((long_u)(len sizeof(int)), TRUE);
				1611	if (ip == NULL)
				1612	return SP_OTHERERROR;
				1613	*idxsp = ip;
				1614
				1615	/* Recursively read the tree and store it in the array. */
				1616	idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt);
				1617	if (idx < 0)
				1618	return idx;
				1619	}
				1620	return 0;
				1621	}
				1622
				1623	/*
				1624	* Read one row of siblings from the spell file and store it in the byte array
				1625	* "byts" and index array "idxs". Recursively read the children.
				1626	*
				1627	* NOTE: The code here must match put_node()!
				1628	*
				1629	* Returns the index (>= 0) following the siblings.
				1630	* Returns SP_TRUNCERROR if the file is shorter than expected.
				1631	* Returns SP_FORMERROR if there is a format error.
				1632	*/
				1633	static idx_T
				1634	read_tree_node(
				1635	FILE *fd,
				1636	char_u *byts,
				1637	idx_T *idxs,
				1638	int maxidx, /* size of arrays */
				1639	idx_T startidx, /* current index in "byts" and "idxs" */
				1640	int prefixtree, /* TRUE for reading PREFIXTREE */
				1641	int maxprefcondnr) /* maximum for <prefcondnr> */
				1642	{
				1643	int len;
				1644	int i;
				1645	int n;
				1646	idx_T idx = startidx;
				1647	int c;
				1648	int c2;
				1649	#define SHARED_MASK 0x8000000
				1650
				1651	len = getc(fd); /* <siblingcount> */
				1652	if (len <= 0)
				1653	return SP_TRUNCERROR;
				1654
				1655	if (startidx + len >= maxidx)
				1656	return SP_FORMERROR;
				1657	byts[idx++] = len;
				1658
				1659	/* Read the byte values, flag/region bytes and shared indexes. */
				1660	for (i = 1; i <= len; ++i)
				1661	{
				1662	c = getc(fd); /* <byte> */
				1663	if (c < 0)
				1664	return SP_TRUNCERROR;
				1665	if (c <= BY_SPECIAL)
				1666	{
				1667	if (c == BY_NOFLAGS && !prefixtree)
				1668	{
				1669	/* No flags, all regions. */
				1670	idxs[idx] = 0;
				1671	c = 0;
				1672	}
				1673	else if (c != BY_INDEX)
				1674	{
				1675	if (prefixtree)
				1676	{
				1677	/* Read the optional pflags byte, the prefix ID and the
				1678	* condition nr. In idxs[] store the prefix ID in the low
				1679	* byte, the condition index shifted up 8 bits, the flags
				1680	* shifted up 24 bits. */
				1681	if (c == BY_FLAGS)
				1682	c = getc(fd) << 24; /* <pflags> */
				1683	else
				1684	c = 0;
				1685
				1686	c \|= getc(fd); /* <affixID> */
				1687
				1688	n = get2c(fd); /* <prefcondnr> */
				1689	if (n >= maxprefcondnr)
				1690	return SP_FORMERROR;
				1691	c \|= (n << 8);
				1692	}
				1693	else /* c must be BY_FLAGS or BY_FLAGS2 */
				1694	{
				1695	/* Read flags and optional region and prefix ID. In
				1696	* idxs[] the flags go in the low two bytes, region above
				1697	* that and prefix ID above the region. */
				1698	c2 = c;
				1699	c = getc(fd); /* <flags> */
				1700	if (c2 == BY_FLAGS2)
				1701	c = (getc(fd) << 8) + c; /* <flags2> */
				1702	if (c & WF_REGION)
				1703	c = (getc(fd) << 16) + c; /* <region> */
				1704	if (c & WF_AFX)
				1705	c = (getc(fd) << 24) + c; /* <affixID> */
				1706	}
				1707
				1708	idxs[idx] = c;
				1709	c = 0;
				1710	}
				1711	else /* c == BY_INDEX */
				1712	{
				1713	/* <nodeidx> */
				1714	n = get3c(fd);
				1715	if (n < 0 \|\| n >= maxidx)
				1716	return SP_FORMERROR;
				1717	idxs[idx] = n + SHARED_MASK;
				1718	c = getc(fd); /* <xbyte> */
				1719	}
				1720	}
				1721	byts[idx++] = c;
				1722	}
				1723
				1724	/* Recursively read the children for non-shared siblings.
				1725	* Skip the end-of-word ones (zero byte value) and the shared ones (and
				1726	* remove SHARED_MASK) */
				1727	for (i = 1; i <= len; ++i)
				1728	if (byts[startidx + i] != 0)
				1729	{
				1730	if (idxs[startidx + i] & SHARED_MASK)
				1731	idxs[startidx + i] &= ~SHARED_MASK;
				1732	else
				1733	{
				1734	idxs[startidx + i] = idx;
				1735	idx = read_tree_node(fd, byts, idxs, maxidx, idx,
				1736	prefixtree, maxprefcondnr);
				1737	if (idx < 0)
				1738	break;
				1739	}
				1740	}
				1741
				1742	return idx;
				1743	}
				1744
				1745	/*
				1746	* Reload the spell file "fname" if it's loaded.
				1747	*/
				1748	static void
				1749	spell_reload_one(
				1750	char_u *fname,
				1751	int added_word) /* invoked through "zg" */
				1752	{
				1753	slang_T *slang;
				1754	int didit = FALSE;
				1755
				1756	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
				1757	{
				1758	if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME)
				1759	{
				1760	slang_clear(slang);
				1761	if (spell_load_file(fname, NULL, slang, FALSE) == NULL)
				1762	/* reloading failed, clear the language */
				1763	slang_clear(slang);
				1764	redraw_all_later(SOME_VALID);
				1765	didit = TRUE;
				1766	}
				1767	}
				1768
				1769	/* When "zg" was used and the file wasn't loaded yet, should redo
				1770	* 'spelllang' to load it now. */
				1771	if (added_word && !didit)
				1772	did_set_spelllang(curwin);
				1773	}
				1774
				1775
				1776	/*
				1777	* Functions for ":mkspell".
				1778	*/
				1779
				1780	#define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff
				1781	and .dic file. */
				1782	/*
				1783	* Main structure to store the contents of a ".aff" file.
				1784	*/
				1785	typedef struct afffile_S
				1786	{
				1787	char_u af_enc; / "SET", normalized, alloc'ed string or NULL */
				1788	int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */
				1789	unsigned af_rare; /* RARE ID for rare word */
				1790	unsigned af_keepcase; /* KEEPCASE ID for keep-case word */
				1791	unsigned af_bad; /* BAD ID for banned word */
				1792	unsigned af_needaffix; /* NEEDAFFIX ID */
				1793	unsigned af_circumfix; /* CIRCUMFIX ID */
				1794	unsigned af_needcomp; /* NEEDCOMPOUND ID */
				1795	unsigned af_comproot; /* COMPOUNDROOT ID */
				1796	unsigned af_compforbid; /* COMPOUNDFORBIDFLAG ID */
				1797	unsigned af_comppermit; /* COMPOUNDPERMITFLAG ID */
				1798	unsigned af_nosuggest; /* NOSUGGEST ID */
				1799	int af_pfxpostpone; /* postpone prefixes without chop string and
				1800	without flags */
				1801	int af_ignoreextra; /* IGNOREEXTRA present */
				1802	hashtab_T af_pref; /* hashtable for prefixes, affheader_T */
				1803	hashtab_T af_suff; /* hashtable for suffixes, affheader_T */
				1804	hashtab_T af_comp; /* hashtable for compound flags, compitem_T */
				1805	} afffile_T;
				1806
				1807	#define AFT_CHAR 0 /* flags are one character */
				1808	#define AFT_LONG 1 /* flags are two characters */
				1809	#define AFT_CAPLONG 2 /* flags are one or two characters */
				1810	#define AFT_NUM 3 /* flags are numbers, comma separated */
				1811
				1812	typedef struct affentry_S affentry_T;
				1813	/* Affix entry from ".aff" file. Used for prefixes and suffixes. */
				1814	struct affentry_S
				1815	{
				1816	affentry_T ae_next; / next affix with same name/number */
				1817	char_u ae_chop; / text to chop off basic word (can be NULL) */
				1818	char_u ae_add; / text to add to basic word (can be NULL) */
				1819	char_u ae_flags; / flags on the affix (can be NULL) */
				1820	char_u ae_cond; / condition (NULL for ".") */
				1821	regprog_T ae_prog; / regexp program for ae_cond or NULL */
				1822	char ae_compforbid; /* COMPOUNDFORBIDFLAG found */
				1823	char ae_comppermit; /* COMPOUNDPERMITFLAG found */
				1824	};
				1825
				1826	#ifdef FEAT_MBYTE
				1827	# define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */
				1828	#else
				1829	# define AH_KEY_LEN 7 /* 6 digits + NUL */
				1830	#endif
				1831
				1832	/* Affix header from ".aff" file. Used for af_pref and af_suff. */
				1833	typedef struct affheader_S
				1834	{
				1835	char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */
				1836	unsigned ah_flag; /* affix name as number, uses "af_flagtype" */
				1837	int ah_newID; /* prefix ID after renumbering; 0 if not used */
				1838	int ah_combine; /* suffix may combine with prefix */
				1839	int ah_follows; /* another affix block should be following */
				1840	affentry_T ah_first; / first affix entry */
				1841	} affheader_T;
				1842
				1843	#define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
				1844
				1845	/* Flag used in compound items. */
				1846	typedef struct compitem_S
				1847	{
				1848	char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */
				1849	unsigned ci_flag; /* affix name as number, uses "af_flagtype" */
				1850	int ci_newID; /* affix ID after renumbering. */
				1851	} compitem_T;
				1852
				1853	#define HI2CI(hi) ((compitem_T *)(hi)->hi_key)
				1854
				1855	/*
				1856	* Structure that is used to store the items in the word tree. This avoids
				1857	* the need to keep track of each allocated thing, everything is freed all at
				1858	* once after ":mkspell" is done.
				1859	* Note: "sb_next" must be just before "sb_data" to make sure the alignment of
				1860	* "sb_data" is correct for systems where pointers must be aligned on
				1861	* pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc).
				1862	*/
				1863	#define SBLOCKSIZE 16000 /* size of sb_data */
				1864	typedef struct sblock_S sblock_T;
				1865	struct sblock_S
				1866	{
				1867	int sb_used; /* nr of bytes already in use */
				1868	sblock_T sb_next; / next block in list */
				1869	char_u sb_data[1]; /* data, actually longer */
				1870	};
				1871
				1872	/*
				1873	* A node in the tree.
				1874	*/
				1875	typedef struct wordnode_S wordnode_T;
				1876	struct wordnode_S
				1877	{
				1878	union /* shared to save space */
				1879	{
				1880	char_u hashkey[6]; /* the hash key, only used while compressing */
				1881	int index; /* index in written nodes (valid after first
				1882	round) */
				1883	} wn_u1;
				1884	union /* shared to save space */
				1885	{
				1886	wordnode_T next; / next node with same hash key */
				1887	wordnode_T wnode; / parent node that will write this node */
				1888	} wn_u2;
				1889	wordnode_T wn_child; / child (next byte in word) */
				1890	wordnode_T wn_sibling; / next sibling (alternate byte in word,
				1891	always sorted) */
				1892	int wn_refs; /* Nr. of references to this node. Only
				1893	relevant for first node in a list of
				1894	siblings, in following siblings it is
				1895	always one. */
				1896	char_u wn_byte; /* Byte for this node. NUL for word end */
				1897
				1898	/* Info for when "wn_byte" is NUL.
				1899	* In PREFIXTREE "wn_region" is used for the prefcondnr.
				1900	* In the soundfolded word tree "wn_flags" has the MSW of the wordnr and
				1901	* "wn_region" the LSW of the wordnr. */
				1902	char_u wn_affixID; /* supported/required prefix ID or 0 */
				1903	short_u wn_flags; /* WF_ flags */
				1904	short wn_region; /* region mask */
				1905
				1906	#ifdef SPELL_PRINTTREE
				1907	int wn_nr; /* sequence nr for printing */
				1908	#endif
				1909	};
				1910
				1911	#define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */
				1912
				1913	#define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
				1914
				1915	/*
				1916	* Info used while reading the spell files.
				1917	*/
				1918	typedef struct spellinfo_S
				1919	{
				1920	wordnode_T si_foldroot; / tree with case-folded words */
				1921	long si_foldwcount; /* nr of words in si_foldroot */
				1922
				1923	wordnode_T si_keeproot; / tree with keep-case words */
				1924	long si_keepwcount; /* nr of words in si_keeproot */
				1925
				1926	wordnode_T si_prefroot; / tree with postponed prefixes */
				1927
				1928	long si_sugtree; /* creating the soundfolding trie */
				1929
				1930	sblock_T si_blocks; / memory blocks used */
				1931	long si_blocks_cnt; /* memory blocks allocated */
				1932	int si_did_emsg; /* TRUE when ran out of memory */
				1933
				1934	long si_compress_cnt; /* words to add before lowering
				1935	compression limit */
				1936	wordnode_T si_first_free; / List of nodes that have been freed during
				1937	compression, linked by "wn_child" field. */
				1938	long si_free_count; /* number of nodes in si_first_free */
				1939	#ifdef SPELL_PRINTTREE
				1940	int si_wordnode_nr; /* sequence nr for nodes */
				1941	#endif
				1942	buf_T si_spellbuf; / buffer used to store soundfold word table */
				1943
				1944	int si_ascii; /* handling only ASCII words */
				1945	int si_add; /* addition file */
				1946	int si_clear_chartab; /* when TRUE clear char tables */
				1947	int si_region; /* region mask */
				1948	vimconv_T si_conv; /* for conversion to 'encoding' */
				1949	int si_memtot; /* runtime memory used */
				1950	int si_verbose; /* verbose messages */
				1951	int si_msg_count; /* number of words added since last message */
				1952	char_u si_info; / info text chars or NULL */
				1953	int si_region_count; /* number of regions supported (1 when there
				1954	are no regions) */
				1955	char_u si_region_name[17]; /* region names; used only if
				1956	* si_region_count > 1) */
				1957
				1958	garray_T si_rep; /* list of fromto_T entries from REP lines */
				1959	garray_T si_repsal; /* list of fromto_T entries from REPSAL lines */
				1960	garray_T si_sal; /* list of fromto_T entries from SAL lines */
				1961	char_u si_sofofr; / SOFOFROM text */
				1962	char_u si_sofoto; / SOFOTO text */
				1963	int si_nosugfile; /* NOSUGFILE item found */
				1964	int si_nosplitsugs; /* NOSPLITSUGS item found */
				1965	int si_nocompoundsugs; /* NOCOMPOUNDSUGS item found */
				1966	int si_followup; /* soundsalike: ? */
				1967	int si_collapse; /* soundsalike: ? */
				1968	hashtab_T si_commonwords; /* hashtable for common words */
				1969	time_t si_sugtime; /* timestamp for .sug file */
				1970	int si_rem_accents; /* soundsalike: remove accents */
				1971	garray_T si_map; /* MAP info concatenated */
				1972	char_u si_midword; / MIDWORD chars or NULL */
				1973	int si_compmax; /* max nr of words for compounding */
				1974	int si_compminlen; /* minimal length for compounding */
				1975	int si_compsylmax; /* max nr of syllables for compounding */
				1976	int si_compoptions; /* COMP_ flags */
				1977	garray_T si_comppat; /* CHECKCOMPOUNDPATTERN items, each stored as
				1978	a string */
				1979	char_u si_compflags; / flags used for compounding */
				1980	char_u si_nobreak; /* NOBREAK */
				1981	char_u si_syllable; / syllable string */
				1982	garray_T si_prefcond; /* table with conditions for postponed
				1983	* prefixes, each stored as a string */
				1984	int si_newprefID; /* current value for ah_newID */
				1985	int si_newcompID; /* current value for compound ID */
				1986	} spellinfo_T;
				1987
				1988	static afffile_T spell_read_aff(spellinfo_T spin, char_u *fname);
				1989	static int is_aff_rule(char_u *items, int itemcnt, char rulename, int mincount);
				1990	static void aff_process_flags(afffile_T affile, affentry_T entry);
				1991	static int spell_info_item(char_u *s);
				1992	static unsigned affitem2flag(int flagtype, char_u item, char_u fname, int lnum);
				1993	static unsigned get_affitem(int flagtype, char_u **pp);
				1994	static void process_compflags(spellinfo_T spin, afffile_T aff, char_u *compflags);
				1995	static void check_renumber(spellinfo_T *spin);
				1996	static int flag_in_afflist(int flagtype, char_u *afflist, unsigned flag);
				1997	static void aff_check_number(int spinval, int affval, char *name);
				1998	static void aff_check_string(char_u spinval, char_u affval, char *name);
				1999	static int str_equal(char_u s1, char_u s2);
				2000	static void add_fromto(spellinfo_T spin, garray_T gap, char_u from, char_u to);
				2001	static int sal_to_bool(char_u *s);
				2002	static void spell_free_aff(afffile_T *aff);
				2003	static int spell_read_dic(spellinfo_T spin, char_u fname, afffile_T *affile);
				2004	static int get_affix_flags(afffile_T affile, char_u afflist);
				2005	static int get_pfxlist(afffile_T affile, char_u afflist, char_u *store_afflist);
				2006	static void get_compflags(afffile_T affile, char_u afflist, char_u *store_afflist);
				2007	static int store_aff_word(spellinfo_T spin, char_u word, char_u afflist, afffile_T affile, hashtab_T ht, hashtab_T xht, int condit, int flags, char_u *pfxlist, int pfxlen);
				2008	static int spell_read_wordfile(spellinfo_T spin, char_u fname);
				2009	static void getroom(spellinfo_T spin, size_t len, int align);
				2010	static char_u getroom_save(spellinfo_T spin, char_u *s);
				2011	static void free_blocks(sblock_T *bl);
				2012	static wordnode_T wordtree_alloc(spellinfo_T spin);
				2013	static int store_word(spellinfo_T spin, char_u word, int flags, int region, char_u *pfxlist, int need_affix);
				2014	static int tree_add_word(spellinfo_T spin, char_u word, wordnode_T *tree, int flags, int region, int affixID);
				2015	static wordnode_T get_wordnode(spellinfo_T spin);
				2016	static int deref_wordnode(spellinfo_T spin, wordnode_T node);
				2017	static void free_wordnode(spellinfo_T spin, wordnode_T n);
				2018	static void wordtree_compress(spellinfo_T spin, wordnode_T root);
				2019	static int node_compress(spellinfo_T spin, wordnode_T node, hashtab_T ht, int tot);
				2020	static int node_equal(wordnode_T n1, wordnode_T n2);
				2021	static int write_vim_spell(spellinfo_T spin, char_u fname);
				2022	static void clear_node(wordnode_T *node);
				2023	static int put_node(FILE fd, wordnode_T node, int idx, int regionmask, int prefixtree);
				2024	static void spell_make_sugfile(spellinfo_T spin, char_u wfname);
				2025	static int sug_filltree(spellinfo_T spin, slang_T slang);
				2026	static int sug_maketable(spellinfo_T *spin);
				2027	static int sug_filltable(spellinfo_T spin, wordnode_T node, int startwordnr, garray_T *gap);
				2028	static int offset2bytes(int nr, char_u *buf);
				2029	static void sug_write(spellinfo_T spin, char_u fname);
				2030	static void spell_message(spellinfo_T spin, char_u str);
				2031	static void init_spellfile(void);
				2032
				2033	/* In the postponed prefixes tree wn_flags is used to store the WFP_ flags,
				2034	* but it must be negative to indicate the prefix tree to tree_add_word().
				2035	* Use a negative number with the lower 8 bits zero. */
				2036	#define PFX_FLAGS -256
				2037
				2038	/* flags for "condit" argument of store_aff_word() */
				2039	#define CONDIT_COMB 1 /* affix must combine */
				2040	#define CONDIT_CFIX 2 /* affix must have CIRCUMFIX flag */
				2041	#define CONDIT_SUF 4 /* add a suffix for matching flags */
				2042	#define CONDIT_AFF 8 /* word already has an affix */
				2043
				2044	/*
				2045	* Tunable parameters for when the tree is compressed. See 'mkspellmem'.
				2046	*/
				2047	static long compress_start = 30000; /* memory / SBLOCKSIZE */
				2048	static long compress_inc = 100; /* memory / SBLOCKSIZE */
				2049	static long compress_added = 500000; /* word count */
				2050
				2051	/*
				2052	* Check the 'mkspellmem' option. Return FAIL if it's wrong.
				2053	* Sets "sps_flags".
				2054	*/
				2055	int
				2056	spell_check_msm(void)
				2057	{
				2058	char_u *p = p_msm;
				2059	long start = 0;
				2060	long incr = 0;
				2061	long added = 0;
				2062
				2063	if (!VIM_ISDIGIT(*p))
				2064	return FAIL;
				2065	/* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/
				2066	start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102);
				2067	if (*p != ',')
				2068	return FAIL;
				2069	++p;
				2070	if (!VIM_ISDIGIT(*p))
				2071	return FAIL;
				2072	incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10);
				2073	if (*p != ',')
				2074	return FAIL;
				2075	++p;
				2076	if (!VIM_ISDIGIT(*p))
				2077	return FAIL;
				2078	added = getdigits(&p) * 1024;
				2079	if (*p != NUL)
				2080	return FAIL;
				2081
				2082	if (start == 0 \|\| incr == 0 \|\| added == 0 \|\| incr > start)
				2083	return FAIL;
				2084
				2085	compress_start = start;
				2086	compress_inc = incr;
				2087	compress_added = added;
				2088	return OK;
				2089	}
				2090
				2091	#ifdef SPELL_PRINTTREE
				2092	/*
				2093	* For debugging the tree code: print the current tree in a (more or less)
				2094	* readable format, so that we can see what happens when adding a word and/or
				2095	* compressing the tree.
				2096	* Based on code from Olaf Seibert.
				2097	*/
				2098	#define PRINTLINESIZE 1000
				2099	#define PRINTWIDTH 6
				2100
				2101	#define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
				2102	PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
				2103
				2104	static char line1[PRINTLINESIZE];
				2105	static char line2[PRINTLINESIZE];
				2106	static char line3[PRINTLINESIZE];
				2107
				2108	static void
				2109	spell_clear_flags(wordnode_T *node)
				2110	{
				2111	wordnode_T *np;
				2112
				2113	for (np = node; np != NULL; np = np->wn_sibling)
				2114	{
				2115	np->wn_u1.index = FALSE;
				2116	spell_clear_flags(np->wn_child);
				2117	}
				2118	}
				2119
				2120	static void
				2121	spell_print_node(wordnode_T *node, int depth)
				2122	{
				2123	if (node->wn_u1.index)
				2124	{
				2125	/* Done this node before, print the reference. */
				2126	PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
				2127	PRINTSOME(line2, depth, " ", 0, 0);
				2128	PRINTSOME(line3, depth, " ", 0, 0);
				2129	msg((char_u *)line1);
				2130	msg((char_u *)line2);
				2131	msg((char_u *)line3);
				2132	}
				2133	else
				2134	{
				2135	node->wn_u1.index = TRUE;
				2136
				2137	if (node->wn_byte != NUL)
				2138	{
				2139	if (node->wn_child != NULL)
				2140	PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0);
				2141	else
				2142	/* Cannot happen? */
				2143	PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0);
				2144	}
				2145	else
				2146	PRINTSOME(line1, depth, " $ ", 0, 0);
				2147
				2148	PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs);
				2149
				2150	if (node->wn_sibling != NULL)
				2151	PRINTSOME(line3, depth, " \| ", 0, 0);
				2152	else
				2153	PRINTSOME(line3, depth, " ", 0, 0);
				2154
				2155	if (node->wn_byte == NUL)
				2156	{
				2157	msg((char_u *)line1);
				2158	msg((char_u *)line2);
				2159	msg((char_u *)line3);
				2160	}
				2161
				2162	/* do the children */
				2163	if (node->wn_byte != NUL && node->wn_child != NULL)
				2164	spell_print_node(node->wn_child, depth + 1);
				2165
				2166	/* do the siblings */
				2167	if (node->wn_sibling != NULL)
				2168	{
				2169	/* get rid of all parent details except \| */
				2170	STRCPY(line1, line3);
				2171	STRCPY(line2, line3);
				2172	spell_print_node(node->wn_sibling, depth);
				2173	}
				2174	}
				2175	}
				2176
				2177	static void
				2178	spell_print_tree(wordnode_T *root)
				2179	{
				2180	if (root != NULL)
				2181	{
				2182	/* Clear the "wn_u1.index" fields, used to remember what has been
				2183	* done. */
				2184	spell_clear_flags(root);
				2185
				2186	/* Recursively print the tree. */
				2187	spell_print_node(root, 0);
				2188	}
				2189	}
				2190	#endif /* SPELL_PRINTTREE */
				2191
				2192	/*
				2193	* Read the affix file "fname".
				2194	* Returns an afffile_T, NULL for complete failure.
				2195	*/
				2196	static afffile_T *
				2197	spell_read_aff(spellinfo_T spin, char_u fname)
				2198	{
				2199	FILE *fd;
				2200	afffile_T *aff;
				2201	char_u rline[MAXLINELEN];
				2202	char_u *line;
				2203	char_u *pc = NULL;
				2204	#define MAXITEMCNT 30
				2205	char_u *(items[MAXITEMCNT]);
				2206	int itemcnt;
				2207	char_u *p;
				2208	int lnum = 0;
				2209	affheader_T *cur_aff = NULL;
				2210	int did_postpone_prefix = FALSE;
				2211	int aff_todo = 0;
				2212	hashtab_T *tp;
				2213	char_u *low = NULL;
				2214	char_u *fol = NULL;
				2215	char_u *upp = NULL;
				2216	int do_rep;
				2217	int do_repsal;
				2218	int do_sal;
				2219	int do_mapline;
				2220	int found_map = FALSE;
				2221	hashitem_T *hi;
				2222	int l;
				2223	int compminlen = 0; /* COMPOUNDMIN value */
				2224	int compsylmax = 0; /* COMPOUNDSYLMAX value */
				2225	int compoptions = 0; /* COMP_ flags */
				2226	int compmax = 0; /* COMPOUNDWORDMAX value */
				2227	char_u compflags = NULL; / COMPOUNDFLAG and COMPOUNDRULE
				2228	concatenated */
				2229	char_u midword = NULL; / MIDWORD value */
				2230	char_u syllable = NULL; / SYLLABLE value */
				2231	char_u sofofrom = NULL; / SOFOFROM value */
				2232	char_u sofoto = NULL; / SOFOTO value */
				2233
				2234	/*
				2235	* Open the file.
				2236	*/
				2237	fd = mch_fopen((char *)fname, "r");
				2238	if (fd == NULL)
				2239	{
				2240	EMSG2(_(e_notopen), fname);
				2241	return NULL;
				2242	}
				2243
				2244	vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname);
				2245	spell_message(spin, IObuff);
				2246
				2247	/* Only do REP lines when not done in another .aff file already. */
				2248	do_rep = spin->si_rep.ga_len == 0;
				2249
				2250	/* Only do REPSAL lines when not done in another .aff file already. */
				2251	do_repsal = spin->si_repsal.ga_len == 0;
				2252
				2253	/* Only do SAL lines when not done in another .aff file already. */
				2254	do_sal = spin->si_sal.ga_len == 0;
				2255
				2256	/* Only do MAP lines when not done in another .aff file already. */
				2257	do_mapline = spin->si_map.ga_len == 0;
				2258
				2259	/*
				2260	* Allocate and init the afffile_T structure.
				2261	*/
				2262	aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE);
				2263	if (aff == NULL)
				2264	{
				2265	fclose(fd);
				2266	return NULL;
				2267	}
				2268	hash_init(&aff->af_pref);
				2269	hash_init(&aff->af_suff);
				2270	hash_init(&aff->af_comp);
				2271
				2272	/*
				2273	* Read all the lines in the file one by one.
				2274	*/
				2275	while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
				2276	{
				2277	line_breakcheck();
				2278	++lnum;
				2279
				2280	/* Skip comment lines. */
				2281	if (*rline == '#')
				2282	continue;
				2283
				2284	/* Convert from "SET" to 'encoding' when needed. */
				2285	vim_free(pc);
				2286	#ifdef FEAT_MBYTE
				2287	if (spin->si_conv.vc_type != CONV_NONE)
				2288	{
				2289	pc = string_convert(&spin->si_conv, rline, NULL);
				2290	if (pc == NULL)
				2291	{
				2292	smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
				2293	fname, lnum, rline);
				2294	continue;
				2295	}
				2296	line = pc;
				2297	}
				2298	else
				2299	#endif
				2300	{
				2301	pc = NULL;
				2302	line = rline;
				2303	}
				2304
				2305	/* Split the line up in white separated items. Put a NUL after each
				2306	* item. */
				2307	itemcnt = 0;
				2308	for (p = line; ; )
				2309	{
				2310	while (p != NUL && p <= ' ') /* skip white space and CR/NL */
				2311	++p;
				2312	if (*p == NUL)
				2313	break;
				2314	if (itemcnt == MAXITEMCNT) /* too many items */
				2315	break;
				2316	items[itemcnt++] = p;
				2317	/* A few items have arbitrary text argument, don't split them. */
				2318	if (itemcnt == 2 && spell_info_item(items[0]))
				2319	while (p >= ' ' \|\| p == TAB) /* skip until CR/NL */
				2320	++p;
				2321	else
				2322	while (p > ' ') / skip until white space or CR/NL */
				2323	++p;
				2324	if (*p == NUL)
				2325	break;
				2326	*p++ = NUL;
				2327	}
				2328
				2329	/* Handle non-empty lines. */
				2330	if (itemcnt > 0)
				2331	{
				2332	if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL)
				2333	{
				2334	#ifdef FEAT_MBYTE
				2335	/* Setup for conversion from "ENC" to 'encoding'. */
				2336	aff->af_enc = enc_canonize(items[1]);
				2337	if (aff->af_enc != NULL && !spin->si_ascii
				2338	&& convert_setup(&spin->si_conv, aff->af_enc,
				2339	p_enc) == FAIL)
				2340	smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
				2341	fname, aff->af_enc, p_enc);
				2342	spin->si_conv.vc_fail = TRUE;
				2343	#else
				2344	smsg((char_u *)_("Conversion in %s not supported"), fname);
				2345	#endif
				2346	}
				2347	else if (is_aff_rule(items, itemcnt, "FLAG", 2)
				2348	&& aff->af_flagtype == AFT_CHAR)
				2349	{
				2350	if (STRCMP(items[1], "long") == 0)
				2351	aff->af_flagtype = AFT_LONG;
				2352	else if (STRCMP(items[1], "num") == 0)
				2353	aff->af_flagtype = AFT_NUM;
				2354	else if (STRCMP(items[1], "caplong") == 0)
				2355	aff->af_flagtype = AFT_CAPLONG;
				2356	else
				2357	smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"),
				2358	fname, lnum, items[1]);
				2359	if (aff->af_rare != 0
				2360	\|\| aff->af_keepcase != 0
				2361	\|\| aff->af_bad != 0
				2362	\|\| aff->af_needaffix != 0
				2363	\|\| aff->af_circumfix != 0
				2364	\|\| aff->af_needcomp != 0
				2365	\|\| aff->af_comproot != 0
				2366	\|\| aff->af_nosuggest != 0
				2367	\|\| compflags != NULL
				2368	\|\| aff->af_suff.ht_used > 0
				2369	\|\| aff->af_pref.ht_used > 0)
				2370	smsg((char_u *)_("FLAG after using flags in %s line %d: %s"),
				2371	fname, lnum, items[1]);
				2372	}
				2373	else if (spell_info_item(items[0]))
				2374	{
				2375	p = (char_u *)getroom(spin,
				2376	(spin->si_info == NULL ? 0 : STRLEN(spin->si_info))
				2377	+ STRLEN(items[0])
				2378	+ STRLEN(items[1]) + 3, FALSE);
				2379	if (p != NULL)
				2380	{
				2381	if (spin->si_info != NULL)
				2382	{
				2383	STRCPY(p, spin->si_info);
				2384	STRCAT(p, "\n");
				2385	}
				2386	STRCAT(p, items[0]);
				2387	STRCAT(p, " ");
				2388	STRCAT(p, items[1]);
				2389	spin->si_info = p;
				2390	}
				2391	}
				2392	else if (is_aff_rule(items, itemcnt, "MIDWORD", 2)
				2393	&& midword == NULL)
				2394	{
				2395	midword = getroom_save(spin, items[1]);
				2396	}
				2397	else if (is_aff_rule(items, itemcnt, "TRY", 2))
				2398	{
				2399	/* ignored, we look in the tree for what chars may appear */
				2400	}
				2401	/* TODO: remove "RAR" later */
				2402	else if ((is_aff_rule(items, itemcnt, "RAR", 2)
				2403	\|\| is_aff_rule(items, itemcnt, "RARE", 2))
				2404	&& aff->af_rare == 0)
				2405	{
				2406	aff->af_rare = affitem2flag(aff->af_flagtype, items[1],
				2407	fname, lnum);
				2408	}
				2409	/* TODO: remove "KEP" later */
				2410	else if ((is_aff_rule(items, itemcnt, "KEP", 2)
				2411	\|\| is_aff_rule(items, itemcnt, "KEEPCASE", 2))
				2412	&& aff->af_keepcase == 0)
				2413	{
				2414	aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1],
				2415	fname, lnum);
				2416	}
				2417	else if ((is_aff_rule(items, itemcnt, "BAD", 2)
				2418	\|\| is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2))
				2419	&& aff->af_bad == 0)
				2420	{
				2421	aff->af_bad = affitem2flag(aff->af_flagtype, items[1],
				2422	fname, lnum);
				2423	}
				2424	else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2)
				2425	&& aff->af_needaffix == 0)
				2426	{
				2427	aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1],
				2428	fname, lnum);
				2429	}
				2430	else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2)
				2431	&& aff->af_circumfix == 0)
				2432	{
				2433	aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1],
				2434	fname, lnum);
				2435	}
				2436	else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2)
				2437	&& aff->af_nosuggest == 0)
				2438	{
				2439	aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1],
				2440	fname, lnum);
				2441	}
				2442	else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2)
				2443	\|\| is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2))
				2444	&& aff->af_needcomp == 0)
				2445	{
				2446	aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1],
				2447	fname, lnum);
				2448	}
				2449	else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2)
				2450	&& aff->af_comproot == 0)
				2451	{
				2452	aff->af_comproot = affitem2flag(aff->af_flagtype, items[1],
				2453	fname, lnum);
				2454	}
				2455	else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2)
				2456	&& aff->af_compforbid == 0)
				2457	{
				2458	aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1],
				2459	fname, lnum);
				2460	if (aff->af_pref.ht_used > 0)
				2461	smsg((char_u *)_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"),
				2462	fname, lnum);
				2463	}
				2464	else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2)
				2465	&& aff->af_comppermit == 0)
				2466	{
				2467	aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1],
				2468	fname, lnum);
				2469	if (aff->af_pref.ht_used > 0)
				2470	smsg((char_u *)_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"),
				2471	fname, lnum);
				2472	}
				2473	else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2)
				2474	&& compflags == NULL)
				2475	{
				2476	/* Turn flag "c" into COMPOUNDRULE compatible string "c+",
				2477	* "Na" into "Na+", "1234" into "1234+". */
				2478	p = getroom(spin, STRLEN(items[1]) + 2, FALSE);
				2479	if (p != NULL)
				2480	{
				2481	STRCPY(p, items[1]);
				2482	STRCAT(p, "+");
				2483	compflags = p;
				2484	}
				2485	}
				2486	else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2))
				2487	{
				2488	/* We don't use the count, but do check that it's a number and
				2489	* not COMPOUNDRULE mistyped. */
				2490	if (atoi((char *)items[1]) == 0)
				2491	smsg((char_u *)_("Wrong COMPOUNDRULES value in %s line %d: %s"),
				2492	fname, lnum, items[1]);
				2493	}
				2494	else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2))
				2495	{
				2496	/* Don't use the first rule if it is a number. */
				2497	if (compflags != NULL \|\| *skipdigits(items[1]) != NUL)
				2498	{
				2499	/* Concatenate this string to previously defined ones,
				2500	* using a slash to separate them. */
				2501	l = (int)STRLEN(items[1]) + 1;
				2502	if (compflags != NULL)
				2503	l += (int)STRLEN(compflags) + 1;
				2504	p = getroom(spin, l, FALSE);
				2505	if (p != NULL)
				2506	{
				2507	if (compflags != NULL)
				2508	{
				2509	STRCPY(p, compflags);
				2510	STRCAT(p, "/");
				2511	}
				2512	STRCAT(p, items[1]);
				2513	compflags = p;
				2514	}
				2515	}
				2516	}
				2517	else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2)
				2518	&& compmax == 0)
				2519	{
				2520	compmax = atoi((char *)items[1]);
				2521	if (compmax == 0)
				2522	smsg((char_u *)_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"),
				2523	fname, lnum, items[1]);
				2524	}
				2525	else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2)
				2526	&& compminlen == 0)
				2527	{
				2528	compminlen = atoi((char *)items[1]);
				2529	if (compminlen == 0)
				2530	smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"),
				2531	fname, lnum, items[1]);
				2532	}
				2533	else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2)
				2534	&& compsylmax == 0)
				2535	{
				2536	compsylmax = atoi((char *)items[1]);
				2537	if (compsylmax == 0)
				2538	smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"),
				2539	fname, lnum, items[1]);
				2540	}
				2541	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1))
				2542	{
				2543	compoptions \|= COMP_CHECKDUP;
				2544	}
				2545	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1))
				2546	{
				2547	compoptions \|= COMP_CHECKREP;
				2548	}
				2549	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1))
				2550	{
				2551	compoptions \|= COMP_CHECKCASE;
				2552	}
				2553	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1))
				2554	{
				2555	compoptions \|= COMP_CHECKTRIPLE;
				2556	}
				2557	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2))
				2558	{
				2559	if (atoi((char *)items[1]) == 0)
				2560	smsg((char_u *)_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"),
				2561	fname, lnum, items[1]);
				2562	}
				2563	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3))
				2564	{
				2565	garray_T *gap = &spin->si_comppat;
				2566	int i;
				2567
				2568	/* Only add the couple if it isn't already there. */
				2569	for (i = 0; i < gap->ga_len - 1; i += 2)
				2570	if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0
				2571	&& STRCMP(((char_u **)(gap->ga_data))[i + 1],
				2572	items[2]) == 0)
				2573	break;
				2574	if (i >= gap->ga_len && ga_grow(gap, 2) == OK)
				2575	{
				2576	((char_u **)(gap->ga_data))[gap->ga_len++]
				2577	= getroom_save(spin, items[1]);
				2578	((char_u **)(gap->ga_data))[gap->ga_len++]
				2579	= getroom_save(spin, items[2]);
				2580	}
				2581	}
				2582	else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2)
				2583	&& syllable == NULL)
				2584	{
				2585	syllable = getroom_save(spin, items[1]);
				2586	}
				2587	else if (is_aff_rule(items, itemcnt, "NOBREAK", 1))
				2588	{
				2589	spin->si_nobreak = TRUE;
				2590	}
				2591	else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1))
				2592	{
				2593	spin->si_nosplitsugs = TRUE;
				2594	}
				2595	else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1))
				2596	{
				2597	spin->si_nocompoundsugs = TRUE;
				2598	}
				2599	else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1))
				2600	{
				2601	spin->si_nosugfile = TRUE;
				2602	}
				2603	else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1))
				2604	{
				2605	aff->af_pfxpostpone = TRUE;
				2606	}
				2607	else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1))
				2608	{
				2609	aff->af_ignoreextra = TRUE;
				2610	}
				2611	else if ((STRCMP(items[0], "PFX") == 0
				2612	\|\| STRCMP(items[0], "SFX") == 0)
				2613	&& aff_todo == 0
				2614	&& itemcnt >= 4)
				2615	{
				2616	int lasti = 4;
				2617	char_u key[AH_KEY_LEN];
				2618
				2619	if (*items[0] == 'P')
				2620	tp = &aff->af_pref;
				2621	else
				2622	tp = &aff->af_suff;
				2623
				2624	/* Myspell allows the same affix name to be used multiple
				2625	* times. The affix files that do this have an undocumented
				2626	* "S" flag on all but the last block, thus we check for that
				2627	* and store it in ah_follows. */
				2628	vim_strncpy(key, items[1], AH_KEY_LEN - 1);
				2629	hi = hash_find(tp, key);
				2630	if (!HASHITEM_EMPTY(hi))
				2631	{
				2632	cur_aff = HI2AH(hi);
				2633	if (cur_aff->ah_combine != (*items[2] == 'Y'))
				2634	smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"),
				2635	fname, lnum, items[1]);
				2636	if (!cur_aff->ah_follows)
				2637	smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
				2638	fname, lnum, items[1]);
				2639	}
				2640	else
				2641	{
				2642	/* New affix letter. */
				2643	cur_aff = (affheader_T *)getroom(spin,
				2644	sizeof(affheader_T), TRUE);
				2645	if (cur_aff == NULL)
				2646	break;
				2647	cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1],
				2648	fname, lnum);
				2649	if (cur_aff->ah_flag == 0 \|\| STRLEN(items[1]) >= AH_KEY_LEN)
				2650	break;
				2651	if (cur_aff->ah_flag == aff->af_bad
				2652	\|\| cur_aff->ah_flag == aff->af_rare
				2653	\|\| cur_aff->ah_flag == aff->af_keepcase
				2654	\|\| cur_aff->ah_flag == aff->af_needaffix
				2655	\|\| cur_aff->ah_flag == aff->af_circumfix
				2656	\|\| cur_aff->ah_flag == aff->af_nosuggest
				2657	\|\| cur_aff->ah_flag == aff->af_needcomp
				2658	\|\| cur_aff->ah_flag == aff->af_comproot)
				2659	smsg((char_u *)_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"),
				2660	fname, lnum, items[1]);
				2661	STRCPY(cur_aff->ah_key, items[1]);
				2662	hash_add(tp, cur_aff->ah_key);
				2663
				2664	cur_aff->ah_combine = (*items[2] == 'Y');
				2665	}
				2666
				2667	/* Check for the "S" flag, which apparently means that another
				2668	* block with the same affix name is following. */
				2669	if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0)
				2670	{
				2671	++lasti;
				2672	cur_aff->ah_follows = TRUE;
				2673	}
				2674	else
				2675	cur_aff->ah_follows = FALSE;
				2676
				2677	/* Myspell allows extra text after the item, but that might
				2678	* mean mistakes go unnoticed. Require a comment-starter. */
				2679	if (itemcnt > lasti && *items[lasti] != '#')
				2680	smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]);
				2681
				2682	if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0)
				2683	smsg((char_u *)_("Expected Y or N in %s line %d: %s"),
				2684	fname, lnum, items[2]);
				2685
				2686	if (*items[0] == 'P' && aff->af_pfxpostpone)
				2687	{
				2688	if (cur_aff->ah_newID == 0)
				2689	{
				2690	/* Use a new number in the .spl file later, to be able
				2691	* to handle multiple .aff files. */
				2692	check_renumber(spin);
				2693	cur_aff->ah_newID = ++spin->si_newprefID;
				2694
				2695	/* We only really use ah_newID if the prefix is
				2696	* postponed. We know that only after handling all
				2697	* the items. */
				2698	did_postpone_prefix = FALSE;
				2699	}
				2700	else
				2701	/* Did use the ID in a previous block. */
				2702	did_postpone_prefix = TRUE;
				2703	}
				2704
				2705	aff_todo = atoi((char *)items[3]);
				2706	}
				2707	else if ((STRCMP(items[0], "PFX") == 0
				2708	\|\| STRCMP(items[0], "SFX") == 0)
				2709	&& aff_todo > 0
				2710	&& STRCMP(cur_aff->ah_key, items[1]) == 0
				2711	&& itemcnt >= 5)
				2712	{
				2713	affentry_T *aff_entry;
				2714	int upper = FALSE;
				2715	int lasti = 5;
				2716
				2717	/* Myspell allows extra text after the item, but that might
				2718	* mean mistakes go unnoticed. Require a comment-starter,
				2719	* unless IGNOREEXTRA is used. Hunspell uses a "-" item. */
				2720	if (itemcnt > lasti
				2721	&& !aff->af_ignoreextra
				2722	&& *items[lasti] != '#'
				2723	&& (STRCMP(items[lasti], "-") != 0
				2724	\|\| itemcnt != lasti + 1))
				2725	smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]);
				2726
				2727	/* New item for an affix letter. */
				2728	--aff_todo;
				2729	aff_entry = (affentry_T *)getroom(spin,
				2730	sizeof(affentry_T), TRUE);
				2731	if (aff_entry == NULL)
				2732	break;
				2733
				2734	if (STRCMP(items[2], "0") != 0)
				2735	aff_entry->ae_chop = getroom_save(spin, items[2]);
				2736	if (STRCMP(items[3], "0") != 0)
				2737	{
				2738	aff_entry->ae_add = getroom_save(spin, items[3]);
				2739
				2740	/* Recognize flags on the affix: abcd/XYZ */
				2741	aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/');
				2742	if (aff_entry->ae_flags != NULL)
				2743	{
				2744	*aff_entry->ae_flags++ = NUL;
				2745	aff_process_flags(aff, aff_entry);
				2746	}
				2747	}
				2748
				2749	/* Don't use an affix entry with non-ASCII characters when
				2750	* "spin->si_ascii" is TRUE. */
				2751	if (!spin->si_ascii \|\| !(has_non_ascii(aff_entry->ae_chop)
				2752	\|\| has_non_ascii(aff_entry->ae_add)))
				2753	{
				2754	aff_entry->ae_next = cur_aff->ah_first;
				2755	cur_aff->ah_first = aff_entry;
				2756
				2757	if (STRCMP(items[4], ".") != 0)
				2758	{
				2759	char_u buf[MAXLINELEN];
				2760
				2761	aff_entry->ae_cond = getroom_save(spin, items[4]);
				2762	if (*items[0] == 'P')
				2763	sprintf((char *)buf, "^%s", items[4]);
				2764	else
				2765	sprintf((char *)buf, "%s$", items[4]);
				2766	aff_entry->ae_prog = vim_regcomp(buf,
				2767	RE_MAGIC + RE_STRING + RE_STRICT);
				2768	if (aff_entry->ae_prog == NULL)
				2769	smsg((char_u *)_("Broken condition in %s line %d: %s"),
				2770	fname, lnum, items[4]);
				2771	}
				2772
				2773	/* For postponed prefixes we need an entry in si_prefcond
				2774	* for the condition. Use an existing one if possible.
				2775	* Can't be done for an affix with flags, ignoring
				2776	* COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. */
				2777	if (*items[0] == 'P' && aff->af_pfxpostpone
				2778	&& aff_entry->ae_flags == NULL)
				2779	{
				2780	/* When the chop string is one lower-case letter and
				2781	* the add string ends in the upper-case letter we set
				2782	* the "upper" flag, clear "ae_chop" and remove the
				2783	* letters from "ae_add". The condition must either
				2784	* be empty or start with the same letter. */
				2785	if (aff_entry->ae_chop != NULL
				2786	&& aff_entry->ae_add != NULL
				2787	#ifdef FEAT_MBYTE
				2788	&& aff_entry->ae_chop[(*mb_ptr2len)(
				2789	aff_entry->ae_chop)] == NUL
				2790	#else
				2791	&& aff_entry->ae_chop[1] == NUL
				2792	#endif
				2793	)
				2794	{
				2795	int c, c_up;
				2796
				2797	c = PTR2CHAR(aff_entry->ae_chop);
				2798	c_up = SPELL_TOUPPER(c);
				2799	if (c_up != c
				2800	&& (aff_entry->ae_cond == NULL
				2801	\|\| PTR2CHAR(aff_entry->ae_cond) == c))
				2802	{
				2803	p = aff_entry->ae_add
				2804	+ STRLEN(aff_entry->ae_add);
				2805	mb_ptr_back(aff_entry->ae_add, p);
				2806	if (PTR2CHAR(p) == c_up)
				2807	{
				2808	upper = TRUE;
				2809	aff_entry->ae_chop = NULL;
				2810	*p = NUL;
				2811
				2812	/* The condition is matched with the
				2813	* actual word, thus must check for the
				2814	* upper-case letter. */
				2815	if (aff_entry->ae_cond != NULL)
				2816	{
				2817	char_u buf[MAXLINELEN];
				2818	#ifdef FEAT_MBYTE
				2819	if (has_mbyte)
				2820	{
				2821	onecap_copy(items[4], buf, TRUE);
				2822	aff_entry->ae_cond = getroom_save(
				2823	spin, buf);
				2824	}
				2825	else
				2826	#endif
				2827	*aff_entry->ae_cond = c_up;
				2828	if (aff_entry->ae_cond != NULL)
				2829	{
				2830	sprintf((char *)buf, "^%s",
				2831	aff_entry->ae_cond);
				2832	vim_regfree(aff_entry->ae_prog);
				2833	aff_entry->ae_prog = vim_regcomp(
				2834	buf, RE_MAGIC + RE_STRING);
				2835	}
				2836	}
				2837	}
				2838	}
				2839	}
				2840
				2841	if (aff_entry->ae_chop == NULL
				2842	&& aff_entry->ae_flags == NULL)
				2843	{
				2844	int idx;
				2845	char_u **pp;
				2846	int n;
				2847
				2848	/* Find a previously used condition. */
				2849	for (idx = spin->si_prefcond.ga_len - 1; idx >= 0;
				2850	--idx)
				2851	{
				2852	p = ((char_u **)spin->si_prefcond.ga_data)[idx];
				2853	if (str_equal(p, aff_entry->ae_cond))
				2854	break;
				2855	}
				2856	if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK)
				2857	{
				2858	/* Not found, add a new condition. */
				2859	idx = spin->si_prefcond.ga_len++;
				2860	pp = ((char_u **)spin->si_prefcond.ga_data)
				2861	+ idx;
				2862	if (aff_entry->ae_cond == NULL)
				2863	*pp = NULL;
				2864	else
				2865	*pp = getroom_save(spin,
				2866	aff_entry->ae_cond);
				2867	}
				2868
				2869	/* Add the prefix to the prefix tree. */
				2870	if (aff_entry->ae_add == NULL)
				2871	p = (char_u *)"";
				2872	else
				2873	p = aff_entry->ae_add;
				2874
				2875	/* PFX_FLAGS is a negative number, so that
				2876	* tree_add_word() knows this is the prefix tree. */
				2877	n = PFX_FLAGS;
				2878	if (!cur_aff->ah_combine)
				2879	n \|= WFP_NC;
				2880	if (upper)
				2881	n \|= WFP_UP;
				2882	if (aff_entry->ae_comppermit)
				2883	n \|= WFP_COMPPERMIT;
				2884	if (aff_entry->ae_compforbid)
				2885	n \|= WFP_COMPFORBID;
				2886	tree_add_word(spin, p, spin->si_prefroot, n,
				2887	idx, cur_aff->ah_newID);
				2888	did_postpone_prefix = TRUE;
				2889	}
				2890
				2891	/* Didn't actually use ah_newID, backup si_newprefID. */
				2892	if (aff_todo == 0 && !did_postpone_prefix)
				2893	{
				2894	--spin->si_newprefID;
				2895	cur_aff->ah_newID = 0;
				2896	}
				2897	}
				2898	}
				2899	}
				2900	else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL)
				2901	{
				2902	fol = vim_strsave(items[1]);
				2903	}
				2904	else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL)
				2905	{
				2906	low = vim_strsave(items[1]);
				2907	}
				2908	else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL)
				2909	{
				2910	upp = vim_strsave(items[1]);
				2911	}
				2912	else if (is_aff_rule(items, itemcnt, "REP", 2)
				2913	\|\| is_aff_rule(items, itemcnt, "REPSAL", 2))
				2914	{
				2915	/* Ignore REP/REPSAL count */;
				2916	if (!isdigit(*items[1]))
				2917	smsg((char_u *)_("Expected REP(SAL) count in %s line %d"),
				2918	fname, lnum);
				2919	}
				2920	else if ((STRCMP(items[0], "REP") == 0
				2921	\|\| STRCMP(items[0], "REPSAL") == 0)
				2922	&& itemcnt >= 3)
				2923	{
				2924	/* REP/REPSAL item */
				2925	/* Myspell ignores extra arguments, we require it starts with
				2926	* # to detect mistakes. */
				2927	if (itemcnt > 3 && items[3][0] != '#')
				2928	smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]);
				2929	if (items[0][3] == 'S' ? do_repsal : do_rep)
				2930	{
				2931	/* Replace underscore with space (can't include a space
				2932	* directly). */
				2933	for (p = items[1]; *p != NUL; mb_ptr_adv(p))
				2934	if (*p == '_')
				2935	*p = ' ';
				2936	for (p = items[2]; *p != NUL; mb_ptr_adv(p))
				2937	if (*p == '_')
				2938	*p = ' ';
				2939	add_fromto(spin, items[0][3] == 'S'
				2940	? &spin->si_repsal
				2941	: &spin->si_rep, items[1], items[2]);
				2942	}
				2943	}
				2944	else if (is_aff_rule(items, itemcnt, "MAP", 2))
				2945	{
				2946	/* MAP item or count */
				2947	if (!found_map)
				2948	{
				2949	/* First line contains the count. */
				2950	found_map = TRUE;
				2951	if (!isdigit(*items[1]))
				2952	smsg((char_u *)_("Expected MAP count in %s line %d"),
				2953	fname, lnum);
				2954	}
				2955	else if (do_mapline)
				2956	{
				2957	int c;
				2958
				2959	/* Check that every character appears only once. */
				2960	for (p = items[1]; *p != NUL; )
				2961	{
				2962	#ifdef FEAT_MBYTE
				2963	c = mb_ptr2char_adv(&p);
				2964	#else
				2965	c = *p++;
				2966	#endif
				2967	if ((spin->si_map.ga_len > 0
				2968	&& vim_strchr(spin->si_map.ga_data, c)
				2969	!= NULL)
				2970	\|\| vim_strchr(p, c) != NULL)
				2971	smsg((char_u *)_("Duplicate character in MAP in %s line %d"),
				2972	fname, lnum);
				2973	}
				2974
				2975	/* We simply concatenate all the MAP strings, separated by
				2976	* slashes. */
				2977	ga_concat(&spin->si_map, items[1]);
				2978	ga_append(&spin->si_map, '/');
				2979	}
				2980	}
				2981	/* Accept "SAL from to" and "SAL from to #comment". */
				2982	else if (is_aff_rule(items, itemcnt, "SAL", 3))
				2983	{
				2984	if (do_sal)
				2985	{
				2986	/* SAL item (sounds-a-like)
				2987	* Either one of the known keys or a from-to pair. */
				2988	if (STRCMP(items[1], "followup") == 0)
				2989	spin->si_followup = sal_to_bool(items[2]);
				2990	else if (STRCMP(items[1], "collapse_result") == 0)
				2991	spin->si_collapse = sal_to_bool(items[2]);
				2992	else if (STRCMP(items[1], "remove_accents") == 0)
				2993	spin->si_rem_accents = sal_to_bool(items[2]);
				2994	else
				2995	/* when "to" is "_" it means empty */
				2996	add_fromto(spin, &spin->si_sal, items[1],
				2997	STRCMP(items[2], "_") == 0 ? (char_u *)""
				2998	: items[2]);
				2999	}
				3000	}
				3001	else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2)
				3002	&& sofofrom == NULL)
				3003	{
				3004	sofofrom = getroom_save(spin, items[1]);
				3005	}
				3006	else if (is_aff_rule(items, itemcnt, "SOFOTO", 2)
				3007	&& sofoto == NULL)
				3008	{
				3009	sofoto = getroom_save(spin, items[1]);
				3010	}
				3011	else if (STRCMP(items[0], "COMMON") == 0)
				3012	{
				3013	int i;
				3014
				3015	for (i = 1; i < itemcnt; ++i)
				3016	{
				3017	if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords,
				3018	items[i])))
				3019	{
				3020	p = vim_strsave(items[i]);
				3021	if (p == NULL)
				3022	break;
				3023	hash_add(&spin->si_commonwords, p);
				3024	}
				3025	}
				3026	}
				3027	else
				3028	smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"),
				3029	fname, lnum, items[0]);
				3030	}
				3031	}
				3032
				3033	if (fol != NULL \|\| low != NULL \|\| upp != NULL)
				3034	{
				3035	if (spin->si_clear_chartab)
				3036	{
				3037	/* Clear the char type tables, don't want to use any of the
				3038	* currently used spell properties. */
				3039	init_spell_chartab();
				3040	spin->si_clear_chartab = FALSE;
				3041	}
				3042
				3043	/*
				3044	* Don't write a word table for an ASCII file, so that we don't check
				3045	* for conflicts with a word table that matches 'encoding'.
				3046	* Don't write one for utf-8 either, we use utf_*() and
				3047	* mb_get_class(), the list of chars in the file will be incomplete.
				3048	*/
				3049	if (!spin->si_ascii
				3050	#ifdef FEAT_MBYTE
				3051	&& !enc_utf8
				3052	#endif
				3053	)
				3054	{
				3055	if (fol == NULL \|\| low == NULL \|\| upp == NULL)
				3056	smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
				3057	else
				3058	(void)set_spell_chartab(fol, low, upp);
				3059	}
				3060
				3061	vim_free(fol);
				3062	vim_free(low);
				3063	vim_free(upp);
				3064	}
				3065
				3066	/* Use compound specifications of the .aff file for the spell info. */
				3067	if (compmax != 0)
				3068	{
				3069	aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX");
				3070	spin->si_compmax = compmax;
				3071	}
				3072
				3073	if (compminlen != 0)
				3074	{
				3075	aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN");
				3076	spin->si_compminlen = compminlen;
				3077	}
				3078
				3079	if (compsylmax != 0)
				3080	{
				3081	if (syllable == NULL)
				3082	smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE"));
				3083	aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX");
				3084	spin->si_compsylmax = compsylmax;
				3085	}
				3086
				3087	if (compoptions != 0)
				3088	{
				3089	aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options");
				3090	spin->si_compoptions \|= compoptions;
				3091	}
				3092
				3093	if (compflags != NULL)
				3094	process_compflags(spin, aff, compflags);
				3095
				3096	/* Check that we didn't use too many renumbered flags. */
				3097	if (spin->si_newcompID < spin->si_newprefID)
				3098	{
				3099	if (spin->si_newcompID == 127 \|\| spin->si_newcompID == 255)
				3100	MSG(_("Too many postponed prefixes"));
				3101	else if (spin->si_newprefID == 0 \|\| spin->si_newprefID == 127)
				3102	MSG(_("Too many compound flags"));
				3103	else
				3104	MSG(_("Too many postponed prefixes and/or compound flags"));
				3105	}
				3106
				3107	if (syllable != NULL)
				3108	{
				3109	aff_check_string(spin->si_syllable, syllable, "SYLLABLE");
				3110	spin->si_syllable = syllable;
				3111	}
				3112
				3113	if (sofofrom != NULL \|\| sofoto != NULL)
				3114	{
				3115	if (sofofrom == NULL \|\| sofoto == NULL)
				3116	smsg((char_u *)_("Missing SOFO%s line in %s"),
				3117	sofofrom == NULL ? "FROM" : "TO", fname);
				3118	else if (spin->si_sal.ga_len > 0)
				3119	smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname);
				3120	else
				3121	{
				3122	aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM");
				3123	aff_check_string(spin->si_sofoto, sofoto, "SOFOTO");
				3124	spin->si_sofofr = sofofrom;
				3125	spin->si_sofoto = sofoto;
				3126	}
				3127	}
				3128
				3129	if (midword != NULL)
				3130	{
				3131	aff_check_string(spin->si_midword, midword, "MIDWORD");
				3132	spin->si_midword = midword;
				3133	}
				3134
				3135	vim_free(pc);
				3136	fclose(fd);
				3137	return aff;
				3138	}
				3139
				3140	/*
				3141	* Return TRUE when items[0] equals "rulename", there are "mincount" items or
				3142	* a comment is following after item "mincount".
				3143	*/
				3144	static int
				3145	is_aff_rule(
				3146	char_u **items,
				3147	int itemcnt,
				3148	char *rulename,
				3149	int mincount)
				3150	{
				3151	return (STRCMP(items[0], rulename) == 0
				3152	&& (itemcnt == mincount
				3153	\|\| (itemcnt > mincount && items[mincount][0] == '#')));
				3154	}
				3155
				3156	/*
				3157	* For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from
				3158	* ae_flags to ae_comppermit and ae_compforbid.
				3159	*/
				3160	static void
				3161	aff_process_flags(afffile_T affile, affentry_T entry)
				3162	{
				3163	char_u *p;
				3164	char_u *prevp;
				3165	unsigned flag;
				3166
				3167	if (entry->ae_flags != NULL
				3168	&& (affile->af_compforbid != 0 \|\| affile->af_comppermit != 0))
				3169	{
				3170	for (p = entry->ae_flags; *p != NUL; )
				3171	{
				3172	prevp = p;
				3173	flag = get_affitem(affile->af_flagtype, &p);
				3174	if (flag == affile->af_comppermit \|\| flag == affile->af_compforbid)
				3175	{
				3176	STRMOVE(prevp, p);
				3177	p = prevp;
				3178	if (flag == affile->af_comppermit)
				3179	entry->ae_comppermit = TRUE;
				3180	else
				3181	entry->ae_compforbid = TRUE;
				3182	}
				3183	if (affile->af_flagtype == AFT_NUM && *p == ',')
				3184	++p;
				3185	}
				3186	if (*entry->ae_flags == NUL)
				3187	entry->ae_flags = NULL; /* nothing left */
				3188	}
				3189	}
				3190
				3191	/*
				3192	* Return TRUE if "s" is the name of an info item in the affix file.
				3193	*/
				3194	static int
				3195	spell_info_item(char_u *s)
				3196	{
				3197	return STRCMP(s, "NAME") == 0
				3198	\|\| STRCMP(s, "HOME") == 0
				3199	\|\| STRCMP(s, "VERSION") == 0
				3200	\|\| STRCMP(s, "AUTHOR") == 0
				3201	\|\| STRCMP(s, "EMAIL") == 0
				3202	\|\| STRCMP(s, "COPYRIGHT") == 0;
				3203	}
				3204
				3205	/*
				3206	* Turn an affix flag name into a number, according to the FLAG type.
				3207	* returns zero for failure.
				3208	*/
				3209	static unsigned
				3210	affitem2flag(
				3211	int flagtype,
				3212	char_u *item,
				3213	char_u *fname,
				3214	int lnum)
				3215	{
				3216	unsigned res;
				3217	char_u *p = item;
				3218
				3219	res = get_affitem(flagtype, &p);
				3220	if (res == 0)
				3221	{
				3222	if (flagtype == AFT_NUM)
				3223	smsg((char_u *)_("Flag is not a number in %s line %d: %s"),
				3224	fname, lnum, item);
				3225	else
				3226	smsg((char_u *)_("Illegal flag in %s line %d: %s"),
				3227	fname, lnum, item);
				3228	}
				3229	if (*p != NUL)
				3230	{
				3231	smsg((char_u *)_(e_affname), fname, lnum, item);
				3232	return 0;
				3233	}
				3234
				3235	return res;
				3236	}
				3237
				3238	/*
				3239	* Get one affix name from "*pp" and advance the pointer.
				3240	* Returns zero for an error, still advances the pointer then.
				3241	*/
				3242	static unsigned
				3243	get_affitem(int flagtype, char_u **pp)
				3244	{
				3245	int res;
				3246
				3247	if (flagtype == AFT_NUM)
				3248	{
				3249	if (!VIM_ISDIGIT(**pp))
				3250	{
				3251	++pp; / always advance, avoid getting stuck */
				3252	return 0;
				3253	}
				3254	res = getdigits(pp);
				3255	}
				3256	else
				3257	{
				3258	#ifdef FEAT_MBYTE
				3259	res = mb_ptr2char_adv(pp);
				3260	#else
				3261	res = (pp)++;
				3262	#endif
				3263	if (flagtype == AFT_LONG \|\| (flagtype == AFT_CAPLONG
				3264	&& res >= 'A' && res <= 'Z'))
				3265	{
				3266	if (**pp == NUL)
				3267	return 0;
				3268	#ifdef FEAT_MBYTE
				3269	res = mb_ptr2char_adv(pp) + (res << 16);
				3270	#else
				3271	res = (pp)++ + (res << 16);
				3272	#endif
				3273	}
				3274	}
				3275	return res;
				3276	}
				3277
				3278	/*
				3279	* Process the "compflags" string used in an affix file and append it to
				3280	* spin->si_compflags.
				3281	* The processing involves changing the affix names to ID numbers, so that
				3282	* they fit in one byte.
				3283	*/
				3284	static void
				3285	process_compflags(
				3286	spellinfo_T *spin,
				3287	afffile_T *aff,
				3288	char_u *compflags)
				3289	{
				3290	char_u *p;
				3291	char_u *prevp;
				3292	unsigned flag;
				3293	compitem_T *ci;
				3294	int id;
				3295	int len;
				3296	char_u *tp;
				3297	char_u key[AH_KEY_LEN];
				3298	hashitem_T *hi;
				3299
				3300	/* Make room for the old and the new compflags, concatenated with a / in
				3301	* between. Processing it makes it shorter, but we don't know by how
				3302	* much, thus allocate the maximum. */
				3303	len = (int)STRLEN(compflags) + 1;
				3304	if (spin->si_compflags != NULL)
				3305	len += (int)STRLEN(spin->si_compflags) + 1;
				3306	p = getroom(spin, len, FALSE);
				3307	if (p == NULL)
				3308	return;
				3309	if (spin->si_compflags != NULL)
				3310	{
				3311	STRCPY(p, spin->si_compflags);
				3312	STRCAT(p, "/");
				3313	}
				3314	spin->si_compflags = p;
				3315	tp = p + STRLEN(p);
				3316
				3317	for (p = compflags; *p != NUL; )
				3318	{
				3319	if (vim_strchr((char_u )"/?+[]", *p) != NULL)
				3320	/* Copy non-flag characters directly. */
				3321	tp++ = p++;
				3322	else
				3323	{
				3324	/* First get the flag number, also checks validity. */
				3325	prevp = p;
				3326	flag = get_affitem(aff->af_flagtype, &p);
				3327	if (flag != 0)
				3328	{
				3329	/* Find the flag in the hashtable. If it was used before, use
				3330	* the existing ID. Otherwise add a new entry. */
				3331	vim_strncpy(key, prevp, p - prevp);
				3332	hi = hash_find(&aff->af_comp, key);
				3333	if (!HASHITEM_EMPTY(hi))
				3334	id = HI2CI(hi)->ci_newID;
				3335	else
				3336	{
				3337	ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE);
				3338	if (ci == NULL)
				3339	break;
				3340	STRCPY(ci->ci_key, key);
				3341	ci->ci_flag = flag;
				3342	/* Avoid using a flag ID that has a special meaning in a
				3343	* regexp (also inside []). */
				3344	do
				3345	{
				3346	check_renumber(spin);
				3347	id = spin->si_newcompID--;
				3348	} while (vim_strchr((char_u )"/?+[]\\-^", id) != NULL);
				3349	ci->ci_newID = id;
				3350	hash_add(&aff->af_comp, ci->ci_key);
				3351	}
				3352	*tp++ = id;
				3353	}
				3354	if (aff->af_flagtype == AFT_NUM && *p == ',')
				3355	++p;
				3356	}
				3357	}
				3358
				3359	*tp = NUL;
				3360	}
				3361
				3362	/*
				3363	* Check that the new IDs for postponed affixes and compounding don't overrun
				3364	* each other. We have almost 255 available, but start at 0-127 to avoid
				3365	* using two bytes for utf-8. When the 0-127 range is used up go to 128-255.
				3366	* When that is used up an error message is given.
				3367	*/
				3368	static void
				3369	check_renumber(spellinfo_T *spin)
				3370	{
				3371	if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128)
				3372	{
				3373	spin->si_newprefID = 127;
				3374	spin->si_newcompID = 255;
				3375	}
				3376	}
				3377
				3378	/*
				3379	* Return TRUE if flag "flag" appears in affix list "afflist".
				3380	*/
				3381	static int
				3382	flag_in_afflist(int flagtype, char_u *afflist, unsigned flag)
				3383	{
				3384	char_u *p;
				3385	unsigned n;
				3386
				3387	switch (flagtype)
				3388	{
				3389	case AFT_CHAR:
				3390	return vim_strchr(afflist, flag) != NULL;
				3391
				3392	case AFT_CAPLONG:
				3393	case AFT_LONG:
				3394	for (p = afflist; *p != NUL; )
				3395	{
				3396	#ifdef FEAT_MBYTE
				3397	n = mb_ptr2char_adv(&p);
				3398	#else
				3399	n = *p++;
				3400	#endif
				3401	if ((flagtype == AFT_LONG \|\| (n >= 'A' && n <= 'Z'))
				3402	&& *p != NUL)
				3403	#ifdef FEAT_MBYTE
				3404	n = mb_ptr2char_adv(&p) + (n << 16);
				3405	#else
				3406	n = *p++ + (n << 16);
				3407	#endif
				3408	if (n == flag)
				3409	return TRUE;
				3410	}
				3411	break;
				3412
				3413	case AFT_NUM:
				3414	for (p = afflist; *p != NUL; )
				3415	{
				3416	n = getdigits(&p);
				3417	if (n == flag)
				3418	return TRUE;
				3419	if (p != NUL) / skip over comma */
				3420	++p;
				3421	}
				3422	break;
				3423	}
				3424	return FALSE;
				3425	}
				3426
				3427	/*
				3428	* Give a warning when "spinval" and "affval" numbers are set and not the same.
				3429	*/
				3430	static void
				3431	aff_check_number(int spinval, int affval, char *name)
				3432	{
				3433	if (spinval != 0 && spinval != affval)
				3434	smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
				3435	}
				3436
				3437	/*
				3438	* Give a warning when "spinval" and "affval" strings are set and not the same.
				3439	*/
				3440	static void
				3441	aff_check_string(char_u spinval, char_u affval, char *name)
				3442	{
				3443	if (spinval != NULL && STRCMP(spinval, affval) != 0)
				3444	smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
				3445	}
				3446
				3447	/*
				3448	* Return TRUE if strings "s1" and "s2" are equal. Also consider both being
				3449	* NULL as equal.
				3450	*/
				3451	static int
				3452	str_equal(char_u s1, char_u s2)
				3453	{
				3454	if (s1 == NULL \|\| s2 == NULL)
				3455	return s1 == s2;
				3456	return STRCMP(s1, s2) == 0;
				3457	}
				3458
				3459	/*
				3460	* Add a from-to item to "gap". Used for REP and SAL items.
				3461	* They are stored case-folded.
				3462	*/
				3463	static void
				3464	add_fromto(
				3465	spellinfo_T *spin,
				3466	garray_T *gap,
				3467	char_u *from,
				3468	char_u *to)
				3469	{
				3470	fromto_T *ftp;
				3471	char_u word[MAXWLEN];
				3472
				3473	if (ga_grow(gap, 1) == OK)
				3474	{
				3475	ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
				3476	(void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN);
				3477	ftp->ft_from = getroom_save(spin, word);
				3478	(void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN);
				3479	ftp->ft_to = getroom_save(spin, word);
				3480	++gap->ga_len;
				3481	}
				3482	}
				3483
				3484	/*
				3485	* Convert a boolean argument in a SAL line to TRUE or FALSE;
				3486	*/
				3487	static int
				3488	sal_to_bool(char_u *s)
				3489	{
				3490	return STRCMP(s, "1") == 0 \|\| STRCMP(s, "true") == 0;
				3491	}
				3492
				3493	/*
				3494	* Free the structure filled by spell_read_aff().
				3495	*/
				3496	static void
				3497	spell_free_aff(afffile_T *aff)
				3498	{
				3499	hashtab_T *ht;
				3500	hashitem_T *hi;
				3501	int todo;
				3502	affheader_T *ah;
				3503	affentry_T *ae;
				3504
				3505	vim_free(aff->af_enc);
				3506
				3507	/* All this trouble to free the "ae_prog" items... */
				3508	for (ht = &aff->af_pref; ; ht = &aff->af_suff)
				3509	{
				3510	todo = (int)ht->ht_used;
				3511	for (hi = ht->ht_array; todo > 0; ++hi)
				3512	{
				3513	if (!HASHITEM_EMPTY(hi))
				3514	{
				3515	--todo;
				3516	ah = HI2AH(hi);
				3517	for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
				3518	vim_regfree(ae->ae_prog);
				3519	}
				3520	}
				3521	if (ht == &aff->af_suff)
				3522	break;
				3523	}
				3524
				3525	hash_clear(&aff->af_pref);
				3526	hash_clear(&aff->af_suff);
				3527	hash_clear(&aff->af_comp);
				3528	}
				3529
				3530	/*
				3531	* Read dictionary file "fname".
				3532	* Returns OK or FAIL;
				3533	*/
				3534	static int
				3535	spell_read_dic(spellinfo_T spin, char_u fname, afffile_T *affile)
				3536	{
				3537	hashtab_T ht;
				3538	char_u line[MAXLINELEN];
				3539	char_u *p;
				3540	char_u *afflist;
				3541	char_u store_afflist[MAXWLEN];
				3542	int pfxlen;
				3543	int need_affix;
				3544	char_u *dw;
				3545	char_u *pc;
				3546	char_u *w;
				3547	int l;
				3548	hash_T hash;
				3549	hashitem_T *hi;
				3550	FILE *fd;
				3551	int lnum = 1;
				3552	int non_ascii = 0;
				3553	int retval = OK;
				3554	char_u message[MAXLINELEN + MAXWLEN];
				3555	int flags;
				3556	int duplicate = 0;
				3557
				3558	/*
				3559	* Open the file.
				3560	*/
				3561	fd = mch_fopen((char *)fname, "r");
				3562	if (fd == NULL)
				3563	{
				3564	EMSG2(_(e_notopen), fname);
				3565	return FAIL;
				3566	}
				3567
				3568	/* The hashtable is only used to detect duplicated words. */
				3569	hash_init(&ht);
				3570
				3571	vim_snprintf((char *)IObuff, IOSIZE,
				3572	_("Reading dictionary file %s ..."), fname);
				3573	spell_message(spin, IObuff);
				3574
				3575	/* start with a message for the first line */
				3576	spin->si_msg_count = 999999;
				3577
				3578	/* Read and ignore the first line: word count. */
				3579	(void)vim_fgets(line, MAXLINELEN, fd);
				3580	if (!vim_isdigit(*skipwhite(line)))
				3581	EMSG2(_("E760: No word count in %s"), fname);
				3582
				3583	/*
				3584	* Read all the lines in the file one by one.
				3585	* The words are converted to 'encoding' here, before being added to
				3586	* the hashtable.
				3587	*/
				3588	while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
				3589	{
				3590	line_breakcheck();
				3591	++lnum;
				3592	if (line[0] == '#' \|\| line[0] == '/')
				3593	continue; /* comment line */
				3594
				3595	/* Remove CR, LF and white space from the end. White space halfway
				3596	* the word is kept to allow e.g., "et al.". */
				3597	l = (int)STRLEN(line);
				3598	while (l > 0 && line[l - 1] <= ' ')
				3599	--l;
				3600	if (l == 0)
				3601	continue; /* empty line */
				3602	line[l] = NUL;
				3603
				3604	#ifdef FEAT_MBYTE
				3605	/* Convert from "SET" to 'encoding' when needed. */
				3606	if (spin->si_conv.vc_type != CONV_NONE)
				3607	{
				3608	pc = string_convert(&spin->si_conv, line, NULL);
				3609	if (pc == NULL)
				3610	{
				3611	smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
				3612	fname, lnum, line);
				3613	continue;
				3614	}
				3615	w = pc;
				3616	}
				3617	else
				3618	#endif
				3619	{
				3620	pc = NULL;
				3621	w = line;
				3622	}
				3623
				3624	/* Truncate the word at the "/", set "afflist" to what follows.
				3625	* Replace "\/" by "/" and "\\" by "\". */
				3626	afflist = NULL;
				3627	for (p = w; *p != NUL; mb_ptr_adv(p))
				3628	{
				3629	if (*p == '\\' && (p[1] == '\\' \|\| p[1] == '/'))
				3630	STRMOVE(p, p + 1);
				3631	else if (*p == '/')
				3632	{
				3633	*p = NUL;
				3634	afflist = p + 1;
				3635	break;
				3636	}
				3637	}
				3638
				3639	/* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
				3640	if (spin->si_ascii && has_non_ascii(w))
				3641	{
				3642	++non_ascii;
				3643	vim_free(pc);
				3644	continue;
				3645	}
				3646
				3647	/* This takes time, print a message every 10000 words. */
				3648	if (spin->si_verbose && spin->si_msg_count > 10000)
				3649	{
				3650	spin->si_msg_count = 0;
				3651	vim_snprintf((char *)message, sizeof(message),
				3652	_("line %6d, word %6d - %s"),
				3653	lnum, spin->si_foldwcount + spin->si_keepwcount, w);
				3654	msg_start();
				3655	msg_puts_long_attr(message, 0);
				3656	msg_clr_eos();
				3657	msg_didout = FALSE;
				3658	msg_col = 0;
				3659	out_flush();
				3660	}
				3661
				3662	/* Store the word in the hashtable to be able to find duplicates. */
				3663	dw = (char_u *)getroom_save(spin, w);
				3664	if (dw == NULL)
				3665	{
				3666	retval = FAIL;
				3667	vim_free(pc);
				3668	break;
				3669	}
				3670
				3671	hash = hash_hash(dw);
				3672	hi = hash_lookup(&ht, dw, hash);
				3673	if (!HASHITEM_EMPTY(hi))
				3674	{
				3675	if (p_verbose > 0)
				3676	smsg((char_u *)_("Duplicate word in %s line %d: %s"),
				3677	fname, lnum, dw);
				3678	else if (duplicate == 0)
				3679	smsg((char_u *)_("First duplicate word in %s line %d: %s"),
				3680	fname, lnum, dw);
				3681	++duplicate;
				3682	}
				3683	else
				3684	hash_add_item(&ht, hi, dw, hash);
				3685
				3686	flags = 0;
				3687	store_afflist[0] = NUL;
				3688	pfxlen = 0;
				3689	need_affix = FALSE;
				3690	if (afflist != NULL)
				3691	{
				3692	/* Extract flags from the affix list. */
				3693	flags \|= get_affix_flags(affile, afflist);
				3694
				3695	if (affile->af_needaffix != 0 && flag_in_afflist(
				3696	affile->af_flagtype, afflist, affile->af_needaffix))
				3697	need_affix = TRUE;
				3698
				3699	if (affile->af_pfxpostpone)
				3700	/* Need to store the list of prefix IDs with the word. */
				3701	pfxlen = get_pfxlist(affile, afflist, store_afflist);
				3702
				3703	if (spin->si_compflags != NULL)
				3704	/* Need to store the list of compound flags with the word.
				3705	* Concatenate them to the list of prefix IDs. */
				3706	get_compflags(affile, afflist, store_afflist + pfxlen);
				3707	}
				3708
				3709	/* Add the word to the word tree(s). */
				3710	if (store_word(spin, dw, flags, spin->si_region,
				3711	store_afflist, need_affix) == FAIL)
				3712	retval = FAIL;
				3713
				3714	if (afflist != NULL)
				3715	{
				3716	/* Find all matching suffixes and add the resulting words.
				3717	* Additionally do matching prefixes that combine. */
				3718	if (store_aff_word(spin, dw, afflist, affile,
				3719	&affile->af_suff, &affile->af_pref,
				3720	CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
				3721	retval = FAIL;
				3722
				3723	/* Find all matching prefixes and add the resulting words. */
				3724	if (store_aff_word(spin, dw, afflist, affile,
				3725	&affile->af_pref, NULL,
				3726	CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
				3727	retval = FAIL;
				3728	}
				3729
				3730	vim_free(pc);
				3731	}
				3732
				3733	if (duplicate > 0)
				3734	smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname);
				3735	if (spin->si_ascii && non_ascii > 0)
				3736	smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"),
				3737	non_ascii, fname);
				3738	hash_clear(&ht);
				3739
				3740	fclose(fd);
				3741	return retval;
				3742	}
				3743
				3744	/*
				3745	* Check for affix flags in "afflist" that are turned into word flags.
				3746	* Return WF_ flags.
				3747	*/
				3748	static int
				3749	get_affix_flags(afffile_T affile, char_u afflist)
				3750	{
				3751	int flags = 0;
				3752
				3753	if (affile->af_keepcase != 0 && flag_in_afflist(
				3754	affile->af_flagtype, afflist, affile->af_keepcase))
				3755	flags \|= WF_KEEPCAP \| WF_FIXCAP;
				3756	if (affile->af_rare != 0 && flag_in_afflist(
				3757	affile->af_flagtype, afflist, affile->af_rare))
				3758	flags \|= WF_RARE;
				3759	if (affile->af_bad != 0 && flag_in_afflist(
				3760	affile->af_flagtype, afflist, affile->af_bad))
				3761	flags \|= WF_BANNED;
				3762	if (affile->af_needcomp != 0 && flag_in_afflist(
				3763	affile->af_flagtype, afflist, affile->af_needcomp))
				3764	flags \|= WF_NEEDCOMP;
				3765	if (affile->af_comproot != 0 && flag_in_afflist(
				3766	affile->af_flagtype, afflist, affile->af_comproot))
				3767	flags \|= WF_COMPROOT;
				3768	if (affile->af_nosuggest != 0 && flag_in_afflist(
				3769	affile->af_flagtype, afflist, affile->af_nosuggest))
				3770	flags \|= WF_NOSUGGEST;
				3771	return flags;
				3772	}
				3773
				3774	/*
				3775	* Get the list of prefix IDs from the affix list "afflist".
				3776	* Used for PFXPOSTPONE.
				3777	* Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL
				3778	* and return the number of affixes.
				3779	*/
				3780	static int
				3781	get_pfxlist(
				3782	afffile_T *affile,
				3783	char_u *afflist,
				3784	char_u *store_afflist)
				3785	{
				3786	char_u *p;
				3787	char_u *prevp;
				3788	int cnt = 0;
				3789	int id;
				3790	char_u key[AH_KEY_LEN];
				3791	hashitem_T *hi;
				3792
				3793	for (p = afflist; *p != NUL; )
				3794	{
				3795	prevp = p;
				3796	if (get_affitem(affile->af_flagtype, &p) != 0)
				3797	{
				3798	/* A flag is a postponed prefix flag if it appears in "af_pref"
				3799	* and it's ID is not zero. */
				3800	vim_strncpy(key, prevp, p - prevp);
				3801	hi = hash_find(&affile->af_pref, key);
				3802	if (!HASHITEM_EMPTY(hi))
				3803	{
				3804	id = HI2AH(hi)->ah_newID;
				3805	if (id != 0)
				3806	store_afflist[cnt++] = id;
				3807	}
				3808	}
				3809	if (affile->af_flagtype == AFT_NUM && *p == ',')
				3810	++p;
				3811	}
				3812
				3813	store_afflist[cnt] = NUL;
				3814	return cnt;
				3815	}
				3816
				3817	/*
				3818	* Get the list of compound IDs from the affix list "afflist" that are used
				3819	* for compound words.
				3820	* Puts the flags in "store_afflist[]".
				3821	*/
				3822	static void
				3823	get_compflags(
				3824	afffile_T *affile,
				3825	char_u *afflist,
				3826	char_u *store_afflist)
				3827	{
				3828	char_u *p;
				3829	char_u *prevp;
				3830	int cnt = 0;
				3831	char_u key[AH_KEY_LEN];
				3832	hashitem_T *hi;
				3833
				3834	for (p = afflist; *p != NUL; )
				3835	{
				3836	prevp = p;
				3837	if (get_affitem(affile->af_flagtype, &p) != 0)
				3838	{
				3839	/* A flag is a compound flag if it appears in "af_comp". */
				3840	vim_strncpy(key, prevp, p - prevp);
				3841	hi = hash_find(&affile->af_comp, key);
				3842	if (!HASHITEM_EMPTY(hi))
				3843	store_afflist[cnt++] = HI2CI(hi)->ci_newID;
				3844	}
				3845	if (affile->af_flagtype == AFT_NUM && *p == ',')
				3846	++p;
				3847	}
				3848
				3849	store_afflist[cnt] = NUL;
				3850	}
				3851
				3852	/*
				3853	* Apply affixes to a word and store the resulting words.
				3854	* "ht" is the hashtable with affentry_T that need to be applied, either
				3855	* prefixes or suffixes.
				3856	* "xht", when not NULL, is the prefix hashtable, to be used additionally on
				3857	* the resulting words for combining affixes.
				3858	*
				3859	* Returns FAIL when out of memory.
				3860	*/
				3861	static int
				3862	store_aff_word(
				3863	spellinfo_T spin, / spell info */
				3864	char_u word, / basic word start */
				3865	char_u afflist, / list of names of supported affixes */
				3866	afffile_T *affile,
				3867	hashtab_T *ht,
				3868	hashtab_T *xht,
				3869	int condit, /* CONDIT_SUF et al. */
				3870	int flags, /* flags for the word */
				3871	char_u pfxlist, / list of prefix IDs */
				3872	int pfxlen) /* nr of flags in "pfxlist" for prefixes, rest
				3873	* is compound flags */
				3874	{
				3875	int todo;
				3876	hashitem_T *hi;
				3877	affheader_T *ah;
				3878	affentry_T *ae;
				3879	char_u newword[MAXWLEN];
				3880	int retval = OK;
				3881	int i, j;
				3882	char_u *p;
				3883	int use_flags;
				3884	char_u *use_pfxlist;
				3885	int use_pfxlen;
				3886	int need_affix;
				3887	char_u store_afflist[MAXWLEN];
				3888	char_u pfx_pfxlist[MAXWLEN];
				3889	size_t wordlen = STRLEN(word);
				3890	int use_condit;
				3891
				3892	todo = (int)ht->ht_used;
				3893	for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
				3894	{
				3895	if (!HASHITEM_EMPTY(hi))
				3896	{
				3897	--todo;
				3898	ah = HI2AH(hi);
				3899
				3900	/* Check that the affix combines, if required, and that the word
				3901	* supports this affix. */
				3902	if (((condit & CONDIT_COMB) == 0 \|\| ah->ah_combine)
				3903	&& flag_in_afflist(affile->af_flagtype, afflist,
				3904	ah->ah_flag))
				3905	{
				3906	/* Loop over all affix entries with this name. */
				3907	for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
				3908	{
				3909	/* Check the condition. It's not logical to match case
				3910	* here, but it is required for compatibility with
				3911	* Myspell.
				3912	* Another requirement from Myspell is that the chop
				3913	* string is shorter than the word itself.
				3914	* For prefixes, when "PFXPOSTPONE" was used, only do
				3915	* prefixes with a chop string and/or flags.
				3916	* When a previously added affix had CIRCUMFIX this one
				3917	* must have it too, if it had not then this one must not
				3918	* have one either. */
				3919	if ((xht != NULL \|\| !affile->af_pfxpostpone
				3920	\|\| ae->ae_chop != NULL
				3921	\|\| ae->ae_flags != NULL)
				3922	&& (ae->ae_chop == NULL
				3923	\|\| STRLEN(ae->ae_chop) < wordlen)
				3924	&& (ae->ae_prog == NULL
				3925	\|\| vim_regexec_prog(&ae->ae_prog, FALSE,
				3926	word, (colnr_T)0))
				3927	&& (((condit & CONDIT_CFIX) == 0)
				3928	== ((condit & CONDIT_AFF) == 0
				3929	\|\| ae->ae_flags == NULL
				3930	\|\| !flag_in_afflist(affile->af_flagtype,
				3931	ae->ae_flags, affile->af_circumfix))))
				3932	{
				3933	/* Match. Remove the chop and add the affix. */
				3934	if (xht == NULL)
				3935	{
				3936	/* prefix: chop/add at the start of the word */
				3937	if (ae->ae_add == NULL)
				3938	*newword = NUL;
				3939	else
				3940	vim_strncpy(newword, ae->ae_add, MAXWLEN - 1);
				3941	p = word;
				3942	if (ae->ae_chop != NULL)
				3943	{
				3944	/* Skip chop string. */
				3945	#ifdef FEAT_MBYTE
				3946	if (has_mbyte)
				3947	{
				3948	i = mb_charlen(ae->ae_chop);
				3949	for ( ; i > 0; --i)
				3950	mb_ptr_adv(p);
				3951	}
				3952	else
				3953	#endif
				3954	p += STRLEN(ae->ae_chop);
				3955	}
				3956	STRCAT(newword, p);
				3957	}
				3958	else
				3959	{
				3960	/* suffix: chop/add at the end of the word */
				3961	vim_strncpy(newword, word, MAXWLEN - 1);
				3962	if (ae->ae_chop != NULL)
				3963	{
				3964	/* Remove chop string. */
				3965	p = newword + STRLEN(newword);
				3966	i = (int)MB_CHARLEN(ae->ae_chop);
				3967	for ( ; i > 0; --i)
				3968	mb_ptr_back(newword, p);
				3969	*p = NUL;
				3970	}
				3971	if (ae->ae_add != NULL)
				3972	STRCAT(newword, ae->ae_add);
				3973	}
				3974
				3975	use_flags = flags;
				3976	use_pfxlist = pfxlist;
				3977	use_pfxlen = pfxlen;
				3978	need_affix = FALSE;
				3979	use_condit = condit \| CONDIT_COMB \| CONDIT_AFF;
				3980	if (ae->ae_flags != NULL)
				3981	{
				3982	/* Extract flags from the affix list. */
				3983	use_flags \|= get_affix_flags(affile, ae->ae_flags);
				3984
				3985	if (affile->af_needaffix != 0 && flag_in_afflist(
				3986	affile->af_flagtype, ae->ae_flags,
				3987	affile->af_needaffix))
				3988	need_affix = TRUE;
				3989
				3990	/* When there is a CIRCUMFIX flag the other affix
				3991	* must also have it and we don't add the word
				3992	* with one affix. */
				3993	if (affile->af_circumfix != 0 && flag_in_afflist(
				3994	affile->af_flagtype, ae->ae_flags,
				3995	affile->af_circumfix))
				3996	{
				3997	use_condit \|= CONDIT_CFIX;
				3998	if ((condit & CONDIT_CFIX) == 0)
				3999	need_affix = TRUE;
				4000	}
				4001
				4002	if (affile->af_pfxpostpone
				4003	\|\| spin->si_compflags != NULL)
				4004	{
				4005	if (affile->af_pfxpostpone)
				4006	/* Get prefix IDS from the affix list. */
				4007	use_pfxlen = get_pfxlist(affile,
				4008	ae->ae_flags, store_afflist);
				4009	else
				4010	use_pfxlen = 0;
				4011	use_pfxlist = store_afflist;
				4012
				4013	/* Combine the prefix IDs. Avoid adding the
				4014	* same ID twice. */
				4015	for (i = 0; i < pfxlen; ++i)
				4016	{
				4017	for (j = 0; j < use_pfxlen; ++j)
				4018	if (pfxlist[i] == use_pfxlist[j])
				4019	break;
				4020	if (j == use_pfxlen)
				4021	use_pfxlist[use_pfxlen++] = pfxlist[i];
				4022	}
				4023
				4024	if (spin->si_compflags != NULL)
				4025	/* Get compound IDS from the affix list. */
				4026	get_compflags(affile, ae->ae_flags,
				4027	use_pfxlist + use_pfxlen);
				4028
				4029	/* Combine the list of compound flags.
				4030	* Concatenate them to the prefix IDs list.
				4031	* Avoid adding the same ID twice. */
				4032	for (i = pfxlen; pfxlist[i] != NUL; ++i)
				4033	{
				4034	for (j = use_pfxlen;
				4035	use_pfxlist[j] != NUL; ++j)
				4036	if (pfxlist[i] == use_pfxlist[j])
				4037	break;
				4038	if (use_pfxlist[j] == NUL)
				4039	{
				4040	use_pfxlist[j++] = pfxlist[i];
				4041	use_pfxlist[j] = NUL;
				4042	}
				4043	}
				4044	}
				4045	}
				4046
				4047	/* Obey a "COMPOUNDFORBIDFLAG" of the affix: don't
				4048	* use the compound flags. */
				4049	if (use_pfxlist != NULL && ae->ae_compforbid)
				4050	{
				4051	vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen);
				4052	use_pfxlist = pfx_pfxlist;
				4053	}
				4054
				4055	/* When there are postponed prefixes... */
				4056	if (spin->si_prefroot != NULL
				4057	&& spin->si_prefroot->wn_sibling != NULL)
				4058	{
				4059	/* ... add a flag to indicate an affix was used. */
				4060	use_flags \|= WF_HAS_AFF;
				4061
				4062	/* ... don't use a prefix list if combining
				4063	* affixes is not allowed. But do use the
				4064	* compound flags after them. */
				4065	if (!ah->ah_combine && use_pfxlist != NULL)
				4066	use_pfxlist += use_pfxlen;
				4067	}
				4068
				4069	/* When compounding is supported and there is no
				4070	* "COMPOUNDPERMITFLAG" then forbid compounding on the
				4071	* side where the affix is applied. */
				4072	if (spin->si_compflags != NULL && !ae->ae_comppermit)
				4073	{
				4074	if (xht != NULL)
				4075	use_flags \|= WF_NOCOMPAFT;
				4076	else
				4077	use_flags \|= WF_NOCOMPBEF;
				4078	}
				4079
				4080	/* Store the modified word. */
				4081	if (store_word(spin, newword, use_flags,
				4082	spin->si_region, use_pfxlist,
				4083	need_affix) == FAIL)
				4084	retval = FAIL;
				4085
				4086	/* When added a prefix or a first suffix and the affix
				4087	* has flags may add a(nother) suffix. RECURSIVE! */
				4088	if ((condit & CONDIT_SUF) && ae->ae_flags != NULL)
				4089	if (store_aff_word(spin, newword, ae->ae_flags,
				4090	affile, &affile->af_suff, xht,
				4091	use_condit & (xht == NULL
				4092	? ~0 : ~CONDIT_SUF),
				4093	use_flags, use_pfxlist, pfxlen) == FAIL)
				4094	retval = FAIL;
				4095
				4096	/* When added a suffix and combining is allowed also
				4097	* try adding a prefix additionally. Both for the
				4098	* word flags and for the affix flags. RECURSIVE! */
				4099	if (xht != NULL && ah->ah_combine)
				4100	{
				4101	if (store_aff_word(spin, newword,
				4102	afflist, affile,
				4103	xht, NULL, use_condit,
				4104	use_flags, use_pfxlist,
				4105	pfxlen) == FAIL
				4106	\|\| (ae->ae_flags != NULL
				4107	&& store_aff_word(spin, newword,
				4108	ae->ae_flags, affile,
				4109	xht, NULL, use_condit,
				4110	use_flags, use_pfxlist,
				4111	pfxlen) == FAIL))
				4112	retval = FAIL;
				4113	}
				4114	}
				4115	}
				4116	}
				4117	}
				4118	}
				4119
				4120	return retval;
				4121	}
				4122
				4123	/*
				4124	* Read a file with a list of words.
				4125	*/
				4126	static int
				4127	spell_read_wordfile(spellinfo_T spin, char_u fname)
				4128	{
				4129	FILE *fd;
				4130	long lnum = 0;
				4131	char_u rline[MAXLINELEN];
				4132	char_u *line;
				4133	char_u *pc = NULL;
				4134	char_u *p;
				4135	int l;
				4136	int retval = OK;
				4137	int did_word = FALSE;
				4138	int non_ascii = 0;
				4139	int flags;
				4140	int regionmask;
				4141
				4142	/*
				4143	* Open the file.
				4144	*/
				4145	fd = mch_fopen((char *)fname, "r");
				4146	if (fd == NULL)
				4147	{
				4148	EMSG2(_(e_notopen), fname);
				4149	return FAIL;
				4150	}
				4151
				4152	vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname);
				4153	spell_message(spin, IObuff);
				4154
				4155	/*
				4156	* Read all the lines in the file one by one.
				4157	*/
				4158	while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
				4159	{
				4160	line_breakcheck();
				4161	++lnum;
				4162
				4163	/* Skip comment lines. */
				4164	if (*rline == '#')
				4165	continue;
				4166
				4167	/* Remove CR, LF and white space from the end. */
				4168	l = (int)STRLEN(rline);
				4169	while (l > 0 && rline[l - 1] <= ' ')
				4170	--l;
				4171	if (l == 0)
				4172	continue; /* empty or blank line */
				4173	rline[l] = NUL;
				4174
				4175	/* Convert from "/encoding={encoding}" to 'encoding' when needed. */
				4176	vim_free(pc);
				4177	#ifdef FEAT_MBYTE
				4178	if (spin->si_conv.vc_type != CONV_NONE)
				4179	{
				4180	pc = string_convert(&spin->si_conv, rline, NULL);
				4181	if (pc == NULL)
				4182	{
				4183	smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
				4184	fname, lnum, rline);
				4185	continue;
				4186	}
				4187	line = pc;
				4188	}
				4189	else
				4190	#endif
				4191	{
				4192	pc = NULL;
				4193	line = rline;
				4194	}
				4195
				4196	if (*line == '/')
				4197	{
				4198	++line;
				4199	if (STRNCMP(line, "encoding=", 9) == 0)
				4200	{
				4201	if (spin->si_conv.vc_type != CONV_NONE)
				4202	smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"),
				4203	fname, lnum, line - 1);
				4204	else if (did_word)
				4205	smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"),
				4206	fname, lnum, line - 1);
				4207	else
				4208	{
				4209	#ifdef FEAT_MBYTE
				4210	char_u *enc;
				4211
				4212	/* Setup for conversion to 'encoding'. */
				4213	line += 9;
				4214	enc = enc_canonize(line);
				4215	if (enc != NULL && !spin->si_ascii
				4216	&& convert_setup(&spin->si_conv, enc,
				4217	p_enc) == FAIL)
				4218	smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
				4219	fname, line, p_enc);
				4220	vim_free(enc);
				4221	spin->si_conv.vc_fail = TRUE;
				4222	#else
				4223	smsg((char_u *)_("Conversion in %s not supported"), fname);
				4224	#endif
				4225	}
				4226	continue;
				4227	}
				4228
				4229	if (STRNCMP(line, "regions=", 8) == 0)
				4230	{
				4231	if (spin->si_region_count > 1)
				4232	smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"),
				4233	fname, lnum, line);
				4234	else
				4235	{
				4236	line += 8;
				4237	if (STRLEN(line) > 16)
				4238	smsg((char_u *)_("Too many regions in %s line %d: %s"),
				4239	fname, lnum, line);
				4240	else
				4241	{
				4242	spin->si_region_count = (int)STRLEN(line) / 2;
				4243	STRCPY(spin->si_region_name, line);
				4244
				4245	/* Adjust the mask for a word valid in all regions. */
				4246	spin->si_region = (1 << spin->si_region_count) - 1;
				4247	}
				4248	}
				4249	continue;
				4250	}
				4251
				4252	smsg((char_u *)_("/ line ignored in %s line %d: %s"),
				4253	fname, lnum, line - 1);
				4254	continue;
				4255	}
				4256
				4257	flags = 0;
				4258	regionmask = spin->si_region;
				4259
				4260	/* Check for flags and region after a slash. */
				4261	p = vim_strchr(line, '/');
				4262	if (p != NULL)
				4263	{
				4264	*p++ = NUL;
				4265	while (*p != NUL)
				4266	{
				4267	if (p == '=') / keep-case word */
				4268	flags \|= WF_KEEPCAP \| WF_FIXCAP;
				4269	else if (p == '!') / Bad, bad, wicked word. */
				4270	flags \|= WF_BANNED;
				4271	else if (p == '?') / Rare word. */
				4272	flags \|= WF_RARE;
				4273	else if (VIM_ISDIGIT(p)) / region number(s) */
				4274	{
				4275	if ((flags & WF_REGION) == 0) /* first one */
				4276	regionmask = 0;
				4277	flags \|= WF_REGION;
				4278
				4279	l = *p - '0';
				4280	if (l > spin->si_region_count)
				4281	{
				4282	smsg((char_u *)_("Invalid region nr in %s line %d: %s"),
				4283	fname, lnum, p);
				4284	break;
				4285	}
				4286	regionmask \|= 1 << (l - 1);
				4287	}
				4288	else
				4289	{
				4290	smsg((char_u *)_("Unrecognized flags in %s line %d: %s"),
				4291	fname, lnum, p);
				4292	break;
				4293	}
				4294	++p;
				4295	}
				4296	}
				4297
				4298	/* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
				4299	if (spin->si_ascii && has_non_ascii(line))
				4300	{
				4301	++non_ascii;
				4302	continue;
				4303	}
				4304
				4305	/* Normal word: store it. */
				4306	if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL)
				4307	{
				4308	retval = FAIL;
				4309	break;
				4310	}
				4311	did_word = TRUE;
				4312	}
				4313
				4314	vim_free(pc);
				4315	fclose(fd);
				4316
				4317	if (spin->si_ascii && non_ascii > 0)
				4318	{
				4319	vim_snprintf((char *)IObuff, IOSIZE,
				4320	_("Ignored %d words with non-ASCII characters"), non_ascii);
				4321	spell_message(spin, IObuff);
				4322	}
				4323
				4324	return retval;
				4325	}
				4326
				4327	/*
				4328	* Get part of an sblock_T, "len" bytes long.
				4329	* This avoids calling free() for every little struct we use (and keeping
				4330	* track of them).
				4331	* The memory is cleared to all zeros.
				4332	* Returns NULL when out of memory.
				4333	*/
				4334	static void *
				4335	getroom(
				4336	spellinfo_T *spin,
				4337	size_t len, /* length needed */
				4338	int align) /* align for pointer */
				4339	{
				4340	char_u *p;
				4341	sblock_T *bl = spin->si_blocks;
				4342
				4343	if (align && bl != NULL)
				4344	/* Round size up for alignment. On some systems structures need to be
				4345	* aligned to the size of a pointer (e.g., SPARC). */
				4346	bl->sb_used = (bl->sb_used + sizeof(char *) - 1)
				4347	& ~(sizeof(char *) - 1);
				4348
				4349	if (bl == NULL \|\| bl->sb_used + len > SBLOCKSIZE)
				4350	{
				4351	if (len >= SBLOCKSIZE)
				4352	bl = NULL;
				4353	else
				4354	/* Allocate a block of memory. It is not freed until much later. */
				4355	bl = (sblock_T *)alloc_clear(
				4356	(unsigned)(sizeof(sblock_T) + SBLOCKSIZE));
				4357	if (bl == NULL)
				4358	{
				4359	if (!spin->si_did_emsg)
				4360	{
				4361	EMSG(_("E845: Insufficient memory, word list will be incomplete"));
				4362	spin->si_did_emsg = TRUE;
				4363	}
				4364	return NULL;
				4365	}
				4366	bl->sb_next = spin->si_blocks;
				4367	spin->si_blocks = bl;
				4368	bl->sb_used = 0;
				4369	++spin->si_blocks_cnt;
				4370	}
				4371
				4372	p = bl->sb_data + bl->sb_used;
				4373	bl->sb_used += (int)len;
				4374
				4375	return p;
				4376	}
				4377
				4378	/*
				4379	* Make a copy of a string into memory allocated with getroom().
				4380	* Returns NULL when out of memory.
				4381	*/
				4382	static char_u *
				4383	getroom_save(spellinfo_T spin, char_u s)
				4384	{
				4385	char_u *sc;
				4386
				4387	sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE);
				4388	if (sc != NULL)
				4389	STRCPY(sc, s);
				4390	return sc;
				4391	}
				4392
				4393
				4394	/*
				4395	* Free the list of allocated sblock_T.
				4396	*/
				4397	static void
				4398	free_blocks(sblock_T *bl)
				4399	{
				4400	sblock_T *next;
				4401
				4402	while (bl != NULL)
				4403	{
				4404	next = bl->sb_next;
				4405	vim_free(bl);
				4406	bl = next;
				4407	}
				4408	}
				4409
				4410	/*
				4411	* Allocate the root of a word tree.
				4412	* Returns NULL when out of memory.
				4413	*/
				4414	static wordnode_T *
				4415	wordtree_alloc(spellinfo_T *spin)
				4416	{
				4417	return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
				4418	}
				4419
				4420	/*
				4421	* Store a word in the tree(s).
				4422	* Always store it in the case-folded tree. For a keep-case word this is
				4423	* useful when the word can also be used with all caps (no WF_FIXCAP flag) and
				4424	* used to find suggestions.
				4425	* For a keep-case word also store it in the keep-case tree.
				4426	* When "pfxlist" is not NULL store the word for each postponed prefix ID and
				4427	* compound flag.
				4428	*/
				4429	static int
				4430	store_word(
				4431	spellinfo_T *spin,
				4432	char_u *word,
				4433	int flags, /* extra flags, WF_BANNED */
				4434	int region, /* supported region(s) */
				4435	char_u pfxlist, / list of prefix IDs or NULL */
				4436	int need_affix) /* only store word with affix ID */
				4437	{
				4438	int len = (int)STRLEN(word);
				4439	int ct = captype(word, word + len);
				4440	char_u foldword[MAXWLEN];
				4441	int res = OK;
				4442	char_u *p;
				4443
				4444	(void)spell_casefold(word, len, foldword, MAXWLEN);
				4445	for (p = pfxlist; res == OK; ++p)
				4446	{
				4447	if (!need_affix \|\| (p != NULL && *p != NUL))
				4448	res = tree_add_word(spin, foldword, spin->si_foldroot, ct \| flags,
				4449	region, p == NULL ? 0 : *p);
				4450	if (p == NULL \|\| *p == NUL)
				4451	break;
				4452	}
				4453	++spin->si_foldwcount;
				4454
				4455	if (res == OK && (ct == WF_KEEPCAP \|\| (flags & WF_KEEPCAP)))
				4456	{
				4457	for (p = pfxlist; res == OK; ++p)
				4458	{
				4459	if (!need_affix \|\| (p != NULL && *p != NUL))
				4460	res = tree_add_word(spin, word, spin->si_keeproot, flags,
				4461	region, p == NULL ? 0 : *p);
				4462	if (p == NULL \|\| *p == NUL)
				4463	break;
				4464	}
				4465	++spin->si_keepwcount;
				4466	}
				4467	return res;
				4468	}
				4469
				4470	/*
				4471	* Add word "word" to a word tree at "root".
				4472	* When "flags" < 0 we are adding to the prefix tree where "flags" is used for
				4473	* "rare" and "region" is the condition nr.
				4474	* Returns FAIL when out of memory.
				4475	*/
				4476	static int
				4477	tree_add_word(
				4478	spellinfo_T *spin,
				4479	char_u *word,
				4480	wordnode_T *root,
				4481	int flags,
				4482	int region,
				4483	int affixID)
				4484	{
				4485	wordnode_T *node = root;
				4486	wordnode_T *np;
				4487	wordnode_T copyp, *copyprev;
				4488	wordnode_T **prev = NULL;
				4489	int i;
				4490
				4491	/* Add each byte of the word to the tree, including the NUL at the end. */
				4492	for (i = 0; ; ++i)
				4493	{
				4494	/* When there is more than one reference to this node we need to make
				4495	* a copy, so that we can modify it. Copy the whole list of siblings
				4496	* (we don't optimize for a partly shared list of siblings). */
				4497	if (node != NULL && node->wn_refs > 1)
				4498	{
				4499	--node->wn_refs;
				4500	copyprev = prev;
				4501	for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling)
				4502	{
				4503	/* Allocate a new node and copy the info. */
				4504	np = get_wordnode(spin);
				4505	if (np == NULL)
				4506	return FAIL;
				4507	np->wn_child = copyp->wn_child;
				4508	if (np->wn_child != NULL)
				4509	++np->wn_child->wn_refs; /* child gets extra ref */
				4510	np->wn_byte = copyp->wn_byte;
				4511	if (np->wn_byte == NUL)
				4512	{
				4513	np->wn_flags = copyp->wn_flags;
				4514	np->wn_region = copyp->wn_region;
				4515	np->wn_affixID = copyp->wn_affixID;
				4516	}
				4517
				4518	/* Link the new node in the list, there will be one ref. */
				4519	np->wn_refs = 1;
				4520	if (copyprev != NULL)
				4521	*copyprev = np;
				4522	copyprev = &np->wn_sibling;
				4523
				4524	/* Let "node" point to the head of the copied list. */
				4525	if (copyp == node)
				4526	node = np;
				4527	}
				4528	}
				4529
				4530	/* Look for the sibling that has the same character. They are sorted
				4531	* on byte value, thus stop searching when a sibling is found with a
				4532	* higher byte value. For zero bytes (end of word) the sorting is
				4533	* done on flags and then on affixID. */
				4534	while (node != NULL
				4535	&& (node->wn_byte < word[i]
				4536	\|\| (node->wn_byte == NUL
				4537	&& (flags < 0
				4538	? node->wn_affixID < (unsigned)affixID
				4539	: (node->wn_flags < (unsigned)(flags & WN_MASK)
				4540	\|\| (node->wn_flags == (flags & WN_MASK)
				4541	&& (spin->si_sugtree
				4542	? (node->wn_region & 0xffff) < region
				4543	: node->wn_affixID
				4544	< (unsigned)affixID)))))))
				4545	{
				4546	prev = &node->wn_sibling;
				4547	node = *prev;
				4548	}
				4549	if (node == NULL
				4550	\|\| node->wn_byte != word[i]
				4551	\|\| (word[i] == NUL
				4552	&& (flags < 0
				4553	\|\| spin->si_sugtree
				4554	\|\| node->wn_flags != (flags & WN_MASK)
				4555	\|\| node->wn_affixID != affixID)))
				4556	{
				4557	/* Allocate a new node. */
				4558	np = get_wordnode(spin);
				4559	if (np == NULL)
				4560	return FAIL;
				4561	np->wn_byte = word[i];
				4562
				4563	/* If "node" is NULL this is a new child or the end of the sibling
				4564	* list: ref count is one. Otherwise use ref count of sibling and
				4565	* make ref count of sibling one (matters when inserting in front
				4566	* of the list of siblings). */
				4567	if (node == NULL)
				4568	np->wn_refs = 1;
				4569	else
				4570	{
				4571	np->wn_refs = node->wn_refs;
				4572	node->wn_refs = 1;
				4573	}
				4574	if (prev != NULL)
				4575	*prev = np;
				4576	np->wn_sibling = node;
				4577	node = np;
				4578	}
				4579
				4580	if (word[i] == NUL)
				4581	{
				4582	node->wn_flags = flags;
				4583	node->wn_region \|= region;
				4584	node->wn_affixID = affixID;
				4585	break;
				4586	}
				4587	prev = &node->wn_child;
				4588	node = *prev;
				4589	}
				4590	#ifdef SPELL_PRINTTREE
				4591	smsg((char_u *)"Added \"%s\"", word);
				4592	spell_print_tree(root->wn_sibling);
				4593	#endif
				4594
				4595	/* count nr of words added since last message */
				4596	++spin->si_msg_count;
				4597
				4598	if (spin->si_compress_cnt > 1)
				4599	{
				4600	if (--spin->si_compress_cnt == 1)
				4601	/* Did enough words to lower the block count limit. */
				4602	spin->si_blocks_cnt += compress_inc;
				4603	}
				4604
				4605	/*
				4606	* When we have allocated lots of memory we need to compress the word tree
				4607	* to free up some room. But compression is slow, and we might actually
				4608	* need that room, thus only compress in the following situations:
				4609	* 1. When not compressed before (si_compress_cnt == 0): when using
				4610	* "compress_start" blocks.
				4611	* 2. When compressed before and used "compress_inc" blocks before
				4612	* adding "compress_added" words (si_compress_cnt > 1).
				4613	* 3. When compressed before, added "compress_added" words
				4614	* (si_compress_cnt == 1) and the number of free nodes drops below the
				4615	* maximum word length.
				4616	*/
				4617	#ifndef SPELL_COMPRESS_ALLWAYS
				4618	if (spin->si_compress_cnt == 1
				4619	? spin->si_free_count < MAXWLEN
				4620	: spin->si_blocks_cnt >= compress_start)
				4621	#endif
				4622	{
				4623	/* Decrement the block counter. The effect is that we compress again
				4624	* when the freed up room has been used and another "compress_inc"
				4625	* blocks have been allocated. Unless "compress_added" words have
				4626	* been added, then the limit is put back again. */
				4627	spin->si_blocks_cnt -= compress_inc;
				4628	spin->si_compress_cnt = compress_added;
				4629
				4630	if (spin->si_verbose)
				4631	{
				4632	msg_start();
				4633	msg_puts((char_u *)_(msg_compressing));
				4634	msg_clr_eos();
				4635	msg_didout = FALSE;
				4636	msg_col = 0;
				4637	out_flush();
				4638	}
				4639
				4640	/* Compress both trees. Either they both have many nodes, which makes
				4641	* compression useful, or one of them is small, which means
				4642	* compression goes fast. But when filling the soundfold word tree
				4643	* there is no keep-case tree. */
				4644	wordtree_compress(spin, spin->si_foldroot);
				4645	if (affixID >= 0)
				4646	wordtree_compress(spin, spin->si_keeproot);
				4647	}
				4648
				4649	return OK;
				4650	}
				4651
				4652	/*
				4653	* Get a wordnode_T, either from the list of previously freed nodes or
				4654	* allocate a new one.
				4655	* Returns NULL when out of memory.
				4656	*/
				4657	static wordnode_T *
				4658	get_wordnode(spellinfo_T *spin)
				4659	{
				4660	wordnode_T *n;
				4661
				4662	if (spin->si_first_free == NULL)
				4663	n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
				4664	else
				4665	{
				4666	n = spin->si_first_free;
				4667	spin->si_first_free = n->wn_child;
				4668	vim_memset(n, 0, sizeof(wordnode_T));
				4669	--spin->si_free_count;
				4670	}
				4671	#ifdef SPELL_PRINTTREE
				4672	if (n != NULL)
				4673	n->wn_nr = ++spin->si_wordnode_nr;
				4674	#endif
				4675	return n;
				4676	}
				4677
				4678	/*
				4679	* Decrement the reference count on a node (which is the head of a list of
				4680	* siblings). If the reference count becomes zero free the node and its
				4681	* siblings.
				4682	* Returns the number of nodes actually freed.
				4683	*/
				4684	static int
				4685	deref_wordnode(spellinfo_T spin, wordnode_T node)
				4686	{
				4687	wordnode_T *np;
				4688	int cnt = 0;
				4689
				4690	if (--node->wn_refs == 0)
				4691	{
				4692	for (np = node; np != NULL; np = np->wn_sibling)
				4693	{
				4694	if (np->wn_child != NULL)
				4695	cnt += deref_wordnode(spin, np->wn_child);
				4696	free_wordnode(spin, np);
				4697	++cnt;
				4698	}
				4699	++cnt; /* length field */
				4700	}
				4701	return cnt;
				4702	}
				4703
				4704	/*
				4705	* Free a wordnode_T for re-use later.
				4706	* Only the "wn_child" field becomes invalid.
				4707	*/
				4708	static void
				4709	free_wordnode(spellinfo_T spin, wordnode_T n)
				4710	{
				4711	n->wn_child = spin->si_first_free;
				4712	spin->si_first_free = n;
				4713	++spin->si_free_count;
				4714	}
				4715
				4716	/*
				4717	* Compress a tree: find tails that are identical and can be shared.
				4718	*/
				4719	static void
				4720	wordtree_compress(spellinfo_T spin, wordnode_T root)
				4721	{
				4722	hashtab_T ht;
				4723	int n;
				4724	int tot = 0;
				4725	int perc;
				4726
				4727	/* Skip the root itself, it's not actually used. The first sibling is the
				4728	* start of the tree. */
				4729	if (root->wn_sibling != NULL)
				4730	{
				4731	hash_init(&ht);
				4732	n = node_compress(spin, root->wn_sibling, &ht, &tot);
				4733
				4734	#ifndef SPELL_PRINTTREE
				4735	if (spin->si_verbose \|\| p_verbose > 2)
				4736	#endif
				4737	{
				4738	if (tot > 1000000)
				4739	perc = (tot - n) / (tot / 100);
				4740	else if (tot == 0)
				4741	perc = 0;
				4742	else
				4743	perc = (tot - n) * 100 / tot;
				4744	vim_snprintf((char *)IObuff, IOSIZE,
				4745	_("Compressed %d of %d nodes; %d (%d%%) remaining"),
				4746	n, tot, tot - n, perc);
				4747	spell_message(spin, IObuff);
				4748	}
				4749	#ifdef SPELL_PRINTTREE
				4750	spell_print_tree(root->wn_sibling);
				4751	#endif
				4752	hash_clear(&ht);
				4753	}
				4754	}
				4755
				4756	/*
				4757	* Compress a node, its siblings and its children, depth first.
				4758	* Returns the number of compressed nodes.
				4759	*/
				4760	static int
				4761	node_compress(
				4762	spellinfo_T *spin,
				4763	wordnode_T *node,
				4764	hashtab_T *ht,
				4765	int tot) / total count of nodes before compressing,
				4766	incremented while going through the tree */
				4767	{
				4768	wordnode_T *np;
				4769	wordnode_T *tp;
				4770	wordnode_T *child;
				4771	hash_T hash;
				4772	hashitem_T *hi;
				4773	int len = 0;
				4774	unsigned nr, n;
				4775	int compressed = 0;
				4776
				4777	/*
				4778	* Go through the list of siblings. Compress each child and then try
				4779	* finding an identical child to replace it.
				4780	* Note that with "child" we mean not just the node that is pointed to,
				4781	* but the whole list of siblings of which the child node is the first.
				4782	*/
				4783	for (np = node; np != NULL && !got_int; np = np->wn_sibling)
				4784	{
				4785	++len;
				4786	if ((child = np->wn_child) != NULL)
				4787	{
				4788	/* Compress the child first. This fills hashkey. */
				4789	compressed += node_compress(spin, child, ht, tot);
				4790
				4791	/* Try to find an identical child. */
				4792	hash = hash_hash(child->wn_u1.hashkey);
				4793	hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
				4794	if (!HASHITEM_EMPTY(hi))
				4795	{
				4796	/* There are children we encountered before with a hash value
				4797	* identical to the current child. Now check if there is one
				4798	* that is really identical. */
				4799	for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
				4800	if (node_equal(child, tp))
				4801	{
				4802	/* Found one! Now use that child in place of the
				4803	* current one. This means the current child and all
				4804	* its siblings is unlinked from the tree. */
				4805	++tp->wn_refs;
				4806	compressed += deref_wordnode(spin, child);
				4807	np->wn_child = tp;
				4808	break;
				4809	}
				4810	if (tp == NULL)
				4811	{
				4812	/* No other child with this hash value equals the child of
				4813	* the node, add it to the linked list after the first
				4814	* item. */
				4815	tp = HI2WN(hi);
				4816	child->wn_u2.next = tp->wn_u2.next;
				4817	tp->wn_u2.next = child;
				4818	}
				4819	}
				4820	else
				4821	/* No other child has this hash value, add it to the
				4822	* hashtable. */
				4823	hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
				4824	}
				4825	}
				4826	tot += len + 1; / add one for the node that stores the length */
				4827
				4828	/*
				4829	* Make a hash key for the node and its siblings, so that we can quickly
				4830	* find a lookalike node. This must be done after compressing the sibling
				4831	* list, otherwise the hash key would become invalid by the compression.
				4832	*/
				4833	node->wn_u1.hashkey[0] = len;
				4834	nr = 0;
				4835	for (np = node; np != NULL; np = np->wn_sibling)
				4836	{
				4837	if (np->wn_byte == NUL)
				4838	/* end node: use wn_flags, wn_region and wn_affixID */
				4839	n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16);
				4840	else
				4841	/* byte node: use the byte value and the child pointer */
				4842	n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8));
				4843	nr = nr * 101 + n;
				4844	}
				4845
				4846	/* Avoid NUL bytes, it terminates the hash key. */
				4847	n = nr & 0xff;
				4848	node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
				4849	n = (nr >> 8) & 0xff;
				4850	node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
				4851	n = (nr >> 16) & 0xff;
				4852	node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
				4853	n = (nr >> 24) & 0xff;
				4854	node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
				4855	node->wn_u1.hashkey[5] = NUL;
				4856
				4857	/* Check for CTRL-C pressed now and then. */
				4858	fast_breakcheck();
				4859
				4860	return compressed;
				4861	}
				4862
				4863	/*
				4864	* Return TRUE when two nodes have identical siblings and children.
				4865	*/
				4866	static int
				4867	node_equal(wordnode_T n1, wordnode_T n2)
				4868	{
				4869	wordnode_T *p1;
				4870	wordnode_T *p2;
				4871
				4872	for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
				4873	p1 = p1->wn_sibling, p2 = p2->wn_sibling)
				4874	if (p1->wn_byte != p2->wn_byte
				4875	\|\| (p1->wn_byte == NUL
				4876	? (p1->wn_flags != p2->wn_flags
				4877	\|\| p1->wn_region != p2->wn_region
				4878	\|\| p1->wn_affixID != p2->wn_affixID)
				4879	: (p1->wn_child != p2->wn_child)))
				4880	break;
				4881
				4882	return p1 == NULL && p2 == NULL;
				4883	}
				4884
				4885	static int
				4886	#ifdef __BORLANDC__
				4887	_RTLENTRYF
				4888	#endif
				4889	rep_compare(const void s1, const void s2);
				4890
				4891	/*
				4892	* Function given to qsort() to sort the REP items on "from" string.
				4893	*/
				4894	static int
				4895	#ifdef __BORLANDC__
				4896	_RTLENTRYF
				4897	#endif
				4898	rep_compare(const void s1, const void s2)
				4899	{
				4900	fromto_T p1 = (fromto_T )s1;
				4901	fromto_T p2 = (fromto_T )s2;
				4902
				4903	return STRCMP(p1->ft_from, p2->ft_from);
				4904	}
				4905
				4906	/*
				4907	* Write the Vim .spl file "fname".
				4908	* Return FAIL or OK;
				4909	*/
				4910	static int
				4911	write_vim_spell(spellinfo_T spin, char_u fname)
				4912	{
				4913	FILE *fd;
				4914	int regionmask;
				4915	int round;
				4916	wordnode_T *tree;
				4917	int nodecount;
				4918	int i;
				4919	int l;
				4920	garray_T *gap;
				4921	fromto_T *ftp;
				4922	char_u *p;
				4923	int rr;
				4924	int retval = OK;
				4925	size_t fwv = 1; /* collect return value of fwrite() to avoid
				4926	warnings from picky compiler */
				4927
				4928	fd = mch_fopen((char *)fname, "w");
				4929	if (fd == NULL)
				4930	{
				4931	EMSG2(_(e_notopen), fname);
				4932	return FAIL;
				4933	}
				4934
				4935	/* <HEADER>: <fileID> <versionnr> */
				4936	/* <fileID> */
				4937	fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd);
				4938	if (fwv != (size_t)1)
				4939	/* Catch first write error, don't try writing more. */
				4940	goto theend;
				4941
				4942	putc(VIMSPELLVERSION, fd); /* <versionnr> */
				4943
				4944	/*
				4945	* <SECTIONS>: <section> ... <sectionend>
				4946	*/
				4947
				4948	/* SN_INFO: <infotext> */
				4949	if (spin->si_info != NULL)
				4950	{
				4951	putc(SN_INFO, fd); /* <sectionID> */
				4952	putc(0, fd); /* <sectionflags> */
				4953
				4954	i = (int)STRLEN(spin->si_info);
				4955	put_bytes(fd, (long_u)i, 4); /* <sectionlen> */
				4956	fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); /* <infotext> */
				4957	}
				4958
				4959	/* SN_REGION: <regionname> ...
				4960	* Write the region names only if there is more than one. */
				4961	if (spin->si_region_count > 1)
				4962	{
				4963	putc(SN_REGION, fd); /* <sectionID> */
				4964	putc(SNF_REQUIRED, fd); /* <sectionflags> */
				4965	l = spin->si_region_count * 2;
				4966	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				4967	fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd);
				4968	/* <regionname> ... */
				4969	regionmask = (1 << spin->si_region_count) - 1;
				4970	}
				4971	else
				4972	regionmask = 0;
				4973
				4974	/* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars>
				4975	*
				4976	* The table with character flags and the table for case folding.
				4977	* This makes sure the same characters are recognized as word characters
				4978	* when generating an when using a spell file.
				4979	* Skip this for ASCII, the table may conflict with the one used for
				4980	* 'encoding'.
				4981	* Also skip this for an .add.spl file, the main spell file must contain
				4982	* the table (avoids that it conflicts). File is shorter too.
				4983	*/
				4984	if (!spin->si_ascii && !spin->si_add)
				4985	{
				4986	char_u folchars[128 * 8];
				4987	int flags;
				4988
				4989	putc(SN_CHARFLAGS, fd); /* <sectionID> */
				4990	putc(SNF_REQUIRED, fd); /* <sectionflags> */
				4991
				4992	/* Form the <folchars> string first, we need to know its length. */
				4993	l = 0;
				4994	for (i = 128; i < 256; ++i)
				4995	{
				4996	#ifdef FEAT_MBYTE
				4997	if (has_mbyte)
				4998	l += mb_char2bytes(spelltab.st_fold[i], folchars + l);
				4999	else
				5000	#endif
				5001	folchars[l++] = spelltab.st_fold[i];
				5002	}
				5003	put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */
				5004
				5005	fputc(128, fd); /* <charflagslen> */
				5006	for (i = 128; i < 256; ++i)
				5007	{
				5008	flags = 0;
				5009	if (spelltab.st_isw[i])
				5010	flags \|= CF_WORD;
				5011	if (spelltab.st_isu[i])
				5012	flags \|= CF_UPPER;
				5013	fputc(flags, fd); /* <charflags> */
				5014	}
				5015
				5016	put_bytes(fd, (long_u)l, 2); /* <folcharslen> */
				5017	fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */
				5018	}
				5019
				5020	/* SN_MIDWORD: <midword> */
				5021	if (spin->si_midword != NULL)
				5022	{
				5023	putc(SN_MIDWORD, fd); /* <sectionID> */
				5024	putc(SNF_REQUIRED, fd); /* <sectionflags> */
				5025
				5026	i = (int)STRLEN(spin->si_midword);
				5027	put_bytes(fd, (long_u)i, 4); /* <sectionlen> */
				5028	fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd);
				5029	/* <midword> */
				5030	}
				5031
				5032	/* SN_PREFCOND: <prefcondcnt> <prefcond> ... */
				5033	if (spin->si_prefcond.ga_len > 0)
				5034	{
				5035	putc(SN_PREFCOND, fd); /* <sectionID> */
				5036	putc(SNF_REQUIRED, fd); /* <sectionflags> */
				5037
				5038	l = write_spell_prefcond(NULL, &spin->si_prefcond);
				5039	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				5040
				5041	write_spell_prefcond(fd, &spin->si_prefcond);
				5042	}
				5043
				5044	/* SN_REP: <repcount> <rep> ...
				5045	* SN_SAL: <salflags> <salcount> <sal> ...
				5046	* SN_REPSAL: <repcount> <rep> ... */
				5047
				5048	/* round 1: SN_REP section
				5049	* round 2: SN_SAL section (unless SN_SOFO is used)
				5050	* round 3: SN_REPSAL section */
				5051	for (round = 1; round <= 3; ++round)
				5052	{
				5053	if (round == 1)
				5054	gap = &spin->si_rep;
				5055	else if (round == 2)
				5056	{
				5057	/* Don't write SN_SAL when using a SN_SOFO section */
				5058	if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
				5059	continue;
				5060	gap = &spin->si_sal;
				5061	}
				5062	else
				5063	gap = &spin->si_repsal;
				5064
				5065	/* Don't write the section if there are no items. */
				5066	if (gap->ga_len == 0)
				5067	continue;
				5068
				5069	/* Sort the REP/REPSAL items. */
				5070	if (round != 2)
				5071	qsort(gap->ga_data, (size_t)gap->ga_len,
				5072	sizeof(fromto_T), rep_compare);
				5073
				5074	i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL);
				5075	putc(i, fd); /* <sectionID> */
				5076
				5077	/* This is for making suggestions, section is not required. */
				5078	putc(0, fd); /* <sectionflags> */
				5079
				5080	/* Compute the length of what follows. */
				5081	l = 2; /* count <repcount> or <salcount> */
				5082	for (i = 0; i < gap->ga_len; ++i)
				5083	{
				5084	ftp = &((fromto_T *)gap->ga_data)[i];
				5085	l += 1 + (int)STRLEN(ftp->ft_from); /* count <fromlen> and <from> */
				5086	l += 1 + (int)STRLEN(ftp->ft_to); /* count <tolen> and <to> */
				5087	}
				5088	if (round == 2)
				5089	++l; /* count <salflags> */
				5090	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				5091
				5092	if (round == 2)
				5093	{
				5094	i = 0;
				5095	if (spin->si_followup)
				5096	i \|= SAL_F0LLOWUP;
				5097	if (spin->si_collapse)
				5098	i \|= SAL_COLLAPSE;
				5099	if (spin->si_rem_accents)
				5100	i \|= SAL_REM_ACCENTS;
				5101	putc(i, fd); /* <salflags> */
				5102	}
				5103
				5104	put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */
				5105	for (i = 0; i < gap->ga_len; ++i)
				5106	{
				5107	/* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
				5108	/* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
				5109	ftp = &((fromto_T *)gap->ga_data)[i];
				5110	for (rr = 1; rr <= 2; ++rr)
				5111	{
				5112	p = rr == 1 ? ftp->ft_from : ftp->ft_to;
				5113	l = (int)STRLEN(p);
				5114	putc(l, fd);
				5115	if (l > 0)
				5116	fwv &= fwrite(p, l, (size_t)1, fd);
				5117	}
				5118	}
				5119
				5120	}
				5121
				5122	/* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
				5123	* This is for making suggestions, section is not required. */
				5124	if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
				5125	{
				5126	putc(SN_SOFO, fd); /* <sectionID> */
				5127	putc(0, fd); /* <sectionflags> */
				5128
				5129	l = (int)STRLEN(spin->si_sofofr);
				5130	put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4);
				5131	/* <sectionlen> */
				5132
				5133	put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */
				5134	fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */
				5135
				5136	l = (int)STRLEN(spin->si_sofoto);
				5137	put_bytes(fd, (long_u)l, 2); /* <sofotolen> */
				5138	fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */
				5139	}
				5140
				5141	/* SN_WORDS: <word> ...
				5142	* This is for making suggestions, section is not required. */
				5143	if (spin->si_commonwords.ht_used > 0)
				5144	{
				5145	putc(SN_WORDS, fd); /* <sectionID> */
				5146	putc(0, fd); /* <sectionflags> */
				5147
				5148	/* round 1: count the bytes
				5149	* round 2: write the bytes */
				5150	for (round = 1; round <= 2; ++round)
				5151	{
				5152	int todo;
				5153	int len = 0;
				5154	hashitem_T *hi;
				5155
				5156	todo = (int)spin->si_commonwords.ht_used;
				5157	for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi)
				5158	if (!HASHITEM_EMPTY(hi))
				5159	{
				5160	l = (int)STRLEN(hi->hi_key) + 1;
				5161	len += l;
				5162	if (round == 2) /* <word> */
				5163	fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd);
				5164	--todo;
				5165	}
				5166	if (round == 1)
				5167	put_bytes(fd, (long_u)len, 4); /* <sectionlen> */
				5168	}
				5169	}
				5170
				5171	/* SN_MAP: <mapstr>
				5172	* This is for making suggestions, section is not required. */
				5173	if (spin->si_map.ga_len > 0)
				5174	{
				5175	putc(SN_MAP, fd); /* <sectionID> */
				5176	putc(0, fd); /* <sectionflags> */
				5177	l = spin->si_map.ga_len;
				5178	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				5179	fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd);
				5180	/* <mapstr> */
				5181	}
				5182
				5183	/* SN_SUGFILE: <timestamp>
				5184	* This is used to notify that a .sug file may be available and at the
				5185	* same time allows for checking that a .sug file that is found matches
				5186	* with this .spl file. That's because the word numbers must be exactly
				5187	* right. */
				5188	if (!spin->si_nosugfile
				5189	&& (spin->si_sal.ga_len > 0
				5190	\|\| (spin->si_sofofr != NULL && spin->si_sofoto != NULL)))
				5191	{
				5192	putc(SN_SUGFILE, fd); /* <sectionID> */
				5193	putc(0, fd); /* <sectionflags> */
				5194	put_bytes(fd, (long_u)8, 4); /* <sectionlen> */
				5195
				5196	/* Set si_sugtime and write it to the file. */
				5197	spin->si_sugtime = time(NULL);
				5198	put_time(fd, spin->si_sugtime); /* <timestamp> */
				5199	}
				5200
				5201	/* SN_NOSPLITSUGS: nothing
				5202	* This is used to notify that no suggestions with word splits are to be
				5203	* made. */
				5204	if (spin->si_nosplitsugs)
				5205	{
				5206	putc(SN_NOSPLITSUGS, fd); /* <sectionID> */
				5207	putc(0, fd); /* <sectionflags> */
				5208	put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
				5209	}
				5210
				5211	/* SN_NOCOMPUNDSUGS: nothing
				5212	* This is used to notify that no suggestions with compounds are to be
				5213	* made. */
				5214	if (spin->si_nocompoundsugs)
				5215	{
				5216	putc(SN_NOCOMPOUNDSUGS, fd); /* <sectionID> */
				5217	putc(0, fd); /* <sectionflags> */
				5218	put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
				5219	}
				5220
				5221	/* SN_COMPOUND: compound info.
				5222	* We don't mark it required, when not supported all compound words will
				5223	* be bad words. */
				5224	if (spin->si_compflags != NULL)
				5225	{
				5226	putc(SN_COMPOUND, fd); /* <sectionID> */
				5227	putc(0, fd); /* <sectionflags> */
				5228
				5229	l = (int)STRLEN(spin->si_compflags);
				5230	for (i = 0; i < spin->si_comppat.ga_len; ++i)
				5231	l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1;
				5232	put_bytes(fd, (long_u)(l + 7), 4); /* <sectionlen> */
				5233
				5234	putc(spin->si_compmax, fd); /* <compmax> */
				5235	putc(spin->si_compminlen, fd); /* <compminlen> */
				5236	putc(spin->si_compsylmax, fd); /* <compsylmax> */
				5237	putc(0, fd); /* for Vim 7.0b compatibility */
				5238	putc(spin->si_compoptions, fd); /* <compoptions> */
				5239	put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2);
				5240	/* <comppatcount> */
				5241	for (i = 0; i < spin->si_comppat.ga_len; ++i)
				5242	{
				5243	p = ((char_u **)(spin->si_comppat.ga_data))[i];
				5244	putc((int)STRLEN(p), fd); /* <comppatlen> */
				5245	fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd);
				5246	/* <comppattext> */
				5247	}
				5248	/* <compflags> */
				5249	fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags),
				5250	(size_t)1, fd);
				5251	}
				5252
				5253	/* SN_NOBREAK: NOBREAK flag */
				5254	if (spin->si_nobreak)
				5255	{
				5256	putc(SN_NOBREAK, fd); /* <sectionID> */
				5257	putc(0, fd); /* <sectionflags> */
				5258
				5259	/* It's empty, the presence of the section flags the feature. */
				5260	put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
				5261	}
				5262
				5263	/* SN_SYLLABLE: syllable info.
				5264	* We don't mark it required, when not supported syllables will not be
				5265	* counted. */
				5266	if (spin->si_syllable != NULL)
				5267	{
				5268	putc(SN_SYLLABLE, fd); /* <sectionID> */
				5269	putc(0, fd); /* <sectionflags> */
				5270
				5271	l = (int)STRLEN(spin->si_syllable);
				5272	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				5273	fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd);
				5274	/* <syllable> */
				5275	}
				5276
				5277	/* end of <SECTIONS> */
				5278	putc(SN_END, fd); /* <sectionend> */
				5279
				5280
				5281	/*
				5282	* <LWORDTREE> <KWORDTREE> <PREFIXTREE>
				5283	*/
				5284	spin->si_memtot = 0;
				5285	for (round = 1; round <= 3; ++round)
				5286	{
				5287	if (round == 1)
				5288	tree = spin->si_foldroot->wn_sibling;
				5289	else if (round == 2)
				5290	tree = spin->si_keeproot->wn_sibling;
				5291	else
				5292	tree = spin->si_prefroot->wn_sibling;
				5293
				5294	/* Clear the index and wnode fields in the tree. */
				5295	clear_node(tree);
				5296
				5297	/* Count the number of nodes. Needed to be able to allocate the
				5298	* memory when reading the nodes. Also fills in index for shared
				5299	* nodes. */
				5300	nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
				5301
				5302	/* number of nodes in 4 bytes */
				5303	put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
				5304	spin->si_memtot += nodecount + nodecount * sizeof(int);
				5305
				5306	/* Write the nodes. */
				5307	(void)put_node(fd, tree, 0, regionmask, round == 3);
				5308	}
				5309
				5310	/* Write another byte to check for errors (file system full). */
				5311	if (putc(0, fd) == EOF)
				5312	retval = FAIL;
				5313	theend:
				5314	if (fclose(fd) == EOF)
				5315	retval = FAIL;
				5316
				5317	if (fwv != (size_t)1)
				5318	retval = FAIL;
				5319	if (retval == FAIL)
				5320	EMSG(_(e_write));
				5321
				5322	return retval;
				5323	}
				5324
				5325	/*
				5326	* Clear the index and wnode fields of "node", it siblings and its
				5327	* children. This is needed because they are a union with other items to save
				5328	* space.
				5329	*/
				5330	static void
				5331	clear_node(wordnode_T *node)
				5332	{
				5333	wordnode_T *np;
				5334
				5335	if (node != NULL)
				5336	for (np = node; np != NULL; np = np->wn_sibling)
				5337	{
				5338	np->wn_u1.index = 0;
				5339	np->wn_u2.wnode = NULL;
				5340
				5341	if (np->wn_byte != NUL)
				5342	clear_node(np->wn_child);
				5343	}
				5344	}
				5345
				5346
				5347	/*
				5348	* Dump a word tree at node "node".
				5349	*
				5350	* This first writes the list of possible bytes (siblings). Then for each
				5351	* byte recursively write the children.
				5352	*
				5353	* NOTE: The code here must match the code in read_tree_node(), since
				5354	* assumptions are made about the indexes (so that we don't have to write them
				5355	* in the file).
				5356	*
				5357	* Returns the number of nodes used.
				5358	*/
				5359	static int
				5360	put_node(
				5361	FILE fd, / NULL when only counting */
				5362	wordnode_T *node,
				5363	int idx,
				5364	int regionmask,
				5365	int prefixtree) /* TRUE for PREFIXTREE */
				5366	{
				5367	int newindex = idx;
				5368	int siblingcount = 0;
				5369	wordnode_T *np;
				5370	int flags;
				5371
				5372	/* If "node" is zero the tree is empty. */
				5373	if (node == NULL)
				5374	return 0;
				5375
				5376	/* Store the index where this node is written. */
				5377	node->wn_u1.index = idx;
				5378
				5379	/* Count the number of siblings. */
				5380	for (np = node; np != NULL; np = np->wn_sibling)
				5381	++siblingcount;
				5382
				5383	/* Write the sibling count. */
				5384	if (fd != NULL)
				5385	putc(siblingcount, fd); /* <siblingcount> */
				5386
				5387	/* Write each sibling byte and optionally extra info. */
				5388	for (np = node; np != NULL; np = np->wn_sibling)
				5389	{
				5390	if (np->wn_byte == 0)
				5391	{
				5392	if (fd != NULL)
				5393	{
				5394	/* For a NUL byte (end of word) write the flags etc. */
				5395	if (prefixtree)
				5396	{
				5397	/* In PREFIXTREE write the required affixID and the
				5398	* associated condition nr (stored in wn_region). The
				5399	* byte value is misused to store the "rare" and "not
				5400	* combining" flags */
				5401	if (np->wn_flags == (short_u)PFX_FLAGS)
				5402	putc(BY_NOFLAGS, fd); /* <byte> */
				5403	else
				5404	{
				5405	putc(BY_FLAGS, fd); /* <byte> */
				5406	putc(np->wn_flags, fd); /* <pflags> */
				5407	}
				5408	putc(np->wn_affixID, fd); /* <affixID> */
				5409	put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */
				5410	}
				5411	else
				5412	{
				5413	/* For word trees we write the flag/region items. */
				5414	flags = np->wn_flags;
				5415	if (regionmask != 0 && np->wn_region != regionmask)
				5416	flags \|= WF_REGION;
				5417	if (np->wn_affixID != 0)
				5418	flags \|= WF_AFX;
				5419	if (flags == 0)
				5420	{
				5421	/* word without flags or region */
				5422	putc(BY_NOFLAGS, fd); /* <byte> */
				5423	}
				5424	else
				5425	{
				5426	if (np->wn_flags >= 0x100)
				5427	{
				5428	putc(BY_FLAGS2, fd); /* <byte> */
				5429	putc(flags, fd); /* <flags> */
				5430	putc((unsigned)flags >> 8, fd); /* <flags2> */
				5431	}
				5432	else
				5433	{
				5434	putc(BY_FLAGS, fd); /* <byte> */
				5435	putc(flags, fd); /* <flags> */
				5436	}
				5437	if (flags & WF_REGION)
				5438	putc(np->wn_region, fd); /* <region> */
				5439	if (flags & WF_AFX)
				5440	putc(np->wn_affixID, fd); /* <affixID> */
				5441	}
				5442	}
				5443	}
				5444	}
				5445	else
				5446	{
				5447	if (np->wn_child->wn_u1.index != 0
				5448	&& np->wn_child->wn_u2.wnode != node)
				5449	{
				5450	/* The child is written elsewhere, write the reference. */
				5451	if (fd != NULL)
				5452	{
				5453	putc(BY_INDEX, fd); /* <byte> */
				5454	/* <nodeidx> */
				5455	put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
				5456	}
				5457	}
				5458	else if (np->wn_child->wn_u2.wnode == NULL)
				5459	/* We will write the child below and give it an index. */
				5460	np->wn_child->wn_u2.wnode = node;
				5461
				5462	if (fd != NULL)
				5463	if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
				5464	{
				5465	EMSG(_(e_write));
				5466	return 0;
				5467	}
				5468	}
				5469	}
				5470
				5471	/* Space used in the array when reading: one for each sibling and one for
				5472	* the count. */
				5473	newindex += siblingcount + 1;
				5474
				5475	/* Recursively dump the children of each sibling. */
				5476	for (np = node; np != NULL; np = np->wn_sibling)
				5477	if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
				5478	newindex = put_node(fd, np->wn_child, newindex, regionmask,
				5479	prefixtree);
				5480
				5481	return newindex;
				5482	}
				5483
				5484
				5485	/*
				5486	* ":mkspell [-ascii] outfile infile ..."
				5487	* ":mkspell [-ascii] addfile"
				5488	*/
				5489	void
				5490	ex_mkspell(exarg_T *eap)
				5491	{
				5492	int fcount;
				5493	char_u **fnames;
				5494	char_u *arg = eap->arg;
				5495	int ascii = FALSE;
				5496
				5497	if (STRNCMP(arg, "-ascii", 6) == 0)
				5498	{
				5499	ascii = TRUE;
				5500	arg = skipwhite(arg + 6);
				5501	}
				5502
				5503	/* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
				5504	if (get_arglist_exp(arg, &fcount, &fnames, FALSE) == OK)
				5505	{
				5506	mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
				5507	FreeWild(fcount, fnames);
				5508	}
				5509	}
				5510
				5511	/*
				5512	* Create the .sug file.
				5513	* Uses the soundfold info in "spin".
				5514	* Writes the file with the name "wfname", with ".spl" changed to ".sug".
				5515	*/
				5516	static void
				5517	spell_make_sugfile(spellinfo_T spin, char_u wfname)
				5518	{
				5519	char_u *fname = NULL;
				5520	int len;
				5521	slang_T *slang;
				5522	int free_slang = FALSE;
				5523
				5524	/*
				5525	* Read back the .spl file that was written. This fills the required
				5526	* info for soundfolding. This also uses less memory than the
				5527	* pointer-linked version of the trie. And it avoids having two versions
				5528	* of the code for the soundfolding stuff.
				5529	* It might have been done already by spell_reload_one().
				5530	*/
				5531	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
				5532	if (fullpathcmp(wfname, slang->sl_fname, FALSE) == FPC_SAME)
				5533	break;
				5534	if (slang == NULL)
				5535	{
				5536	spell_message(spin, (char_u *)_("Reading back spell file..."));
				5537	slang = spell_load_file(wfname, NULL, NULL, FALSE);
				5538	if (slang == NULL)
				5539	return;
				5540	free_slang = TRUE;
				5541	}
				5542
				5543	/*
				5544	* Clear the info in "spin" that is used.
				5545	*/
				5546	spin->si_blocks = NULL;
				5547	spin->si_blocks_cnt = 0;
				5548	spin->si_compress_cnt = 0; /* will stay at 0 all the time*/
				5549	spin->si_free_count = 0;
				5550	spin->si_first_free = NULL;
				5551	spin->si_foldwcount = 0;
				5552
				5553	/*
				5554	* Go through the trie of good words, soundfold each word and add it to
				5555	* the soundfold trie.
				5556	*/
				5557	spell_message(spin, (char_u *)_("Performing soundfolding..."));
				5558	if (sug_filltree(spin, slang) == FAIL)
				5559	goto theend;
				5560
				5561	/*
				5562	* Create the table which links each soundfold word with a list of the
				5563	* good words it may come from. Creates buffer "spin->si_spellbuf".
				5564	* This also removes the wordnr from the NUL byte entries to make
				5565	* compression possible.
				5566	*/
				5567	if (sug_maketable(spin) == FAIL)
				5568	goto theend;
				5569
				5570	smsg((char_u *)_("Number of words after soundfolding: %ld"),
				5571	(long)spin->si_spellbuf->b_ml.ml_line_count);
				5572
				5573	/*
				5574	* Compress the soundfold trie.
				5575	*/
				5576	spell_message(spin, (char_u *)_(msg_compressing));
				5577	wordtree_compress(spin, spin->si_foldroot);
				5578
				5579	/*
				5580	* Write the .sug file.
				5581	* Make the file name by changing ".spl" to ".sug".
				5582	*/
				5583	fname = alloc(MAXPATHL);
				5584	if (fname == NULL)
				5585	goto theend;
				5586	vim_strncpy(fname, wfname, MAXPATHL - 1);
				5587	len = (int)STRLEN(fname);
				5588	fname[len - 2] = 'u';
				5589	fname[len - 1] = 'g';
				5590	sug_write(spin, fname);
				5591
				5592	theend:
				5593	vim_free(fname);
				5594	if (free_slang)
				5595	slang_free(slang);
				5596	free_blocks(spin->si_blocks);
				5597	close_spellbuf(spin->si_spellbuf);
				5598	}
				5599
				5600	/*
				5601	* Build the soundfold trie for language "slang".
				5602	*/
				5603	static int
				5604	sug_filltree(spellinfo_T spin, slang_T slang)
				5605	{
				5606	char_u *byts;
				5607	idx_T *idxs;
				5608	int depth;
				5609	idx_T arridx[MAXWLEN];
				5610	int curi[MAXWLEN];
				5611	char_u tword[MAXWLEN];
				5612	char_u tsalword[MAXWLEN];
				5613	int c;
				5614	idx_T n;
				5615	unsigned words_done = 0;
				5616	int wordcount[MAXWLEN];
				5617
				5618	/* We use si_foldroot for the soundfolded trie. */
				5619	spin->si_foldroot = wordtree_alloc(spin);
				5620	if (spin->si_foldroot == NULL)
				5621	return FAIL;
				5622
				5623	/* let tree_add_word() know we're adding to the soundfolded tree */
				5624	spin->si_sugtree = TRUE;
				5625
				5626	/*
				5627	* Go through the whole case-folded tree, soundfold each word and put it
				5628	* in the trie.
				5629	*/
				5630	byts = slang->sl_fbyts;
				5631	idxs = slang->sl_fidxs;
				5632
				5633	arridx[0] = 0;
				5634	curi[0] = 1;
				5635	wordcount[0] = 0;
				5636
				5637	depth = 0;
				5638	while (depth >= 0 && !got_int)
				5639	{
				5640	if (curi[depth] > byts[arridx[depth]])
				5641	{
				5642	/* Done all bytes at this node, go up one level. */
				5643	idxs[arridx[depth]] = wordcount[depth];
				5644	if (depth > 0)
				5645	wordcount[depth - 1] += wordcount[depth];
				5646
				5647	--depth;
				5648	line_breakcheck();
				5649	}
				5650	else
				5651	{
				5652
				5653	/* Do one more byte at this node. */
				5654	n = arridx[depth] + curi[depth];
				5655	++curi[depth];
				5656
				5657	c = byts[n];
				5658	if (c == 0)
				5659	{
				5660	/* Sound-fold the word. */
				5661	tword[depth] = NUL;
				5662	spell_soundfold(slang, tword, TRUE, tsalword);
				5663
				5664	/* We use the "flags" field for the MSB of the wordnr,
				5665	* "region" for the LSB of the wordnr. */
				5666	if (tree_add_word(spin, tsalword, spin->si_foldroot,
				5667	words_done >> 16, words_done & 0xffff,
				5668	0) == FAIL)
				5669	return FAIL;
				5670
				5671	++words_done;
				5672	++wordcount[depth];
				5673
				5674	/* Reset the block count each time to avoid compression
				5675	* kicking in. */
				5676	spin->si_blocks_cnt = 0;
				5677
				5678	/* Skip over any other NUL bytes (same word with different
				5679	* flags). */
				5680	while (byts[n + 1] == 0)
				5681	{
				5682	++n;
				5683	++curi[depth];
				5684	}
				5685	}
				5686	else
				5687	{
				5688	/* Normal char, go one level deeper. */
				5689	tword[depth++] = c;
				5690	arridx[depth] = idxs[n];
				5691	curi[depth] = 1;
				5692	wordcount[depth] = 0;
				5693	}
				5694	}
				5695	}
				5696
				5697	smsg((char_u *)_("Total number of words: %d"), words_done);
				5698
				5699	return OK;
				5700	}
				5701
				5702	/*
				5703	* Make the table that links each word in the soundfold trie to the words it
				5704	* can be produced from.
				5705	* This is not unlike lines in a file, thus use a memfile to be able to access
				5706	* the table efficiently.
				5707	* Returns FAIL when out of memory.
				5708	*/
				5709	static int
				5710	sug_maketable(spellinfo_T *spin)
				5711	{
				5712	garray_T ga;
				5713	int res = OK;
				5714
				5715	/* Allocate a buffer, open a memline for it and create the swap file
				5716	* (uses a temp file, not a .swp file). */
				5717	spin->si_spellbuf = open_spellbuf();
				5718	if (spin->si_spellbuf == NULL)
				5719	return FAIL;
				5720
				5721	/* Use a buffer to store the line info, avoids allocating many small
				5722	* pieces of memory. */
				5723	ga_init2(&ga, 1, 100);
				5724
				5725	/* recursively go through the tree */
				5726	if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1)
				5727	res = FAIL;
				5728
				5729	ga_clear(&ga);
				5730	return res;
				5731	}
				5732
				5733	/*
				5734	* Fill the table for one node and its children.
				5735	* Returns the wordnr at the start of the node.
				5736	* Returns -1 when out of memory.
				5737	*/
				5738	static int
				5739	sug_filltable(
				5740	spellinfo_T *spin,
				5741	wordnode_T *node,
				5742	int startwordnr,
				5743	garray_T gap) / place to store line of numbers */
				5744	{
				5745	wordnode_T p, np;
				5746	int wordnr = startwordnr;
				5747	int nr;
				5748	int prev_nr;
				5749
				5750	for (p = node; p != NULL; p = p->wn_sibling)
				5751	{
				5752	if (p->wn_byte == NUL)
				5753	{
				5754	gap->ga_len = 0;
				5755	prev_nr = 0;
				5756	for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling)
				5757	{
				5758	if (ga_grow(gap, 10) == FAIL)
				5759	return -1;
				5760
				5761	nr = (np->wn_flags << 16) + (np->wn_region & 0xffff);
				5762	/* Compute the offset from the previous nr and store the
				5763	* offset in a way that it takes a minimum number of bytes.
				5764	* It's a bit like utf-8, but without the need to mark
				5765	* following bytes. */
				5766	nr -= prev_nr;
				5767	prev_nr += nr;
				5768	gap->ga_len += offset2bytes(nr,
				5769	(char_u *)gap->ga_data + gap->ga_len);
				5770	}
				5771
				5772	/* add the NUL byte */
				5773	((char_u *)gap->ga_data)[gap->ga_len++] = NUL;
				5774
				5775	if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr,
				5776	gap->ga_data, gap->ga_len, TRUE) == FAIL)
				5777	return -1;
				5778	++wordnr;
				5779
				5780	/* Remove extra NUL entries, we no longer need them. We don't
				5781	* bother freeing the nodes, the won't be reused anyway. */
				5782	while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL)
				5783	p->wn_sibling = p->wn_sibling->wn_sibling;
				5784
				5785	/* Clear the flags on the remaining NUL node, so that compression
				5786	* works a lot better. */
				5787	p->wn_flags = 0;
				5788	p->wn_region = 0;
				5789	}
				5790	else
				5791	{
				5792	wordnr = sug_filltable(spin, p->wn_child, wordnr, gap);
				5793	if (wordnr == -1)
				5794	return -1;
				5795	}
				5796	}
				5797	return wordnr;
				5798	}
				5799
				5800	/*
				5801	* Convert an offset into a minimal number of bytes.
				5802	* Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL
				5803	* bytes.
				5804	*/
				5805	static int
				5806	offset2bytes(int nr, char_u *buf)
				5807	{
				5808	int rem;
				5809	int b1, b2, b3, b4;
				5810
				5811	/* Split the number in parts of base 255. We need to avoid NUL bytes. */
				5812	b1 = nr % 255 + 1;
				5813	rem = nr / 255;
				5814	b2 = rem % 255 + 1;
				5815	rem = rem / 255;
				5816	b3 = rem % 255 + 1;
				5817	b4 = rem / 255 + 1;
				5818
				5819	if (b4 > 1 \|\| b3 > 0x1f) /* 4 bytes */
				5820	{
				5821	buf[0] = 0xe0 + b4;
				5822	buf[1] = b3;
				5823	buf[2] = b2;
				5824	buf[3] = b1;
				5825	return 4;
				5826	}
				5827	if (b3 > 1 \|\| b2 > 0x3f ) /* 3 bytes */
				5828	{
				5829	buf[0] = 0xc0 + b3;
				5830	buf[1] = b2;
				5831	buf[2] = b1;
				5832	return 3;
				5833	}
				5834	if (b2 > 1 \|\| b1 > 0x7f ) /* 2 bytes */
				5835	{
				5836	buf[0] = 0x80 + b2;
				5837	buf[1] = b1;
				5838	return 2;
				5839	}
				5840	/* 1 byte */
				5841	buf[0] = b1;
				5842	return 1;
				5843	}
				5844
				5845	/*
				5846	* Write the .sug file in "fname".
				5847	*/
				5848	static void
				5849	sug_write(spellinfo_T spin, char_u fname)
				5850	{
				5851	FILE *fd;
				5852	wordnode_T *tree;
				5853	int nodecount;
				5854	int wcount;
				5855	char_u *line;
				5856	linenr_T lnum;
				5857	int len;
				5858
				5859	/* Create the file. Note that an existing file is silently overwritten! */
				5860	fd = mch_fopen((char *)fname, "w");
				5861	if (fd == NULL)
				5862	{
				5863	EMSG2(_(e_notopen), fname);
				5864	return;
				5865	}
				5866
				5867	vim_snprintf((char *)IObuff, IOSIZE,
				5868	_("Writing suggestion file %s ..."), fname);
				5869	spell_message(spin, IObuff);
				5870
				5871	/*
				5872	* <SUGHEADER>: <fileID> <versionnr> <timestamp>
				5873	*/
				5874	if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */
				5875	{
				5876	EMSG(_(e_write));
				5877	goto theend;
				5878	}
				5879	putc(VIMSUGVERSION, fd); /* <versionnr> */
				5880
				5881	/* Write si_sugtime to the file. */
				5882	put_time(fd, spin->si_sugtime); /* <timestamp> */
				5883
				5884	/*
				5885	* <SUGWORDTREE>
				5886	*/
				5887	spin->si_memtot = 0;
				5888	tree = spin->si_foldroot->wn_sibling;
				5889
				5890	/* Clear the index and wnode fields in the tree. */
				5891	clear_node(tree);
				5892
				5893	/* Count the number of nodes. Needed to be able to allocate the
				5894	* memory when reading the nodes. Also fills in index for shared
				5895	* nodes. */
				5896	nodecount = put_node(NULL, tree, 0, 0, FALSE);
				5897
				5898	/* number of nodes in 4 bytes */
				5899	put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
				5900	spin->si_memtot += nodecount + nodecount * sizeof(int);
				5901
				5902	/* Write the nodes. */
				5903	(void)put_node(fd, tree, 0, 0, FALSE);
				5904
				5905	/*
				5906	* <SUGTABLE>: <sugwcount> <sugline> ...
				5907	*/
				5908	wcount = spin->si_spellbuf->b_ml.ml_line_count;
				5909	put_bytes(fd, (long_u)wcount, 4); /* <sugwcount> */
				5910
				5911	for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum)
				5912	{
				5913	/* <sugline>: <sugnr> ... NUL */
				5914	line = ml_get_buf(spin->si_spellbuf, lnum, FALSE);
				5915	len = (int)STRLEN(line) + 1;
				5916	if (fwrite(line, (size_t)len, (size_t)1, fd) == 0)
				5917	{
				5918	EMSG(_(e_write));
				5919	goto theend;
				5920	}
				5921	spin->si_memtot += len;
				5922	}
				5923
				5924	/* Write another byte to check for errors. */
				5925	if (putc(0, fd) == EOF)
				5926	EMSG(_(e_write));
				5927
				5928	vim_snprintf((char *)IObuff, IOSIZE,
				5929	_("Estimated runtime memory use: %d bytes"), spin->si_memtot);
				5930	spell_message(spin, IObuff);
				5931
				5932	theend:
				5933	/* close the file */
				5934	fclose(fd);
				5935	}
				5936
				5937
				5938	/*
				5939	* Create a Vim spell file from one or more word lists.
				5940	* "fnames[0]" is the output file name.
				5941	* "fnames[fcount - 1]" is the last input file name.
				5942	* Exception: when "fnames[0]" ends in ".add" it's used as the input file name
				5943	* and ".spl" is appended to make the output file name.
				5944	*/
				5945	void
				5946	mkspell(
				5947	int fcount,
				5948	char_u **fnames,
				5949	int ascii, /* -ascii argument given */
				5950	int over_write, /* overwrite existing output file */
				5951	int added_word) /* invoked through "zg" */
				5952	{
				5953	char_u *fname = NULL;
				5954	char_u *wfname;
				5955	char_u **innames;
				5956	int incount;
				5957	afffile_T *(afile[8]);
				5958	int i;
				5959	int len;
				5960	stat_T st;
				5961	int error = FALSE;
				5962	spellinfo_T spin;
				5963
				5964	vim_memset(&spin, 0, sizeof(spin));
				5965	spin.si_verbose = !added_word;
				5966	spin.si_ascii = ascii;
				5967	spin.si_followup = TRUE;
				5968	spin.si_rem_accents = TRUE;
				5969	ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20);
				5970	ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20);
				5971	ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
				5972	ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
				5973	ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20);
				5974	ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
				5975	hash_init(&spin.si_commonwords);
				5976	spin.si_newcompID = 127; /* start compound ID at first maximum */
				5977
				5978	/* default: fnames[0] is output file, following are input files */
				5979	innames = &fnames[1];
				5980	incount = fcount - 1;
				5981
				5982	wfname = alloc(MAXPATHL);
				5983	if (wfname == NULL)
				5984	return;
				5985
				5986	if (fcount >= 1)
				5987	{
				5988	len = (int)STRLEN(fnames[0]);
				5989	if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
				5990	{
				5991	/* For ":mkspell path/en.latin1.add" output file is
				5992	* "path/en.latin1.add.spl". */
				5993	innames = &fnames[0];
				5994	incount = 1;
				5995	vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]);
				5996	}
				5997	else if (fcount == 1)
				5998	{
				5999	/* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */
				6000	innames = &fnames[0];
				6001	incount = 1;
				6002	vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
				6003	fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
				6004	}
				6005	else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
				6006	{
				6007	/* Name ends in ".spl", use as the file name. */
				6008	vim_strncpy(wfname, fnames[0], MAXPATHL - 1);
				6009	}
				6010	else
				6011	/* Name should be language, make the file name from it. */
				6012	vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
				6013	fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
				6014
				6015	/* Check for .ascii.spl. */
				6016	if (strstr((char *)gettail(wfname), SPL_FNAME_ASCII) != NULL)
				6017	spin.si_ascii = TRUE;
				6018
				6019	/* Check for .add.spl. */
				6020	if (strstr((char *)gettail(wfname), SPL_FNAME_ADD) != NULL)
				6021	spin.si_add = TRUE;
				6022	}
				6023
				6024	if (incount <= 0)
				6025	EMSG(_(e_invarg)); /* need at least output and input names */
				6026	else if (vim_strchr(gettail(wfname), '_') != NULL)
				6027	EMSG(_("E751: Output file name must not have region name"));
				6028	else if (incount > 8)
				6029	EMSG(_("E754: Only up to 8 regions supported"));
				6030	else
				6031	{
				6032	/* Check for overwriting before doing things that may take a lot of
				6033	* time. */
				6034	if (!over_write && mch_stat((char *)wfname, &st) >= 0)
				6035	{
				6036	EMSG(_(e_exists));
				6037	goto theend;
				6038	}
				6039	if (mch_isdir(wfname))
				6040	{
				6041	EMSG2(_(e_isadir2), wfname);
				6042	goto theend;
				6043	}
				6044
				6045	fname = alloc(MAXPATHL);
				6046	if (fname == NULL)
				6047	goto theend;
				6048
				6049	/*
				6050	* Init the aff and dic pointers.
				6051	* Get the region names if there are more than 2 arguments.
				6052	*/
				6053	for (i = 0; i < incount; ++i)
				6054	{
				6055	afile[i] = NULL;
				6056
				6057	if (incount > 1)
				6058	{
				6059	len = (int)STRLEN(innames[i]);
				6060	if (STRLEN(gettail(innames[i])) < 5
				6061	\|\| innames[i][len - 3] != '_')
				6062	{
				6063	EMSG2(_("E755: Invalid region in %s"), innames[i]);
				6064	goto theend;
				6065	}
				6066	spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
				6067	spin.si_region_name[i * 2 + 1] =
				6068	TOLOWER_ASC(innames[i][len - 1]);
				6069	}
				6070	}
				6071	spin.si_region_count = incount;
				6072
				6073	spin.si_foldroot = wordtree_alloc(&spin);
				6074	spin.si_keeproot = wordtree_alloc(&spin);
				6075	spin.si_prefroot = wordtree_alloc(&spin);
				6076	if (spin.si_foldroot == NULL
				6077	\|\| spin.si_keeproot == NULL
				6078	\|\| spin.si_prefroot == NULL)
				6079	{
				6080	free_blocks(spin.si_blocks);
				6081	goto theend;
				6082	}
				6083
				6084	/* When not producing a .add.spl file clear the character table when
				6085	* we encounter one in the .aff file. This means we dump the current
				6086	* one in the .spl file if the .aff file doesn't define one. That's
				6087	* better than guessing the contents, the table will match a
				6088	* previously loaded spell file. */
				6089	if (!spin.si_add)
				6090	spin.si_clear_chartab = TRUE;
				6091
				6092	/*
				6093	* Read all the .aff and .dic files.
				6094	* Text is converted to 'encoding'.
				6095	* Words are stored in the case-folded and keep-case trees.
				6096	*/
				6097	for (i = 0; i < incount && !error; ++i)
				6098	{
				6099	spin.si_conv.vc_type = CONV_NONE;
				6100	spin.si_region = 1 << i;
				6101
				6102	vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]);
				6103	if (mch_stat((char *)fname, &st) >= 0)
				6104	{
				6105	/* Read the .aff file. Will init "spin->si_conv" based on the
				6106	* "SET" line. */
				6107	afile[i] = spell_read_aff(&spin, fname);
				6108	if (afile[i] == NULL)
				6109	error = TRUE;
				6110	else
				6111	{
				6112	/* Read the .dic file and store the words in the trees. */
				6113	vim_snprintf((char *)fname, MAXPATHL, "%s.dic",
				6114	innames[i]);
				6115	if (spell_read_dic(&spin, fname, afile[i]) == FAIL)
				6116	error = TRUE;
				6117	}
				6118	}
				6119	else
				6120	{
				6121	/* No .aff file, try reading the file as a word list. Store
				6122	* the words in the trees. */
				6123	if (spell_read_wordfile(&spin, innames[i]) == FAIL)
				6124	error = TRUE;
				6125	}
				6126
				6127	#ifdef FEAT_MBYTE
				6128	/* Free any conversion stuff. */
				6129	convert_setup(&spin.si_conv, NULL, NULL);
				6130	#endif
				6131	}
				6132
				6133	if (spin.si_compflags != NULL && spin.si_nobreak)
				6134	MSG(_("Warning: both compounding and NOBREAK specified"));
				6135
				6136	if (!error && !got_int)
				6137	{
				6138	/*
				6139	* Combine tails in the tree.
				6140	*/
				6141	spell_message(&spin, (char_u *)_(msg_compressing));
				6142	wordtree_compress(&spin, spin.si_foldroot);
				6143	wordtree_compress(&spin, spin.si_keeproot);
				6144	wordtree_compress(&spin, spin.si_prefroot);
				6145	}
				6146
				6147	if (!error && !got_int)
				6148	{
				6149	/*
				6150	* Write the info in the spell file.
				6151	*/
				6152	vim_snprintf((char *)IObuff, IOSIZE,
				6153	_("Writing spell file %s ..."), wfname);
				6154	spell_message(&spin, IObuff);
				6155
				6156	error = write_vim_spell(&spin, wfname) == FAIL;
				6157
				6158	spell_message(&spin, (char_u *)_("Done!"));
				6159	vim_snprintf((char *)IObuff, IOSIZE,
				6160	_("Estimated runtime memory use: %d bytes"), spin.si_memtot);
				6161	spell_message(&spin, IObuff);
				6162
				6163	/*
				6164	* If the file is loaded need to reload it.
				6165	*/
				6166	if (!error)
				6167	spell_reload_one(wfname, added_word);
				6168	}
				6169
				6170	/* Free the allocated memory. */
				6171	ga_clear(&spin.si_rep);
				6172	ga_clear(&spin.si_repsal);
				6173	ga_clear(&spin.si_sal);
				6174	ga_clear(&spin.si_map);
				6175	ga_clear(&spin.si_comppat);
				6176	ga_clear(&spin.si_prefcond);
				6177	hash_clear_all(&spin.si_commonwords, 0);
				6178
				6179	/* Free the .aff file structures. */
				6180	for (i = 0; i < incount; ++i)
				6181	if (afile[i] != NULL)
				6182	spell_free_aff(afile[i]);
				6183
				6184	/* Free all the bits and pieces at once. */
				6185	free_blocks(spin.si_blocks);
				6186
				6187	/*
				6188	* If there is soundfolding info and no NOSUGFILE item create the
				6189	* .sug file with the soundfolded word trie.
				6190	*/
				6191	if (spin.si_sugtime != 0 && !error && !got_int)
				6192	spell_make_sugfile(&spin, wfname);
				6193
				6194	}
				6195
				6196	theend:
				6197	vim_free(fname);
				6198	vim_free(wfname);
				6199	}
				6200
				6201	/*
				6202	* Display a message for spell file processing when 'verbose' is set or using
				6203	* ":mkspell". "str" can be IObuff.
				6204	*/
				6205	static void
				6206	spell_message(spellinfo_T spin, char_u str)
				6207	{
				6208	if (spin->si_verbose \|\| p_verbose > 2)
				6209	{
				6210	if (!spin->si_verbose)
				6211	verbose_enter();
				6212	MSG(str);
				6213	out_flush();
				6214	if (!spin->si_verbose)
				6215	verbose_leave();
				6216	}
				6217	}
				6218
				6219	/*
				6220	* ":[count]spellgood {word}"
				6221	* ":[count]spellwrong {word}"
				6222	* ":[count]spellundo {word}"
				6223	*/
				6224	void
				6225	ex_spell(exarg_T *eap)
				6226	{
				6227	spell_add_word(eap->arg, (int)STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong,
				6228	eap->forceit ? 0 : (int)eap->line2,
				6229	eap->cmdidx == CMD_spellundo);
				6230	}
				6231
				6232	/*
				6233	* Add "word[len]" to 'spellfile' as a good or bad word.
				6234	*/
				6235	void
				6236	spell_add_word(
				6237	char_u *word,
				6238	int len,
				6239	int bad,
				6240	int idx, /* "zG" and "zW": zero, otherwise index in
				6241	'spellfile' */
				6242	int undo) /* TRUE for "zug", "zuG", "zuw" and "zuW" */
				6243	{
				6244	FILE *fd = NULL;
				6245	buf_T *buf = NULL;
				6246	int new_spf = FALSE;
				6247	char_u *fname;
				6248	char_u *fnamebuf = NULL;
				6249	char_u line[MAXWLEN * 2];
				6250	long fpos, fpos_next = 0;
				6251	int i;
				6252	char_u *spf;
				6253
				6254	if (idx == 0) /* use internal wordlist */
				6255	{
				6256	if (int_wordlist == NULL)
				6257	{
				6258	int_wordlist = vim_tempname('s', FALSE);
				6259	if (int_wordlist == NULL)
				6260	return;
				6261	}
				6262	fname = int_wordlist;
				6263	}
				6264	else
				6265	{
				6266	/* If 'spellfile' isn't set figure out a good default value. */
				6267	if (*curwin->w_s->b_p_spf == NUL)
				6268	{
				6269	init_spellfile();
				6270	new_spf = TRUE;
				6271	}
				6272
				6273	if (*curwin->w_s->b_p_spf == NUL)
				6274	{
				6275	EMSG2(_(e_notset), "spellfile");
				6276	return;
				6277	}
				6278	fnamebuf = alloc(MAXPATHL);
				6279	if (fnamebuf == NULL)
				6280	return;
				6281
				6282	for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i)
				6283	{
				6284	copy_option_part(&spf, fnamebuf, MAXPATHL, ",");
				6285	if (i == idx)
				6286	break;
				6287	if (*spf == NUL)
				6288	{
				6289	EMSGN(_("E765: 'spellfile' does not have %ld entries"), idx);
				6290	vim_free(fnamebuf);
				6291	return;
				6292	}
				6293	}
				6294
				6295	/* Check that the user isn't editing the .add file somewhere. */
				6296	buf = buflist_findname_exp(fnamebuf);
				6297	if (buf != NULL && buf->b_ml.ml_mfp == NULL)
				6298	buf = NULL;
				6299	if (buf != NULL && bufIsChanged(buf))
				6300	{
				6301	EMSG(_(e_bufloaded));
				6302	vim_free(fnamebuf);
				6303	return;
				6304	}
				6305
				6306	fname = fnamebuf;
				6307	}
				6308
				6309	if (bad \|\| undo)
				6310	{
				6311	/* When the word appears as good word we need to remove that one,
				6312	* since its flags sort before the one with WF_BANNED. */
				6313	fd = mch_fopen((char *)fname, "r");
				6314	if (fd != NULL)
				6315	{
				6316	while (!vim_fgets(line, MAXWLEN * 2, fd))
				6317	{
				6318	fpos = fpos_next;
				6319	fpos_next = ftell(fd);
				6320	if (STRNCMP(word, line, len) == 0
				6321	&& (line[len] == '/' \|\| line[len] < ' '))
				6322	{
				6323	/* Found duplicate word. Remove it by writing a '#' at
				6324	* the start of the line. Mixing reading and writing
				6325	* doesn't work for all systems, close the file first. */
				6326	fclose(fd);
				6327	fd = mch_fopen((char *)fname, "r+");
				6328	if (fd == NULL)
				6329	break;
				6330	if (fseek(fd, fpos, SEEK_SET) == 0)
				6331	{
				6332	fputc('#', fd);
				6333	if (undo)
				6334	{
				6335	home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
				6336	smsg((char_u )_("Word '%.s' removed from %s"),
				6337	len, word, NameBuff);
				6338	}
				6339	}
				6340	fseek(fd, fpos_next, SEEK_SET);
				6341	}
				6342	}
				6343	if (fd != NULL)
				6344	fclose(fd);
				6345	}
				6346	}
				6347
				6348	if (!undo)
				6349	{
				6350	fd = mch_fopen((char *)fname, "a");
				6351	if (fd == NULL && new_spf)
				6352	{
				6353	char_u *p;
				6354
				6355	/* We just initialized the 'spellfile' option and can't open the
				6356	* file. We may need to create the "spell" directory first. We
				6357	* already checked the runtime directory is writable in
				6358	* init_spellfile(). */
				6359	if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname)
				6360	{
				6361	int c = *p;
				6362
				6363	/* The directory doesn't exist. Try creating it and opening
				6364	* the file again. */
				6365	*p = NUL;
				6366	vim_mkdir(fname, 0755);
				6367	*p = c;
				6368	fd = mch_fopen((char *)fname, "a");
				6369	}
				6370	}
				6371
				6372	if (fd == NULL)
				6373	EMSG2(_(e_notopen), fname);
				6374	else
				6375	{
				6376	if (bad)
				6377	fprintf(fd, "%.*s/!\n", len, word);
				6378	else
				6379	fprintf(fd, "%.*s\n", len, word);
				6380	fclose(fd);
				6381
				6382	home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
				6383	smsg((char_u )_("Word '%.s' added to %s"), len, word, NameBuff);
				6384	}
				6385	}
				6386
				6387	if (fd != NULL)
				6388	{
				6389	/* Update the .add.spl file. */
				6390	mkspell(1, &fname, FALSE, TRUE, TRUE);
				6391
				6392	/* If the .add file is edited somewhere, reload it. */
				6393	if (buf != NULL)
				6394	buf_reload(buf, buf->b_orig_mode);
				6395
				6396	redraw_all_later(SOME_VALID);
				6397	}
				6398	vim_free(fnamebuf);
				6399	}
				6400
				6401	/*
				6402	* Initialize 'spellfile' for the current buffer.
				6403	*/
				6404	static void
				6405	init_spellfile(void)
				6406	{
				6407	char_u *buf;
				6408	int l;
				6409	char_u *fname;
				6410	char_u *rtp;
				6411	char_u *lend;
				6412	int aspath = FALSE;
				6413	char_u *lstart = curbuf->b_s.b_p_spl;
				6414
				6415	if (*curwin->w_s->b_p_spl != NUL && curwin->w_s->b_langp.ga_len > 0)
				6416	{
				6417	buf = alloc(MAXPATHL);
				6418	if (buf == NULL)
				6419	return;
				6420
				6421	/* Find the end of the language name. Exclude the region. If there
				6422	* is a path separator remember the start of the tail. */
				6423	for (lend = curwin->w_s->b_p_spl; *lend != NUL
				6424	&& vim_strchr((char_u )",._", lend) == NULL; ++lend)
				6425	if (vim_ispathsep(*lend))
				6426	{
				6427	aspath = TRUE;
				6428	lstart = lend + 1;
				6429	}
				6430
				6431	/* Loop over all entries in 'runtimepath'. Use the first one where we
				6432	* are allowed to write. */
				6433	rtp = p_rtp;
				6434	while (*rtp != NUL)
				6435	{
				6436	if (aspath)
				6437	/* Use directory of an entry with path, e.g., for
				6438	* "/dir/lg.utf-8.spl" use "/dir". */
				6439	vim_strncpy(buf, curbuf->b_s.b_p_spl,
				6440	lstart - curbuf->b_s.b_p_spl - 1);
				6441	else
				6442	/* Copy the path from 'runtimepath' to buf[]. */
				6443	copy_option_part(&rtp, buf, MAXPATHL, ",");
				6444	if (filewritable(buf) == 2)
				6445	{
				6446	/* Use the first language name from 'spelllang' and the
				6447	* encoding used in the first loaded .spl file. */
				6448	if (aspath)
				6449	vim_strncpy(buf, curbuf->b_s.b_p_spl,
				6450	lend - curbuf->b_s.b_p_spl);
				6451	else
				6452	{
				6453	/* Create the "spell" directory if it doesn't exist yet. */
				6454	l = (int)STRLEN(buf);
				6455	vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell");
				6456	if (filewritable(buf) != 2)
				6457	vim_mkdir(buf, 0755);
				6458
				6459	l = (int)STRLEN(buf);
				6460	vim_snprintf((char *)buf + l, MAXPATHL - l,
				6461	"/%.*s", (int)(lend - lstart), lstart);
				6462	}
				6463	l = (int)STRLEN(buf);
				6464	fname = LANGP_ENTRY(curwin->w_s->b_langp, 0)
				6465	->lp_slang->sl_fname;
				6466	vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add",
				6467	fname != NULL
				6468	&& strstr((char *)gettail(fname), ".ascii.") != NULL
				6469	? (char_u *)"ascii" : spell_enc());
				6470	set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
				6471	break;
				6472	}
				6473	aspath = FALSE;
				6474	}
				6475
				6476	vim_free(buf);
				6477	}
				6478	}
				6479
				6480
				6481
				6482	/*
				6483	* Set the spell character tables from strings in the affix file.
				6484	*/
				6485	static int
				6486	set_spell_chartab(char_u fol, char_u low, char_u *upp)
				6487	{
				6488	/* We build the new tables here first, so that we can compare with the
				6489	* previous one. */
				6490	spelltab_T new_st;
				6491	char_u pf = fol, pl = low, *pu = upp;
				6492	int f, l, u;
				6493
				6494	clear_spell_chartab(&new_st);
				6495
				6496	while (*pf != NUL)
				6497	{
				6498	if (pl == NUL \|\| pu == NUL)
				6499	{
				6500	EMSG(_(e_affform));
				6501	return FAIL;
				6502	}
				6503	#ifdef FEAT_MBYTE
				6504	f = mb_ptr2char_adv(&pf);
				6505	l = mb_ptr2char_adv(&pl);
				6506	u = mb_ptr2char_adv(&pu);
				6507	#else
				6508	f = *pf++;
				6509	l = *pl++;
				6510	u = *pu++;
				6511	#endif
				6512	/* Every character that appears is a word character. */
				6513	if (f < 256)
				6514	new_st.st_isw[f] = TRUE;
				6515	if (l < 256)
				6516	new_st.st_isw[l] = TRUE;
				6517	if (u < 256)
				6518	new_st.st_isw[u] = TRUE;
				6519
				6520	/* if "LOW" and "FOL" are not the same the "LOW" char needs
				6521	* case-folding */
				6522	if (l < 256 && l != f)
				6523	{
				6524	if (f >= 256)
				6525	{
				6526	EMSG(_(e_affrange));
				6527	return FAIL;
				6528	}
				6529	new_st.st_fold[l] = f;
				6530	}
				6531
				6532	/* if "UPP" and "FOL" are not the same the "UPP" char needs
				6533	* case-folding, it's upper case and the "UPP" is the upper case of
				6534	* "FOL" . */
				6535	if (u < 256 && u != f)
				6536	{
				6537	if (f >= 256)
				6538	{
				6539	EMSG(_(e_affrange));
				6540	return FAIL;
				6541	}
				6542	new_st.st_fold[u] = f;
				6543	new_st.st_isu[u] = TRUE;
				6544	new_st.st_upper[f] = u;
				6545	}
				6546	}
				6547
				6548	if (pl != NUL \|\| pu != NUL)
				6549	{
				6550	EMSG(_(e_affform));
				6551	return FAIL;
				6552	}
				6553
				6554	return set_spell_finish(&new_st);
				6555	}
				6556
				6557	/*
				6558	* Set the spell character tables from strings in the .spl file.
				6559	*/
				6560	static void
				6561	set_spell_charflags(
				6562	char_u *flags,
				6563	int cnt, /* length of "flags" */
				6564	char_u *fol)
				6565	{
				6566	/* We build the new tables here first, so that we can compare with the
				6567	* previous one. */
				6568	spelltab_T new_st;
				6569	int i;
				6570	char_u *p = fol;
				6571	int c;
				6572
				6573	clear_spell_chartab(&new_st);
				6574
				6575	for (i = 0; i < 128; ++i)
				6576	{
				6577	if (i < cnt)
				6578	{
				6579	new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
				6580	new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
				6581	}
				6582
				6583	if (*p != NUL)
				6584	{
				6585	#ifdef FEAT_MBYTE
				6586	c = mb_ptr2char_adv(&p);
				6587	#else
				6588	c = *p++;
				6589	#endif
				6590	new_st.st_fold[i + 128] = c;
				6591	if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
				6592	new_st.st_upper[c] = i + 128;
				6593	}
				6594	}
				6595
				6596	(void)set_spell_finish(&new_st);
				6597	}
				6598
				6599	static int
				6600	set_spell_finish(spelltab_T *new_st)
				6601	{
				6602	int i;
				6603
				6604	if (did_set_spelltab)
				6605	{
				6606	/* check that it's the same table */
				6607	for (i = 0; i < 256; ++i)
				6608	{
				6609	if (spelltab.st_isw[i] != new_st->st_isw[i]
				6610	\|\| spelltab.st_isu[i] != new_st->st_isu[i]
				6611	\|\| spelltab.st_fold[i] != new_st->st_fold[i]
				6612	\|\| spelltab.st_upper[i] != new_st->st_upper[i])
				6613	{
				6614	EMSG(_("E763: Word characters differ between spell files"));
				6615	return FAIL;
				6616	}
				6617	}
				6618	}
				6619	else
				6620	{
				6621	/* copy the new spelltab into the one being used */
				6622	spelltab = *new_st;
				6623	did_set_spelltab = TRUE;
				6624	}
				6625
				6626	return OK;
				6627	}
				6628
				6629	/*
				6630	* Write the table with prefix conditions to the .spl file.
				6631	* When "fd" is NULL only count the length of what is written.
				6632	*/
				6633	static int
				6634	write_spell_prefcond(FILE fd, garray_T gap)
				6635	{
				6636	int i;
				6637	char_u *p;
				6638	int len;
				6639	int totlen;
				6640	size_t x = 1; /* collect return value of fwrite() */
				6641
				6642	if (fd != NULL)
				6643	put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */
				6644
				6645	totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */
				6646
				6647	for (i = 0; i < gap->ga_len; ++i)
				6648	{
				6649	/* <prefcond> : <condlen> <condstr> */
				6650	p = ((char_u **)gap->ga_data)[i];
				6651	if (p != NULL)
				6652	{
				6653	len = (int)STRLEN(p);
				6654	if (fd != NULL)
				6655	{
				6656	fputc(len, fd);
				6657	x &= fwrite(p, (size_t)len, (size_t)1, fd);
				6658	}
				6659	totlen += len;
				6660	}
				6661	else if (fd != NULL)
				6662	fputc(0, fd);
				6663	}
				6664
				6665	return totlen;
				6666	}
				6667
				6668
				6669	/*
				6670	* Use map string "map" for languages "lp".
				6671	*/
				6672	static void
				6673	set_map_str(slang_T lp, char_u map)
				6674	{
				6675	char_u *p;
				6676	int headc = 0;
				6677	int c;
				6678	int i;
				6679
				6680	if (*map == NUL)
				6681	{
				6682	lp->sl_has_map = FALSE;
				6683	return;
				6684	}
				6685	lp->sl_has_map = TRUE;
				6686
				6687	/* Init the array and hash tables empty. */
				6688	for (i = 0; i < 256; ++i)
				6689	lp->sl_map_array[i] = 0;
				6690	#ifdef FEAT_MBYTE
				6691	hash_init(&lp->sl_map_hash);
				6692	#endif
				6693
				6694	/*
				6695	* The similar characters are stored separated with slashes:
				6696	* "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and
				6697	* before the same slash. For characters above 255 sl_map_hash is used.
				6698	*/
				6699	for (p = map; *p != NUL; )
				6700	{
				6701	#ifdef FEAT_MBYTE
				6702	c = mb_cptr2char_adv(&p);
				6703	#else
				6704	c = *p++;
				6705	#endif
				6706	if (c == '/')
				6707	headc = 0;
				6708	else
				6709	{
				6710	if (headc == 0)
				6711	headc = c;
				6712
				6713	#ifdef FEAT_MBYTE
				6714	/* Characters above 255 don't fit in sl_map_array[], put them in
				6715	* the hash table. Each entry is the char, a NUL the headchar and
				6716	* a NUL. */
				6717	if (c >= 256)
				6718	{
				6719	int cl = mb_char2len(c);
				6720	int headcl = mb_char2len(headc);
				6721	char_u *b;
				6722	hash_T hash;
				6723	hashitem_T *hi;
				6724
				6725	b = alloc((unsigned)(cl + headcl + 2));
				6726	if (b == NULL)
				6727	return;
				6728	mb_char2bytes(c, b);
				6729	b[cl] = NUL;
				6730	mb_char2bytes(headc, b + cl + 1);
				6731	b[cl + 1 + headcl] = NUL;
				6732	hash = hash_hash(b);
				6733	hi = hash_lookup(&lp->sl_map_hash, b, hash);
				6734	if (HASHITEM_EMPTY(hi))
				6735	hash_add_item(&lp->sl_map_hash, hi, b, hash);
				6736	else
				6737	{
				6738	/* This should have been checked when generating the .spl
				6739	* file. */
				6740	EMSG(_("E783: duplicate char in MAP entry"));
				6741	vim_free(b);
				6742	}
				6743	}
				6744	else
				6745	#endif
				6746	lp->sl_map_array[c] = headc;
				6747	}
				6748	}
				6749	}
				6750
				6751
				6752	#endif /* FEAT_SPELL */