Blame - src/spellfile.c - android_external_vim

blob: e5cc84164ba6d2d24e1acd4d5d05810fdf865665 [file] [log] [blame]

Bram Moolenaar	edf3f97	2016-08-29 22:49:24 +0200	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2	*
				3	* VIM - Vi IMproved by Bram Moolenaar
				4	*
				5	* Do ":help uganda" in Vim to read copying and usage conditions.
				6	* Do ":help credits" in Vim to see a list of people who contributed.
				7	* See README.txt for an overview of the Vim source code.
				8	*/
				9
				10	/*
				11	* spellfile.c: code for reading and writing spell files.
				12	*
				13	* See spell.c for information about spell checking.
				14	*/
				15
				16	/*
				17	* Vim spell file format: <HEADER>
				18	* <SECTIONS>
				19	* <LWORDTREE>
				20	* <KWORDTREE>
				21	* <PREFIXTREE>
				22	*
				23	* <HEADER>: <fileID> <versionnr>
				24	*
				25	* <fileID> 8 bytes "VIMspell"
				26	* <versionnr> 1 byte VIMSPELLVERSION
				27	*
				28	*
				29	* Sections make it possible to add information to the .spl file without
				30	* making it incompatible with previous versions. There are two kinds of
				31	* sections:
				32	* 1. Not essential for correct spell checking. E.g. for making suggestions.
				33	* These are skipped when not supported.
				34	* 2. Optional information, but essential for spell checking when present.
				35	* E.g. conditions for affixes. When this section is present but not
				36	* supported an error message is given.
				37	*
				38	* <SECTIONS>: <section> ... <sectionend>
				39	*
				40	* <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
				41	*
				42	* <sectionID> 1 byte number from 0 to 254 identifying the section
				43	*
				44	* <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct
				45	* spell checking
				46	*
				47	* <sectionlen> 4 bytes length of section contents, MSB first
				48	*
				49	* <sectionend> 1 byte SN_END
				50	*
				51	*
				52	* sectionID == SN_INFO: <infotext>
				53	* <infotext> N bytes free format text with spell file info (version,
				54	* website, etc)
				55	*
				56	* sectionID == SN_REGION: <regionname> ...
Bram Moolenaar	2993ac5	2018-02-10 14:12:43 +0100	[diff] [blame]	57	* <regionname> 2 bytes Up to MAXREGIONS region names: ca, au, etc. Lower
				58	* case. First <regionname> is region 1.
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	59	*
				60	* sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
				61	* <folcharslen> <folchars>
				62	* <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
				63	* <charflags> N bytes List of flags (first one is for character 128):
				64	* 0x01 word character CF_WORD
				65	* 0x02 upper-case character CF_UPPER
				66	* <folcharslen> 2 bytes Number of bytes in <folchars>.
				67	* <folchars> N bytes Folded characters, first one is for character 128.
				68	*
				69	* sectionID == SN_MIDWORD: <midword>
				70	* <midword> N bytes Characters that are word characters only when used
				71	* in the middle of a word.
				72	*
				73	* sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
				74	* <prefcondcnt> 2 bytes Number of <prefcond> items following.
				75	* <prefcond> : <condlen> <condstr>
				76	* <condlen> 1 byte Length of <condstr>.
				77	* <condstr> N bytes Condition for the prefix.
				78	*
				79	* sectionID == SN_REP: <repcount> <rep> ...
				80	* <repcount> 2 bytes number of <rep> items, MSB first.
				81	* <rep> : <repfromlen> <repfrom> <reptolen> <repto>
				82	* <repfromlen> 1 byte length of <repfrom>
				83	* <repfrom> N bytes "from" part of replacement
				84	* <reptolen> 1 byte length of <repto>
				85	* <repto> N bytes "to" part of replacement
				86	*
				87	* sectionID == SN_REPSAL: <repcount> <rep> ...
				88	* just like SN_REP but for soundfolded words
				89	*
				90	* sectionID == SN_SAL: <salflags> <salcount> <sal> ...
				91	* <salflags> 1 byte flags for soundsalike conversion:
				92	* SAL_F0LLOWUP
				93	* SAL_COLLAPSE
				94	* SAL_REM_ACCENTS
				95	* <salcount> 2 bytes number of <sal> items following
				96	* <sal> : <salfromlen> <salfrom> <saltolen> <salto>
				97	* <salfromlen> 1 byte length of <salfrom>
				98	* <salfrom> N bytes "from" part of soundsalike
				99	* <saltolen> 1 byte length of <salto>
				100	* <salto> N bytes "to" part of soundsalike
				101	*
				102	* sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
				103	* <sofofromlen> 2 bytes length of <sofofrom>
				104	* <sofofrom> N bytes "from" part of soundfold
				105	* <sofotolen> 2 bytes length of <sofoto>
				106	* <sofoto> N bytes "to" part of soundfold
				107	*
				108	* sectionID == SN_SUGFILE: <timestamp>
				109	* <timestamp> 8 bytes time in seconds that must match with .sug file
				110	*
				111	* sectionID == SN_NOSPLITSUGS: nothing
				112	*
				113	* sectionID == SN_NOCOMPOUNDSUGS: nothing
				114	*
				115	* sectionID == SN_WORDS: <word> ...
				116	* <word> N bytes NUL terminated common word
				117	*
				118	* sectionID == SN_MAP: <mapstr>
				119	* <mapstr> N bytes String with sequences of similar characters,
				120	* separated by slashes.
				121	*
				122	* sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions>
				123	* <comppatcount> <comppattern> ... <compflags>
				124	* <compmax> 1 byte Maximum nr of words in compound word.
				125	* <compminlen> 1 byte Minimal word length for compounding.
				126	* <compsylmax> 1 byte Maximum nr of syllables in compound word.
				127	* <compoptions> 2 bytes COMP_ flags.
				128	* <comppatcount> 2 bytes number of <comppattern> following
				129	* <compflags> N bytes Flags from COMPOUNDRULE items, separated by
				130	* slashes.
				131	*
				132	* <comppattern>: <comppatlen> <comppattext>
				133	* <comppatlen> 1 byte length of <comppattext>
				134	* <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN
				135	*
				136	* sectionID == SN_NOBREAK: (empty, its presence is what matters)
				137	*
				138	* sectionID == SN_SYLLABLE: <syllable>
				139	* <syllable> N bytes String from SYLLABLE item.
				140	*
				141	* <LWORDTREE>: <wordtree>
				142	*
				143	* <KWORDTREE>: <wordtree>
				144	*
				145	* <PREFIXTREE>: <wordtree>
				146	*
				147	*
				148	* <wordtree>: <nodecount> <nodedata> ...
				149	*
				150	* <nodecount> 4 bytes Number of nodes following. MSB first.
				151	*
				152	* <nodedata>: <siblingcount> <sibling> ...
				153	*
				154	* <siblingcount> 1 byte Number of siblings in this node. The siblings
				155	* follow in sorted order.
				156	*
				157	* <sibling>: <byte> [ <nodeidx> <xbyte>
				158	* \| <flags> [<flags2>] [<region>] [<affixID>]
				159	* \| [<pflags>] <affixID> <prefcondnr> ]
				160	*
				161	* <byte> 1 byte Byte value of the sibling. Special cases:
				162	* BY_NOFLAGS: End of word without flags and for all
				163	* regions.
				164	* For PREFIXTREE <affixID> and
				165	* <prefcondnr> follow.
				166	* BY_FLAGS: End of word, <flags> follow.
				167	* For PREFIXTREE <pflags>, <affixID>
				168	* and <prefcondnr> follow.
				169	* BY_FLAGS2: End of word, <flags> and <flags2>
				170	* follow. Not used in PREFIXTREE.
				171	* BY_INDEX: Child of sibling is shared, <nodeidx>
				172	* and <xbyte> follow.
				173	*
				174	* <nodeidx> 3 bytes Index of child for this sibling, MSB first.
				175	*
				176	* <xbyte> 1 byte byte value of the sibling.
				177	*
				178	* <flags> 1 byte bitmask of:
				179	* WF_ALLCAP word must have only capitals
				180	* WF_ONECAP first char of word must be capital
				181	* WF_KEEPCAP keep-case word
				182	* WF_FIXCAP keep-case word, all caps not allowed
				183	* WF_RARE rare word
				184	* WF_BANNED bad word
				185	* WF_REGION <region> follows
				186	* WF_AFX <affixID> follows
				187	*
				188	* <flags2> 1 byte Bitmask of:
				189	* WF_HAS_AFF >> 8 word includes affix
				190	* WF_NEEDCOMP >> 8 word only valid in compound
				191	* WF_NOSUGGEST >> 8 word not used for suggestions
				192	* WF_COMPROOT >> 8 word already a compound
				193	* WF_NOCOMPBEF >> 8 no compounding before this word
				194	* WF_NOCOMPAFT >> 8 no compounding after this word
				195	*
				196	* <pflags> 1 byte bitmask of:
				197	* WFP_RARE rare prefix
				198	* WFP_NC non-combining prefix
				199	* WFP_UP letter after prefix made upper case
				200	*
				201	* <region> 1 byte Bitmask for regions in which word is valid. When
				202	* omitted it's valid in all regions.
				203	* Lowest bit is for region 1.
				204	*
				205	* <affixID> 1 byte ID of affix that can be used with this word. In
				206	* PREFIXTREE used for the required prefix ID.
				207	*
				208	* <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list
				209	* from HEADER.
				210	*
				211	* All text characters are in 'encoding', but stored as single bytes.
				212	*/
				213
				214	/*
				215	* Vim .sug file format: <SUGHEADER>
				216	* <SUGWORDTREE>
				217	* <SUGTABLE>
				218	*
				219	* <SUGHEADER>: <fileID> <versionnr> <timestamp>
				220	*
				221	* <fileID> 6 bytes "VIMsug"
				222	* <versionnr> 1 byte VIMSUGVERSION
				223	* <timestamp> 8 bytes timestamp that must match with .spl file
				224	*
				225	*
				226	* <SUGWORDTREE>: <wordtree> (see above, no flags or region used)
				227	*
				228	*
				229	* <SUGTABLE>: <sugwcount> <sugline> ...
				230	*
				231	* <sugwcount> 4 bytes number of <sugline> following
				232	*
				233	* <sugline>: <sugnr> ... NUL
				234	*
				235	* <sugnr>: X bytes word number that results in this soundfolded word,
				236	* stored as an offset to the previous number in as
				237	* few bytes as possible, see offset2bytes())
				238	*/
				239
				240	#include "vim.h"
				241
				242	#if defined(FEAT_SPELL) \|\| defined(PROTO)
				243
				244	#ifndef UNIX /* it's in os_unix.h for Unix */
				245	# include <time.h> /* for time_t */
				246	#endif
				247
				248	#ifndef UNIX /* it's in os_unix.h for Unix */
				249	# include <time.h> /* for time_t */
				250	#endif
				251
				252	/* Special byte values for <byte>. Some are only used in the tree for
				253	* postponed prefixes, some only in the other trees. This is a bit messy... */
				254	#define BY_NOFLAGS 0 /* end of word without flags or region; for
				255	* postponed prefix: no <pflags> */
				256	#define BY_INDEX 1 /* child is shared, index follows */
				257	#define BY_FLAGS 2 /* end of word, <flags> byte follows; for
				258	* postponed prefix: <pflags> follows */
				259	#define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes
				260	* follow; never used in prefix tree */
				261	#define BY_SPECIAL BY_FLAGS2 /* highest special byte value */
				262
				263	/* Flags used in .spl file for soundsalike flags. */
				264	#define SAL_F0LLOWUP 1
				265	#define SAL_COLLAPSE 2
				266	#define SAL_REM_ACCENTS 4
				267
				268	#define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */
				269	#define VIMSPELLMAGICL 8
				270	#define VIMSPELLVERSION 50
				271
				272	/* Section IDs. Only renumber them when VIMSPELLVERSION changes! */
				273	#define SN_REGION 0 /* <regionname> section */
				274	#define SN_CHARFLAGS 1 /* charflags section */
				275	#define SN_MIDWORD 2 /* <midword> section */
				276	#define SN_PREFCOND 3 /* <prefcond> section */
				277	#define SN_REP 4 /* REP items section */
				278	#define SN_SAL 5 /* SAL items section */
				279	#define SN_SOFO 6 /* soundfolding section */
				280	#define SN_MAP 7 /* MAP items section */
				281	#define SN_COMPOUND 8 /* compound words section */
				282	#define SN_SYLLABLE 9 /* syllable section */
				283	#define SN_NOBREAK 10 /* NOBREAK section */
				284	#define SN_SUGFILE 11 /* timestamp for .sug file */
				285	#define SN_REPSAL 12 /* REPSAL items section */
				286	#define SN_WORDS 13 /* common words */
				287	#define SN_NOSPLITSUGS 14 /* don't split word for suggestions */
				288	#define SN_INFO 15 /* info section */
				289	#define SN_NOCOMPOUNDSUGS 16 /* don't compound for suggestions */
				290	#define SN_END 255 /* end of sections */
				291
				292	#define SNF_REQUIRED 1 /* <sectionflags>: required section */
				293
				294	#define CF_WORD 0x01
				295	#define CF_UPPER 0x02
				296
				297	static int set_spell_finish(spelltab_T *new_st);
				298	static int write_spell_prefcond(FILE fd, garray_T gap);
				299	static char_u read_cnt_string(FILE fd, int cnt_bytes, int *lenp);
				300	static int read_region_section(FILE fd, slang_T slang, int len);
				301	static int read_charflags_section(FILE *fd);
				302	static int read_prefcond_section(FILE fd, slang_T lp);
				303	static int read_rep_section(FILE fd, garray_T gap, short *first);
				304	static int read_sal_section(FILE fd, slang_T slang);
				305	static int read_words_section(FILE fd, slang_T lp, int len);
				306	static int read_sofo_section(FILE fd, slang_T slang);
				307	static int read_compound(FILE fd, slang_T slang, int len);
				308	static int set_sofo(slang_T lp, char_u from, char_u *to);
				309	static void set_sal_first(slang_T *lp);
				310	#ifdef FEAT_MBYTE
				311	static int mb_str2wide(char_u s);
				312	#endif
				313	static int spell_read_tree(FILE fd, char_u bytsp, idx_T *idxsp, int prefixtree, int prefixcnt);
				314	static idx_T read_tree_node(FILE fd, char_u byts, idx_T *idxs, int maxidx, idx_T startidx, int prefixtree, int maxprefcondnr);
				315	static void spell_reload_one(char_u *fname, int added_word);
				316	static void set_spell_charflags(char_u flags, int cnt, char_u upp);
				317	static int set_spell_chartab(char_u fol, char_u low, char_u *upp);
				318	static void set_map_str(slang_T lp, char_u map);
				319
				320
				321	static char *e_spell_trunc = N_("E758: Truncated spell file");
				322	static char *e_afftrailing = N_("Trailing text in %s line %d: %s");
				323	static char *e_affname = N_("Affix name too long in %s line %d: %s");
				324	static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
				325	static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
				326	static char *msg_compressing = N_("Compressing word tree...");
				327
				328	/*
				329	* Load one spell file and store the info into a slang_T.
				330	*
				331	* This is invoked in three ways:
				332	* - From spell_load_cb() to load a spell file for the first time. "lang" is
				333	* the language name, "old_lp" is NULL. Will allocate an slang_T.
				334	* - To reload a spell file that was changed. "lang" is NULL and "old_lp"
				335	* points to the existing slang_T.
				336	* - Just after writing a .spl file; it's read back to produce the .sug file.
				337	* "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T.
				338	*
				339	* Returns the slang_T the spell file was loaded into. NULL for error.
				340	*/
				341	slang_T *
				342	spell_load_file(
				343	char_u *fname,
				344	char_u *lang,
				345	slang_T *old_lp,
				346	int silent) /* no error if file doesn't exist */
				347	{
				348	FILE *fd;
				349	char_u buf[VIMSPELLMAGICL];
				350	char_u *p;
				351	int i;
				352	int n;
				353	int len;
				354	char_u *save_sourcing_name = sourcing_name;
				355	linenr_T save_sourcing_lnum = sourcing_lnum;
				356	slang_T *lp = NULL;
				357	int c = 0;
				358	int res;
				359
				360	fd = mch_fopen((char *)fname, "r");
				361	if (fd == NULL)
				362	{
				363	if (!silent)
				364	EMSG2(_(e_notopen), fname);
				365	else if (p_verbose > 2)
				366	{
				367	verbose_enter();
				368	smsg((char_u *)e_notopen, fname);
				369	verbose_leave();
				370	}
				371	goto endFAIL;
				372	}
				373	if (p_verbose > 2)
				374	{
				375	verbose_enter();
				376	smsg((char_u *)_("Reading spell file \"%s\""), fname);
				377	verbose_leave();
				378	}
				379
				380	if (old_lp == NULL)
				381	{
				382	lp = slang_alloc(lang);
				383	if (lp == NULL)
				384	goto endFAIL;
				385
				386	/* Remember the file name, used to reload the file when it's updated. */
				387	lp->sl_fname = vim_strsave(fname);
				388	if (lp->sl_fname == NULL)
				389	goto endFAIL;
				390
				391	/* Check for .add.spl (_add.spl for VMS). */
				392	lp->sl_add = strstr((char *)gettail(fname), SPL_FNAME_ADD) != NULL;
				393	}
				394	else
				395	lp = old_lp;
				396
				397	/* Set sourcing_name, so that error messages mention the file name. */
				398	sourcing_name = fname;
				399	sourcing_lnum = 0;
				400
				401	/*
				402	* <HEADER>: <fileID>
				403	*/
				404	for (i = 0; i < VIMSPELLMAGICL; ++i)
				405	buf[i] = getc(fd); /* <fileID> */
				406	if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
				407	{
				408	EMSG(_("E757: This does not look like a spell file"));
				409	goto endFAIL;
				410	}
				411	c = getc(fd); /* <versionnr> */
				412	if (c < VIMSPELLVERSION)
				413	{
				414	EMSG(_("E771: Old spell file, needs to be updated"));
				415	goto endFAIL;
				416	}
				417	else if (c > VIMSPELLVERSION)
				418	{
				419	EMSG(_("E772: Spell file is for newer version of Vim"));
				420	goto endFAIL;
				421	}
				422
				423
				424	/*
				425	* <SECTIONS>: <section> ... <sectionend>
				426	* <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
				427	*/
				428	for (;;)
				429	{
				430	n = getc(fd); /* <sectionID> or <sectionend> */
				431	if (n == SN_END)
				432	break;
				433	c = getc(fd); /* <sectionflags> */
				434	len = get4c(fd); /* <sectionlen> */
				435	if (len < 0)
				436	goto truncerr;
				437
				438	res = 0;
				439	switch (n)
				440	{
				441	case SN_INFO:
				442	lp->sl_info = read_string(fd, len); /* <infotext> */
				443	if (lp->sl_info == NULL)
				444	goto endFAIL;
				445	break;
				446
				447	case SN_REGION:
				448	res = read_region_section(fd, lp, len);
				449	break;
				450
				451	case SN_CHARFLAGS:
				452	res = read_charflags_section(fd);
				453	break;
				454
				455	case SN_MIDWORD:
				456	lp->sl_midword = read_string(fd, len); /* <midword> */
				457	if (lp->sl_midword == NULL)
				458	goto endFAIL;
				459	break;
				460
				461	case SN_PREFCOND:
				462	res = read_prefcond_section(fd, lp);
				463	break;
				464
				465	case SN_REP:
				466	res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first);
				467	break;
				468
				469	case SN_REPSAL:
				470	res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first);
				471	break;
				472
				473	case SN_SAL:
				474	res = read_sal_section(fd, lp);
				475	break;
				476
				477	case SN_SOFO:
				478	res = read_sofo_section(fd, lp);
				479	break;
				480
				481	case SN_MAP:
				482	p = read_string(fd, len); /* <mapstr> */
				483	if (p == NULL)
				484	goto endFAIL;
				485	set_map_str(lp, p);
				486	vim_free(p);
				487	break;
				488
				489	case SN_WORDS:
				490	res = read_words_section(fd, lp, len);
				491	break;
				492
				493	case SN_SUGFILE:
				494	lp->sl_sugtime = get8ctime(fd); /* <timestamp> */
				495	break;
				496
				497	case SN_NOSPLITSUGS:
				498	lp->sl_nosplitsugs = TRUE;
				499	break;
				500
				501	case SN_NOCOMPOUNDSUGS:
				502	lp->sl_nocompoundsugs = TRUE;
				503	break;
				504
				505	case SN_COMPOUND:
				506	res = read_compound(fd, lp, len);
				507	break;
				508
				509	case SN_NOBREAK:
				510	lp->sl_nobreak = TRUE;
				511	break;
				512
				513	case SN_SYLLABLE:
				514	lp->sl_syllable = read_string(fd, len); /* <syllable> */
				515	if (lp->sl_syllable == NULL)
				516	goto endFAIL;
				517	if (init_syl_tab(lp) == FAIL)
				518	goto endFAIL;
				519	break;
				520
				521	default:
				522	/* Unsupported section. When it's required give an error
				523	* message. When it's not required skip the contents. */
				524	if (c & SNF_REQUIRED)
				525	{
				526	EMSG(_("E770: Unsupported section in spell file"));
				527	goto endFAIL;
				528	}
				529	while (--len >= 0)
				530	if (getc(fd) < 0)
				531	goto truncerr;
				532	break;
				533	}
				534	someerror:
				535	if (res == SP_FORMERROR)
				536	{
				537	EMSG(_(e_format));
				538	goto endFAIL;
				539	}
				540	if (res == SP_TRUNCERROR)
				541	{
				542	truncerr:
				543	EMSG(_(e_spell_trunc));
				544	goto endFAIL;
				545	}
				546	if (res == SP_OTHERERROR)
				547	goto endFAIL;
				548	}
				549
				550	/* <LWORDTREE> */
				551	res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0);
				552	if (res != 0)
				553	goto someerror;
				554
				555	/* <KWORDTREE> */
				556	res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0);
				557	if (res != 0)
				558	goto someerror;
				559
				560	/* <PREFIXTREE> */
				561	res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE,
				562	lp->sl_prefixcnt);
				563	if (res != 0)
				564	goto someerror;
				565
				566	/* For a new file link it in the list of spell files. */
				567	if (old_lp == NULL && lang != NULL)
				568	{
				569	lp->sl_next = first_lang;
				570	first_lang = lp;
				571	}
				572
				573	goto endOK;
				574
				575	endFAIL:
				576	if (lang != NULL)
				577	/* truncating the name signals the error to spell_load_lang() */
				578	*lang = NUL;
				579	if (lp != NULL && old_lp == NULL)
				580	slang_free(lp);
				581	lp = NULL;
				582
				583	endOK:
				584	if (fd != NULL)
				585	fclose(fd);
				586	sourcing_name = save_sourcing_name;
				587	sourcing_lnum = save_sourcing_lnum;
				588
				589	return lp;
				590	}
				591
				592	/*
				593	* Fill in the wordcount fields for a trie.
				594	* Returns the total number of words.
				595	*/
				596	static void
				597	tree_count_words(char_u byts, idx_T idxs)
				598	{
				599	int depth;
				600	idx_T arridx[MAXWLEN];
				601	int curi[MAXWLEN];
				602	int c;
				603	idx_T n;
				604	int wordcount[MAXWLEN];
				605
				606	arridx[0] = 0;
				607	curi[0] = 1;
				608	wordcount[0] = 0;
				609	depth = 0;
				610	while (depth >= 0 && !got_int)
				611	{
				612	if (curi[depth] > byts[arridx[depth]])
				613	{
				614	/* Done all bytes at this node, go up one level. */
				615	idxs[arridx[depth]] = wordcount[depth];
				616	if (depth > 0)
				617	wordcount[depth - 1] += wordcount[depth];
				618
				619	--depth;
				620	fast_breakcheck();
				621	}
				622	else
				623	{
				624	/* Do one more byte at this node. */
				625	n = arridx[depth] + curi[depth];
				626	++curi[depth];
				627
				628	c = byts[n];
				629	if (c == 0)
				630	{
				631	/* End of word, count it. */
				632	++wordcount[depth];
				633
				634	/* Skip over any other NUL bytes (same word with different
				635	* flags). */
				636	while (byts[n + 1] == 0)
				637	{
				638	++n;
				639	++curi[depth];
				640	}
				641	}
				642	else
				643	{
				644	/* Normal char, go one level deeper to count the words. */
				645	++depth;
				646	arridx[depth] = idxs[n];
				647	curi[depth] = 1;
				648	wordcount[depth] = 0;
				649	}
				650	}
				651	}
				652	}
				653
				654	/*
				655	* Load the .sug files for languages that have one and weren't loaded yet.
				656	*/
				657	void
				658	suggest_load_files(void)
				659	{
				660	langp_T *lp;
				661	int lpi;
				662	slang_T *slang;
				663	char_u *dotp;
				664	FILE *fd;
				665	char_u buf[MAXWLEN];
				666	int i;
				667	time_t timestamp;
				668	int wcount;
				669	int wordnr;
				670	garray_T ga;
				671	int c;
				672
				673	/* Do this for all languages that support sound folding. */
				674	for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
				675	{
				676	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
				677	slang = lp->lp_slang;
				678	if (slang->sl_sugtime != 0 && !slang->sl_sugloaded)
				679	{
				680	/* Change ".spl" to ".sug" and open the file. When the file isn't
				681	* found silently skip it. Do set "sl_sugloaded" so that we
				682	* don't try again and again. */
				683	slang->sl_sugloaded = TRUE;
				684
				685	dotp = vim_strrchr(slang->sl_fname, '.');
				686	if (dotp == NULL \|\| fnamecmp(dotp, ".spl") != 0)
				687	continue;
				688	STRCPY(dotp, ".sug");
				689	fd = mch_fopen((char *)slang->sl_fname, "r");
				690	if (fd == NULL)
				691	goto nextone;
				692
				693	/*
				694	* <SUGHEADER>: <fileID> <versionnr> <timestamp>
				695	*/
				696	for (i = 0; i < VIMSUGMAGICL; ++i)
				697	buf[i] = getc(fd); /* <fileID> */
				698	if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0)
				699	{
				700	EMSG2(_("E778: This does not look like a .sug file: %s"),
				701	slang->sl_fname);
				702	goto nextone;
				703	}
				704	c = getc(fd); /* <versionnr> */
				705	if (c < VIMSUGVERSION)
				706	{
				707	EMSG2(_("E779: Old .sug file, needs to be updated: %s"),
				708	slang->sl_fname);
				709	goto nextone;
				710	}
				711	else if (c > VIMSUGVERSION)
				712	{
				713	EMSG2(_("E780: .sug file is for newer version of Vim: %s"),
				714	slang->sl_fname);
				715	goto nextone;
				716	}
				717
				718	/* Check the timestamp, it must be exactly the same as the one in
				719	* the .spl file. Otherwise the word numbers won't match. */
				720	timestamp = get8ctime(fd); /* <timestamp> */
				721	if (timestamp != slang->sl_sugtime)
				722	{
				723	EMSG2(_("E781: .sug file doesn't match .spl file: %s"),
				724	slang->sl_fname);
				725	goto nextone;
				726	}
				727
				728	/*
				729	* <SUGWORDTREE>: <wordtree>
				730	* Read the trie with the soundfolded words.
				731	*/
				732	if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs,
				733	FALSE, 0) != 0)
				734	{
				735	someerror:
				736	EMSG2(_("E782: error while reading .sug file: %s"),
				737	slang->sl_fname);
				738	slang_clear_sug(slang);
				739	goto nextone;
				740	}
				741
				742	/*
				743	* <SUGTABLE>: <sugwcount> <sugline> ...
				744	*
				745	* Read the table with word numbers. We use a file buffer for
				746	* this, because it's so much like a file with lines. Makes it
				747	* possible to swap the info and save on memory use.
				748	*/
				749	slang->sl_sugbuf = open_spellbuf();
				750	if (slang->sl_sugbuf == NULL)
				751	goto someerror;
				752	/* <sugwcount> */
				753	wcount = get4c(fd);
				754	if (wcount < 0)
				755	goto someerror;
				756
				757	/* Read all the wordnr lists into the buffer, one NUL terminated
				758	* list per line. */
				759	ga_init2(&ga, 1, 100);
				760	for (wordnr = 0; wordnr < wcount; ++wordnr)
				761	{
				762	ga.ga_len = 0;
				763	for (;;)
				764	{
				765	c = getc(fd); /* <sugline> */
				766	if (c < 0 \|\| ga_grow(&ga, 1) == FAIL)
				767	goto someerror;
				768	((char_u *)ga.ga_data)[ga.ga_len++] = c;
				769	if (c == NUL)
				770	break;
				771	}
				772	if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr,
				773	ga.ga_data, ga.ga_len, TRUE) == FAIL)
				774	goto someerror;
				775	}
				776	ga_clear(&ga);
				777
				778	/*
				779	* Need to put word counts in the word tries, so that we can find
				780	* a word by its number.
				781	*/
				782	tree_count_words(slang->sl_fbyts, slang->sl_fidxs);
				783	tree_count_words(slang->sl_sbyts, slang->sl_sidxs);
				784
				785	nextone:
				786	if (fd != NULL)
				787	fclose(fd);
				788	STRCPY(dotp, ".spl");
				789	}
				790	}
				791	}
				792
				793
				794	/*
				795	* Read a length field from "fd" in "cnt_bytes" bytes.
				796	* Allocate memory, read the string into it and add a NUL at the end.
				797	* Returns NULL when the count is zero.
				798	* Sets "cntp" to SP_ERROR when there is an error, length of the result
				799	* otherwise.
				800	*/
				801	static char_u *
				802	read_cnt_string(FILE fd, int cnt_bytes, int cntp)
				803	{
				804	int cnt = 0;
				805	int i;
				806	char_u *str;
				807
				808	/* read the length bytes, MSB first */
				809	for (i = 0; i < cnt_bytes; ++i)
				810	cnt = (cnt << 8) + getc(fd);
				811	if (cnt < 0)
				812	{
				813	*cntp = SP_TRUNCERROR;
				814	return NULL;
				815	}
				816	*cntp = cnt;
				817	if (cnt == 0)
				818	return NULL; /* nothing to read, return NULL */
				819
				820	str = read_string(fd, cnt);
				821	if (str == NULL)
				822	*cntp = SP_OTHERERROR;
				823	return str;
				824	}
				825
				826	/*
				827	* Read SN_REGION: <regionname> ...
				828	* Return SP_*ERROR flags.
				829	*/
				830	static int
				831	read_region_section(FILE fd, slang_T lp, int len)
				832	{
				833	int i;
				834
Bram Moolenaar	2993ac5	2018-02-10 14:12:43 +0100	[diff] [blame]	835	if (len > MAXREGIONS * 2)
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	836	return SP_FORMERROR;
				837	for (i = 0; i < len; ++i)
				838	lp->sl_regions[i] = getc(fd); /* <regionname> */
				839	lp->sl_regions[len] = NUL;
				840	return 0;
				841	}
				842
				843	/*
				844	* Read SN_CHARFLAGS section: <charflagslen> <charflags>
				845	* <folcharslen> <folchars>
				846	* Return SP_*ERROR flags.
				847	*/
				848	static int
				849	read_charflags_section(FILE *fd)
				850	{
				851	char_u *flags;
				852	char_u *fol;
				853	int flagslen, follen;
				854
				855	/* <charflagslen> <charflags> */
				856	flags = read_cnt_string(fd, 1, &flagslen);
				857	if (flagslen < 0)
				858	return flagslen;
				859
				860	/* <folcharslen> <folchars> */
				861	fol = read_cnt_string(fd, 2, &follen);
				862	if (follen < 0)
				863	{
				864	vim_free(flags);
				865	return follen;
				866	}
				867
				868	/* Set the word-char flags and fill SPELL_ISUPPER() table. */
				869	if (flags != NULL && fol != NULL)
				870	set_spell_charflags(flags, flagslen, fol);
				871
				872	vim_free(flags);
				873	vim_free(fol);
				874
				875	/* When <charflagslen> is zero then <fcharlen> must also be zero. */
				876	if ((flags == NULL) != (fol == NULL))
				877	return SP_FORMERROR;
				878	return 0;
				879	}
				880
				881	/*
				882	* Read SN_PREFCOND section.
				883	* Return SP_*ERROR flags.
				884	*/
				885	static int
				886	read_prefcond_section(FILE fd, slang_T lp)
				887	{
				888	int cnt;
				889	int i;
				890	int n;
				891	char_u *p;
				892	char_u buf[MAXWLEN + 1];
				893
				894	/* <prefcondcnt> <prefcond> ... */
				895	cnt = get2c(fd); /* <prefcondcnt> */
				896	if (cnt <= 0)
				897	return SP_FORMERROR;
				898
				899	lp->sl_prefprog = (regprog_T **)alloc_clear(
				900	(unsigned)sizeof(regprog_T ) cnt);
				901	if (lp->sl_prefprog == NULL)
				902	return SP_OTHERERROR;
				903	lp->sl_prefixcnt = cnt;
				904
				905	for (i = 0; i < cnt; ++i)
				906	{
				907	/* <prefcond> : <condlen> <condstr> */
				908	n = getc(fd); /* <condlen> */
				909	if (n < 0 \|\| n >= MAXWLEN)
				910	return SP_FORMERROR;
				911
				912	/* When <condlen> is zero we have an empty condition. Otherwise
				913	* compile the regexp program used to check for the condition. */
				914	if (n > 0)
				915	{
				916	buf[0] = '^'; /* always match at one position only */
				917	p = buf + 1;
				918	while (n-- > 0)
				919	p++ = getc(fd); / <condstr> */
				920	*p = NUL;
				921	lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
				922	}
				923	}
				924	return 0;
				925	}
				926
				927	/*
				928	* Read REP or REPSAL items section from "fd": <repcount> <rep> ...
				929	* Return SP_*ERROR flags.
				930	*/
				931	static int
				932	read_rep_section(FILE fd, garray_T gap, short *first)
				933	{
				934	int cnt;
				935	fromto_T *ftp;
				936	int i;
				937
				938	cnt = get2c(fd); /* <repcount> */
				939	if (cnt < 0)
				940	return SP_TRUNCERROR;
				941
				942	if (ga_grow(gap, cnt) == FAIL)
				943	return SP_OTHERERROR;
				944
				945	/* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
				946	for (; gap->ga_len < cnt; ++gap->ga_len)
				947	{
				948	ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
				949	ftp->ft_from = read_cnt_string(fd, 1, &i);
				950	if (i < 0)
				951	return i;
				952	if (i == 0)
				953	return SP_FORMERROR;
				954	ftp->ft_to = read_cnt_string(fd, 1, &i);
				955	if (i <= 0)
				956	{
				957	vim_free(ftp->ft_from);
				958	if (i < 0)
				959	return i;
				960	return SP_FORMERROR;
				961	}
				962	}
				963
				964	/* Fill the first-index table. */
				965	for (i = 0; i < 256; ++i)
				966	first[i] = -1;
				967	for (i = 0; i < gap->ga_len; ++i)
				968	{
				969	ftp = &((fromto_T *)gap->ga_data)[i];
				970	if (first[*ftp->ft_from] == -1)
				971	first[*ftp->ft_from] = i;
				972	}
				973	return 0;
				974	}
				975
				976	/*
				977	* Read SN_SAL section: <salflags> <salcount> <sal> ...
				978	* Return SP_*ERROR flags.
				979	*/
				980	static int
				981	read_sal_section(FILE fd, slang_T slang)
				982	{
				983	int i;
				984	int cnt;
				985	garray_T *gap;
				986	salitem_T *smp;
				987	int ccnt;
				988	char_u *p;
				989	int c = NUL;
				990
				991	slang->sl_sofo = FALSE;
				992
				993	i = getc(fd); /* <salflags> */
				994	if (i & SAL_F0LLOWUP)
				995	slang->sl_followup = TRUE;
				996	if (i & SAL_COLLAPSE)
				997	slang->sl_collapse = TRUE;
				998	if (i & SAL_REM_ACCENTS)
				999	slang->sl_rem_accents = TRUE;
				1000
				1001	cnt = get2c(fd); /* <salcount> */
				1002	if (cnt < 0)
				1003	return SP_TRUNCERROR;
				1004
				1005	gap = &slang->sl_sal;
				1006	ga_init2(gap, sizeof(salitem_T), 10);
				1007	if (ga_grow(gap, cnt + 1) == FAIL)
				1008	return SP_OTHERERROR;
				1009
				1010	/* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
				1011	for (; gap->ga_len < cnt; ++gap->ga_len)
				1012	{
				1013	smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
				1014	ccnt = getc(fd); /* <salfromlen> */
				1015	if (ccnt < 0)
				1016	return SP_TRUNCERROR;
				1017	if ((p = alloc(ccnt + 2)) == NULL)
				1018	return SP_OTHERERROR;
				1019	smp->sm_lead = p;
				1020
				1021	/* Read up to the first special char into sm_lead. */
				1022	for (i = 0; i < ccnt; ++i)
				1023	{
				1024	c = getc(fd); /* <salfrom> */
				1025	if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
				1026	break;
				1027	*p++ = c;
				1028	}
				1029	smp->sm_leadlen = (int)(p - smp->sm_lead);
				1030	*p++ = NUL;
				1031
				1032	/* Put (abc) chars in sm_oneof, if any. */
				1033	if (c == '(')
				1034	{
				1035	smp->sm_oneof = p;
				1036	for (++i; i < ccnt; ++i)
				1037	{
				1038	c = getc(fd); /* <salfrom> */
				1039	if (c == ')')
				1040	break;
				1041	*p++ = c;
				1042	}
				1043	*p++ = NUL;
				1044	if (++i < ccnt)
				1045	c = getc(fd);
				1046	}
				1047	else
				1048	smp->sm_oneof = NULL;
				1049
				1050	/* Any following chars go in sm_rules. */
				1051	smp->sm_rules = p;
				1052	if (i < ccnt)
				1053	/* store the char we got while checking for end of sm_lead */
				1054	*p++ = c;
				1055	for (++i; i < ccnt; ++i)
				1056	p++ = getc(fd); / <salfrom> */
				1057	*p++ = NUL;
				1058
				1059	/* <saltolen> <salto> */
				1060	smp->sm_to = read_cnt_string(fd, 1, &ccnt);
				1061	if (ccnt < 0)
				1062	{
				1063	vim_free(smp->sm_lead);
				1064	return ccnt;
				1065	}
				1066
				1067	#ifdef FEAT_MBYTE
				1068	if (has_mbyte)
				1069	{
				1070	/* convert the multi-byte strings to wide char strings */
				1071	smp->sm_lead_w = mb_str2wide(smp->sm_lead);
				1072	smp->sm_leadlen = mb_charlen(smp->sm_lead);
				1073	if (smp->sm_oneof == NULL)
				1074	smp->sm_oneof_w = NULL;
				1075	else
				1076	smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
				1077	if (smp->sm_to == NULL)
				1078	smp->sm_to_w = NULL;
				1079	else
				1080	smp->sm_to_w = mb_str2wide(smp->sm_to);
				1081	if (smp->sm_lead_w == NULL
				1082	\|\| (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL)
				1083	\|\| (smp->sm_to_w == NULL && smp->sm_to != NULL))
				1084	{
				1085	vim_free(smp->sm_lead);
				1086	vim_free(smp->sm_to);
				1087	vim_free(smp->sm_lead_w);
				1088	vim_free(smp->sm_oneof_w);
				1089	vim_free(smp->sm_to_w);
				1090	return SP_OTHERERROR;
				1091	}
				1092	}
				1093	#endif
				1094	}
				1095
				1096	if (gap->ga_len > 0)
				1097	{
				1098	/* Add one extra entry to mark the end with an empty sm_lead. Avoids
				1099	* that we need to check the index every time. */
				1100	smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
				1101	if ((p = alloc(1)) == NULL)
				1102	return SP_OTHERERROR;
				1103	p[0] = NUL;
				1104	smp->sm_lead = p;
				1105	smp->sm_leadlen = 0;
				1106	smp->sm_oneof = NULL;
				1107	smp->sm_rules = p;
				1108	smp->sm_to = NULL;
				1109	#ifdef FEAT_MBYTE
				1110	if (has_mbyte)
				1111	{
				1112	smp->sm_lead_w = mb_str2wide(smp->sm_lead);
				1113	smp->sm_leadlen = 0;
				1114	smp->sm_oneof_w = NULL;
				1115	smp->sm_to_w = NULL;
				1116	}
				1117	#endif
				1118	++gap->ga_len;
				1119	}
				1120
				1121	/* Fill the first-index table. */
				1122	set_sal_first(slang);
				1123
				1124	return 0;
				1125	}
				1126
				1127	/*
				1128	* Read SN_WORDS: <word> ...
				1129	* Return SP_*ERROR flags.
				1130	*/
				1131	static int
				1132	read_words_section(FILE fd, slang_T lp, int len)
				1133	{
				1134	int done = 0;
				1135	int i;
				1136	int c;
				1137	char_u word[MAXWLEN];
				1138
				1139	while (done < len)
				1140	{
				1141	/* Read one word at a time. */
				1142	for (i = 0; ; ++i)
				1143	{
				1144	c = getc(fd);
				1145	if (c == EOF)
				1146	return SP_TRUNCERROR;
				1147	word[i] = c;
				1148	if (word[i] == NUL)
				1149	break;
				1150	if (i == MAXWLEN - 1)
				1151	return SP_FORMERROR;
				1152	}
				1153
				1154	/* Init the count to 10. */
				1155	count_common_word(lp, word, -1, 10);
				1156	done += i + 1;
				1157	}
				1158	return 0;
				1159	}
				1160
				1161	/*
				1162	* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
				1163	* Return SP_*ERROR flags.
				1164	*/
				1165	static int
				1166	read_sofo_section(FILE fd, slang_T slang)
				1167	{
				1168	int cnt;
				1169	char_u from, to;
				1170	int res;
				1171
				1172	slang->sl_sofo = TRUE;
				1173
				1174	/* <sofofromlen> <sofofrom> */
				1175	from = read_cnt_string(fd, 2, &cnt);
				1176	if (cnt < 0)
				1177	return cnt;
				1178
				1179	/* <sofotolen> <sofoto> */
				1180	to = read_cnt_string(fd, 2, &cnt);
				1181	if (cnt < 0)
				1182	{
				1183	vim_free(from);
				1184	return cnt;
				1185	}
				1186
				1187	/* Store the info in slang->sl_sal and/or slang->sl_sal_first. */
				1188	if (from != NULL && to != NULL)
				1189	res = set_sofo(slang, from, to);
				1190	else if (from != NULL \|\| to != NULL)
				1191	res = SP_FORMERROR; /* only one of two strings is an error */
				1192	else
				1193	res = 0;
				1194
				1195	vim_free(from);
				1196	vim_free(to);
				1197	return res;
				1198	}
				1199
				1200	/*
				1201	* Read the compound section from the .spl file:
				1202	* <compmax> <compminlen> <compsylmax> <compoptions> <compflags>
				1203	* Returns SP_*ERROR flags.
				1204	*/
				1205	static int
				1206	read_compound(FILE fd, slang_T slang, int len)
				1207	{
				1208	int todo = len;
				1209	int c;
				1210	int atstart;
				1211	char_u *pat;
				1212	char_u *pp;
				1213	char_u *cp;
				1214	char_u *ap;
				1215	char_u *crp;
				1216	int cnt;
				1217	garray_T *gap;
				1218
				1219	if (todo < 2)
				1220	return SP_FORMERROR; /* need at least two bytes */
				1221
				1222	--todo;
				1223	c = getc(fd); /* <compmax> */
				1224	if (c < 2)
				1225	c = MAXWLEN;
				1226	slang->sl_compmax = c;
				1227
				1228	--todo;
				1229	c = getc(fd); /* <compminlen> */
				1230	if (c < 1)
				1231	c = 0;
				1232	slang->sl_compminlen = c;
				1233
				1234	--todo;
				1235	c = getc(fd); /* <compsylmax> */
				1236	if (c < 1)
				1237	c = MAXWLEN;
				1238	slang->sl_compsylmax = c;
				1239
				1240	c = getc(fd); /* <compoptions> */
				1241	if (c != 0)
				1242	ungetc(c, fd); /* be backwards compatible with Vim 7.0b */
				1243	else
				1244	{
				1245	--todo;
				1246	c = getc(fd); /* only use the lower byte for now */
				1247	--todo;
				1248	slang->sl_compoptions = c;
				1249
				1250	gap = &slang->sl_comppat;
				1251	c = get2c(fd); /* <comppatcount> */
				1252	todo -= 2;
				1253	ga_init2(gap, sizeof(char_u *), c);
				1254	if (ga_grow(gap, c) == OK)
				1255	while (--c >= 0)
				1256	{
				1257	((char_u **)(gap->ga_data))[gap->ga_len++] =
				1258	read_cnt_string(fd, 1, &cnt);
				1259	/* <comppatlen> <comppattext> */
				1260	if (cnt < 0)
				1261	return cnt;
				1262	todo -= cnt + 1;
				1263	}
				1264	}
				1265	if (todo < 0)
				1266	return SP_FORMERROR;
				1267
				1268	/* Turn the COMPOUNDRULE items into a regexp pattern:
				1269	* "a[bc]/ab+" -> "^$a[bc]\\|ab\+$$".
				1270	* Inserting backslashes may double the length, "^$<Nul>" is 7 bytes.
				1271	* Conversion to utf-8 may double the size. */
				1272	c = todo * 2 + 7;
				1273	#ifdef FEAT_MBYTE
				1274	if (enc_utf8)
				1275	c += todo * 2;
				1276	#endif
				1277	pat = alloc((unsigned)c);
				1278	if (pat == NULL)
				1279	return SP_OTHERERROR;
				1280
				1281	/* We also need a list of all flags that can appear at the start and one
				1282	* for all flags. */
				1283	cp = alloc(todo + 1);
				1284	if (cp == NULL)
				1285	{
				1286	vim_free(pat);
				1287	return SP_OTHERERROR;
				1288	}
				1289	slang->sl_compstartflags = cp;
				1290	*cp = NUL;
				1291
				1292	ap = alloc(todo + 1);
				1293	if (ap == NULL)
				1294	{
				1295	vim_free(pat);
				1296	return SP_OTHERERROR;
				1297	}
				1298	slang->sl_compallflags = ap;
				1299	*ap = NUL;
				1300
				1301	/* And a list of all patterns in their original form, for checking whether
				1302	* compounding may work in match_compoundrule(). This is freed when we
				1303	* encounter a wildcard, the check doesn't work then. */
				1304	crp = alloc(todo + 1);
				1305	slang->sl_comprules = crp;
				1306
				1307	pp = pat;
				1308	*pp++ = '^';
				1309	*pp++ = '\\';
				1310	*pp++ = '(';
				1311
				1312	atstart = 1;
				1313	while (todo-- > 0)
				1314	{
				1315	c = getc(fd); /* <compflags> */
				1316	if (c == EOF)
				1317	{
				1318	vim_free(pat);
				1319	return SP_TRUNCERROR;
				1320	}
				1321
				1322	/* Add all flags to "sl_compallflags". */
				1323	if (vim_strchr((char_u )"?+[]/", c) == NULL
				1324	&& !byte_in_str(slang->sl_compallflags, c))
				1325	{
				1326	*ap++ = c;
				1327	*ap = NUL;
				1328	}
				1329
				1330	if (atstart != 0)
				1331	{
				1332	/* At start of item: copy flags to "sl_compstartflags". For a
				1333	* [abc] item set "atstart" to 2 and copy up to the ']'. */
				1334	if (c == '[')
				1335	atstart = 2;
				1336	else if (c == ']')
				1337	atstart = 0;
				1338	else
				1339	{
				1340	if (!byte_in_str(slang->sl_compstartflags, c))
				1341	{
				1342	*cp++ = c;
				1343	*cp = NUL;
				1344	}
				1345	if (atstart == 1)
				1346	atstart = 0;
				1347	}
				1348	}
				1349
				1350	/* Copy flag to "sl_comprules", unless we run into a wildcard. */
				1351	if (crp != NULL)
				1352	{
				1353	if (c == '?' \|\| c == '+' \|\| c == '*')
				1354	{
				1355	vim_free(slang->sl_comprules);
				1356	slang->sl_comprules = NULL;
				1357	crp = NULL;
				1358	}
				1359	else
				1360	*crp++ = c;
				1361	}
				1362
				1363	if (c == '/') /* slash separates two items */
				1364	{
				1365	*pp++ = '\\';
				1366	*pp++ = '\|';
				1367	atstart = 1;
				1368	}
				1369	else /* normal char, "[abc]" and '' are copied as-is /
				1370	{
				1371	if (c == '?' \|\| c == '+' \|\| c == '~')
				1372	pp++ = '\\'; / "a?" becomes "a\?", "a+" becomes "a\+" */
				1373	#ifdef FEAT_MBYTE
				1374	if (enc_utf8)
				1375	pp += mb_char2bytes(c, pp);
				1376	else
				1377	#endif
				1378	*pp++ = c;
				1379	}
				1380	}
				1381
				1382	*pp++ = '\\';
				1383	*pp++ = ')';
				1384	*pp++ = '$';
				1385	*pp = NUL;
				1386
				1387	if (crp != NULL)
				1388	*crp = NUL;
				1389
				1390	slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT);
				1391	vim_free(pat);
				1392	if (slang->sl_compprog == NULL)
				1393	return SP_FORMERROR;
				1394
				1395	return 0;
				1396	}
				1397
				1398	/*
				1399	* Set the SOFOFROM and SOFOTO items in language "lp".
				1400	* Returns SP_*ERROR flags when there is something wrong.
				1401	*/
				1402	static int
				1403	set_sofo(slang_T lp, char_u from, char_u *to)
				1404	{
				1405	int i;
				1406
				1407	#ifdef FEAT_MBYTE
				1408	garray_T *gap;
				1409	char_u *s;
				1410	char_u *p;
				1411	int c;
				1412	int *inp;
				1413
				1414	if (has_mbyte)
				1415	{
				1416	/* Use "sl_sal" as an array with 256 pointers to a list of wide
				1417	* characters. The index is the low byte of the character.
				1418	* The list contains from-to pairs with a terminating NUL.
				1419	* sl_sal_first[] is used for latin1 "from" characters. */
				1420	gap = &lp->sl_sal;
				1421	ga_init2(gap, sizeof(int *), 1);
				1422	if (ga_grow(gap, 256) == FAIL)
				1423	return SP_OTHERERROR;
				1424	vim_memset(gap->ga_data, 0, sizeof(int ) 256);
				1425	gap->ga_len = 256;
				1426
				1427	/* First count the number of items for each list. Temporarily use
				1428	* sl_sal_first[] for this. */
				1429	for (p = from, s = to; p != NUL && s != NUL; )
				1430	{
				1431	c = mb_cptr2char_adv(&p);
Bram Moolenaar	91acfff	2017-03-12 19:22:36 +0100	[diff] [blame]	1432	MB_CPTR_ADV(s);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1433	if (c >= 256)
				1434	++lp->sl_sal_first[c & 0xff];
				1435	}
				1436	if (p != NUL \|\| s != NUL) /* lengths differ */
				1437	return SP_FORMERROR;
				1438
				1439	/* Allocate the lists. */
				1440	for (i = 0; i < 256; ++i)
				1441	if (lp->sl_sal_first[i] > 0)
				1442	{
				1443	p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
				1444	if (p == NULL)
				1445	return SP_OTHERERROR;
				1446	((int *)gap->ga_data)[i] = (int )p;
				1447	(int )p = 0;
				1448	}
				1449
				1450	/* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
				1451	* list. */
				1452	vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
				1453	for (p = from, s = to; p != NUL && s != NUL; )
				1454	{
				1455	c = mb_cptr2char_adv(&p);
				1456	i = mb_cptr2char_adv(&s);
				1457	if (c >= 256)
				1458	{
				1459	/* Append the from-to chars at the end of the list with
				1460	* the low byte. */
				1461	inp = ((int **)gap->ga_data)[c & 0xff];
				1462	while (*inp != 0)
				1463	++inp;
				1464	inp++ = c; / from char */
				1465	inp++ = i; / to char */
				1466	inp++ = NUL; / NUL at the end */
				1467	}
				1468	else
				1469	/* mapping byte to char is done in sl_sal_first[] */
				1470	lp->sl_sal_first[c] = i;
				1471	}
				1472	}
				1473	else
				1474	#endif
				1475	{
				1476	/* mapping bytes to bytes is done in sl_sal_first[] */
				1477	if (STRLEN(from) != STRLEN(to))
				1478	return SP_FORMERROR;
				1479
				1480	for (i = 0; to[i] != NUL; ++i)
				1481	lp->sl_sal_first[from[i]] = to[i];
				1482	lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */
				1483	}
				1484
				1485	return 0;
				1486	}
				1487
				1488	/*
				1489	* Fill the first-index table for "lp".
				1490	*/
				1491	static void
				1492	set_sal_first(slang_T *lp)
				1493	{
				1494	salfirst_T *sfirst;
				1495	int i;
				1496	salitem_T *smp;
				1497	int c;
				1498	garray_T *gap = &lp->sl_sal;
				1499
				1500	sfirst = lp->sl_sal_first;
				1501	for (i = 0; i < 256; ++i)
				1502	sfirst[i] = -1;
				1503	smp = (salitem_T *)gap->ga_data;
				1504	for (i = 0; i < gap->ga_len; ++i)
				1505	{
				1506	#ifdef FEAT_MBYTE
				1507	if (has_mbyte)
				1508	/* Use the lowest byte of the first character. For latin1 it's
				1509	* the character, for other encodings it should differ for most
				1510	* characters. */
				1511	c = *smp[i].sm_lead_w & 0xff;
				1512	else
				1513	#endif
				1514	c = *smp[i].sm_lead;
				1515	if (sfirst[c] == -1)
				1516	{
				1517	sfirst[c] = i;
				1518	#ifdef FEAT_MBYTE
				1519	if (has_mbyte)
				1520	{
				1521	int n;
				1522
				1523	/* Make sure all entries with this byte are following each
				1524	* other. Move the ones that are in the wrong position. Do
				1525	* keep the same ordering! */
				1526	while (i + 1 < gap->ga_len
				1527	&& (*smp[i + 1].sm_lead_w & 0xff) == c)
				1528	/* Skip over entry with same index byte. */
				1529	++i;
				1530
				1531	for (n = 1; i + n < gap->ga_len; ++n)
				1532	if ((*smp[i + n].sm_lead_w & 0xff) == c)
				1533	{
				1534	salitem_T tsal;
				1535
				1536	/* Move entry with same index byte after the entries
				1537	* we already found. */
				1538	++i;
				1539	--n;
				1540	tsal = smp[i + n];
				1541	mch_memmove(smp + i + 1, smp + i,
				1542	sizeof(salitem_T) * n);
				1543	smp[i] = tsal;
				1544	}
				1545	}
				1546	#endif
				1547	}
				1548	}
				1549	}
				1550
				1551	#ifdef FEAT_MBYTE
				1552	/*
				1553	* Turn a multi-byte string into a wide character string.
				1554	* Return it in allocated memory (NULL for out-of-memory)
				1555	*/
				1556	static int *
				1557	mb_str2wide(char_u *s)
				1558	{
				1559	int *res;
				1560	char_u *p;
				1561	int i = 0;
				1562
				1563	res = (int )alloc(sizeof(int) (mb_charlen(s) + 1));
				1564	if (res != NULL)
				1565	{
				1566	for (p = s; *p != NUL; )
				1567	res[i++] = mb_ptr2char_adv(&p);
				1568	res[i] = NUL;
				1569	}
				1570	return res;
				1571	}
				1572	#endif
				1573
				1574	/*
				1575	* Read a tree from the .spl or .sug file.
				1576	* Allocates the memory and stores pointers in "bytsp" and "idxsp".
				1577	* This is skipped when the tree has zero length.
				1578	* Returns zero when OK, SP_ value for an error.
				1579	*/
				1580	static int
				1581	spell_read_tree(
				1582	FILE *fd,
				1583	char_u **bytsp,
				1584	idx_T **idxsp,
				1585	int prefixtree, /* TRUE for the prefix tree */
				1586	int prefixcnt) /* when "prefixtree" is TRUE: prefix count */
				1587	{
Bram Moolenaar	6d3c858	2017-02-26 15:27:23 +0100	[diff] [blame]	1588	long len;
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1589	int idx;
				1590	char_u *bp;
				1591	idx_T *ip;
				1592
				1593	/* The tree size was computed when writing the file, so that we can
				1594	* allocate it as one long block. <nodecount> */
				1595	len = get4c(fd);
				1596	if (len < 0)
				1597	return SP_TRUNCERROR;
Bram Moolenaar	6d3c858	2017-02-26 15:27:23 +0100	[diff] [blame]	1598	if (len >= LONG_MAX / (long)sizeof(int))
Bram Moolenaar	399c297	2017-02-09 21:07:12 +0100	[diff] [blame]	1599	/* Invalid length, multiply with sizeof(int) would overflow. */
				1600	return SP_FORMERROR;
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1601	if (len > 0)
				1602	{
				1603	/* Allocate the byte array. */
				1604	bp = lalloc((long_u)len, TRUE);
				1605	if (bp == NULL)
				1606	return SP_OTHERERROR;
				1607	*bytsp = bp;
				1608
				1609	/* Allocate the index array. */
				1610	ip = (idx_T )lalloc_clear((long_u)(len sizeof(int)), TRUE);
				1611	if (ip == NULL)
				1612	return SP_OTHERERROR;
				1613	*idxsp = ip;
				1614
				1615	/* Recursively read the tree and store it in the array. */
				1616	idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt);
				1617	if (idx < 0)
				1618	return idx;
				1619	}
				1620	return 0;
				1621	}
				1622
				1623	/*
				1624	* Read one row of siblings from the spell file and store it in the byte array
				1625	* "byts" and index array "idxs". Recursively read the children.
				1626	*
				1627	* NOTE: The code here must match put_node()!
				1628	*
				1629	* Returns the index (>= 0) following the siblings.
				1630	* Returns SP_TRUNCERROR if the file is shorter than expected.
				1631	* Returns SP_FORMERROR if there is a format error.
				1632	*/
				1633	static idx_T
				1634	read_tree_node(
				1635	FILE *fd,
				1636	char_u *byts,
				1637	idx_T *idxs,
				1638	int maxidx, /* size of arrays */
				1639	idx_T startidx, /* current index in "byts" and "idxs" */
				1640	int prefixtree, /* TRUE for reading PREFIXTREE */
				1641	int maxprefcondnr) /* maximum for <prefcondnr> */
				1642	{
				1643	int len;
				1644	int i;
				1645	int n;
				1646	idx_T idx = startidx;
				1647	int c;
				1648	int c2;
				1649	#define SHARED_MASK 0x8000000
				1650
				1651	len = getc(fd); /* <siblingcount> */
				1652	if (len <= 0)
				1653	return SP_TRUNCERROR;
				1654
				1655	if (startidx + len >= maxidx)
				1656	return SP_FORMERROR;
				1657	byts[idx++] = len;
				1658
				1659	/* Read the byte values, flag/region bytes and shared indexes. */
				1660	for (i = 1; i <= len; ++i)
				1661	{
				1662	c = getc(fd); /* <byte> */
				1663	if (c < 0)
				1664	return SP_TRUNCERROR;
				1665	if (c <= BY_SPECIAL)
				1666	{
				1667	if (c == BY_NOFLAGS && !prefixtree)
				1668	{
				1669	/* No flags, all regions. */
				1670	idxs[idx] = 0;
				1671	c = 0;
				1672	}
				1673	else if (c != BY_INDEX)
				1674	{
				1675	if (prefixtree)
				1676	{
				1677	/* Read the optional pflags byte, the prefix ID and the
				1678	* condition nr. In idxs[] store the prefix ID in the low
				1679	* byte, the condition index shifted up 8 bits, the flags
				1680	* shifted up 24 bits. */
				1681	if (c == BY_FLAGS)
				1682	c = getc(fd) << 24; /* <pflags> */
				1683	else
				1684	c = 0;
				1685
				1686	c \|= getc(fd); /* <affixID> */
				1687
				1688	n = get2c(fd); /* <prefcondnr> */
				1689	if (n >= maxprefcondnr)
				1690	return SP_FORMERROR;
				1691	c \|= (n << 8);
				1692	}
				1693	else /* c must be BY_FLAGS or BY_FLAGS2 */
				1694	{
				1695	/* Read flags and optional region and prefix ID. In
				1696	* idxs[] the flags go in the low two bytes, region above
				1697	* that and prefix ID above the region. */
				1698	c2 = c;
				1699	c = getc(fd); /* <flags> */
				1700	if (c2 == BY_FLAGS2)
				1701	c = (getc(fd) << 8) + c; /* <flags2> */
				1702	if (c & WF_REGION)
				1703	c = (getc(fd) << 16) + c; /* <region> */
				1704	if (c & WF_AFX)
				1705	c = (getc(fd) << 24) + c; /* <affixID> */
				1706	}
				1707
				1708	idxs[idx] = c;
				1709	c = 0;
				1710	}
				1711	else /* c == BY_INDEX */
				1712	{
				1713	/* <nodeidx> */
				1714	n = get3c(fd);
				1715	if (n < 0 \|\| n >= maxidx)
				1716	return SP_FORMERROR;
				1717	idxs[idx] = n + SHARED_MASK;
				1718	c = getc(fd); /* <xbyte> */
				1719	}
				1720	}
				1721	byts[idx++] = c;
				1722	}
				1723
				1724	/* Recursively read the children for non-shared siblings.
				1725	* Skip the end-of-word ones (zero byte value) and the shared ones (and
				1726	* remove SHARED_MASK) */
				1727	for (i = 1; i <= len; ++i)
				1728	if (byts[startidx + i] != 0)
				1729	{
				1730	if (idxs[startidx + i] & SHARED_MASK)
				1731	idxs[startidx + i] &= ~SHARED_MASK;
				1732	else
				1733	{
				1734	idxs[startidx + i] = idx;
				1735	idx = read_tree_node(fd, byts, idxs, maxidx, idx,
				1736	prefixtree, maxprefcondnr);
				1737	if (idx < 0)
				1738	break;
				1739	}
				1740	}
				1741
				1742	return idx;
				1743	}
				1744
				1745	/*
				1746	* Reload the spell file "fname" if it's loaded.
				1747	*/
				1748	static void
				1749	spell_reload_one(
				1750	char_u *fname,
				1751	int added_word) /* invoked through "zg" */
				1752	{
				1753	slang_T *slang;
				1754	int didit = FALSE;
				1755
				1756	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
				1757	{
				1758	if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME)
				1759	{
				1760	slang_clear(slang);
				1761	if (spell_load_file(fname, NULL, slang, FALSE) == NULL)
				1762	/* reloading failed, clear the language */
				1763	slang_clear(slang);
				1764	redraw_all_later(SOME_VALID);
				1765	didit = TRUE;
				1766	}
				1767	}
				1768
				1769	/* When "zg" was used and the file wasn't loaded yet, should redo
				1770	* 'spelllang' to load it now. */
				1771	if (added_word && !didit)
				1772	did_set_spelllang(curwin);
				1773	}
				1774
				1775
				1776	/*
				1777	* Functions for ":mkspell".
				1778	*/
				1779
				1780	#define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff
				1781	and .dic file. */
				1782	/*
				1783	* Main structure to store the contents of a ".aff" file.
				1784	*/
				1785	typedef struct afffile_S
				1786	{
				1787	char_u af_enc; / "SET", normalized, alloc'ed string or NULL */
				1788	int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */
				1789	unsigned af_rare; /* RARE ID for rare word */
				1790	unsigned af_keepcase; /* KEEPCASE ID for keep-case word */
				1791	unsigned af_bad; /* BAD ID for banned word */
				1792	unsigned af_needaffix; /* NEEDAFFIX ID */
				1793	unsigned af_circumfix; /* CIRCUMFIX ID */
				1794	unsigned af_needcomp; /* NEEDCOMPOUND ID */
				1795	unsigned af_comproot; /* COMPOUNDROOT ID */
				1796	unsigned af_compforbid; /* COMPOUNDFORBIDFLAG ID */
				1797	unsigned af_comppermit; /* COMPOUNDPERMITFLAG ID */
				1798	unsigned af_nosuggest; /* NOSUGGEST ID */
				1799	int af_pfxpostpone; /* postpone prefixes without chop string and
				1800	without flags */
				1801	int af_ignoreextra; /* IGNOREEXTRA present */
				1802	hashtab_T af_pref; /* hashtable for prefixes, affheader_T */
				1803	hashtab_T af_suff; /* hashtable for suffixes, affheader_T */
				1804	hashtab_T af_comp; /* hashtable for compound flags, compitem_T */
				1805	} afffile_T;
				1806
				1807	#define AFT_CHAR 0 /* flags are one character */
				1808	#define AFT_LONG 1 /* flags are two characters */
				1809	#define AFT_CAPLONG 2 /* flags are one or two characters */
				1810	#define AFT_NUM 3 /* flags are numbers, comma separated */
				1811
				1812	typedef struct affentry_S affentry_T;
				1813	/* Affix entry from ".aff" file. Used for prefixes and suffixes. */
				1814	struct affentry_S
				1815	{
				1816	affentry_T ae_next; / next affix with same name/number */
				1817	char_u ae_chop; / text to chop off basic word (can be NULL) */
				1818	char_u ae_add; / text to add to basic word (can be NULL) */
				1819	char_u ae_flags; / flags on the affix (can be NULL) */
				1820	char_u ae_cond; / condition (NULL for ".") */
				1821	regprog_T ae_prog; / regexp program for ae_cond or NULL */
				1822	char ae_compforbid; /* COMPOUNDFORBIDFLAG found */
				1823	char ae_comppermit; /* COMPOUNDPERMITFLAG found */
				1824	};
				1825
				1826	#ifdef FEAT_MBYTE
				1827	# define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */
				1828	#else
				1829	# define AH_KEY_LEN 7 /* 6 digits + NUL */
				1830	#endif
				1831
				1832	/* Affix header from ".aff" file. Used for af_pref and af_suff. */
				1833	typedef struct affheader_S
				1834	{
				1835	char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */
				1836	unsigned ah_flag; /* affix name as number, uses "af_flagtype" */
				1837	int ah_newID; /* prefix ID after renumbering; 0 if not used */
				1838	int ah_combine; /* suffix may combine with prefix */
				1839	int ah_follows; /* another affix block should be following */
				1840	affentry_T ah_first; / first affix entry */
				1841	} affheader_T;
				1842
				1843	#define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
				1844
				1845	/* Flag used in compound items. */
				1846	typedef struct compitem_S
				1847	{
				1848	char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */
				1849	unsigned ci_flag; /* affix name as number, uses "af_flagtype" */
				1850	int ci_newID; /* affix ID after renumbering. */
				1851	} compitem_T;
				1852
				1853	#define HI2CI(hi) ((compitem_T *)(hi)->hi_key)
				1854
				1855	/*
				1856	* Structure that is used to store the items in the word tree. This avoids
				1857	* the need to keep track of each allocated thing, everything is freed all at
				1858	* once after ":mkspell" is done.
				1859	* Note: "sb_next" must be just before "sb_data" to make sure the alignment of
				1860	* "sb_data" is correct for systems where pointers must be aligned on
				1861	* pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc).
				1862	*/
				1863	#define SBLOCKSIZE 16000 /* size of sb_data */
				1864	typedef struct sblock_S sblock_T;
				1865	struct sblock_S
				1866	{
				1867	int sb_used; /* nr of bytes already in use */
				1868	sblock_T sb_next; / next block in list */
				1869	char_u sb_data[1]; /* data, actually longer */
				1870	};
				1871
				1872	/*
				1873	* A node in the tree.
				1874	*/
				1875	typedef struct wordnode_S wordnode_T;
				1876	struct wordnode_S
				1877	{
				1878	union /* shared to save space */
				1879	{
				1880	char_u hashkey[6]; /* the hash key, only used while compressing */
				1881	int index; /* index in written nodes (valid after first
				1882	round) */
				1883	} wn_u1;
				1884	union /* shared to save space */
				1885	{
				1886	wordnode_T next; / next node with same hash key */
				1887	wordnode_T wnode; / parent node that will write this node */
				1888	} wn_u2;
				1889	wordnode_T wn_child; / child (next byte in word) */
				1890	wordnode_T wn_sibling; / next sibling (alternate byte in word,
				1891	always sorted) */
				1892	int wn_refs; /* Nr. of references to this node. Only
				1893	relevant for first node in a list of
				1894	siblings, in following siblings it is
				1895	always one. */
				1896	char_u wn_byte; /* Byte for this node. NUL for word end */
				1897
				1898	/* Info for when "wn_byte" is NUL.
				1899	* In PREFIXTREE "wn_region" is used for the prefcondnr.
				1900	* In the soundfolded word tree "wn_flags" has the MSW of the wordnr and
				1901	* "wn_region" the LSW of the wordnr. */
				1902	char_u wn_affixID; /* supported/required prefix ID or 0 */
				1903	short_u wn_flags; /* WF_ flags */
				1904	short wn_region; /* region mask */
				1905
				1906	#ifdef SPELL_PRINTTREE
				1907	int wn_nr; /* sequence nr for printing */
				1908	#endif
				1909	};
				1910
				1911	#define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */
				1912
				1913	#define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
				1914
				1915	/*
				1916	* Info used while reading the spell files.
				1917	*/
				1918	typedef struct spellinfo_S
				1919	{
				1920	wordnode_T si_foldroot; / tree with case-folded words */
				1921	long si_foldwcount; /* nr of words in si_foldroot */
				1922
				1923	wordnode_T si_keeproot; / tree with keep-case words */
				1924	long si_keepwcount; /* nr of words in si_keeproot */
				1925
				1926	wordnode_T si_prefroot; / tree with postponed prefixes */
				1927
				1928	long si_sugtree; /* creating the soundfolding trie */
				1929
				1930	sblock_T si_blocks; / memory blocks used */
				1931	long si_blocks_cnt; /* memory blocks allocated */
				1932	int si_did_emsg; /* TRUE when ran out of memory */
				1933
				1934	long si_compress_cnt; /* words to add before lowering
				1935	compression limit */
				1936	wordnode_T si_first_free; / List of nodes that have been freed during
				1937	compression, linked by "wn_child" field. */
				1938	long si_free_count; /* number of nodes in si_first_free */
				1939	#ifdef SPELL_PRINTTREE
				1940	int si_wordnode_nr; /* sequence nr for nodes */
				1941	#endif
				1942	buf_T si_spellbuf; / buffer used to store soundfold word table */
				1943
				1944	int si_ascii; /* handling only ASCII words */
				1945	int si_add; /* addition file */
				1946	int si_clear_chartab; /* when TRUE clear char tables */
				1947	int si_region; /* region mask */
				1948	vimconv_T si_conv; /* for conversion to 'encoding' */
				1949	int si_memtot; /* runtime memory used */
				1950	int si_verbose; /* verbose messages */
				1951	int si_msg_count; /* number of words added since last message */
				1952	char_u si_info; / info text chars or NULL */
				1953	int si_region_count; /* number of regions supported (1 when there
				1954	are no regions) */
Bram Moolenaar	2993ac5	2018-02-10 14:12:43 +0100	[diff] [blame]	1955	char_u si_region_name[MAXREGIONS * 2 + 1];
				1956	/* region names; used only if
				1957	* si_region_count > 1) */
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1958
				1959	garray_T si_rep; /* list of fromto_T entries from REP lines */
				1960	garray_T si_repsal; /* list of fromto_T entries from REPSAL lines */
				1961	garray_T si_sal; /* list of fromto_T entries from SAL lines */
				1962	char_u si_sofofr; / SOFOFROM text */
				1963	char_u si_sofoto; / SOFOTO text */
				1964	int si_nosugfile; /* NOSUGFILE item found */
				1965	int si_nosplitsugs; /* NOSPLITSUGS item found */
				1966	int si_nocompoundsugs; /* NOCOMPOUNDSUGS item found */
				1967	int si_followup; /* soundsalike: ? */
				1968	int si_collapse; /* soundsalike: ? */
				1969	hashtab_T si_commonwords; /* hashtable for common words */
				1970	time_t si_sugtime; /* timestamp for .sug file */
				1971	int si_rem_accents; /* soundsalike: remove accents */
				1972	garray_T si_map; /* MAP info concatenated */
				1973	char_u si_midword; / MIDWORD chars or NULL */
				1974	int si_compmax; /* max nr of words for compounding */
				1975	int si_compminlen; /* minimal length for compounding */
				1976	int si_compsylmax; /* max nr of syllables for compounding */
				1977	int si_compoptions; /* COMP_ flags */
				1978	garray_T si_comppat; /* CHECKCOMPOUNDPATTERN items, each stored as
				1979	a string */
				1980	char_u si_compflags; / flags used for compounding */
				1981	char_u si_nobreak; /* NOBREAK */
				1982	char_u si_syllable; / syllable string */
				1983	garray_T si_prefcond; /* table with conditions for postponed
				1984	* prefixes, each stored as a string */
				1985	int si_newprefID; /* current value for ah_newID */
				1986	int si_newcompID; /* current value for compound ID */
				1987	} spellinfo_T;
				1988
				1989	static afffile_T spell_read_aff(spellinfo_T spin, char_u *fname);
				1990	static int is_aff_rule(char_u *items, int itemcnt, char rulename, int mincount);
				1991	static void aff_process_flags(afffile_T affile, affentry_T entry);
				1992	static int spell_info_item(char_u *s);
				1993	static unsigned affitem2flag(int flagtype, char_u item, char_u fname, int lnum);
				1994	static unsigned get_affitem(int flagtype, char_u **pp);
				1995	static void process_compflags(spellinfo_T spin, afffile_T aff, char_u *compflags);
				1996	static void check_renumber(spellinfo_T *spin);
				1997	static int flag_in_afflist(int flagtype, char_u *afflist, unsigned flag);
				1998	static void aff_check_number(int spinval, int affval, char *name);
				1999	static void aff_check_string(char_u spinval, char_u affval, char *name);
				2000	static int str_equal(char_u s1, char_u s2);
				2001	static void add_fromto(spellinfo_T spin, garray_T gap, char_u from, char_u to);
				2002	static int sal_to_bool(char_u *s);
				2003	static void spell_free_aff(afffile_T *aff);
				2004	static int spell_read_dic(spellinfo_T spin, char_u fname, afffile_T *affile);
				2005	static int get_affix_flags(afffile_T affile, char_u afflist);
				2006	static int get_pfxlist(afffile_T affile, char_u afflist, char_u *store_afflist);
				2007	static void get_compflags(afffile_T affile, char_u afflist, char_u *store_afflist);
				2008	static int store_aff_word(spellinfo_T spin, char_u word, char_u afflist, afffile_T affile, hashtab_T ht, hashtab_T xht, int condit, int flags, char_u *pfxlist, int pfxlen);
				2009	static int spell_read_wordfile(spellinfo_T spin, char_u fname);
				2010	static void getroom(spellinfo_T spin, size_t len, int align);
				2011	static char_u getroom_save(spellinfo_T spin, char_u *s);
				2012	static void free_blocks(sblock_T *bl);
				2013	static wordnode_T wordtree_alloc(spellinfo_T spin);
				2014	static int store_word(spellinfo_T spin, char_u word, int flags, int region, char_u *pfxlist, int need_affix);
				2015	static int tree_add_word(spellinfo_T spin, char_u word, wordnode_T *tree, int flags, int region, int affixID);
				2016	static wordnode_T get_wordnode(spellinfo_T spin);
				2017	static int deref_wordnode(spellinfo_T spin, wordnode_T node);
				2018	static void free_wordnode(spellinfo_T spin, wordnode_T n);
				2019	static void wordtree_compress(spellinfo_T spin, wordnode_T root);
				2020	static int node_compress(spellinfo_T spin, wordnode_T node, hashtab_T ht, int tot);
				2021	static int node_equal(wordnode_T n1, wordnode_T n2);
				2022	static int write_vim_spell(spellinfo_T spin, char_u fname);
				2023	static void clear_node(wordnode_T *node);
				2024	static int put_node(FILE fd, wordnode_T node, int idx, int regionmask, int prefixtree);
				2025	static void spell_make_sugfile(spellinfo_T spin, char_u wfname);
				2026	static int sug_filltree(spellinfo_T spin, slang_T slang);
				2027	static int sug_maketable(spellinfo_T *spin);
				2028	static int sug_filltable(spellinfo_T spin, wordnode_T node, int startwordnr, garray_T *gap);
				2029	static int offset2bytes(int nr, char_u *buf);
				2030	static void sug_write(spellinfo_T spin, char_u fname);
				2031	static void spell_message(spellinfo_T spin, char_u str);
				2032	static void init_spellfile(void);
				2033
				2034	/* In the postponed prefixes tree wn_flags is used to store the WFP_ flags,
				2035	* but it must be negative to indicate the prefix tree to tree_add_word().
				2036	* Use a negative number with the lower 8 bits zero. */
				2037	#define PFX_FLAGS -256
				2038
				2039	/* flags for "condit" argument of store_aff_word() */
				2040	#define CONDIT_COMB 1 /* affix must combine */
				2041	#define CONDIT_CFIX 2 /* affix must have CIRCUMFIX flag */
				2042	#define CONDIT_SUF 4 /* add a suffix for matching flags */
				2043	#define CONDIT_AFF 8 /* word already has an affix */
				2044
				2045	/*
				2046	* Tunable parameters for when the tree is compressed. See 'mkspellmem'.
				2047	*/
				2048	static long compress_start = 30000; /* memory / SBLOCKSIZE */
				2049	static long compress_inc = 100; /* memory / SBLOCKSIZE */
				2050	static long compress_added = 500000; /* word count */
				2051
				2052	/*
				2053	* Check the 'mkspellmem' option. Return FAIL if it's wrong.
				2054	* Sets "sps_flags".
				2055	*/
				2056	int
				2057	spell_check_msm(void)
				2058	{
				2059	char_u *p = p_msm;
				2060	long start = 0;
				2061	long incr = 0;
				2062	long added = 0;
				2063
				2064	if (!VIM_ISDIGIT(*p))
				2065	return FAIL;
				2066	/* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/
				2067	start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102);
				2068	if (*p != ',')
				2069	return FAIL;
				2070	++p;
				2071	if (!VIM_ISDIGIT(*p))
				2072	return FAIL;
				2073	incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10);
				2074	if (*p != ',')
				2075	return FAIL;
				2076	++p;
				2077	if (!VIM_ISDIGIT(*p))
				2078	return FAIL;
				2079	added = getdigits(&p) * 1024;
				2080	if (*p != NUL)
				2081	return FAIL;
				2082
				2083	if (start == 0 \|\| incr == 0 \|\| added == 0 \|\| incr > start)
				2084	return FAIL;
				2085
				2086	compress_start = start;
				2087	compress_inc = incr;
				2088	compress_added = added;
				2089	return OK;
				2090	}
				2091
				2092	#ifdef SPELL_PRINTTREE
				2093	/*
				2094	* For debugging the tree code: print the current tree in a (more or less)
				2095	* readable format, so that we can see what happens when adding a word and/or
				2096	* compressing the tree.
				2097	* Based on code from Olaf Seibert.
				2098	*/
				2099	#define PRINTLINESIZE 1000
				2100	#define PRINTWIDTH 6
				2101
				2102	#define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
				2103	PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
				2104
				2105	static char line1[PRINTLINESIZE];
				2106	static char line2[PRINTLINESIZE];
				2107	static char line3[PRINTLINESIZE];
				2108
				2109	static void
				2110	spell_clear_flags(wordnode_T *node)
				2111	{
				2112	wordnode_T *np;
				2113
				2114	for (np = node; np != NULL; np = np->wn_sibling)
				2115	{
				2116	np->wn_u1.index = FALSE;
				2117	spell_clear_flags(np->wn_child);
				2118	}
				2119	}
				2120
				2121	static void
				2122	spell_print_node(wordnode_T *node, int depth)
				2123	{
				2124	if (node->wn_u1.index)
				2125	{
				2126	/* Done this node before, print the reference. */
				2127	PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
				2128	PRINTSOME(line2, depth, " ", 0, 0);
				2129	PRINTSOME(line3, depth, " ", 0, 0);
				2130	msg((char_u *)line1);
				2131	msg((char_u *)line2);
				2132	msg((char_u *)line3);
				2133	}
				2134	else
				2135	{
				2136	node->wn_u1.index = TRUE;
				2137
				2138	if (node->wn_byte != NUL)
				2139	{
				2140	if (node->wn_child != NULL)
				2141	PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0);
				2142	else
				2143	/* Cannot happen? */
				2144	PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0);
				2145	}
				2146	else
				2147	PRINTSOME(line1, depth, " $ ", 0, 0);
				2148
				2149	PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs);
				2150
				2151	if (node->wn_sibling != NULL)
				2152	PRINTSOME(line3, depth, " \| ", 0, 0);
				2153	else
				2154	PRINTSOME(line3, depth, " ", 0, 0);
				2155
				2156	if (node->wn_byte == NUL)
				2157	{
				2158	msg((char_u *)line1);
				2159	msg((char_u *)line2);
				2160	msg((char_u *)line3);
				2161	}
				2162
				2163	/* do the children */
				2164	if (node->wn_byte != NUL && node->wn_child != NULL)
				2165	spell_print_node(node->wn_child, depth + 1);
				2166
				2167	/* do the siblings */
				2168	if (node->wn_sibling != NULL)
				2169	{
				2170	/* get rid of all parent details except \| */
				2171	STRCPY(line1, line3);
				2172	STRCPY(line2, line3);
				2173	spell_print_node(node->wn_sibling, depth);
				2174	}
				2175	}
				2176	}
				2177
				2178	static void
				2179	spell_print_tree(wordnode_T *root)
				2180	{
				2181	if (root != NULL)
				2182	{
				2183	/* Clear the "wn_u1.index" fields, used to remember what has been
				2184	* done. */
				2185	spell_clear_flags(root);
				2186
				2187	/* Recursively print the tree. */
				2188	spell_print_node(root, 0);
				2189	}
				2190	}
				2191	#endif /* SPELL_PRINTTREE */
				2192
				2193	/*
				2194	* Read the affix file "fname".
				2195	* Returns an afffile_T, NULL for complete failure.
				2196	*/
				2197	static afffile_T *
				2198	spell_read_aff(spellinfo_T spin, char_u fname)
				2199	{
				2200	FILE *fd;
				2201	afffile_T *aff;
				2202	char_u rline[MAXLINELEN];
				2203	char_u *line;
				2204	char_u *pc = NULL;
				2205	#define MAXITEMCNT 30
				2206	char_u *(items[MAXITEMCNT]);
				2207	int itemcnt;
				2208	char_u *p;
				2209	int lnum = 0;
				2210	affheader_T *cur_aff = NULL;
				2211	int did_postpone_prefix = FALSE;
				2212	int aff_todo = 0;
				2213	hashtab_T *tp;
				2214	char_u *low = NULL;
				2215	char_u *fol = NULL;
				2216	char_u *upp = NULL;
				2217	int do_rep;
				2218	int do_repsal;
				2219	int do_sal;
				2220	int do_mapline;
				2221	int found_map = FALSE;
				2222	hashitem_T *hi;
				2223	int l;
				2224	int compminlen = 0; /* COMPOUNDMIN value */
				2225	int compsylmax = 0; /* COMPOUNDSYLMAX value */
				2226	int compoptions = 0; /* COMP_ flags */
				2227	int compmax = 0; /* COMPOUNDWORDMAX value */
				2228	char_u compflags = NULL; / COMPOUNDFLAG and COMPOUNDRULE
				2229	concatenated */
				2230	char_u midword = NULL; / MIDWORD value */
				2231	char_u syllable = NULL; / SYLLABLE value */
				2232	char_u sofofrom = NULL; / SOFOFROM value */
				2233	char_u sofoto = NULL; / SOFOTO value */
				2234
				2235	/*
				2236	* Open the file.
				2237	*/
				2238	fd = mch_fopen((char *)fname, "r");
				2239	if (fd == NULL)
				2240	{
				2241	EMSG2(_(e_notopen), fname);
				2242	return NULL;
				2243	}
				2244
				2245	vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname);
				2246	spell_message(spin, IObuff);
				2247
				2248	/* Only do REP lines when not done in another .aff file already. */
				2249	do_rep = spin->si_rep.ga_len == 0;
				2250
				2251	/* Only do REPSAL lines when not done in another .aff file already. */
				2252	do_repsal = spin->si_repsal.ga_len == 0;
				2253
				2254	/* Only do SAL lines when not done in another .aff file already. */
				2255	do_sal = spin->si_sal.ga_len == 0;
				2256
				2257	/* Only do MAP lines when not done in another .aff file already. */
				2258	do_mapline = spin->si_map.ga_len == 0;
				2259
				2260	/*
				2261	* Allocate and init the afffile_T structure.
				2262	*/
				2263	aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE);
				2264	if (aff == NULL)
				2265	{
				2266	fclose(fd);
				2267	return NULL;
				2268	}
				2269	hash_init(&aff->af_pref);
				2270	hash_init(&aff->af_suff);
				2271	hash_init(&aff->af_comp);
				2272
				2273	/*
				2274	* Read all the lines in the file one by one.
				2275	*/
				2276	while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
				2277	{
				2278	line_breakcheck();
				2279	++lnum;
				2280
				2281	/* Skip comment lines. */
				2282	if (*rline == '#')
				2283	continue;
				2284
				2285	/* Convert from "SET" to 'encoding' when needed. */
				2286	vim_free(pc);
				2287	#ifdef FEAT_MBYTE
				2288	if (spin->si_conv.vc_type != CONV_NONE)
				2289	{
				2290	pc = string_convert(&spin->si_conv, rline, NULL);
				2291	if (pc == NULL)
				2292	{
				2293	smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
				2294	fname, lnum, rline);
				2295	continue;
				2296	}
				2297	line = pc;
				2298	}
				2299	else
				2300	#endif
				2301	{
				2302	pc = NULL;
				2303	line = rline;
				2304	}
				2305
				2306	/* Split the line up in white separated items. Put a NUL after each
				2307	* item. */
				2308	itemcnt = 0;
				2309	for (p = line; ; )
				2310	{
				2311	while (p != NUL && p <= ' ') /* skip white space and CR/NL */
				2312	++p;
				2313	if (*p == NUL)
				2314	break;
				2315	if (itemcnt == MAXITEMCNT) /* too many items */
				2316	break;
				2317	items[itemcnt++] = p;
				2318	/* A few items have arbitrary text argument, don't split them. */
				2319	if (itemcnt == 2 && spell_info_item(items[0]))
				2320	while (p >= ' ' \|\| p == TAB) /* skip until CR/NL */
				2321	++p;
				2322	else
				2323	while (p > ' ') / skip until white space or CR/NL */
				2324	++p;
				2325	if (*p == NUL)
				2326	break;
				2327	*p++ = NUL;
				2328	}
				2329
				2330	/* Handle non-empty lines. */
				2331	if (itemcnt > 0)
				2332	{
				2333	if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL)
				2334	{
				2335	#ifdef FEAT_MBYTE
				2336	/* Setup for conversion from "ENC" to 'encoding'. */
				2337	aff->af_enc = enc_canonize(items[1]);
				2338	if (aff->af_enc != NULL && !spin->si_ascii
				2339	&& convert_setup(&spin->si_conv, aff->af_enc,
				2340	p_enc) == FAIL)
				2341	smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
				2342	fname, aff->af_enc, p_enc);
				2343	spin->si_conv.vc_fail = TRUE;
				2344	#else
				2345	smsg((char_u *)_("Conversion in %s not supported"), fname);
				2346	#endif
				2347	}
				2348	else if (is_aff_rule(items, itemcnt, "FLAG", 2)
				2349	&& aff->af_flagtype == AFT_CHAR)
				2350	{
				2351	if (STRCMP(items[1], "long") == 0)
				2352	aff->af_flagtype = AFT_LONG;
				2353	else if (STRCMP(items[1], "num") == 0)
				2354	aff->af_flagtype = AFT_NUM;
				2355	else if (STRCMP(items[1], "caplong") == 0)
				2356	aff->af_flagtype = AFT_CAPLONG;
				2357	else
				2358	smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"),
				2359	fname, lnum, items[1]);
				2360	if (aff->af_rare != 0
				2361	\|\| aff->af_keepcase != 0
				2362	\|\| aff->af_bad != 0
				2363	\|\| aff->af_needaffix != 0
				2364	\|\| aff->af_circumfix != 0
				2365	\|\| aff->af_needcomp != 0
				2366	\|\| aff->af_comproot != 0
				2367	\|\| aff->af_nosuggest != 0
				2368	\|\| compflags != NULL
				2369	\|\| aff->af_suff.ht_used > 0
				2370	\|\| aff->af_pref.ht_used > 0)
				2371	smsg((char_u *)_("FLAG after using flags in %s line %d: %s"),
				2372	fname, lnum, items[1]);
				2373	}
				2374	else if (spell_info_item(items[0]))
				2375	{
				2376	p = (char_u *)getroom(spin,
				2377	(spin->si_info == NULL ? 0 : STRLEN(spin->si_info))
				2378	+ STRLEN(items[0])
				2379	+ STRLEN(items[1]) + 3, FALSE);
				2380	if (p != NULL)
				2381	{
				2382	if (spin->si_info != NULL)
				2383	{
				2384	STRCPY(p, spin->si_info);
				2385	STRCAT(p, "\n");
				2386	}
				2387	STRCAT(p, items[0]);
				2388	STRCAT(p, " ");
				2389	STRCAT(p, items[1]);
				2390	spin->si_info = p;
				2391	}
				2392	}
				2393	else if (is_aff_rule(items, itemcnt, "MIDWORD", 2)
				2394	&& midword == NULL)
				2395	{
				2396	midword = getroom_save(spin, items[1]);
				2397	}
				2398	else if (is_aff_rule(items, itemcnt, "TRY", 2))
				2399	{
				2400	/* ignored, we look in the tree for what chars may appear */
				2401	}
				2402	/* TODO: remove "RAR" later */
				2403	else if ((is_aff_rule(items, itemcnt, "RAR", 2)
				2404	\|\| is_aff_rule(items, itemcnt, "RARE", 2))
				2405	&& aff->af_rare == 0)
				2406	{
				2407	aff->af_rare = affitem2flag(aff->af_flagtype, items[1],
				2408	fname, lnum);
				2409	}
				2410	/* TODO: remove "KEP" later */
				2411	else if ((is_aff_rule(items, itemcnt, "KEP", 2)
				2412	\|\| is_aff_rule(items, itemcnt, "KEEPCASE", 2))
				2413	&& aff->af_keepcase == 0)
				2414	{
				2415	aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1],
				2416	fname, lnum);
				2417	}
				2418	else if ((is_aff_rule(items, itemcnt, "BAD", 2)
				2419	\|\| is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2))
				2420	&& aff->af_bad == 0)
				2421	{
				2422	aff->af_bad = affitem2flag(aff->af_flagtype, items[1],
				2423	fname, lnum);
				2424	}
				2425	else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2)
				2426	&& aff->af_needaffix == 0)
				2427	{
				2428	aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1],
				2429	fname, lnum);
				2430	}
				2431	else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2)
				2432	&& aff->af_circumfix == 0)
				2433	{
				2434	aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1],
				2435	fname, lnum);
				2436	}
				2437	else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2)
				2438	&& aff->af_nosuggest == 0)
				2439	{
				2440	aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1],
				2441	fname, lnum);
				2442	}
				2443	else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2)
				2444	\|\| is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2))
				2445	&& aff->af_needcomp == 0)
				2446	{
				2447	aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1],
				2448	fname, lnum);
				2449	}
				2450	else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2)
				2451	&& aff->af_comproot == 0)
				2452	{
				2453	aff->af_comproot = affitem2flag(aff->af_flagtype, items[1],
				2454	fname, lnum);
				2455	}
				2456	else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2)
				2457	&& aff->af_compforbid == 0)
				2458	{
				2459	aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1],
				2460	fname, lnum);
				2461	if (aff->af_pref.ht_used > 0)
				2462	smsg((char_u *)_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"),
				2463	fname, lnum);
				2464	}
				2465	else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2)
				2466	&& aff->af_comppermit == 0)
				2467	{
				2468	aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1],
				2469	fname, lnum);
				2470	if (aff->af_pref.ht_used > 0)
				2471	smsg((char_u *)_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"),
				2472	fname, lnum);
				2473	}
				2474	else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2)
				2475	&& compflags == NULL)
				2476	{
				2477	/* Turn flag "c" into COMPOUNDRULE compatible string "c+",
				2478	* "Na" into "Na+", "1234" into "1234+". */
				2479	p = getroom(spin, STRLEN(items[1]) + 2, FALSE);
				2480	if (p != NULL)
				2481	{
				2482	STRCPY(p, items[1]);
				2483	STRCAT(p, "+");
				2484	compflags = p;
				2485	}
				2486	}
				2487	else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2))
				2488	{
				2489	/* We don't use the count, but do check that it's a number and
				2490	* not COMPOUNDRULE mistyped. */
				2491	if (atoi((char *)items[1]) == 0)
				2492	smsg((char_u *)_("Wrong COMPOUNDRULES value in %s line %d: %s"),
				2493	fname, lnum, items[1]);
				2494	}
				2495	else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2))
				2496	{
				2497	/* Don't use the first rule if it is a number. */
				2498	if (compflags != NULL \|\| *skipdigits(items[1]) != NUL)
				2499	{
				2500	/* Concatenate this string to previously defined ones,
				2501	* using a slash to separate them. */
				2502	l = (int)STRLEN(items[1]) + 1;
				2503	if (compflags != NULL)
				2504	l += (int)STRLEN(compflags) + 1;
				2505	p = getroom(spin, l, FALSE);
				2506	if (p != NULL)
				2507	{
				2508	if (compflags != NULL)
				2509	{
				2510	STRCPY(p, compflags);
				2511	STRCAT(p, "/");
				2512	}
				2513	STRCAT(p, items[1]);
				2514	compflags = p;
				2515	}
				2516	}
				2517	}
				2518	else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2)
				2519	&& compmax == 0)
				2520	{
				2521	compmax = atoi((char *)items[1]);
				2522	if (compmax == 0)
				2523	smsg((char_u *)_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"),
				2524	fname, lnum, items[1]);
				2525	}
				2526	else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2)
				2527	&& compminlen == 0)
				2528	{
				2529	compminlen = atoi((char *)items[1]);
				2530	if (compminlen == 0)
				2531	smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"),
				2532	fname, lnum, items[1]);
				2533	}
				2534	else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2)
				2535	&& compsylmax == 0)
				2536	{
				2537	compsylmax = atoi((char *)items[1]);
				2538	if (compsylmax == 0)
				2539	smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"),
				2540	fname, lnum, items[1]);
				2541	}
				2542	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1))
				2543	{
				2544	compoptions \|= COMP_CHECKDUP;
				2545	}
				2546	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1))
				2547	{
				2548	compoptions \|= COMP_CHECKREP;
				2549	}
				2550	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1))
				2551	{
				2552	compoptions \|= COMP_CHECKCASE;
				2553	}
				2554	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1))
				2555	{
				2556	compoptions \|= COMP_CHECKTRIPLE;
				2557	}
				2558	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2))
				2559	{
				2560	if (atoi((char *)items[1]) == 0)
				2561	smsg((char_u *)_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"),
				2562	fname, lnum, items[1]);
				2563	}
				2564	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3))
				2565	{
				2566	garray_T *gap = &spin->si_comppat;
				2567	int i;
				2568
				2569	/* Only add the couple if it isn't already there. */
				2570	for (i = 0; i < gap->ga_len - 1; i += 2)
				2571	if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0
				2572	&& STRCMP(((char_u **)(gap->ga_data))[i + 1],
				2573	items[2]) == 0)
				2574	break;
				2575	if (i >= gap->ga_len && ga_grow(gap, 2) == OK)
				2576	{
				2577	((char_u **)(gap->ga_data))[gap->ga_len++]
				2578	= getroom_save(spin, items[1]);
				2579	((char_u **)(gap->ga_data))[gap->ga_len++]
				2580	= getroom_save(spin, items[2]);
				2581	}
				2582	}
				2583	else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2)
				2584	&& syllable == NULL)
				2585	{
				2586	syllable = getroom_save(spin, items[1]);
				2587	}
				2588	else if (is_aff_rule(items, itemcnt, "NOBREAK", 1))
				2589	{
				2590	spin->si_nobreak = TRUE;
				2591	}
				2592	else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1))
				2593	{
				2594	spin->si_nosplitsugs = TRUE;
				2595	}
				2596	else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1))
				2597	{
				2598	spin->si_nocompoundsugs = TRUE;
				2599	}
				2600	else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1))
				2601	{
				2602	spin->si_nosugfile = TRUE;
				2603	}
				2604	else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1))
				2605	{
				2606	aff->af_pfxpostpone = TRUE;
				2607	}
				2608	else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1))
				2609	{
				2610	aff->af_ignoreextra = TRUE;
				2611	}
				2612	else if ((STRCMP(items[0], "PFX") == 0
				2613	\|\| STRCMP(items[0], "SFX") == 0)
				2614	&& aff_todo == 0
				2615	&& itemcnt >= 4)
				2616	{
				2617	int lasti = 4;
				2618	char_u key[AH_KEY_LEN];
				2619
				2620	if (*items[0] == 'P')
				2621	tp = &aff->af_pref;
				2622	else
				2623	tp = &aff->af_suff;
				2624
				2625	/* Myspell allows the same affix name to be used multiple
				2626	* times. The affix files that do this have an undocumented
				2627	* "S" flag on all but the last block, thus we check for that
				2628	* and store it in ah_follows. */
				2629	vim_strncpy(key, items[1], AH_KEY_LEN - 1);
				2630	hi = hash_find(tp, key);
				2631	if (!HASHITEM_EMPTY(hi))
				2632	{
				2633	cur_aff = HI2AH(hi);
				2634	if (cur_aff->ah_combine != (*items[2] == 'Y'))
				2635	smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"),
				2636	fname, lnum, items[1]);
				2637	if (!cur_aff->ah_follows)
				2638	smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
				2639	fname, lnum, items[1]);
				2640	}
				2641	else
				2642	{
				2643	/* New affix letter. */
				2644	cur_aff = (affheader_T *)getroom(spin,
				2645	sizeof(affheader_T), TRUE);
				2646	if (cur_aff == NULL)
				2647	break;
				2648	cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1],
				2649	fname, lnum);
				2650	if (cur_aff->ah_flag == 0 \|\| STRLEN(items[1]) >= AH_KEY_LEN)
				2651	break;
				2652	if (cur_aff->ah_flag == aff->af_bad
				2653	\|\| cur_aff->ah_flag == aff->af_rare
				2654	\|\| cur_aff->ah_flag == aff->af_keepcase
				2655	\|\| cur_aff->ah_flag == aff->af_needaffix
				2656	\|\| cur_aff->ah_flag == aff->af_circumfix
				2657	\|\| cur_aff->ah_flag == aff->af_nosuggest
				2658	\|\| cur_aff->ah_flag == aff->af_needcomp
				2659	\|\| cur_aff->ah_flag == aff->af_comproot)
				2660	smsg((char_u *)_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"),
				2661	fname, lnum, items[1]);
				2662	STRCPY(cur_aff->ah_key, items[1]);
				2663	hash_add(tp, cur_aff->ah_key);
				2664
				2665	cur_aff->ah_combine = (*items[2] == 'Y');
				2666	}
				2667
				2668	/* Check for the "S" flag, which apparently means that another
				2669	* block with the same affix name is following. */
				2670	if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0)
				2671	{
				2672	++lasti;
				2673	cur_aff->ah_follows = TRUE;
				2674	}
				2675	else
				2676	cur_aff->ah_follows = FALSE;
				2677
				2678	/* Myspell allows extra text after the item, but that might
				2679	* mean mistakes go unnoticed. Require a comment-starter. */
				2680	if (itemcnt > lasti && *items[lasti] != '#')
				2681	smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]);
				2682
				2683	if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0)
				2684	smsg((char_u *)_("Expected Y or N in %s line %d: %s"),
				2685	fname, lnum, items[2]);
				2686
				2687	if (*items[0] == 'P' && aff->af_pfxpostpone)
				2688	{
				2689	if (cur_aff->ah_newID == 0)
				2690	{
				2691	/* Use a new number in the .spl file later, to be able
				2692	* to handle multiple .aff files. */
				2693	check_renumber(spin);
				2694	cur_aff->ah_newID = ++spin->si_newprefID;
				2695
				2696	/* We only really use ah_newID if the prefix is
				2697	* postponed. We know that only after handling all
				2698	* the items. */
				2699	did_postpone_prefix = FALSE;
				2700	}
				2701	else
				2702	/* Did use the ID in a previous block. */
				2703	did_postpone_prefix = TRUE;
				2704	}
				2705
				2706	aff_todo = atoi((char *)items[3]);
				2707	}
				2708	else if ((STRCMP(items[0], "PFX") == 0
				2709	\|\| STRCMP(items[0], "SFX") == 0)
				2710	&& aff_todo > 0
				2711	&& STRCMP(cur_aff->ah_key, items[1]) == 0
				2712	&& itemcnt >= 5)
				2713	{
				2714	affentry_T *aff_entry;
				2715	int upper = FALSE;
				2716	int lasti = 5;
				2717
				2718	/* Myspell allows extra text after the item, but that might
				2719	* mean mistakes go unnoticed. Require a comment-starter,
				2720	* unless IGNOREEXTRA is used. Hunspell uses a "-" item. */
				2721	if (itemcnt > lasti
				2722	&& !aff->af_ignoreextra
				2723	&& *items[lasti] != '#'
				2724	&& (STRCMP(items[lasti], "-") != 0
				2725	\|\| itemcnt != lasti + 1))
				2726	smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]);
				2727
				2728	/* New item for an affix letter. */
				2729	--aff_todo;
				2730	aff_entry = (affentry_T *)getroom(spin,
				2731	sizeof(affentry_T), TRUE);
				2732	if (aff_entry == NULL)
				2733	break;
				2734
				2735	if (STRCMP(items[2], "0") != 0)
				2736	aff_entry->ae_chop = getroom_save(spin, items[2]);
				2737	if (STRCMP(items[3], "0") != 0)
				2738	{
				2739	aff_entry->ae_add = getroom_save(spin, items[3]);
				2740
				2741	/* Recognize flags on the affix: abcd/XYZ */
				2742	aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/');
				2743	if (aff_entry->ae_flags != NULL)
				2744	{
				2745	*aff_entry->ae_flags++ = NUL;
				2746	aff_process_flags(aff, aff_entry);
				2747	}
				2748	}
				2749
				2750	/* Don't use an affix entry with non-ASCII characters when
				2751	* "spin->si_ascii" is TRUE. */
				2752	if (!spin->si_ascii \|\| !(has_non_ascii(aff_entry->ae_chop)
				2753	\|\| has_non_ascii(aff_entry->ae_add)))
				2754	{
				2755	aff_entry->ae_next = cur_aff->ah_first;
				2756	cur_aff->ah_first = aff_entry;
				2757
				2758	if (STRCMP(items[4], ".") != 0)
				2759	{
				2760	char_u buf[MAXLINELEN];
				2761
				2762	aff_entry->ae_cond = getroom_save(spin, items[4]);
				2763	if (*items[0] == 'P')
				2764	sprintf((char *)buf, "^%s", items[4]);
				2765	else
				2766	sprintf((char *)buf, "%s$", items[4]);
				2767	aff_entry->ae_prog = vim_regcomp(buf,
				2768	RE_MAGIC + RE_STRING + RE_STRICT);
				2769	if (aff_entry->ae_prog == NULL)
				2770	smsg((char_u *)_("Broken condition in %s line %d: %s"),
				2771	fname, lnum, items[4]);
				2772	}
				2773
				2774	/* For postponed prefixes we need an entry in si_prefcond
				2775	* for the condition. Use an existing one if possible.
				2776	* Can't be done for an affix with flags, ignoring
				2777	* COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. */
				2778	if (*items[0] == 'P' && aff->af_pfxpostpone
				2779	&& aff_entry->ae_flags == NULL)
				2780	{
				2781	/* When the chop string is one lower-case letter and
				2782	* the add string ends in the upper-case letter we set
				2783	* the "upper" flag, clear "ae_chop" and remove the
				2784	* letters from "ae_add". The condition must either
				2785	* be empty or start with the same letter. */
				2786	if (aff_entry->ae_chop != NULL
				2787	&& aff_entry->ae_add != NULL
				2788	#ifdef FEAT_MBYTE
				2789	&& aff_entry->ae_chop[(*mb_ptr2len)(
				2790	aff_entry->ae_chop)] == NUL
				2791	#else
				2792	&& aff_entry->ae_chop[1] == NUL
				2793	#endif
				2794	)
				2795	{
				2796	int c, c_up;
				2797
				2798	c = PTR2CHAR(aff_entry->ae_chop);
				2799	c_up = SPELL_TOUPPER(c);
				2800	if (c_up != c
				2801	&& (aff_entry->ae_cond == NULL
				2802	\|\| PTR2CHAR(aff_entry->ae_cond) == c))
				2803	{
				2804	p = aff_entry->ae_add
				2805	+ STRLEN(aff_entry->ae_add);
Bram Moolenaar	91acfff	2017-03-12 19:22:36 +0100	[diff] [blame]	2806	MB_PTR_BACK(aff_entry->ae_add, p);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2807	if (PTR2CHAR(p) == c_up)
				2808	{
				2809	upper = TRUE;
				2810	aff_entry->ae_chop = NULL;
				2811	*p = NUL;
				2812
				2813	/* The condition is matched with the
				2814	* actual word, thus must check for the
				2815	* upper-case letter. */
				2816	if (aff_entry->ae_cond != NULL)
				2817	{
				2818	char_u buf[MAXLINELEN];
				2819	#ifdef FEAT_MBYTE
				2820	if (has_mbyte)
				2821	{
				2822	onecap_copy(items[4], buf, TRUE);
				2823	aff_entry->ae_cond = getroom_save(
				2824	spin, buf);
				2825	}
				2826	else
				2827	#endif
				2828	*aff_entry->ae_cond = c_up;
				2829	if (aff_entry->ae_cond != NULL)
				2830	{
				2831	sprintf((char *)buf, "^%s",
				2832	aff_entry->ae_cond);
				2833	vim_regfree(aff_entry->ae_prog);
				2834	aff_entry->ae_prog = vim_regcomp(
				2835	buf, RE_MAGIC + RE_STRING);
				2836	}
				2837	}
				2838	}
				2839	}
				2840	}
				2841
				2842	if (aff_entry->ae_chop == NULL
				2843	&& aff_entry->ae_flags == NULL)
				2844	{
				2845	int idx;
				2846	char_u **pp;
				2847	int n;
				2848
				2849	/* Find a previously used condition. */
				2850	for (idx = spin->si_prefcond.ga_len - 1; idx >= 0;
				2851	--idx)
				2852	{
				2853	p = ((char_u **)spin->si_prefcond.ga_data)[idx];
				2854	if (str_equal(p, aff_entry->ae_cond))
				2855	break;
				2856	}
				2857	if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK)
				2858	{
				2859	/* Not found, add a new condition. */
				2860	idx = spin->si_prefcond.ga_len++;
				2861	pp = ((char_u **)spin->si_prefcond.ga_data)
				2862	+ idx;
				2863	if (aff_entry->ae_cond == NULL)
				2864	*pp = NULL;
				2865	else
				2866	*pp = getroom_save(spin,
				2867	aff_entry->ae_cond);
				2868	}
				2869
				2870	/* Add the prefix to the prefix tree. */
				2871	if (aff_entry->ae_add == NULL)
				2872	p = (char_u *)"";
				2873	else
				2874	p = aff_entry->ae_add;
				2875
				2876	/* PFX_FLAGS is a negative number, so that
				2877	* tree_add_word() knows this is the prefix tree. */
				2878	n = PFX_FLAGS;
				2879	if (!cur_aff->ah_combine)
				2880	n \|= WFP_NC;
				2881	if (upper)
				2882	n \|= WFP_UP;
				2883	if (aff_entry->ae_comppermit)
				2884	n \|= WFP_COMPPERMIT;
				2885	if (aff_entry->ae_compforbid)
				2886	n \|= WFP_COMPFORBID;
				2887	tree_add_word(spin, p, spin->si_prefroot, n,
				2888	idx, cur_aff->ah_newID);
				2889	did_postpone_prefix = TRUE;
				2890	}
				2891
				2892	/* Didn't actually use ah_newID, backup si_newprefID. */
				2893	if (aff_todo == 0 && !did_postpone_prefix)
				2894	{
				2895	--spin->si_newprefID;
				2896	cur_aff->ah_newID = 0;
				2897	}
				2898	}
				2899	}
				2900	}
				2901	else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL)
				2902	{
				2903	fol = vim_strsave(items[1]);
				2904	}
				2905	else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL)
				2906	{
				2907	low = vim_strsave(items[1]);
				2908	}
				2909	else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL)
				2910	{
				2911	upp = vim_strsave(items[1]);
				2912	}
				2913	else if (is_aff_rule(items, itemcnt, "REP", 2)
				2914	\|\| is_aff_rule(items, itemcnt, "REPSAL", 2))
				2915	{
				2916	/* Ignore REP/REPSAL count */;
				2917	if (!isdigit(*items[1]))
				2918	smsg((char_u *)_("Expected REP(SAL) count in %s line %d"),
				2919	fname, lnum);
				2920	}
				2921	else if ((STRCMP(items[0], "REP") == 0
				2922	\|\| STRCMP(items[0], "REPSAL") == 0)
				2923	&& itemcnt >= 3)
				2924	{
				2925	/* REP/REPSAL item */
				2926	/* Myspell ignores extra arguments, we require it starts with
				2927	* # to detect mistakes. */
				2928	if (itemcnt > 3 && items[3][0] != '#')
				2929	smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]);
				2930	if (items[0][3] == 'S' ? do_repsal : do_rep)
				2931	{
				2932	/* Replace underscore with space (can't include a space
				2933	* directly). */
Bram Moolenaar	91acfff	2017-03-12 19:22:36 +0100	[diff] [blame]	2934	for (p = items[1]; *p != NUL; MB_PTR_ADV(p))
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2935	if (*p == '_')
				2936	*p = ' ';
Bram Moolenaar	91acfff	2017-03-12 19:22:36 +0100	[diff] [blame]	2937	for (p = items[2]; *p != NUL; MB_PTR_ADV(p))
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2938	if (*p == '_')
				2939	*p = ' ';
				2940	add_fromto(spin, items[0][3] == 'S'
				2941	? &spin->si_repsal
				2942	: &spin->si_rep, items[1], items[2]);
				2943	}
				2944	}
				2945	else if (is_aff_rule(items, itemcnt, "MAP", 2))
				2946	{
				2947	/* MAP item or count */
				2948	if (!found_map)
				2949	{
				2950	/* First line contains the count. */
				2951	found_map = TRUE;
				2952	if (!isdigit(*items[1]))
				2953	smsg((char_u *)_("Expected MAP count in %s line %d"),
				2954	fname, lnum);
				2955	}
				2956	else if (do_mapline)
				2957	{
				2958	int c;
				2959
				2960	/* Check that every character appears only once. */
				2961	for (p = items[1]; *p != NUL; )
				2962	{
				2963	#ifdef FEAT_MBYTE
				2964	c = mb_ptr2char_adv(&p);
				2965	#else
				2966	c = *p++;
				2967	#endif
				2968	if ((spin->si_map.ga_len > 0
				2969	&& vim_strchr(spin->si_map.ga_data, c)
				2970	!= NULL)
				2971	\|\| vim_strchr(p, c) != NULL)
				2972	smsg((char_u *)_("Duplicate character in MAP in %s line %d"),
				2973	fname, lnum);
				2974	}
				2975
				2976	/* We simply concatenate all the MAP strings, separated by
				2977	* slashes. */
				2978	ga_concat(&spin->si_map, items[1]);
				2979	ga_append(&spin->si_map, '/');
				2980	}
				2981	}
				2982	/* Accept "SAL from to" and "SAL from to #comment". */
				2983	else if (is_aff_rule(items, itemcnt, "SAL", 3))
				2984	{
				2985	if (do_sal)
				2986	{
				2987	/* SAL item (sounds-a-like)
				2988	* Either one of the known keys or a from-to pair. */
				2989	if (STRCMP(items[1], "followup") == 0)
				2990	spin->si_followup = sal_to_bool(items[2]);
				2991	else if (STRCMP(items[1], "collapse_result") == 0)
				2992	spin->si_collapse = sal_to_bool(items[2]);
				2993	else if (STRCMP(items[1], "remove_accents") == 0)
				2994	spin->si_rem_accents = sal_to_bool(items[2]);
				2995	else
				2996	/* when "to" is "_" it means empty */
				2997	add_fromto(spin, &spin->si_sal, items[1],
				2998	STRCMP(items[2], "_") == 0 ? (char_u *)""
				2999	: items[2]);
				3000	}
				3001	}
				3002	else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2)
				3003	&& sofofrom == NULL)
				3004	{
				3005	sofofrom = getroom_save(spin, items[1]);
				3006	}
				3007	else if (is_aff_rule(items, itemcnt, "SOFOTO", 2)
				3008	&& sofoto == NULL)
				3009	{
				3010	sofoto = getroom_save(spin, items[1]);
				3011	}
				3012	else if (STRCMP(items[0], "COMMON") == 0)
				3013	{
				3014	int i;
				3015
				3016	for (i = 1; i < itemcnt; ++i)
				3017	{
				3018	if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords,
				3019	items[i])))
				3020	{
				3021	p = vim_strsave(items[i]);
				3022	if (p == NULL)
				3023	break;
				3024	hash_add(&spin->si_commonwords, p);
				3025	}
				3026	}
				3027	}
				3028	else
				3029	smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"),
				3030	fname, lnum, items[0]);
				3031	}
				3032	}
				3033
				3034	if (fol != NULL \|\| low != NULL \|\| upp != NULL)
				3035	{
				3036	if (spin->si_clear_chartab)
				3037	{
				3038	/* Clear the char type tables, don't want to use any of the
				3039	* currently used spell properties. */
				3040	init_spell_chartab();
				3041	spin->si_clear_chartab = FALSE;
				3042	}
				3043
				3044	/*
				3045	* Don't write a word table for an ASCII file, so that we don't check
				3046	* for conflicts with a word table that matches 'encoding'.
				3047	* Don't write one for utf-8 either, we use utf_*() and
				3048	* mb_get_class(), the list of chars in the file will be incomplete.
				3049	*/
				3050	if (!spin->si_ascii
				3051	#ifdef FEAT_MBYTE
				3052	&& !enc_utf8
				3053	#endif
				3054	)
				3055	{
				3056	if (fol == NULL \|\| low == NULL \|\| upp == NULL)
				3057	smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
				3058	else
				3059	(void)set_spell_chartab(fol, low, upp);
				3060	}
				3061
				3062	vim_free(fol);
				3063	vim_free(low);
				3064	vim_free(upp);
				3065	}
				3066
				3067	/* Use compound specifications of the .aff file for the spell info. */
				3068	if (compmax != 0)
				3069	{
				3070	aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX");
				3071	spin->si_compmax = compmax;
				3072	}
				3073
				3074	if (compminlen != 0)
				3075	{
				3076	aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN");
				3077	spin->si_compminlen = compminlen;
				3078	}
				3079
				3080	if (compsylmax != 0)
				3081	{
				3082	if (syllable == NULL)
				3083	smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE"));
				3084	aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX");
				3085	spin->si_compsylmax = compsylmax;
				3086	}
				3087
				3088	if (compoptions != 0)
				3089	{
				3090	aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options");
				3091	spin->si_compoptions \|= compoptions;
				3092	}
				3093
				3094	if (compflags != NULL)
				3095	process_compflags(spin, aff, compflags);
				3096
				3097	/* Check that we didn't use too many renumbered flags. */
				3098	if (spin->si_newcompID < spin->si_newprefID)
				3099	{
				3100	if (spin->si_newcompID == 127 \|\| spin->si_newcompID == 255)
				3101	MSG(_("Too many postponed prefixes"));
				3102	else if (spin->si_newprefID == 0 \|\| spin->si_newprefID == 127)
				3103	MSG(_("Too many compound flags"));
				3104	else
				3105	MSG(_("Too many postponed prefixes and/or compound flags"));
				3106	}
				3107
				3108	if (syllable != NULL)
				3109	{
				3110	aff_check_string(spin->si_syllable, syllable, "SYLLABLE");
				3111	spin->si_syllable = syllable;
				3112	}
				3113
				3114	if (sofofrom != NULL \|\| sofoto != NULL)
				3115	{
				3116	if (sofofrom == NULL \|\| sofoto == NULL)
				3117	smsg((char_u *)_("Missing SOFO%s line in %s"),
				3118	sofofrom == NULL ? "FROM" : "TO", fname);
				3119	else if (spin->si_sal.ga_len > 0)
				3120	smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname);
				3121	else
				3122	{
				3123	aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM");
				3124	aff_check_string(spin->si_sofoto, sofoto, "SOFOTO");
				3125	spin->si_sofofr = sofofrom;
				3126	spin->si_sofoto = sofoto;
				3127	}
				3128	}
				3129
				3130	if (midword != NULL)
				3131	{
				3132	aff_check_string(spin->si_midword, midword, "MIDWORD");
				3133	spin->si_midword = midword;
				3134	}
				3135
				3136	vim_free(pc);
				3137	fclose(fd);
				3138	return aff;
				3139	}
				3140
				3141	/*
				3142	* Return TRUE when items[0] equals "rulename", there are "mincount" items or
				3143	* a comment is following after item "mincount".
				3144	*/
				3145	static int
				3146	is_aff_rule(
				3147	char_u **items,
				3148	int itemcnt,
				3149	char *rulename,
				3150	int mincount)
				3151	{
				3152	return (STRCMP(items[0], rulename) == 0
				3153	&& (itemcnt == mincount
				3154	\|\| (itemcnt > mincount && items[mincount][0] == '#')));
				3155	}
				3156
				3157	/*
				3158	* For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from
				3159	* ae_flags to ae_comppermit and ae_compforbid.
				3160	*/
				3161	static void
				3162	aff_process_flags(afffile_T affile, affentry_T entry)
				3163	{
				3164	char_u *p;
				3165	char_u *prevp;
				3166	unsigned flag;
				3167
				3168	if (entry->ae_flags != NULL
				3169	&& (affile->af_compforbid != 0 \|\| affile->af_comppermit != 0))
				3170	{
				3171	for (p = entry->ae_flags; *p != NUL; )
				3172	{
				3173	prevp = p;
				3174	flag = get_affitem(affile->af_flagtype, &p);
				3175	if (flag == affile->af_comppermit \|\| flag == affile->af_compforbid)
				3176	{
				3177	STRMOVE(prevp, p);
				3178	p = prevp;
				3179	if (flag == affile->af_comppermit)
				3180	entry->ae_comppermit = TRUE;
				3181	else
				3182	entry->ae_compforbid = TRUE;
				3183	}
				3184	if (affile->af_flagtype == AFT_NUM && *p == ',')
				3185	++p;
				3186	}
				3187	if (*entry->ae_flags == NUL)
				3188	entry->ae_flags = NULL; /* nothing left */
				3189	}
				3190	}
				3191
				3192	/*
				3193	* Return TRUE if "s" is the name of an info item in the affix file.
				3194	*/
				3195	static int
				3196	spell_info_item(char_u *s)
				3197	{
				3198	return STRCMP(s, "NAME") == 0
				3199	\|\| STRCMP(s, "HOME") == 0
				3200	\|\| STRCMP(s, "VERSION") == 0
				3201	\|\| STRCMP(s, "AUTHOR") == 0
				3202	\|\| STRCMP(s, "EMAIL") == 0
				3203	\|\| STRCMP(s, "COPYRIGHT") == 0;
				3204	}
				3205
				3206	/*
				3207	* Turn an affix flag name into a number, according to the FLAG type.
				3208	* returns zero for failure.
				3209	*/
				3210	static unsigned
				3211	affitem2flag(
				3212	int flagtype,
				3213	char_u *item,
				3214	char_u *fname,
				3215	int lnum)
				3216	{
				3217	unsigned res;
				3218	char_u *p = item;
				3219
				3220	res = get_affitem(flagtype, &p);
				3221	if (res == 0)
				3222	{
				3223	if (flagtype == AFT_NUM)
				3224	smsg((char_u *)_("Flag is not a number in %s line %d: %s"),
				3225	fname, lnum, item);
				3226	else
				3227	smsg((char_u *)_("Illegal flag in %s line %d: %s"),
				3228	fname, lnum, item);
				3229	}
				3230	if (*p != NUL)
				3231	{
				3232	smsg((char_u *)_(e_affname), fname, lnum, item);
				3233	return 0;
				3234	}
				3235
				3236	return res;
				3237	}
				3238
				3239	/*
				3240	* Get one affix name from "*pp" and advance the pointer.
				3241	* Returns zero for an error, still advances the pointer then.
				3242	*/
				3243	static unsigned
				3244	get_affitem(int flagtype, char_u **pp)
				3245	{
				3246	int res;
				3247
				3248	if (flagtype == AFT_NUM)
				3249	{
				3250	if (!VIM_ISDIGIT(**pp))
				3251	{
				3252	++pp; / always advance, avoid getting stuck */
				3253	return 0;
				3254	}
				3255	res = getdigits(pp);
				3256	}
				3257	else
				3258	{
				3259	#ifdef FEAT_MBYTE
				3260	res = mb_ptr2char_adv(pp);
				3261	#else
				3262	res = (pp)++;
				3263	#endif
				3264	if (flagtype == AFT_LONG \|\| (flagtype == AFT_CAPLONG
				3265	&& res >= 'A' && res <= 'Z'))
				3266	{
				3267	if (**pp == NUL)
				3268	return 0;
				3269	#ifdef FEAT_MBYTE
				3270	res = mb_ptr2char_adv(pp) + (res << 16);
				3271	#else
				3272	res = (pp)++ + (res << 16);
				3273	#endif
				3274	}
				3275	}
				3276	return res;
				3277	}
				3278
				3279	/*
				3280	* Process the "compflags" string used in an affix file and append it to
				3281	* spin->si_compflags.
				3282	* The processing involves changing the affix names to ID numbers, so that
				3283	* they fit in one byte.
				3284	*/
				3285	static void
				3286	process_compflags(
				3287	spellinfo_T *spin,
				3288	afffile_T *aff,
				3289	char_u *compflags)
				3290	{
				3291	char_u *p;
				3292	char_u *prevp;
				3293	unsigned flag;
				3294	compitem_T *ci;
				3295	int id;
				3296	int len;
				3297	char_u *tp;
				3298	char_u key[AH_KEY_LEN];
				3299	hashitem_T *hi;
				3300
				3301	/* Make room for the old and the new compflags, concatenated with a / in
				3302	* between. Processing it makes it shorter, but we don't know by how
				3303	* much, thus allocate the maximum. */
				3304	len = (int)STRLEN(compflags) + 1;
				3305	if (spin->si_compflags != NULL)
				3306	len += (int)STRLEN(spin->si_compflags) + 1;
				3307	p = getroom(spin, len, FALSE);
				3308	if (p == NULL)
				3309	return;
				3310	if (spin->si_compflags != NULL)
				3311	{
				3312	STRCPY(p, spin->si_compflags);
				3313	STRCAT(p, "/");
				3314	}
				3315	spin->si_compflags = p;
				3316	tp = p + STRLEN(p);
				3317
				3318	for (p = compflags; *p != NUL; )
				3319	{
				3320	if (vim_strchr((char_u )"/?+[]", *p) != NULL)
				3321	/* Copy non-flag characters directly. */
				3322	tp++ = p++;
				3323	else
				3324	{
				3325	/* First get the flag number, also checks validity. */
				3326	prevp = p;
				3327	flag = get_affitem(aff->af_flagtype, &p);
				3328	if (flag != 0)
				3329	{
				3330	/* Find the flag in the hashtable. If it was used before, use
				3331	* the existing ID. Otherwise add a new entry. */
				3332	vim_strncpy(key, prevp, p - prevp);
				3333	hi = hash_find(&aff->af_comp, key);
				3334	if (!HASHITEM_EMPTY(hi))
				3335	id = HI2CI(hi)->ci_newID;
				3336	else
				3337	{
				3338	ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE);
				3339	if (ci == NULL)
				3340	break;
				3341	STRCPY(ci->ci_key, key);
				3342	ci->ci_flag = flag;
				3343	/* Avoid using a flag ID that has a special meaning in a
				3344	* regexp (also inside []). */
				3345	do
				3346	{
				3347	check_renumber(spin);
				3348	id = spin->si_newcompID--;
				3349	} while (vim_strchr((char_u )"/?+[]\\-^", id) != NULL);
				3350	ci->ci_newID = id;
				3351	hash_add(&aff->af_comp, ci->ci_key);
				3352	}
				3353	*tp++ = id;
				3354	}
				3355	if (aff->af_flagtype == AFT_NUM && *p == ',')
				3356	++p;
				3357	}
				3358	}
				3359
				3360	*tp = NUL;
				3361	}
				3362
				3363	/*
				3364	* Check that the new IDs for postponed affixes and compounding don't overrun
				3365	* each other. We have almost 255 available, but start at 0-127 to avoid
				3366	* using two bytes for utf-8. When the 0-127 range is used up go to 128-255.
				3367	* When that is used up an error message is given.
				3368	*/
				3369	static void
				3370	check_renumber(spellinfo_T *spin)
				3371	{
				3372	if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128)
				3373	{
				3374	spin->si_newprefID = 127;
				3375	spin->si_newcompID = 255;
				3376	}
				3377	}
				3378
				3379	/*
				3380	* Return TRUE if flag "flag" appears in affix list "afflist".
				3381	*/
				3382	static int
				3383	flag_in_afflist(int flagtype, char_u *afflist, unsigned flag)
				3384	{
				3385	char_u *p;
				3386	unsigned n;
				3387
				3388	switch (flagtype)
				3389	{
				3390	case AFT_CHAR:
				3391	return vim_strchr(afflist, flag) != NULL;
				3392
				3393	case AFT_CAPLONG:
				3394	case AFT_LONG:
				3395	for (p = afflist; *p != NUL; )
				3396	{
				3397	#ifdef FEAT_MBYTE
				3398	n = mb_ptr2char_adv(&p);
				3399	#else
				3400	n = *p++;
				3401	#endif
				3402	if ((flagtype == AFT_LONG \|\| (n >= 'A' && n <= 'Z'))
				3403	&& *p != NUL)
				3404	#ifdef FEAT_MBYTE
				3405	n = mb_ptr2char_adv(&p) + (n << 16);
				3406	#else
				3407	n = *p++ + (n << 16);
				3408	#endif
				3409	if (n == flag)
				3410	return TRUE;
				3411	}
				3412	break;
				3413
				3414	case AFT_NUM:
				3415	for (p = afflist; *p != NUL; )
				3416	{
				3417	n = getdigits(&p);
				3418	if (n == flag)
				3419	return TRUE;
				3420	if (p != NUL) / skip over comma */
				3421	++p;
				3422	}
				3423	break;
				3424	}
				3425	return FALSE;
				3426	}
				3427
				3428	/*
				3429	* Give a warning when "spinval" and "affval" numbers are set and not the same.
				3430	*/
				3431	static void
				3432	aff_check_number(int spinval, int affval, char *name)
				3433	{
				3434	if (spinval != 0 && spinval != affval)
				3435	smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
				3436	}
				3437
				3438	/*
				3439	* Give a warning when "spinval" and "affval" strings are set and not the same.
				3440	*/
				3441	static void
				3442	aff_check_string(char_u spinval, char_u affval, char *name)
				3443	{
				3444	if (spinval != NULL && STRCMP(spinval, affval) != 0)
				3445	smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
				3446	}
				3447
				3448	/*
				3449	* Return TRUE if strings "s1" and "s2" are equal. Also consider both being
				3450	* NULL as equal.
				3451	*/
				3452	static int
				3453	str_equal(char_u s1, char_u s2)
				3454	{
				3455	if (s1 == NULL \|\| s2 == NULL)
				3456	return s1 == s2;
				3457	return STRCMP(s1, s2) == 0;
				3458	}
				3459
				3460	/*
				3461	* Add a from-to item to "gap". Used for REP and SAL items.
				3462	* They are stored case-folded.
				3463	*/
				3464	static void
				3465	add_fromto(
				3466	spellinfo_T *spin,
				3467	garray_T *gap,
				3468	char_u *from,
				3469	char_u *to)
				3470	{
				3471	fromto_T *ftp;
				3472	char_u word[MAXWLEN];
				3473
				3474	if (ga_grow(gap, 1) == OK)
				3475	{
				3476	ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
				3477	(void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN);
				3478	ftp->ft_from = getroom_save(spin, word);
				3479	(void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN);
				3480	ftp->ft_to = getroom_save(spin, word);
				3481	++gap->ga_len;
				3482	}
				3483	}
				3484
				3485	/*
				3486	* Convert a boolean argument in a SAL line to TRUE or FALSE;
				3487	*/
				3488	static int
				3489	sal_to_bool(char_u *s)
				3490	{
				3491	return STRCMP(s, "1") == 0 \|\| STRCMP(s, "true") == 0;
				3492	}
				3493
				3494	/*
				3495	* Free the structure filled by spell_read_aff().
				3496	*/
				3497	static void
				3498	spell_free_aff(afffile_T *aff)
				3499	{
				3500	hashtab_T *ht;
				3501	hashitem_T *hi;
				3502	int todo;
				3503	affheader_T *ah;
				3504	affentry_T *ae;
				3505
				3506	vim_free(aff->af_enc);
				3507
				3508	/* All this trouble to free the "ae_prog" items... */
				3509	for (ht = &aff->af_pref; ; ht = &aff->af_suff)
				3510	{
				3511	todo = (int)ht->ht_used;
				3512	for (hi = ht->ht_array; todo > 0; ++hi)
				3513	{
				3514	if (!HASHITEM_EMPTY(hi))
				3515	{
				3516	--todo;
				3517	ah = HI2AH(hi);
				3518	for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
				3519	vim_regfree(ae->ae_prog);
				3520	}
				3521	}
				3522	if (ht == &aff->af_suff)
				3523	break;
				3524	}
				3525
				3526	hash_clear(&aff->af_pref);
				3527	hash_clear(&aff->af_suff);
				3528	hash_clear(&aff->af_comp);
				3529	}
				3530
				3531	/*
				3532	* Read dictionary file "fname".
				3533	* Returns OK or FAIL;
				3534	*/
				3535	static int
				3536	spell_read_dic(spellinfo_T spin, char_u fname, afffile_T *affile)
				3537	{
				3538	hashtab_T ht;
				3539	char_u line[MAXLINELEN];
				3540	char_u *p;
				3541	char_u *afflist;
				3542	char_u store_afflist[MAXWLEN];
				3543	int pfxlen;
				3544	int need_affix;
				3545	char_u *dw;
				3546	char_u *pc;
				3547	char_u *w;
				3548	int l;
				3549	hash_T hash;
				3550	hashitem_T *hi;
				3551	FILE *fd;
				3552	int lnum = 1;
				3553	int non_ascii = 0;
				3554	int retval = OK;
				3555	char_u message[MAXLINELEN + MAXWLEN];
				3556	int flags;
				3557	int duplicate = 0;
				3558
				3559	/*
				3560	* Open the file.
				3561	*/
				3562	fd = mch_fopen((char *)fname, "r");
				3563	if (fd == NULL)
				3564	{
				3565	EMSG2(_(e_notopen), fname);
				3566	return FAIL;
				3567	}
				3568
				3569	/* The hashtable is only used to detect duplicated words. */
				3570	hash_init(&ht);
				3571
				3572	vim_snprintf((char *)IObuff, IOSIZE,
				3573	_("Reading dictionary file %s ..."), fname);
				3574	spell_message(spin, IObuff);
				3575
				3576	/* start with a message for the first line */
				3577	spin->si_msg_count = 999999;
				3578
				3579	/* Read and ignore the first line: word count. */
				3580	(void)vim_fgets(line, MAXLINELEN, fd);
				3581	if (!vim_isdigit(*skipwhite(line)))
				3582	EMSG2(_("E760: No word count in %s"), fname);
				3583
				3584	/*
				3585	* Read all the lines in the file one by one.
				3586	* The words are converted to 'encoding' here, before being added to
				3587	* the hashtable.
				3588	*/
				3589	while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
				3590	{
				3591	line_breakcheck();
				3592	++lnum;
				3593	if (line[0] == '#' \|\| line[0] == '/')
				3594	continue; /* comment line */
				3595
				3596	/* Remove CR, LF and white space from the end. White space halfway
				3597	* the word is kept to allow e.g., "et al.". */
				3598	l = (int)STRLEN(line);
				3599	while (l > 0 && line[l - 1] <= ' ')
				3600	--l;
				3601	if (l == 0)
				3602	continue; /* empty line */
				3603	line[l] = NUL;
				3604
				3605	#ifdef FEAT_MBYTE
				3606	/* Convert from "SET" to 'encoding' when needed. */
				3607	if (spin->si_conv.vc_type != CONV_NONE)
				3608	{
				3609	pc = string_convert(&spin->si_conv, line, NULL);
				3610	if (pc == NULL)
				3611	{
				3612	smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
				3613	fname, lnum, line);
				3614	continue;
				3615	}
				3616	w = pc;
				3617	}
				3618	else
				3619	#endif
				3620	{
				3621	pc = NULL;
				3622	w = line;
				3623	}
				3624
				3625	/* Truncate the word at the "/", set "afflist" to what follows.
				3626	* Replace "\/" by "/" and "\\" by "\". */
				3627	afflist = NULL;
Bram Moolenaar	91acfff	2017-03-12 19:22:36 +0100	[diff] [blame]	3628	for (p = w; *p != NUL; MB_PTR_ADV(p))
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3629	{
				3630	if (*p == '\\' && (p[1] == '\\' \|\| p[1] == '/'))
				3631	STRMOVE(p, p + 1);
				3632	else if (*p == '/')
				3633	{
				3634	*p = NUL;
				3635	afflist = p + 1;
				3636	break;
				3637	}
				3638	}
				3639
				3640	/* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
				3641	if (spin->si_ascii && has_non_ascii(w))
				3642	{
				3643	++non_ascii;
				3644	vim_free(pc);
				3645	continue;
				3646	}
				3647
				3648	/* This takes time, print a message every 10000 words. */
				3649	if (spin->si_verbose && spin->si_msg_count > 10000)
				3650	{
				3651	spin->si_msg_count = 0;
				3652	vim_snprintf((char *)message, sizeof(message),
				3653	_("line %6d, word %6d - %s"),
				3654	lnum, spin->si_foldwcount + spin->si_keepwcount, w);
				3655	msg_start();
				3656	msg_puts_long_attr(message, 0);
				3657	msg_clr_eos();
				3658	msg_didout = FALSE;
				3659	msg_col = 0;
				3660	out_flush();
				3661	}
				3662
				3663	/* Store the word in the hashtable to be able to find duplicates. */
				3664	dw = (char_u *)getroom_save(spin, w);
				3665	if (dw == NULL)
				3666	{
				3667	retval = FAIL;
				3668	vim_free(pc);
				3669	break;
				3670	}
				3671
				3672	hash = hash_hash(dw);
				3673	hi = hash_lookup(&ht, dw, hash);
				3674	if (!HASHITEM_EMPTY(hi))
				3675	{
				3676	if (p_verbose > 0)
				3677	smsg((char_u *)_("Duplicate word in %s line %d: %s"),
				3678	fname, lnum, dw);
				3679	else if (duplicate == 0)
				3680	smsg((char_u *)_("First duplicate word in %s line %d: %s"),
				3681	fname, lnum, dw);
				3682	++duplicate;
				3683	}
				3684	else
				3685	hash_add_item(&ht, hi, dw, hash);
				3686
				3687	flags = 0;
				3688	store_afflist[0] = NUL;
				3689	pfxlen = 0;
				3690	need_affix = FALSE;
				3691	if (afflist != NULL)
				3692	{
				3693	/* Extract flags from the affix list. */
				3694	flags \|= get_affix_flags(affile, afflist);
				3695
				3696	if (affile->af_needaffix != 0 && flag_in_afflist(
				3697	affile->af_flagtype, afflist, affile->af_needaffix))
				3698	need_affix = TRUE;
				3699
				3700	if (affile->af_pfxpostpone)
				3701	/* Need to store the list of prefix IDs with the word. */
				3702	pfxlen = get_pfxlist(affile, afflist, store_afflist);
				3703
				3704	if (spin->si_compflags != NULL)
				3705	/* Need to store the list of compound flags with the word.
				3706	* Concatenate them to the list of prefix IDs. */
				3707	get_compflags(affile, afflist, store_afflist + pfxlen);
				3708	}
				3709
				3710	/* Add the word to the word tree(s). */
				3711	if (store_word(spin, dw, flags, spin->si_region,
				3712	store_afflist, need_affix) == FAIL)
				3713	retval = FAIL;
				3714
				3715	if (afflist != NULL)
				3716	{
				3717	/* Find all matching suffixes and add the resulting words.
				3718	* Additionally do matching prefixes that combine. */
				3719	if (store_aff_word(spin, dw, afflist, affile,
				3720	&affile->af_suff, &affile->af_pref,
				3721	CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
				3722	retval = FAIL;
				3723
				3724	/* Find all matching prefixes and add the resulting words. */
				3725	if (store_aff_word(spin, dw, afflist, affile,
				3726	&affile->af_pref, NULL,
				3727	CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
				3728	retval = FAIL;
				3729	}
				3730
				3731	vim_free(pc);
				3732	}
				3733
				3734	if (duplicate > 0)
				3735	smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname);
				3736	if (spin->si_ascii && non_ascii > 0)
				3737	smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"),
				3738	non_ascii, fname);
				3739	hash_clear(&ht);
				3740
				3741	fclose(fd);
				3742	return retval;
				3743	}
				3744
				3745	/*
				3746	* Check for affix flags in "afflist" that are turned into word flags.
				3747	* Return WF_ flags.
				3748	*/
				3749	static int
				3750	get_affix_flags(afffile_T affile, char_u afflist)
				3751	{
				3752	int flags = 0;
				3753
				3754	if (affile->af_keepcase != 0 && flag_in_afflist(
				3755	affile->af_flagtype, afflist, affile->af_keepcase))
				3756	flags \|= WF_KEEPCAP \| WF_FIXCAP;
				3757	if (affile->af_rare != 0 && flag_in_afflist(
				3758	affile->af_flagtype, afflist, affile->af_rare))
				3759	flags \|= WF_RARE;
				3760	if (affile->af_bad != 0 && flag_in_afflist(
				3761	affile->af_flagtype, afflist, affile->af_bad))
				3762	flags \|= WF_BANNED;
				3763	if (affile->af_needcomp != 0 && flag_in_afflist(
				3764	affile->af_flagtype, afflist, affile->af_needcomp))
				3765	flags \|= WF_NEEDCOMP;
				3766	if (affile->af_comproot != 0 && flag_in_afflist(
				3767	affile->af_flagtype, afflist, affile->af_comproot))
				3768	flags \|= WF_COMPROOT;
				3769	if (affile->af_nosuggest != 0 && flag_in_afflist(
				3770	affile->af_flagtype, afflist, affile->af_nosuggest))
				3771	flags \|= WF_NOSUGGEST;
				3772	return flags;
				3773	}
				3774
				3775	/*
				3776	* Get the list of prefix IDs from the affix list "afflist".
				3777	* Used for PFXPOSTPONE.
				3778	* Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL
				3779	* and return the number of affixes.
				3780	*/
				3781	static int
				3782	get_pfxlist(
				3783	afffile_T *affile,
				3784	char_u *afflist,
				3785	char_u *store_afflist)
				3786	{
				3787	char_u *p;
				3788	char_u *prevp;
				3789	int cnt = 0;
				3790	int id;
				3791	char_u key[AH_KEY_LEN];
				3792	hashitem_T *hi;
				3793
				3794	for (p = afflist; *p != NUL; )
				3795	{
				3796	prevp = p;
				3797	if (get_affitem(affile->af_flagtype, &p) != 0)
				3798	{
				3799	/* A flag is a postponed prefix flag if it appears in "af_pref"
				3800	* and it's ID is not zero. */
				3801	vim_strncpy(key, prevp, p - prevp);
				3802	hi = hash_find(&affile->af_pref, key);
				3803	if (!HASHITEM_EMPTY(hi))
				3804	{
				3805	id = HI2AH(hi)->ah_newID;
				3806	if (id != 0)
				3807	store_afflist[cnt++] = id;
				3808	}
				3809	}
				3810	if (affile->af_flagtype == AFT_NUM && *p == ',')
				3811	++p;
				3812	}
				3813
				3814	store_afflist[cnt] = NUL;
				3815	return cnt;
				3816	}
				3817
				3818	/*
				3819	* Get the list of compound IDs from the affix list "afflist" that are used
				3820	* for compound words.
				3821	* Puts the flags in "store_afflist[]".
				3822	*/
				3823	static void
				3824	get_compflags(
				3825	afffile_T *affile,
				3826	char_u *afflist,
				3827	char_u *store_afflist)
				3828	{
				3829	char_u *p;
				3830	char_u *prevp;
				3831	int cnt = 0;
				3832	char_u key[AH_KEY_LEN];
				3833	hashitem_T *hi;
				3834
				3835	for (p = afflist; *p != NUL; )
				3836	{
				3837	prevp = p;
				3838	if (get_affitem(affile->af_flagtype, &p) != 0)
				3839	{
				3840	/* A flag is a compound flag if it appears in "af_comp". */
				3841	vim_strncpy(key, prevp, p - prevp);
				3842	hi = hash_find(&affile->af_comp, key);
				3843	if (!HASHITEM_EMPTY(hi))
				3844	store_afflist[cnt++] = HI2CI(hi)->ci_newID;
				3845	}
				3846	if (affile->af_flagtype == AFT_NUM && *p == ',')
				3847	++p;
				3848	}
				3849
				3850	store_afflist[cnt] = NUL;
				3851	}
				3852
				3853	/*
				3854	* Apply affixes to a word and store the resulting words.
				3855	* "ht" is the hashtable with affentry_T that need to be applied, either
				3856	* prefixes or suffixes.
				3857	* "xht", when not NULL, is the prefix hashtable, to be used additionally on
				3858	* the resulting words for combining affixes.
				3859	*
				3860	* Returns FAIL when out of memory.
				3861	*/
				3862	static int
				3863	store_aff_word(
				3864	spellinfo_T spin, / spell info */
				3865	char_u word, / basic word start */
				3866	char_u afflist, / list of names of supported affixes */
				3867	afffile_T *affile,
				3868	hashtab_T *ht,
				3869	hashtab_T *xht,
				3870	int condit, /* CONDIT_SUF et al. */
				3871	int flags, /* flags for the word */
				3872	char_u pfxlist, / list of prefix IDs */
				3873	int pfxlen) /* nr of flags in "pfxlist" for prefixes, rest
				3874	* is compound flags */
				3875	{
				3876	int todo;
				3877	hashitem_T *hi;
				3878	affheader_T *ah;
				3879	affentry_T *ae;
				3880	char_u newword[MAXWLEN];
				3881	int retval = OK;
				3882	int i, j;
				3883	char_u *p;
				3884	int use_flags;
				3885	char_u *use_pfxlist;
				3886	int use_pfxlen;
				3887	int need_affix;
				3888	char_u store_afflist[MAXWLEN];
				3889	char_u pfx_pfxlist[MAXWLEN];
				3890	size_t wordlen = STRLEN(word);
				3891	int use_condit;
				3892
				3893	todo = (int)ht->ht_used;
				3894	for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
				3895	{
				3896	if (!HASHITEM_EMPTY(hi))
				3897	{
				3898	--todo;
				3899	ah = HI2AH(hi);
				3900
				3901	/* Check that the affix combines, if required, and that the word
				3902	* supports this affix. */
				3903	if (((condit & CONDIT_COMB) == 0 \|\| ah->ah_combine)
				3904	&& flag_in_afflist(affile->af_flagtype, afflist,
				3905	ah->ah_flag))
				3906	{
				3907	/* Loop over all affix entries with this name. */
				3908	for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
				3909	{
				3910	/* Check the condition. It's not logical to match case
				3911	* here, but it is required for compatibility with
				3912	* Myspell.
				3913	* Another requirement from Myspell is that the chop
				3914	* string is shorter than the word itself.
				3915	* For prefixes, when "PFXPOSTPONE" was used, only do
				3916	* prefixes with a chop string and/or flags.
				3917	* When a previously added affix had CIRCUMFIX this one
				3918	* must have it too, if it had not then this one must not
				3919	* have one either. */
				3920	if ((xht != NULL \|\| !affile->af_pfxpostpone
				3921	\|\| ae->ae_chop != NULL
				3922	\|\| ae->ae_flags != NULL)
				3923	&& (ae->ae_chop == NULL
				3924	\|\| STRLEN(ae->ae_chop) < wordlen)
				3925	&& (ae->ae_prog == NULL
				3926	\|\| vim_regexec_prog(&ae->ae_prog, FALSE,
				3927	word, (colnr_T)0))
				3928	&& (((condit & CONDIT_CFIX) == 0)
				3929	== ((condit & CONDIT_AFF) == 0
				3930	\|\| ae->ae_flags == NULL
				3931	\|\| !flag_in_afflist(affile->af_flagtype,
				3932	ae->ae_flags, affile->af_circumfix))))
				3933	{
				3934	/* Match. Remove the chop and add the affix. */
				3935	if (xht == NULL)
				3936	{
				3937	/* prefix: chop/add at the start of the word */
				3938	if (ae->ae_add == NULL)
				3939	*newword = NUL;
				3940	else
				3941	vim_strncpy(newword, ae->ae_add, MAXWLEN - 1);
				3942	p = word;
				3943	if (ae->ae_chop != NULL)
				3944	{
				3945	/* Skip chop string. */
				3946	#ifdef FEAT_MBYTE
				3947	if (has_mbyte)
				3948	{
				3949	i = mb_charlen(ae->ae_chop);
				3950	for ( ; i > 0; --i)
Bram Moolenaar	91acfff	2017-03-12 19:22:36 +0100	[diff] [blame]	3951	MB_PTR_ADV(p);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3952	}
				3953	else
				3954	#endif
				3955	p += STRLEN(ae->ae_chop);
				3956	}
				3957	STRCAT(newword, p);
				3958	}
				3959	else
				3960	{
				3961	/* suffix: chop/add at the end of the word */
				3962	vim_strncpy(newword, word, MAXWLEN - 1);
				3963	if (ae->ae_chop != NULL)
				3964	{
				3965	/* Remove chop string. */
				3966	p = newword + STRLEN(newword);
				3967	i = (int)MB_CHARLEN(ae->ae_chop);
				3968	for ( ; i > 0; --i)
Bram Moolenaar	91acfff	2017-03-12 19:22:36 +0100	[diff] [blame]	3969	MB_PTR_BACK(newword, p);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3970	*p = NUL;
				3971	}
				3972	if (ae->ae_add != NULL)
				3973	STRCAT(newword, ae->ae_add);
				3974	}
				3975
				3976	use_flags = flags;
				3977	use_pfxlist = pfxlist;
				3978	use_pfxlen = pfxlen;
				3979	need_affix = FALSE;
				3980	use_condit = condit \| CONDIT_COMB \| CONDIT_AFF;
				3981	if (ae->ae_flags != NULL)
				3982	{
				3983	/* Extract flags from the affix list. */
				3984	use_flags \|= get_affix_flags(affile, ae->ae_flags);
				3985
				3986	if (affile->af_needaffix != 0 && flag_in_afflist(
				3987	affile->af_flagtype, ae->ae_flags,
				3988	affile->af_needaffix))
				3989	need_affix = TRUE;
				3990
				3991	/* When there is a CIRCUMFIX flag the other affix
				3992	* must also have it and we don't add the word
				3993	* with one affix. */
				3994	if (affile->af_circumfix != 0 && flag_in_afflist(
				3995	affile->af_flagtype, ae->ae_flags,
				3996	affile->af_circumfix))
				3997	{
				3998	use_condit \|= CONDIT_CFIX;
				3999	if ((condit & CONDIT_CFIX) == 0)
				4000	need_affix = TRUE;
				4001	}
				4002
				4003	if (affile->af_pfxpostpone
				4004	\|\| spin->si_compflags != NULL)
				4005	{
				4006	if (affile->af_pfxpostpone)
				4007	/* Get prefix IDS from the affix list. */
				4008	use_pfxlen = get_pfxlist(affile,
				4009	ae->ae_flags, store_afflist);
				4010	else
				4011	use_pfxlen = 0;
				4012	use_pfxlist = store_afflist;
				4013
				4014	/* Combine the prefix IDs. Avoid adding the
				4015	* same ID twice. */
				4016	for (i = 0; i < pfxlen; ++i)
				4017	{
				4018	for (j = 0; j < use_pfxlen; ++j)
				4019	if (pfxlist[i] == use_pfxlist[j])
				4020	break;
				4021	if (j == use_pfxlen)
				4022	use_pfxlist[use_pfxlen++] = pfxlist[i];
				4023	}
				4024
				4025	if (spin->si_compflags != NULL)
				4026	/* Get compound IDS from the affix list. */
				4027	get_compflags(affile, ae->ae_flags,
				4028	use_pfxlist + use_pfxlen);
				4029
				4030	/* Combine the list of compound flags.
				4031	* Concatenate them to the prefix IDs list.
				4032	* Avoid adding the same ID twice. */
				4033	for (i = pfxlen; pfxlist[i] != NUL; ++i)
				4034	{
				4035	for (j = use_pfxlen;
				4036	use_pfxlist[j] != NUL; ++j)
				4037	if (pfxlist[i] == use_pfxlist[j])
				4038	break;
				4039	if (use_pfxlist[j] == NUL)
				4040	{
				4041	use_pfxlist[j++] = pfxlist[i];
				4042	use_pfxlist[j] = NUL;
				4043	}
				4044	}
				4045	}
				4046	}
				4047
				4048	/* Obey a "COMPOUNDFORBIDFLAG" of the affix: don't
				4049	* use the compound flags. */
				4050	if (use_pfxlist != NULL && ae->ae_compforbid)
				4051	{
				4052	vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen);
				4053	use_pfxlist = pfx_pfxlist;
				4054	}
				4055
				4056	/* When there are postponed prefixes... */
				4057	if (spin->si_prefroot != NULL
				4058	&& spin->si_prefroot->wn_sibling != NULL)
				4059	{
				4060	/* ... add a flag to indicate an affix was used. */
				4061	use_flags \|= WF_HAS_AFF;
				4062
				4063	/* ... don't use a prefix list if combining
				4064	* affixes is not allowed. But do use the
				4065	* compound flags after them. */
				4066	if (!ah->ah_combine && use_pfxlist != NULL)
				4067	use_pfxlist += use_pfxlen;
				4068	}
				4069
				4070	/* When compounding is supported and there is no
				4071	* "COMPOUNDPERMITFLAG" then forbid compounding on the
				4072	* side where the affix is applied. */
				4073	if (spin->si_compflags != NULL && !ae->ae_comppermit)
				4074	{
				4075	if (xht != NULL)
				4076	use_flags \|= WF_NOCOMPAFT;
				4077	else
				4078	use_flags \|= WF_NOCOMPBEF;
				4079	}
				4080
				4081	/* Store the modified word. */
				4082	if (store_word(spin, newword, use_flags,
				4083	spin->si_region, use_pfxlist,
				4084	need_affix) == FAIL)
				4085	retval = FAIL;
				4086
				4087	/* When added a prefix or a first suffix and the affix
				4088	* has flags may add a(nother) suffix. RECURSIVE! */
				4089	if ((condit & CONDIT_SUF) && ae->ae_flags != NULL)
				4090	if (store_aff_word(spin, newword, ae->ae_flags,
				4091	affile, &affile->af_suff, xht,
				4092	use_condit & (xht == NULL
				4093	? ~0 : ~CONDIT_SUF),
				4094	use_flags, use_pfxlist, pfxlen) == FAIL)
				4095	retval = FAIL;
				4096
				4097	/* When added a suffix and combining is allowed also
				4098	* try adding a prefix additionally. Both for the
				4099	* word flags and for the affix flags. RECURSIVE! */
				4100	if (xht != NULL && ah->ah_combine)
				4101	{
				4102	if (store_aff_word(spin, newword,
				4103	afflist, affile,
				4104	xht, NULL, use_condit,
				4105	use_flags, use_pfxlist,
				4106	pfxlen) == FAIL
				4107	\|\| (ae->ae_flags != NULL
				4108	&& store_aff_word(spin, newword,
				4109	ae->ae_flags, affile,
				4110	xht, NULL, use_condit,
				4111	use_flags, use_pfxlist,
				4112	pfxlen) == FAIL))
				4113	retval = FAIL;
				4114	}
				4115	}
				4116	}
				4117	}
				4118	}
				4119	}
				4120
				4121	return retval;
				4122	}
				4123
				4124	/*
				4125	* Read a file with a list of words.
				4126	*/
				4127	static int
				4128	spell_read_wordfile(spellinfo_T spin, char_u fname)
				4129	{
				4130	FILE *fd;
				4131	long lnum = 0;
				4132	char_u rline[MAXLINELEN];
				4133	char_u *line;
				4134	char_u *pc = NULL;
				4135	char_u *p;
				4136	int l;
				4137	int retval = OK;
				4138	int did_word = FALSE;
				4139	int non_ascii = 0;
				4140	int flags;
				4141	int regionmask;
				4142
				4143	/*
				4144	* Open the file.
				4145	*/
				4146	fd = mch_fopen((char *)fname, "r");
				4147	if (fd == NULL)
				4148	{
				4149	EMSG2(_(e_notopen), fname);
				4150	return FAIL;
				4151	}
				4152
				4153	vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname);
				4154	spell_message(spin, IObuff);
				4155
				4156	/*
				4157	* Read all the lines in the file one by one.
				4158	*/
				4159	while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
				4160	{
				4161	line_breakcheck();
				4162	++lnum;
				4163
				4164	/* Skip comment lines. */
				4165	if (*rline == '#')
				4166	continue;
				4167
				4168	/* Remove CR, LF and white space from the end. */
				4169	l = (int)STRLEN(rline);
				4170	while (l > 0 && rline[l - 1] <= ' ')
				4171	--l;
				4172	if (l == 0)
				4173	continue; /* empty or blank line */
				4174	rline[l] = NUL;
				4175
				4176	/* Convert from "/encoding={encoding}" to 'encoding' when needed. */
				4177	vim_free(pc);
				4178	#ifdef FEAT_MBYTE
				4179	if (spin->si_conv.vc_type != CONV_NONE)
				4180	{
				4181	pc = string_convert(&spin->si_conv, rline, NULL);
				4182	if (pc == NULL)
				4183	{
				4184	smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
				4185	fname, lnum, rline);
				4186	continue;
				4187	}
				4188	line = pc;
				4189	}
				4190	else
				4191	#endif
				4192	{
				4193	pc = NULL;
				4194	line = rline;
				4195	}
				4196
				4197	if (*line == '/')
				4198	{
				4199	++line;
				4200	if (STRNCMP(line, "encoding=", 9) == 0)
				4201	{
				4202	if (spin->si_conv.vc_type != CONV_NONE)
				4203	smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"),
				4204	fname, lnum, line - 1);
				4205	else if (did_word)
				4206	smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"),
				4207	fname, lnum, line - 1);
				4208	else
				4209	{
				4210	#ifdef FEAT_MBYTE
				4211	char_u *enc;
				4212
				4213	/* Setup for conversion to 'encoding'. */
				4214	line += 9;
				4215	enc = enc_canonize(line);
				4216	if (enc != NULL && !spin->si_ascii
				4217	&& convert_setup(&spin->si_conv, enc,
				4218	p_enc) == FAIL)
				4219	smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
				4220	fname, line, p_enc);
				4221	vim_free(enc);
				4222	spin->si_conv.vc_fail = TRUE;
				4223	#else
				4224	smsg((char_u *)_("Conversion in %s not supported"), fname);
				4225	#endif
				4226	}
				4227	continue;
				4228	}
				4229
				4230	if (STRNCMP(line, "regions=", 8) == 0)
				4231	{
				4232	if (spin->si_region_count > 1)
				4233	smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"),
				4234	fname, lnum, line);
				4235	else
				4236	{
				4237	line += 8;
Bram Moolenaar	2993ac5	2018-02-10 14:12:43 +0100	[diff] [blame]	4238	if (STRLEN(line) > MAXREGIONS * 2)
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4239	smsg((char_u *)_("Too many regions in %s line %d: %s"),
				4240	fname, lnum, line);
				4241	else
				4242	{
				4243	spin->si_region_count = (int)STRLEN(line) / 2;
				4244	STRCPY(spin->si_region_name, line);
				4245
				4246	/* Adjust the mask for a word valid in all regions. */
				4247	spin->si_region = (1 << spin->si_region_count) - 1;
				4248	}
				4249	}
				4250	continue;
				4251	}
				4252
				4253	smsg((char_u *)_("/ line ignored in %s line %d: %s"),
				4254	fname, lnum, line - 1);
				4255	continue;
				4256	}
				4257
				4258	flags = 0;
				4259	regionmask = spin->si_region;
				4260
				4261	/* Check for flags and region after a slash. */
				4262	p = vim_strchr(line, '/');
				4263	if (p != NULL)
				4264	{
				4265	*p++ = NUL;
				4266	while (*p != NUL)
				4267	{
				4268	if (p == '=') / keep-case word */
				4269	flags \|= WF_KEEPCAP \| WF_FIXCAP;
				4270	else if (p == '!') / Bad, bad, wicked word. */
				4271	flags \|= WF_BANNED;
				4272	else if (p == '?') / Rare word. */
				4273	flags \|= WF_RARE;
				4274	else if (VIM_ISDIGIT(p)) / region number(s) */
				4275	{
				4276	if ((flags & WF_REGION) == 0) /* first one */
				4277	regionmask = 0;
				4278	flags \|= WF_REGION;
				4279
				4280	l = *p - '0';
Bram Moolenaar	ee03b94	2017-10-27 00:57:05 +0200	[diff] [blame]	4281	if (l == 0 \|\| l > spin->si_region_count)
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4282	{
				4283	smsg((char_u *)_("Invalid region nr in %s line %d: %s"),
				4284	fname, lnum, p);
				4285	break;
				4286	}
				4287	regionmask \|= 1 << (l - 1);
				4288	}
				4289	else
				4290	{
				4291	smsg((char_u *)_("Unrecognized flags in %s line %d: %s"),
				4292	fname, lnum, p);
				4293	break;
				4294	}
				4295	++p;
				4296	}
				4297	}
				4298
				4299	/* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
				4300	if (spin->si_ascii && has_non_ascii(line))
				4301	{
				4302	++non_ascii;
				4303	continue;
				4304	}
				4305
				4306	/* Normal word: store it. */
				4307	if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL)
				4308	{
				4309	retval = FAIL;
				4310	break;
				4311	}
				4312	did_word = TRUE;
				4313	}
				4314
				4315	vim_free(pc);
				4316	fclose(fd);
				4317
				4318	if (spin->si_ascii && non_ascii > 0)
				4319	{
				4320	vim_snprintf((char *)IObuff, IOSIZE,
				4321	_("Ignored %d words with non-ASCII characters"), non_ascii);
				4322	spell_message(spin, IObuff);
				4323	}
				4324
				4325	return retval;
				4326	}
				4327
				4328	/*
				4329	* Get part of an sblock_T, "len" bytes long.
				4330	* This avoids calling free() for every little struct we use (and keeping
				4331	* track of them).
				4332	* The memory is cleared to all zeros.
				4333	* Returns NULL when out of memory.
				4334	*/
				4335	static void *
				4336	getroom(
				4337	spellinfo_T *spin,
				4338	size_t len, /* length needed */
				4339	int align) /* align for pointer */
				4340	{
				4341	char_u *p;
				4342	sblock_T *bl = spin->si_blocks;
				4343
				4344	if (align && bl != NULL)
				4345	/* Round size up for alignment. On some systems structures need to be
				4346	* aligned to the size of a pointer (e.g., SPARC). */
				4347	bl->sb_used = (bl->sb_used + sizeof(char *) - 1)
				4348	& ~(sizeof(char *) - 1);
				4349
				4350	if (bl == NULL \|\| bl->sb_used + len > SBLOCKSIZE)
				4351	{
				4352	if (len >= SBLOCKSIZE)
				4353	bl = NULL;
				4354	else
				4355	/* Allocate a block of memory. It is not freed until much later. */
				4356	bl = (sblock_T *)alloc_clear(
				4357	(unsigned)(sizeof(sblock_T) + SBLOCKSIZE));
				4358	if (bl == NULL)
				4359	{
				4360	if (!spin->si_did_emsg)
				4361	{
				4362	EMSG(_("E845: Insufficient memory, word list will be incomplete"));
				4363	spin->si_did_emsg = TRUE;
				4364	}
				4365	return NULL;
				4366	}
				4367	bl->sb_next = spin->si_blocks;
				4368	spin->si_blocks = bl;
				4369	bl->sb_used = 0;
				4370	++spin->si_blocks_cnt;
				4371	}
				4372
				4373	p = bl->sb_data + bl->sb_used;
				4374	bl->sb_used += (int)len;
				4375
				4376	return p;
				4377	}
				4378
				4379	/*
				4380	* Make a copy of a string into memory allocated with getroom().
				4381	* Returns NULL when out of memory.
				4382	*/
				4383	static char_u *
				4384	getroom_save(spellinfo_T spin, char_u s)
				4385	{
				4386	char_u *sc;
				4387
				4388	sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE);
				4389	if (sc != NULL)
				4390	STRCPY(sc, s);
				4391	return sc;
				4392	}
				4393
				4394
				4395	/*
				4396	* Free the list of allocated sblock_T.
				4397	*/
				4398	static void
				4399	free_blocks(sblock_T *bl)
				4400	{
				4401	sblock_T *next;
				4402
				4403	while (bl != NULL)
				4404	{
				4405	next = bl->sb_next;
				4406	vim_free(bl);
				4407	bl = next;
				4408	}
				4409	}
				4410
				4411	/*
				4412	* Allocate the root of a word tree.
				4413	* Returns NULL when out of memory.
				4414	*/
				4415	static wordnode_T *
				4416	wordtree_alloc(spellinfo_T *spin)
				4417	{
				4418	return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
				4419	}
				4420
				4421	/*
				4422	* Store a word in the tree(s).
				4423	* Always store it in the case-folded tree. For a keep-case word this is
				4424	* useful when the word can also be used with all caps (no WF_FIXCAP flag) and
				4425	* used to find suggestions.
				4426	* For a keep-case word also store it in the keep-case tree.
				4427	* When "pfxlist" is not NULL store the word for each postponed prefix ID and
				4428	* compound flag.
				4429	*/
				4430	static int
				4431	store_word(
				4432	spellinfo_T *spin,
				4433	char_u *word,
				4434	int flags, /* extra flags, WF_BANNED */
				4435	int region, /* supported region(s) */
				4436	char_u pfxlist, / list of prefix IDs or NULL */
				4437	int need_affix) /* only store word with affix ID */
				4438	{
				4439	int len = (int)STRLEN(word);
				4440	int ct = captype(word, word + len);
				4441	char_u foldword[MAXWLEN];
				4442	int res = OK;
				4443	char_u *p;
				4444
				4445	(void)spell_casefold(word, len, foldword, MAXWLEN);
				4446	for (p = pfxlist; res == OK; ++p)
				4447	{
				4448	if (!need_affix \|\| (p != NULL && *p != NUL))
				4449	res = tree_add_word(spin, foldword, spin->si_foldroot, ct \| flags,
				4450	region, p == NULL ? 0 : *p);
				4451	if (p == NULL \|\| *p == NUL)
				4452	break;
				4453	}
				4454	++spin->si_foldwcount;
				4455
				4456	if (res == OK && (ct == WF_KEEPCAP \|\| (flags & WF_KEEPCAP)))
				4457	{
				4458	for (p = pfxlist; res == OK; ++p)
				4459	{
				4460	if (!need_affix \|\| (p != NULL && *p != NUL))
				4461	res = tree_add_word(spin, word, spin->si_keeproot, flags,
				4462	region, p == NULL ? 0 : *p);
				4463	if (p == NULL \|\| *p == NUL)
				4464	break;
				4465	}
				4466	++spin->si_keepwcount;
				4467	}
				4468	return res;
				4469	}
				4470
				4471	/*
				4472	* Add word "word" to a word tree at "root".
				4473	* When "flags" < 0 we are adding to the prefix tree where "flags" is used for
				4474	* "rare" and "region" is the condition nr.
				4475	* Returns FAIL when out of memory.
				4476	*/
				4477	static int
				4478	tree_add_word(
				4479	spellinfo_T *spin,
				4480	char_u *word,
				4481	wordnode_T *root,
				4482	int flags,
				4483	int region,
				4484	int affixID)
				4485	{
				4486	wordnode_T *node = root;
				4487	wordnode_T *np;
				4488	wordnode_T copyp, *copyprev;
				4489	wordnode_T **prev = NULL;
				4490	int i;
				4491
				4492	/* Add each byte of the word to the tree, including the NUL at the end. */
				4493	for (i = 0; ; ++i)
				4494	{
				4495	/* When there is more than one reference to this node we need to make
				4496	* a copy, so that we can modify it. Copy the whole list of siblings
				4497	* (we don't optimize for a partly shared list of siblings). */
				4498	if (node != NULL && node->wn_refs > 1)
				4499	{
				4500	--node->wn_refs;
				4501	copyprev = prev;
				4502	for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling)
				4503	{
				4504	/* Allocate a new node and copy the info. */
				4505	np = get_wordnode(spin);
				4506	if (np == NULL)
				4507	return FAIL;
				4508	np->wn_child = copyp->wn_child;
				4509	if (np->wn_child != NULL)
				4510	++np->wn_child->wn_refs; /* child gets extra ref */
				4511	np->wn_byte = copyp->wn_byte;
				4512	if (np->wn_byte == NUL)
				4513	{
				4514	np->wn_flags = copyp->wn_flags;
				4515	np->wn_region = copyp->wn_region;
				4516	np->wn_affixID = copyp->wn_affixID;
				4517	}
				4518
				4519	/* Link the new node in the list, there will be one ref. */
				4520	np->wn_refs = 1;
				4521	if (copyprev != NULL)
				4522	*copyprev = np;
				4523	copyprev = &np->wn_sibling;
				4524
				4525	/* Let "node" point to the head of the copied list. */
				4526	if (copyp == node)
				4527	node = np;
				4528	}
				4529	}
				4530
				4531	/* Look for the sibling that has the same character. They are sorted
				4532	* on byte value, thus stop searching when a sibling is found with a
				4533	* higher byte value. For zero bytes (end of word) the sorting is
				4534	* done on flags and then on affixID. */
				4535	while (node != NULL
				4536	&& (node->wn_byte < word[i]
				4537	\|\| (node->wn_byte == NUL
				4538	&& (flags < 0
				4539	? node->wn_affixID < (unsigned)affixID
				4540	: (node->wn_flags < (unsigned)(flags & WN_MASK)
				4541	\|\| (node->wn_flags == (flags & WN_MASK)
				4542	&& (spin->si_sugtree
				4543	? (node->wn_region & 0xffff) < region
				4544	: node->wn_affixID
				4545	< (unsigned)affixID)))))))
				4546	{
				4547	prev = &node->wn_sibling;
				4548	node = *prev;
				4549	}
				4550	if (node == NULL
				4551	\|\| node->wn_byte != word[i]
				4552	\|\| (word[i] == NUL
				4553	&& (flags < 0
				4554	\|\| spin->si_sugtree
				4555	\|\| node->wn_flags != (flags & WN_MASK)
				4556	\|\| node->wn_affixID != affixID)))
				4557	{
				4558	/* Allocate a new node. */
				4559	np = get_wordnode(spin);
				4560	if (np == NULL)
				4561	return FAIL;
				4562	np->wn_byte = word[i];
				4563
				4564	/* If "node" is NULL this is a new child or the end of the sibling
				4565	* list: ref count is one. Otherwise use ref count of sibling and
				4566	* make ref count of sibling one (matters when inserting in front
				4567	* of the list of siblings). */
				4568	if (node == NULL)
				4569	np->wn_refs = 1;
				4570	else
				4571	{
				4572	np->wn_refs = node->wn_refs;
				4573	node->wn_refs = 1;
				4574	}
				4575	if (prev != NULL)
				4576	*prev = np;
				4577	np->wn_sibling = node;
				4578	node = np;
				4579	}
				4580
				4581	if (word[i] == NUL)
				4582	{
				4583	node->wn_flags = flags;
				4584	node->wn_region \|= region;
				4585	node->wn_affixID = affixID;
				4586	break;
				4587	}
				4588	prev = &node->wn_child;
				4589	node = *prev;
				4590	}
				4591	#ifdef SPELL_PRINTTREE
				4592	smsg((char_u *)"Added \"%s\"", word);
				4593	spell_print_tree(root->wn_sibling);
				4594	#endif
				4595
				4596	/* count nr of words added since last message */
				4597	++spin->si_msg_count;
				4598
				4599	if (spin->si_compress_cnt > 1)
				4600	{
				4601	if (--spin->si_compress_cnt == 1)
				4602	/* Did enough words to lower the block count limit. */
				4603	spin->si_blocks_cnt += compress_inc;
				4604	}
				4605
				4606	/*
				4607	* When we have allocated lots of memory we need to compress the word tree
				4608	* to free up some room. But compression is slow, and we might actually
				4609	* need that room, thus only compress in the following situations:
				4610	* 1. When not compressed before (si_compress_cnt == 0): when using
				4611	* "compress_start" blocks.
				4612	* 2. When compressed before and used "compress_inc" blocks before
				4613	* adding "compress_added" words (si_compress_cnt > 1).
				4614	* 3. When compressed before, added "compress_added" words
				4615	* (si_compress_cnt == 1) and the number of free nodes drops below the
				4616	* maximum word length.
				4617	*/
				4618	#ifndef SPELL_COMPRESS_ALLWAYS
				4619	if (spin->si_compress_cnt == 1
				4620	? spin->si_free_count < MAXWLEN
				4621	: spin->si_blocks_cnt >= compress_start)
				4622	#endif
				4623	{
				4624	/* Decrement the block counter. The effect is that we compress again
				4625	* when the freed up room has been used and another "compress_inc"
				4626	* blocks have been allocated. Unless "compress_added" words have
				4627	* been added, then the limit is put back again. */
				4628	spin->si_blocks_cnt -= compress_inc;
				4629	spin->si_compress_cnt = compress_added;
				4630
				4631	if (spin->si_verbose)
				4632	{
				4633	msg_start();
				4634	msg_puts((char_u *)_(msg_compressing));
				4635	msg_clr_eos();
				4636	msg_didout = FALSE;
				4637	msg_col = 0;
				4638	out_flush();
				4639	}
				4640
				4641	/* Compress both trees. Either they both have many nodes, which makes
				4642	* compression useful, or one of them is small, which means
				4643	* compression goes fast. But when filling the soundfold word tree
				4644	* there is no keep-case tree. */
				4645	wordtree_compress(spin, spin->si_foldroot);
				4646	if (affixID >= 0)
				4647	wordtree_compress(spin, spin->si_keeproot);
				4648	}
				4649
				4650	return OK;
				4651	}
				4652
				4653	/*
				4654	* Get a wordnode_T, either from the list of previously freed nodes or
				4655	* allocate a new one.
				4656	* Returns NULL when out of memory.
				4657	*/
				4658	static wordnode_T *
				4659	get_wordnode(spellinfo_T *spin)
				4660	{
				4661	wordnode_T *n;
				4662
				4663	if (spin->si_first_free == NULL)
				4664	n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
				4665	else
				4666	{
				4667	n = spin->si_first_free;
				4668	spin->si_first_free = n->wn_child;
				4669	vim_memset(n, 0, sizeof(wordnode_T));
				4670	--spin->si_free_count;
				4671	}
				4672	#ifdef SPELL_PRINTTREE
				4673	if (n != NULL)
				4674	n->wn_nr = ++spin->si_wordnode_nr;
				4675	#endif
				4676	return n;
				4677	}
				4678
				4679	/*
				4680	* Decrement the reference count on a node (which is the head of a list of
				4681	* siblings). If the reference count becomes zero free the node and its
				4682	* siblings.
				4683	* Returns the number of nodes actually freed.
				4684	*/
				4685	static int
				4686	deref_wordnode(spellinfo_T spin, wordnode_T node)
				4687	{
				4688	wordnode_T *np;
				4689	int cnt = 0;
				4690
				4691	if (--node->wn_refs == 0)
				4692	{
				4693	for (np = node; np != NULL; np = np->wn_sibling)
				4694	{
				4695	if (np->wn_child != NULL)
				4696	cnt += deref_wordnode(spin, np->wn_child);
				4697	free_wordnode(spin, np);
				4698	++cnt;
				4699	}
				4700	++cnt; /* length field */
				4701	}
				4702	return cnt;
				4703	}
				4704
				4705	/*
				4706	* Free a wordnode_T for re-use later.
				4707	* Only the "wn_child" field becomes invalid.
				4708	*/
				4709	static void
				4710	free_wordnode(spellinfo_T spin, wordnode_T n)
				4711	{
				4712	n->wn_child = spin->si_first_free;
				4713	spin->si_first_free = n;
				4714	++spin->si_free_count;
				4715	}
				4716
				4717	/*
				4718	* Compress a tree: find tails that are identical and can be shared.
				4719	*/
				4720	static void
				4721	wordtree_compress(spellinfo_T spin, wordnode_T root)
				4722	{
				4723	hashtab_T ht;
				4724	int n;
				4725	int tot = 0;
				4726	int perc;
				4727
				4728	/* Skip the root itself, it's not actually used. The first sibling is the
				4729	* start of the tree. */
				4730	if (root->wn_sibling != NULL)
				4731	{
				4732	hash_init(&ht);
				4733	n = node_compress(spin, root->wn_sibling, &ht, &tot);
				4734
				4735	#ifndef SPELL_PRINTTREE
				4736	if (spin->si_verbose \|\| p_verbose > 2)
				4737	#endif
				4738	{
				4739	if (tot > 1000000)
				4740	perc = (tot - n) / (tot / 100);
				4741	else if (tot == 0)
				4742	perc = 0;
				4743	else
				4744	perc = (tot - n) * 100 / tot;
				4745	vim_snprintf((char *)IObuff, IOSIZE,
				4746	_("Compressed %d of %d nodes; %d (%d%%) remaining"),
				4747	n, tot, tot - n, perc);
				4748	spell_message(spin, IObuff);
				4749	}
				4750	#ifdef SPELL_PRINTTREE
				4751	spell_print_tree(root->wn_sibling);
				4752	#endif
				4753	hash_clear(&ht);
				4754	}
				4755	}
				4756
				4757	/*
				4758	* Compress a node, its siblings and its children, depth first.
				4759	* Returns the number of compressed nodes.
				4760	*/
				4761	static int
				4762	node_compress(
				4763	spellinfo_T *spin,
				4764	wordnode_T *node,
				4765	hashtab_T *ht,
				4766	int tot) / total count of nodes before compressing,
				4767	incremented while going through the tree */
				4768	{
				4769	wordnode_T *np;
				4770	wordnode_T *tp;
				4771	wordnode_T *child;
				4772	hash_T hash;
				4773	hashitem_T *hi;
				4774	int len = 0;
				4775	unsigned nr, n;
				4776	int compressed = 0;
				4777
				4778	/*
				4779	* Go through the list of siblings. Compress each child and then try
				4780	* finding an identical child to replace it.
				4781	* Note that with "child" we mean not just the node that is pointed to,
				4782	* but the whole list of siblings of which the child node is the first.
				4783	*/
				4784	for (np = node; np != NULL && !got_int; np = np->wn_sibling)
				4785	{
				4786	++len;
				4787	if ((child = np->wn_child) != NULL)
				4788	{
				4789	/* Compress the child first. This fills hashkey. */
				4790	compressed += node_compress(spin, child, ht, tot);
				4791
				4792	/* Try to find an identical child. */
				4793	hash = hash_hash(child->wn_u1.hashkey);
				4794	hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
				4795	if (!HASHITEM_EMPTY(hi))
				4796	{
				4797	/* There are children we encountered before with a hash value
				4798	* identical to the current child. Now check if there is one
				4799	* that is really identical. */
				4800	for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
				4801	if (node_equal(child, tp))
				4802	{
				4803	/* Found one! Now use that child in place of the
				4804	* current one. This means the current child and all
				4805	* its siblings is unlinked from the tree. */
				4806	++tp->wn_refs;
				4807	compressed += deref_wordnode(spin, child);
				4808	np->wn_child = tp;
				4809	break;
				4810	}
				4811	if (tp == NULL)
				4812	{
				4813	/* No other child with this hash value equals the child of
				4814	* the node, add it to the linked list after the first
				4815	* item. */
				4816	tp = HI2WN(hi);
				4817	child->wn_u2.next = tp->wn_u2.next;
				4818	tp->wn_u2.next = child;
				4819	}
				4820	}
				4821	else
				4822	/* No other child has this hash value, add it to the
				4823	* hashtable. */
				4824	hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
				4825	}
				4826	}
				4827	tot += len + 1; / add one for the node that stores the length */
				4828
				4829	/*
				4830	* Make a hash key for the node and its siblings, so that we can quickly
				4831	* find a lookalike node. This must be done after compressing the sibling
				4832	* list, otherwise the hash key would become invalid by the compression.
				4833	*/
				4834	node->wn_u1.hashkey[0] = len;
				4835	nr = 0;
				4836	for (np = node; np != NULL; np = np->wn_sibling)
				4837	{
				4838	if (np->wn_byte == NUL)
				4839	/* end node: use wn_flags, wn_region and wn_affixID */
				4840	n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16);
				4841	else
				4842	/* byte node: use the byte value and the child pointer */
				4843	n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8));
				4844	nr = nr * 101 + n;
				4845	}
				4846
				4847	/* Avoid NUL bytes, it terminates the hash key. */
				4848	n = nr & 0xff;
				4849	node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
				4850	n = (nr >> 8) & 0xff;
				4851	node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
				4852	n = (nr >> 16) & 0xff;
				4853	node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
				4854	n = (nr >> 24) & 0xff;
				4855	node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
				4856	node->wn_u1.hashkey[5] = NUL;
				4857
				4858	/* Check for CTRL-C pressed now and then. */
				4859	fast_breakcheck();
				4860
				4861	return compressed;
				4862	}
				4863
				4864	/*
				4865	* Return TRUE when two nodes have identical siblings and children.
				4866	*/
				4867	static int
				4868	node_equal(wordnode_T n1, wordnode_T n2)
				4869	{
				4870	wordnode_T *p1;
				4871	wordnode_T *p2;
				4872
				4873	for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
				4874	p1 = p1->wn_sibling, p2 = p2->wn_sibling)
				4875	if (p1->wn_byte != p2->wn_byte
				4876	\|\| (p1->wn_byte == NUL
				4877	? (p1->wn_flags != p2->wn_flags
				4878	\|\| p1->wn_region != p2->wn_region
				4879	\|\| p1->wn_affixID != p2->wn_affixID)
				4880	: (p1->wn_child != p2->wn_child)))
				4881	break;
				4882
				4883	return p1 == NULL && p2 == NULL;
				4884	}
				4885
				4886	static int
				4887	#ifdef __BORLANDC__
				4888	_RTLENTRYF
				4889	#endif
				4890	rep_compare(const void s1, const void s2);
				4891
				4892	/*
				4893	* Function given to qsort() to sort the REP items on "from" string.
				4894	*/
				4895	static int
				4896	#ifdef __BORLANDC__
				4897	_RTLENTRYF
				4898	#endif
				4899	rep_compare(const void s1, const void s2)
				4900	{
				4901	fromto_T p1 = (fromto_T )s1;
				4902	fromto_T p2 = (fromto_T )s2;
				4903
				4904	return STRCMP(p1->ft_from, p2->ft_from);
				4905	}
				4906
				4907	/*
				4908	* Write the Vim .spl file "fname".
				4909	* Return FAIL or OK;
				4910	*/
				4911	static int
				4912	write_vim_spell(spellinfo_T spin, char_u fname)
				4913	{
				4914	FILE *fd;
				4915	int regionmask;
				4916	int round;
				4917	wordnode_T *tree;
				4918	int nodecount;
				4919	int i;
				4920	int l;
				4921	garray_T *gap;
				4922	fromto_T *ftp;
				4923	char_u *p;
				4924	int rr;
				4925	int retval = OK;
				4926	size_t fwv = 1; /* collect return value of fwrite() to avoid
				4927	warnings from picky compiler */
				4928
				4929	fd = mch_fopen((char *)fname, "w");
				4930	if (fd == NULL)
				4931	{
				4932	EMSG2(_(e_notopen), fname);
				4933	return FAIL;
				4934	}
				4935
				4936	/* <HEADER>: <fileID> <versionnr> */
				4937	/* <fileID> */
				4938	fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd);
				4939	if (fwv != (size_t)1)
				4940	/* Catch first write error, don't try writing more. */
				4941	goto theend;
				4942
				4943	putc(VIMSPELLVERSION, fd); /* <versionnr> */
				4944
				4945	/*
				4946	* <SECTIONS>: <section> ... <sectionend>
				4947	*/
				4948
				4949	/* SN_INFO: <infotext> */
				4950	if (spin->si_info != NULL)
				4951	{
				4952	putc(SN_INFO, fd); /* <sectionID> */
				4953	putc(0, fd); /* <sectionflags> */
				4954
				4955	i = (int)STRLEN(spin->si_info);
				4956	put_bytes(fd, (long_u)i, 4); /* <sectionlen> */
				4957	fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); /* <infotext> */
				4958	}
				4959
				4960	/* SN_REGION: <regionname> ...
				4961	* Write the region names only if there is more than one. */
				4962	if (spin->si_region_count > 1)
				4963	{
				4964	putc(SN_REGION, fd); /* <sectionID> */
				4965	putc(SNF_REQUIRED, fd); /* <sectionflags> */
				4966	l = spin->si_region_count * 2;
				4967	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				4968	fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd);
				4969	/* <regionname> ... */
				4970	regionmask = (1 << spin->si_region_count) - 1;
				4971	}
				4972	else
				4973	regionmask = 0;
				4974
				4975	/* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars>
				4976	*
				4977	* The table with character flags and the table for case folding.
				4978	* This makes sure the same characters are recognized as word characters
				4979	* when generating an when using a spell file.
				4980	* Skip this for ASCII, the table may conflict with the one used for
				4981	* 'encoding'.
				4982	* Also skip this for an .add.spl file, the main spell file must contain
				4983	* the table (avoids that it conflicts). File is shorter too.
				4984	*/
				4985	if (!spin->si_ascii && !spin->si_add)
				4986	{
				4987	char_u folchars[128 * 8];
				4988	int flags;
				4989
				4990	putc(SN_CHARFLAGS, fd); /* <sectionID> */
				4991	putc(SNF_REQUIRED, fd); /* <sectionflags> */
				4992
				4993	/* Form the <folchars> string first, we need to know its length. */
				4994	l = 0;
				4995	for (i = 128; i < 256; ++i)
				4996	{
				4997	#ifdef FEAT_MBYTE
				4998	if (has_mbyte)
				4999	l += mb_char2bytes(spelltab.st_fold[i], folchars + l);
				5000	else
				5001	#endif
				5002	folchars[l++] = spelltab.st_fold[i];
				5003	}
				5004	put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */
				5005
				5006	fputc(128, fd); /* <charflagslen> */
				5007	for (i = 128; i < 256; ++i)
				5008	{
				5009	flags = 0;
				5010	if (spelltab.st_isw[i])
				5011	flags \|= CF_WORD;
				5012	if (spelltab.st_isu[i])
				5013	flags \|= CF_UPPER;
				5014	fputc(flags, fd); /* <charflags> */
				5015	}
				5016
				5017	put_bytes(fd, (long_u)l, 2); /* <folcharslen> */
				5018	fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */
				5019	}
				5020
				5021	/* SN_MIDWORD: <midword> */
				5022	if (spin->si_midword != NULL)
				5023	{
				5024	putc(SN_MIDWORD, fd); /* <sectionID> */
				5025	putc(SNF_REQUIRED, fd); /* <sectionflags> */
				5026
				5027	i = (int)STRLEN(spin->si_midword);
				5028	put_bytes(fd, (long_u)i, 4); /* <sectionlen> */
				5029	fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd);
				5030	/* <midword> */
				5031	}
				5032
				5033	/* SN_PREFCOND: <prefcondcnt> <prefcond> ... */
				5034	if (spin->si_prefcond.ga_len > 0)
				5035	{
				5036	putc(SN_PREFCOND, fd); /* <sectionID> */
				5037	putc(SNF_REQUIRED, fd); /* <sectionflags> */
				5038
				5039	l = write_spell_prefcond(NULL, &spin->si_prefcond);
				5040	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				5041
				5042	write_spell_prefcond(fd, &spin->si_prefcond);
				5043	}
				5044
				5045	/* SN_REP: <repcount> <rep> ...
				5046	* SN_SAL: <salflags> <salcount> <sal> ...
				5047	* SN_REPSAL: <repcount> <rep> ... */
				5048
				5049	/* round 1: SN_REP section
				5050	* round 2: SN_SAL section (unless SN_SOFO is used)
				5051	* round 3: SN_REPSAL section */
				5052	for (round = 1; round <= 3; ++round)
				5053	{
				5054	if (round == 1)
				5055	gap = &spin->si_rep;
				5056	else if (round == 2)
				5057	{
				5058	/* Don't write SN_SAL when using a SN_SOFO section */
				5059	if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
				5060	continue;
				5061	gap = &spin->si_sal;
				5062	}
				5063	else
				5064	gap = &spin->si_repsal;
				5065
				5066	/* Don't write the section if there are no items. */
				5067	if (gap->ga_len == 0)
				5068	continue;
				5069
				5070	/* Sort the REP/REPSAL items. */
				5071	if (round != 2)
				5072	qsort(gap->ga_data, (size_t)gap->ga_len,
				5073	sizeof(fromto_T), rep_compare);
				5074
				5075	i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL);
				5076	putc(i, fd); /* <sectionID> */
				5077
				5078	/* This is for making suggestions, section is not required. */
				5079	putc(0, fd); /* <sectionflags> */
				5080
				5081	/* Compute the length of what follows. */
				5082	l = 2; /* count <repcount> or <salcount> */
				5083	for (i = 0; i < gap->ga_len; ++i)
				5084	{
				5085	ftp = &((fromto_T *)gap->ga_data)[i];
				5086	l += 1 + (int)STRLEN(ftp->ft_from); /* count <fromlen> and <from> */
				5087	l += 1 + (int)STRLEN(ftp->ft_to); /* count <tolen> and <to> */
				5088	}
				5089	if (round == 2)
				5090	++l; /* count <salflags> */
				5091	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				5092
				5093	if (round == 2)
				5094	{
				5095	i = 0;
				5096	if (spin->si_followup)
				5097	i \|= SAL_F0LLOWUP;
				5098	if (spin->si_collapse)
				5099	i \|= SAL_COLLAPSE;
				5100	if (spin->si_rem_accents)
				5101	i \|= SAL_REM_ACCENTS;
				5102	putc(i, fd); /* <salflags> */
				5103	}
				5104
				5105	put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */
				5106	for (i = 0; i < gap->ga_len; ++i)
				5107	{
				5108	/* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
				5109	/* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
				5110	ftp = &((fromto_T *)gap->ga_data)[i];
				5111	for (rr = 1; rr <= 2; ++rr)
				5112	{
				5113	p = rr == 1 ? ftp->ft_from : ftp->ft_to;
				5114	l = (int)STRLEN(p);
				5115	putc(l, fd);
				5116	if (l > 0)
				5117	fwv &= fwrite(p, l, (size_t)1, fd);
				5118	}
				5119	}
				5120
				5121	}
				5122
				5123	/* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
				5124	* This is for making suggestions, section is not required. */
				5125	if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
				5126	{
				5127	putc(SN_SOFO, fd); /* <sectionID> */
				5128	putc(0, fd); /* <sectionflags> */
				5129
				5130	l = (int)STRLEN(spin->si_sofofr);
				5131	put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4);
				5132	/* <sectionlen> */
				5133
				5134	put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */
				5135	fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */
				5136
				5137	l = (int)STRLEN(spin->si_sofoto);
				5138	put_bytes(fd, (long_u)l, 2); /* <sofotolen> */
				5139	fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */
				5140	}
				5141
				5142	/* SN_WORDS: <word> ...
				5143	* This is for making suggestions, section is not required. */
				5144	if (spin->si_commonwords.ht_used > 0)
				5145	{
				5146	putc(SN_WORDS, fd); /* <sectionID> */
				5147	putc(0, fd); /* <sectionflags> */
				5148
				5149	/* round 1: count the bytes
				5150	* round 2: write the bytes */
				5151	for (round = 1; round <= 2; ++round)
				5152	{
				5153	int todo;
				5154	int len = 0;
				5155	hashitem_T *hi;
				5156
				5157	todo = (int)spin->si_commonwords.ht_used;
				5158	for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi)
				5159	if (!HASHITEM_EMPTY(hi))
				5160	{
				5161	l = (int)STRLEN(hi->hi_key) + 1;
				5162	len += l;
				5163	if (round == 2) /* <word> */
				5164	fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd);
				5165	--todo;
				5166	}
				5167	if (round == 1)
				5168	put_bytes(fd, (long_u)len, 4); /* <sectionlen> */
				5169	}
				5170	}
				5171
				5172	/* SN_MAP: <mapstr>
				5173	* This is for making suggestions, section is not required. */
				5174	if (spin->si_map.ga_len > 0)
				5175	{
				5176	putc(SN_MAP, fd); /* <sectionID> */
				5177	putc(0, fd); /* <sectionflags> */
				5178	l = spin->si_map.ga_len;
				5179	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				5180	fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd);
				5181	/* <mapstr> */
				5182	}
				5183
				5184	/* SN_SUGFILE: <timestamp>
				5185	* This is used to notify that a .sug file may be available and at the
				5186	* same time allows for checking that a .sug file that is found matches
				5187	* with this .spl file. That's because the word numbers must be exactly
				5188	* right. */
				5189	if (!spin->si_nosugfile
				5190	&& (spin->si_sal.ga_len > 0
				5191	\|\| (spin->si_sofofr != NULL && spin->si_sofoto != NULL)))
				5192	{
				5193	putc(SN_SUGFILE, fd); /* <sectionID> */
				5194	putc(0, fd); /* <sectionflags> */
				5195	put_bytes(fd, (long_u)8, 4); /* <sectionlen> */
				5196
				5197	/* Set si_sugtime and write it to the file. */
				5198	spin->si_sugtime = time(NULL);
				5199	put_time(fd, spin->si_sugtime); /* <timestamp> */
				5200	}
				5201
				5202	/* SN_NOSPLITSUGS: nothing
				5203	* This is used to notify that no suggestions with word splits are to be
				5204	* made. */
				5205	if (spin->si_nosplitsugs)
				5206	{
				5207	putc(SN_NOSPLITSUGS, fd); /* <sectionID> */
				5208	putc(0, fd); /* <sectionflags> */
				5209	put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
				5210	}
				5211
				5212	/* SN_NOCOMPUNDSUGS: nothing
				5213	* This is used to notify that no suggestions with compounds are to be
				5214	* made. */
				5215	if (spin->si_nocompoundsugs)
				5216	{
				5217	putc(SN_NOCOMPOUNDSUGS, fd); /* <sectionID> */
				5218	putc(0, fd); /* <sectionflags> */
				5219	put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
				5220	}
				5221
				5222	/* SN_COMPOUND: compound info.
				5223	* We don't mark it required, when not supported all compound words will
				5224	* be bad words. */
				5225	if (spin->si_compflags != NULL)
				5226	{
				5227	putc(SN_COMPOUND, fd); /* <sectionID> */
				5228	putc(0, fd); /* <sectionflags> */
				5229
				5230	l = (int)STRLEN(spin->si_compflags);
				5231	for (i = 0; i < spin->si_comppat.ga_len; ++i)
				5232	l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1;
				5233	put_bytes(fd, (long_u)(l + 7), 4); /* <sectionlen> */
				5234
				5235	putc(spin->si_compmax, fd); /* <compmax> */
				5236	putc(spin->si_compminlen, fd); /* <compminlen> */
				5237	putc(spin->si_compsylmax, fd); /* <compsylmax> */
				5238	putc(0, fd); /* for Vim 7.0b compatibility */
				5239	putc(spin->si_compoptions, fd); /* <compoptions> */
				5240	put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2);
				5241	/* <comppatcount> */
				5242	for (i = 0; i < spin->si_comppat.ga_len; ++i)
				5243	{
				5244	p = ((char_u **)(spin->si_comppat.ga_data))[i];
				5245	putc((int)STRLEN(p), fd); /* <comppatlen> */
				5246	fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd);
				5247	/* <comppattext> */
				5248	}
				5249	/* <compflags> */
				5250	fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags),
				5251	(size_t)1, fd);
				5252	}
				5253
				5254	/* SN_NOBREAK: NOBREAK flag */
				5255	if (spin->si_nobreak)
				5256	{
				5257	putc(SN_NOBREAK, fd); /* <sectionID> */
				5258	putc(0, fd); /* <sectionflags> */
				5259
				5260	/* It's empty, the presence of the section flags the feature. */
				5261	put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
				5262	}
				5263
				5264	/* SN_SYLLABLE: syllable info.
				5265	* We don't mark it required, when not supported syllables will not be
				5266	* counted. */
				5267	if (spin->si_syllable != NULL)
				5268	{
				5269	putc(SN_SYLLABLE, fd); /* <sectionID> */
				5270	putc(0, fd); /* <sectionflags> */
				5271
				5272	l = (int)STRLEN(spin->si_syllable);
				5273	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				5274	fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd);
				5275	/* <syllable> */
				5276	}
				5277
				5278	/* end of <SECTIONS> */
				5279	putc(SN_END, fd); /* <sectionend> */
				5280
				5281
				5282	/*
				5283	* <LWORDTREE> <KWORDTREE> <PREFIXTREE>
				5284	*/
				5285	spin->si_memtot = 0;
				5286	for (round = 1; round <= 3; ++round)
				5287	{
				5288	if (round == 1)
				5289	tree = spin->si_foldroot->wn_sibling;
				5290	else if (round == 2)
				5291	tree = spin->si_keeproot->wn_sibling;
				5292	else
				5293	tree = spin->si_prefroot->wn_sibling;
				5294
				5295	/* Clear the index and wnode fields in the tree. */
				5296	clear_node(tree);
				5297
				5298	/* Count the number of nodes. Needed to be able to allocate the
				5299	* memory when reading the nodes. Also fills in index for shared
				5300	* nodes. */
				5301	nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
				5302
				5303	/* number of nodes in 4 bytes */
				5304	put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
				5305	spin->si_memtot += nodecount + nodecount * sizeof(int);
				5306
				5307	/* Write the nodes. */
				5308	(void)put_node(fd, tree, 0, regionmask, round == 3);
				5309	}
				5310
				5311	/* Write another byte to check for errors (file system full). */
				5312	if (putc(0, fd) == EOF)
				5313	retval = FAIL;
				5314	theend:
				5315	if (fclose(fd) == EOF)
				5316	retval = FAIL;
				5317
				5318	if (fwv != (size_t)1)
				5319	retval = FAIL;
				5320	if (retval == FAIL)
				5321	EMSG(_(e_write));
				5322
				5323	return retval;
				5324	}
				5325
				5326	/*
				5327	* Clear the index and wnode fields of "node", it siblings and its
				5328	* children. This is needed because they are a union with other items to save
				5329	* space.
				5330	*/
				5331	static void
				5332	clear_node(wordnode_T *node)
				5333	{
				5334	wordnode_T *np;
				5335
				5336	if (node != NULL)
				5337	for (np = node; np != NULL; np = np->wn_sibling)
				5338	{
				5339	np->wn_u1.index = 0;
				5340	np->wn_u2.wnode = NULL;
				5341
				5342	if (np->wn_byte != NUL)
				5343	clear_node(np->wn_child);
				5344	}
				5345	}
				5346
				5347
				5348	/*
				5349	* Dump a word tree at node "node".
				5350	*
				5351	* This first writes the list of possible bytes (siblings). Then for each
				5352	* byte recursively write the children.
				5353	*
				5354	* NOTE: The code here must match the code in read_tree_node(), since
				5355	* assumptions are made about the indexes (so that we don't have to write them
				5356	* in the file).
				5357	*
				5358	* Returns the number of nodes used.
				5359	*/
				5360	static int
				5361	put_node(
				5362	FILE fd, / NULL when only counting */
				5363	wordnode_T *node,
				5364	int idx,
				5365	int regionmask,
				5366	int prefixtree) /* TRUE for PREFIXTREE */
				5367	{
				5368	int newindex = idx;
				5369	int siblingcount = 0;
				5370	wordnode_T *np;
				5371	int flags;
				5372
				5373	/* If "node" is zero the tree is empty. */
				5374	if (node == NULL)
				5375	return 0;
				5376
				5377	/* Store the index where this node is written. */
				5378	node->wn_u1.index = idx;
				5379
				5380	/* Count the number of siblings. */
				5381	for (np = node; np != NULL; np = np->wn_sibling)
				5382	++siblingcount;
				5383
				5384	/* Write the sibling count. */
				5385	if (fd != NULL)
				5386	putc(siblingcount, fd); /* <siblingcount> */
				5387
				5388	/* Write each sibling byte and optionally extra info. */
				5389	for (np = node; np != NULL; np = np->wn_sibling)
				5390	{
				5391	if (np->wn_byte == 0)
				5392	{
				5393	if (fd != NULL)
				5394	{
				5395	/* For a NUL byte (end of word) write the flags etc. */
				5396	if (prefixtree)
				5397	{
				5398	/* In PREFIXTREE write the required affixID and the
				5399	* associated condition nr (stored in wn_region). The
				5400	* byte value is misused to store the "rare" and "not
				5401	* combining" flags */
				5402	if (np->wn_flags == (short_u)PFX_FLAGS)
				5403	putc(BY_NOFLAGS, fd); /* <byte> */
				5404	else
				5405	{
				5406	putc(BY_FLAGS, fd); /* <byte> */
				5407	putc(np->wn_flags, fd); /* <pflags> */
				5408	}
				5409	putc(np->wn_affixID, fd); /* <affixID> */
				5410	put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */
				5411	}
				5412	else
				5413	{
				5414	/* For word trees we write the flag/region items. */
				5415	flags = np->wn_flags;
				5416	if (regionmask != 0 && np->wn_region != regionmask)
				5417	flags \|= WF_REGION;
				5418	if (np->wn_affixID != 0)
				5419	flags \|= WF_AFX;
				5420	if (flags == 0)
				5421	{
				5422	/* word without flags or region */
				5423	putc(BY_NOFLAGS, fd); /* <byte> */
				5424	}
				5425	else
				5426	{
				5427	if (np->wn_flags >= 0x100)
				5428	{
				5429	putc(BY_FLAGS2, fd); /* <byte> */
				5430	putc(flags, fd); /* <flags> */
				5431	putc((unsigned)flags >> 8, fd); /* <flags2> */
				5432	}
				5433	else
				5434	{
				5435	putc(BY_FLAGS, fd); /* <byte> */
				5436	putc(flags, fd); /* <flags> */
				5437	}
				5438	if (flags & WF_REGION)
				5439	putc(np->wn_region, fd); /* <region> */
				5440	if (flags & WF_AFX)
				5441	putc(np->wn_affixID, fd); /* <affixID> */
				5442	}
				5443	}
				5444	}
				5445	}
				5446	else
				5447	{
				5448	if (np->wn_child->wn_u1.index != 0
				5449	&& np->wn_child->wn_u2.wnode != node)
				5450	{
				5451	/* The child is written elsewhere, write the reference. */
				5452	if (fd != NULL)
				5453	{
				5454	putc(BY_INDEX, fd); /* <byte> */
				5455	/* <nodeidx> */
				5456	put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
				5457	}
				5458	}
				5459	else if (np->wn_child->wn_u2.wnode == NULL)
				5460	/* We will write the child below and give it an index. */
				5461	np->wn_child->wn_u2.wnode = node;
				5462
				5463	if (fd != NULL)
				5464	if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
				5465	{
				5466	EMSG(_(e_write));
				5467	return 0;
				5468	}
				5469	}
				5470	}
				5471
				5472	/* Space used in the array when reading: one for each sibling and one for
				5473	* the count. */
				5474	newindex += siblingcount + 1;
				5475
				5476	/* Recursively dump the children of each sibling. */
				5477	for (np = node; np != NULL; np = np->wn_sibling)
				5478	if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
				5479	newindex = put_node(fd, np->wn_child, newindex, regionmask,
				5480	prefixtree);
				5481
				5482	return newindex;
				5483	}
				5484
				5485
				5486	/*
				5487	* ":mkspell [-ascii] outfile infile ..."
				5488	* ":mkspell [-ascii] addfile"
				5489	*/
				5490	void
				5491	ex_mkspell(exarg_T *eap)
				5492	{
				5493	int fcount;
				5494	char_u **fnames;
				5495	char_u *arg = eap->arg;
				5496	int ascii = FALSE;
				5497
				5498	if (STRNCMP(arg, "-ascii", 6) == 0)
				5499	{
				5500	ascii = TRUE;
				5501	arg = skipwhite(arg + 6);
				5502	}
				5503
				5504	/* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
				5505	if (get_arglist_exp(arg, &fcount, &fnames, FALSE) == OK)
				5506	{
				5507	mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
				5508	FreeWild(fcount, fnames);
				5509	}
				5510	}
				5511
				5512	/*
				5513	* Create the .sug file.
				5514	* Uses the soundfold info in "spin".
				5515	* Writes the file with the name "wfname", with ".spl" changed to ".sug".
				5516	*/
				5517	static void
				5518	spell_make_sugfile(spellinfo_T spin, char_u wfname)
				5519	{
				5520	char_u *fname = NULL;
				5521	int len;
				5522	slang_T *slang;
				5523	int free_slang = FALSE;
				5524
				5525	/*
				5526	* Read back the .spl file that was written. This fills the required
				5527	* info for soundfolding. This also uses less memory than the
				5528	* pointer-linked version of the trie. And it avoids having two versions
				5529	* of the code for the soundfolding stuff.
				5530	* It might have been done already by spell_reload_one().
				5531	*/
				5532	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
				5533	if (fullpathcmp(wfname, slang->sl_fname, FALSE) == FPC_SAME)
				5534	break;
				5535	if (slang == NULL)
				5536	{
				5537	spell_message(spin, (char_u *)_("Reading back spell file..."));
				5538	slang = spell_load_file(wfname, NULL, NULL, FALSE);
				5539	if (slang == NULL)
				5540	return;
				5541	free_slang = TRUE;
				5542	}
				5543
				5544	/*
				5545	* Clear the info in "spin" that is used.
				5546	*/
				5547	spin->si_blocks = NULL;
				5548	spin->si_blocks_cnt = 0;
				5549	spin->si_compress_cnt = 0; /* will stay at 0 all the time*/
				5550	spin->si_free_count = 0;
				5551	spin->si_first_free = NULL;
				5552	spin->si_foldwcount = 0;
				5553
				5554	/*
				5555	* Go through the trie of good words, soundfold each word and add it to
				5556	* the soundfold trie.
				5557	*/
				5558	spell_message(spin, (char_u *)_("Performing soundfolding..."));
				5559	if (sug_filltree(spin, slang) == FAIL)
				5560	goto theend;
				5561
				5562	/*
				5563	* Create the table which links each soundfold word with a list of the
				5564	* good words it may come from. Creates buffer "spin->si_spellbuf".
				5565	* This also removes the wordnr from the NUL byte entries to make
				5566	* compression possible.
				5567	*/
				5568	if (sug_maketable(spin) == FAIL)
				5569	goto theend;
				5570
				5571	smsg((char_u *)_("Number of words after soundfolding: %ld"),
				5572	(long)spin->si_spellbuf->b_ml.ml_line_count);
				5573
				5574	/*
				5575	* Compress the soundfold trie.
				5576	*/
				5577	spell_message(spin, (char_u *)_(msg_compressing));
				5578	wordtree_compress(spin, spin->si_foldroot);
				5579
				5580	/*
				5581	* Write the .sug file.
				5582	* Make the file name by changing ".spl" to ".sug".
				5583	*/
				5584	fname = alloc(MAXPATHL);
				5585	if (fname == NULL)
				5586	goto theend;
				5587	vim_strncpy(fname, wfname, MAXPATHL - 1);
				5588	len = (int)STRLEN(fname);
				5589	fname[len - 2] = 'u';
				5590	fname[len - 1] = 'g';
				5591	sug_write(spin, fname);
				5592
				5593	theend:
				5594	vim_free(fname);
				5595	if (free_slang)
				5596	slang_free(slang);
				5597	free_blocks(spin->si_blocks);
				5598	close_spellbuf(spin->si_spellbuf);
				5599	}
				5600
				5601	/*
				5602	* Build the soundfold trie for language "slang".
				5603	*/
				5604	static int
				5605	sug_filltree(spellinfo_T spin, slang_T slang)
				5606	{
				5607	char_u *byts;
				5608	idx_T *idxs;
				5609	int depth;
				5610	idx_T arridx[MAXWLEN];
				5611	int curi[MAXWLEN];
				5612	char_u tword[MAXWLEN];
				5613	char_u tsalword[MAXWLEN];
				5614	int c;
				5615	idx_T n;
				5616	unsigned words_done = 0;
				5617	int wordcount[MAXWLEN];
				5618
				5619	/* We use si_foldroot for the soundfolded trie. */
				5620	spin->si_foldroot = wordtree_alloc(spin);
				5621	if (spin->si_foldroot == NULL)
				5622	return FAIL;
				5623
				5624	/* let tree_add_word() know we're adding to the soundfolded tree */
				5625	spin->si_sugtree = TRUE;
				5626
				5627	/*
				5628	* Go through the whole case-folded tree, soundfold each word and put it
				5629	* in the trie.
				5630	*/
				5631	byts = slang->sl_fbyts;
				5632	idxs = slang->sl_fidxs;
				5633
				5634	arridx[0] = 0;
				5635	curi[0] = 1;
				5636	wordcount[0] = 0;
				5637
				5638	depth = 0;
				5639	while (depth >= 0 && !got_int)
				5640	{
				5641	if (curi[depth] > byts[arridx[depth]])
				5642	{
				5643	/* Done all bytes at this node, go up one level. */
				5644	idxs[arridx[depth]] = wordcount[depth];
				5645	if (depth > 0)
				5646	wordcount[depth - 1] += wordcount[depth];
				5647
				5648	--depth;
				5649	line_breakcheck();
				5650	}
				5651	else
				5652	{
				5653
				5654	/* Do one more byte at this node. */
				5655	n = arridx[depth] + curi[depth];
				5656	++curi[depth];
				5657
				5658	c = byts[n];
				5659	if (c == 0)
				5660	{
				5661	/* Sound-fold the word. */
				5662	tword[depth] = NUL;
				5663	spell_soundfold(slang, tword, TRUE, tsalword);
				5664
				5665	/* We use the "flags" field for the MSB of the wordnr,
				5666	* "region" for the LSB of the wordnr. */
				5667	if (tree_add_word(spin, tsalword, spin->si_foldroot,
				5668	words_done >> 16, words_done & 0xffff,
				5669	0) == FAIL)
				5670	return FAIL;
				5671
				5672	++words_done;
				5673	++wordcount[depth];
				5674
				5675	/* Reset the block count each time to avoid compression
				5676	* kicking in. */
				5677	spin->si_blocks_cnt = 0;
				5678
				5679	/* Skip over any other NUL bytes (same word with different
				5680	* flags). */
				5681	while (byts[n + 1] == 0)
				5682	{
				5683	++n;
				5684	++curi[depth];
				5685	}
				5686	}
				5687	else
				5688	{
				5689	/* Normal char, go one level deeper. */
				5690	tword[depth++] = c;
				5691	arridx[depth] = idxs[n];
				5692	curi[depth] = 1;
				5693	wordcount[depth] = 0;
				5694	}
				5695	}
				5696	}
				5697
				5698	smsg((char_u *)_("Total number of words: %d"), words_done);
				5699
				5700	return OK;
				5701	}
				5702
				5703	/*
				5704	* Make the table that links each word in the soundfold trie to the words it
				5705	* can be produced from.
				5706	* This is not unlike lines in a file, thus use a memfile to be able to access
				5707	* the table efficiently.
				5708	* Returns FAIL when out of memory.
				5709	*/
				5710	static int
				5711	sug_maketable(spellinfo_T *spin)
				5712	{
				5713	garray_T ga;
				5714	int res = OK;
				5715
				5716	/* Allocate a buffer, open a memline for it and create the swap file
				5717	* (uses a temp file, not a .swp file). */
				5718	spin->si_spellbuf = open_spellbuf();
				5719	if (spin->si_spellbuf == NULL)
				5720	return FAIL;
				5721
				5722	/* Use a buffer to store the line info, avoids allocating many small
				5723	* pieces of memory. */
				5724	ga_init2(&ga, 1, 100);
				5725
				5726	/* recursively go through the tree */
				5727	if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1)
				5728	res = FAIL;
				5729
				5730	ga_clear(&ga);
				5731	return res;
				5732	}
				5733
				5734	/*
				5735	* Fill the table for one node and its children.
				5736	* Returns the wordnr at the start of the node.
				5737	* Returns -1 when out of memory.
				5738	*/
				5739	static int
				5740	sug_filltable(
				5741	spellinfo_T *spin,
				5742	wordnode_T *node,
				5743	int startwordnr,
				5744	garray_T gap) / place to store line of numbers */
				5745	{
				5746	wordnode_T p, np;
				5747	int wordnr = startwordnr;
				5748	int nr;
				5749	int prev_nr;
				5750
				5751	for (p = node; p != NULL; p = p->wn_sibling)
				5752	{
				5753	if (p->wn_byte == NUL)
				5754	{
				5755	gap->ga_len = 0;
				5756	prev_nr = 0;
				5757	for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling)
				5758	{
				5759	if (ga_grow(gap, 10) == FAIL)
				5760	return -1;
				5761
				5762	nr = (np->wn_flags << 16) + (np->wn_region & 0xffff);
				5763	/* Compute the offset from the previous nr and store the
				5764	* offset in a way that it takes a minimum number of bytes.
				5765	* It's a bit like utf-8, but without the need to mark
				5766	* following bytes. */
				5767	nr -= prev_nr;
				5768	prev_nr += nr;
				5769	gap->ga_len += offset2bytes(nr,
				5770	(char_u *)gap->ga_data + gap->ga_len);
				5771	}
				5772
				5773	/* add the NUL byte */
				5774	((char_u *)gap->ga_data)[gap->ga_len++] = NUL;
				5775
				5776	if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr,
				5777	gap->ga_data, gap->ga_len, TRUE) == FAIL)
				5778	return -1;
				5779	++wordnr;
				5780
				5781	/* Remove extra NUL entries, we no longer need them. We don't
				5782	* bother freeing the nodes, the won't be reused anyway. */
				5783	while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL)
				5784	p->wn_sibling = p->wn_sibling->wn_sibling;
				5785
				5786	/* Clear the flags on the remaining NUL node, so that compression
				5787	* works a lot better. */
				5788	p->wn_flags = 0;
				5789	p->wn_region = 0;
				5790	}
				5791	else
				5792	{
				5793	wordnr = sug_filltable(spin, p->wn_child, wordnr, gap);
				5794	if (wordnr == -1)
				5795	return -1;
				5796	}
				5797	}
				5798	return wordnr;
				5799	}
				5800
				5801	/*
				5802	* Convert an offset into a minimal number of bytes.
				5803	* Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL
				5804	* bytes.
				5805	*/
				5806	static int
				5807	offset2bytes(int nr, char_u *buf)
				5808	{
				5809	int rem;
				5810	int b1, b2, b3, b4;
				5811
				5812	/* Split the number in parts of base 255. We need to avoid NUL bytes. */
				5813	b1 = nr % 255 + 1;
				5814	rem = nr / 255;
				5815	b2 = rem % 255 + 1;
				5816	rem = rem / 255;
				5817	b3 = rem % 255 + 1;
				5818	b4 = rem / 255 + 1;
				5819
				5820	if (b4 > 1 \|\| b3 > 0x1f) /* 4 bytes */
				5821	{
				5822	buf[0] = 0xe0 + b4;
				5823	buf[1] = b3;
				5824	buf[2] = b2;
				5825	buf[3] = b1;
				5826	return 4;
				5827	}
				5828	if (b3 > 1 \|\| b2 > 0x3f ) /* 3 bytes */
				5829	{
				5830	buf[0] = 0xc0 + b3;
				5831	buf[1] = b2;
				5832	buf[2] = b1;
				5833	return 3;
				5834	}
				5835	if (b2 > 1 \|\| b1 > 0x7f ) /* 2 bytes */
				5836	{
				5837	buf[0] = 0x80 + b2;
				5838	buf[1] = b1;
				5839	return 2;
				5840	}
				5841	/* 1 byte */
				5842	buf[0] = b1;
				5843	return 1;
				5844	}
				5845
				5846	/*
				5847	* Write the .sug file in "fname".
				5848	*/
				5849	static void
				5850	sug_write(spellinfo_T spin, char_u fname)
				5851	{
				5852	FILE *fd;
				5853	wordnode_T *tree;
				5854	int nodecount;
				5855	int wcount;
				5856	char_u *line;
				5857	linenr_T lnum;
				5858	int len;
				5859
				5860	/* Create the file. Note that an existing file is silently overwritten! */
				5861	fd = mch_fopen((char *)fname, "w");
				5862	if (fd == NULL)
				5863	{
				5864	EMSG2(_(e_notopen), fname);
				5865	return;
				5866	}
				5867
				5868	vim_snprintf((char *)IObuff, IOSIZE,
				5869	_("Writing suggestion file %s ..."), fname);
				5870	spell_message(spin, IObuff);
				5871
				5872	/*
				5873	* <SUGHEADER>: <fileID> <versionnr> <timestamp>
				5874	*/
				5875	if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */
				5876	{
				5877	EMSG(_(e_write));
				5878	goto theend;
				5879	}
				5880	putc(VIMSUGVERSION, fd); /* <versionnr> */
				5881
				5882	/* Write si_sugtime to the file. */
				5883	put_time(fd, spin->si_sugtime); /* <timestamp> */
				5884
				5885	/*
				5886	* <SUGWORDTREE>
				5887	*/
				5888	spin->si_memtot = 0;
				5889	tree = spin->si_foldroot->wn_sibling;
				5890
				5891	/* Clear the index and wnode fields in the tree. */
				5892	clear_node(tree);
				5893
				5894	/* Count the number of nodes. Needed to be able to allocate the
				5895	* memory when reading the nodes. Also fills in index for shared
				5896	* nodes. */
				5897	nodecount = put_node(NULL, tree, 0, 0, FALSE);
				5898
				5899	/* number of nodes in 4 bytes */
				5900	put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
				5901	spin->si_memtot += nodecount + nodecount * sizeof(int);
				5902
				5903	/* Write the nodes. */
				5904	(void)put_node(fd, tree, 0, 0, FALSE);
				5905
				5906	/*
				5907	* <SUGTABLE>: <sugwcount> <sugline> ...
				5908	*/
				5909	wcount = spin->si_spellbuf->b_ml.ml_line_count;
				5910	put_bytes(fd, (long_u)wcount, 4); /* <sugwcount> */
				5911
				5912	for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum)
				5913	{
				5914	/* <sugline>: <sugnr> ... NUL */
				5915	line = ml_get_buf(spin->si_spellbuf, lnum, FALSE);
				5916	len = (int)STRLEN(line) + 1;
				5917	if (fwrite(line, (size_t)len, (size_t)1, fd) == 0)
				5918	{
				5919	EMSG(_(e_write));
				5920	goto theend;
				5921	}
				5922	spin->si_memtot += len;
				5923	}
				5924
				5925	/* Write another byte to check for errors. */
				5926	if (putc(0, fd) == EOF)
				5927	EMSG(_(e_write));
				5928
				5929	vim_snprintf((char *)IObuff, IOSIZE,
				5930	_("Estimated runtime memory use: %d bytes"), spin->si_memtot);
				5931	spell_message(spin, IObuff);
				5932
				5933	theend:
				5934	/* close the file */
				5935	fclose(fd);
				5936	}
				5937
				5938
				5939	/*
				5940	* Create a Vim spell file from one or more word lists.
				5941	* "fnames[0]" is the output file name.
				5942	* "fnames[fcount - 1]" is the last input file name.
				5943	* Exception: when "fnames[0]" ends in ".add" it's used as the input file name
				5944	* and ".spl" is appended to make the output file name.
				5945	*/
				5946	void
				5947	mkspell(
				5948	int fcount,
				5949	char_u **fnames,
				5950	int ascii, /* -ascii argument given */
				5951	int over_write, /* overwrite existing output file */
				5952	int added_word) /* invoked through "zg" */
				5953	{
				5954	char_u *fname = NULL;
				5955	char_u *wfname;
				5956	char_u **innames;
				5957	int incount;
Bram Moolenaar	2993ac5	2018-02-10 14:12:43 +0100	[diff] [blame]	5958	afffile_T *(afile[MAXREGIONS]);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	5959	int i;
				5960	int len;
				5961	stat_T st;
				5962	int error = FALSE;
				5963	spellinfo_T spin;
				5964
				5965	vim_memset(&spin, 0, sizeof(spin));
				5966	spin.si_verbose = !added_word;
				5967	spin.si_ascii = ascii;
				5968	spin.si_followup = TRUE;
				5969	spin.si_rem_accents = TRUE;
				5970	ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20);
				5971	ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20);
				5972	ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
				5973	ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
				5974	ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20);
				5975	ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
				5976	hash_init(&spin.si_commonwords);
				5977	spin.si_newcompID = 127; /* start compound ID at first maximum */
				5978
				5979	/* default: fnames[0] is output file, following are input files */
				5980	innames = &fnames[1];
				5981	incount = fcount - 1;
				5982
				5983	wfname = alloc(MAXPATHL);
				5984	if (wfname == NULL)
				5985	return;
				5986
				5987	if (fcount >= 1)
				5988	{
				5989	len = (int)STRLEN(fnames[0]);
				5990	if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
				5991	{
				5992	/* For ":mkspell path/en.latin1.add" output file is
				5993	* "path/en.latin1.add.spl". */
				5994	innames = &fnames[0];
				5995	incount = 1;
				5996	vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]);
				5997	}
				5998	else if (fcount == 1)
				5999	{
				6000	/* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */
				6001	innames = &fnames[0];
				6002	incount = 1;
				6003	vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
				6004	fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
				6005	}
				6006	else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
				6007	{
				6008	/* Name ends in ".spl", use as the file name. */
				6009	vim_strncpy(wfname, fnames[0], MAXPATHL - 1);
				6010	}
				6011	else
				6012	/* Name should be language, make the file name from it. */
				6013	vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
				6014	fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
				6015
				6016	/* Check for .ascii.spl. */
				6017	if (strstr((char *)gettail(wfname), SPL_FNAME_ASCII) != NULL)
				6018	spin.si_ascii = TRUE;
				6019
				6020	/* Check for .add.spl. */
				6021	if (strstr((char *)gettail(wfname), SPL_FNAME_ADD) != NULL)
				6022	spin.si_add = TRUE;
				6023	}
				6024
				6025	if (incount <= 0)
				6026	EMSG(_(e_invarg)); /* need at least output and input names */
				6027	else if (vim_strchr(gettail(wfname), '_') != NULL)
				6028	EMSG(_("E751: Output file name must not have region name"));
Bram Moolenaar	2993ac5	2018-02-10 14:12:43 +0100	[diff] [blame]	6029	else if (incount > MAXREGIONS)
				6030	EMSGN(_("E754: Only up to %ld regions supported"), MAXREGIONS);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6031	else
				6032	{
				6033	/* Check for overwriting before doing things that may take a lot of
				6034	* time. */
				6035	if (!over_write && mch_stat((char *)wfname, &st) >= 0)
				6036	{
				6037	EMSG(_(e_exists));
				6038	goto theend;
				6039	}
				6040	if (mch_isdir(wfname))
				6041	{
				6042	EMSG2(_(e_isadir2), wfname);
				6043	goto theend;
				6044	}
				6045
				6046	fname = alloc(MAXPATHL);
				6047	if (fname == NULL)
				6048	goto theend;
				6049
				6050	/*
				6051	* Init the aff and dic pointers.
				6052	* Get the region names if there are more than 2 arguments.
				6053	*/
				6054	for (i = 0; i < incount; ++i)
				6055	{
				6056	afile[i] = NULL;
				6057
				6058	if (incount > 1)
				6059	{
				6060	len = (int)STRLEN(innames[i]);
				6061	if (STRLEN(gettail(innames[i])) < 5
				6062	\|\| innames[i][len - 3] != '_')
				6063	{
				6064	EMSG2(_("E755: Invalid region in %s"), innames[i]);
				6065	goto theend;
				6066	}
				6067	spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
				6068	spin.si_region_name[i * 2 + 1] =
				6069	TOLOWER_ASC(innames[i][len - 1]);
				6070	}
				6071	}
				6072	spin.si_region_count = incount;
				6073
				6074	spin.si_foldroot = wordtree_alloc(&spin);
				6075	spin.si_keeproot = wordtree_alloc(&spin);
				6076	spin.si_prefroot = wordtree_alloc(&spin);
				6077	if (spin.si_foldroot == NULL
				6078	\|\| spin.si_keeproot == NULL
				6079	\|\| spin.si_prefroot == NULL)
				6080	{
				6081	free_blocks(spin.si_blocks);
				6082	goto theend;
				6083	}
				6084
				6085	/* When not producing a .add.spl file clear the character table when
				6086	* we encounter one in the .aff file. This means we dump the current
				6087	* one in the .spl file if the .aff file doesn't define one. That's
				6088	* better than guessing the contents, the table will match a
				6089	* previously loaded spell file. */
				6090	if (!spin.si_add)
				6091	spin.si_clear_chartab = TRUE;
				6092
				6093	/*
				6094	* Read all the .aff and .dic files.
				6095	* Text is converted to 'encoding'.
				6096	* Words are stored in the case-folded and keep-case trees.
				6097	*/
				6098	for (i = 0; i < incount && !error; ++i)
				6099	{
				6100	spin.si_conv.vc_type = CONV_NONE;
				6101	spin.si_region = 1 << i;
				6102
				6103	vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]);
				6104	if (mch_stat((char *)fname, &st) >= 0)
				6105	{
				6106	/* Read the .aff file. Will init "spin->si_conv" based on the
				6107	* "SET" line. */
				6108	afile[i] = spell_read_aff(&spin, fname);
				6109	if (afile[i] == NULL)
				6110	error = TRUE;
				6111	else
				6112	{
				6113	/* Read the .dic file and store the words in the trees. */
				6114	vim_snprintf((char *)fname, MAXPATHL, "%s.dic",
				6115	innames[i]);
				6116	if (spell_read_dic(&spin, fname, afile[i]) == FAIL)
				6117	error = TRUE;
				6118	}
				6119	}
				6120	else
				6121	{
				6122	/* No .aff file, try reading the file as a word list. Store
				6123	* the words in the trees. */
				6124	if (spell_read_wordfile(&spin, innames[i]) == FAIL)
				6125	error = TRUE;
				6126	}
				6127
				6128	#ifdef FEAT_MBYTE
				6129	/* Free any conversion stuff. */
				6130	convert_setup(&spin.si_conv, NULL, NULL);
				6131	#endif
				6132	}
				6133
				6134	if (spin.si_compflags != NULL && spin.si_nobreak)
				6135	MSG(_("Warning: both compounding and NOBREAK specified"));
				6136
				6137	if (!error && !got_int)
				6138	{
				6139	/*
				6140	* Combine tails in the tree.
				6141	*/
				6142	spell_message(&spin, (char_u *)_(msg_compressing));
				6143	wordtree_compress(&spin, spin.si_foldroot);
				6144	wordtree_compress(&spin, spin.si_keeproot);
				6145	wordtree_compress(&spin, spin.si_prefroot);
				6146	}
				6147
				6148	if (!error && !got_int)
				6149	{
				6150	/*
				6151	* Write the info in the spell file.
				6152	*/
				6153	vim_snprintf((char *)IObuff, IOSIZE,
				6154	_("Writing spell file %s ..."), wfname);
				6155	spell_message(&spin, IObuff);
				6156
				6157	error = write_vim_spell(&spin, wfname) == FAIL;
				6158
				6159	spell_message(&spin, (char_u *)_("Done!"));
				6160	vim_snprintf((char *)IObuff, IOSIZE,
				6161	_("Estimated runtime memory use: %d bytes"), spin.si_memtot);
				6162	spell_message(&spin, IObuff);
				6163
				6164	/*
				6165	* If the file is loaded need to reload it.
				6166	*/
				6167	if (!error)
				6168	spell_reload_one(wfname, added_word);
				6169	}
				6170
				6171	/* Free the allocated memory. */
				6172	ga_clear(&spin.si_rep);
				6173	ga_clear(&spin.si_repsal);
				6174	ga_clear(&spin.si_sal);
				6175	ga_clear(&spin.si_map);
				6176	ga_clear(&spin.si_comppat);
				6177	ga_clear(&spin.si_prefcond);
				6178	hash_clear_all(&spin.si_commonwords, 0);
				6179
				6180	/* Free the .aff file structures. */
				6181	for (i = 0; i < incount; ++i)
				6182	if (afile[i] != NULL)
				6183	spell_free_aff(afile[i]);
				6184
				6185	/* Free all the bits and pieces at once. */
				6186	free_blocks(spin.si_blocks);
				6187
				6188	/*
				6189	* If there is soundfolding info and no NOSUGFILE item create the
				6190	* .sug file with the soundfolded word trie.
				6191	*/
				6192	if (spin.si_sugtime != 0 && !error && !got_int)
				6193	spell_make_sugfile(&spin, wfname);
				6194
				6195	}
				6196
				6197	theend:
				6198	vim_free(fname);
				6199	vim_free(wfname);
				6200	}
				6201
				6202	/*
				6203	* Display a message for spell file processing when 'verbose' is set or using
				6204	* ":mkspell". "str" can be IObuff.
				6205	*/
				6206	static void
				6207	spell_message(spellinfo_T spin, char_u str)
				6208	{
				6209	if (spin->si_verbose \|\| p_verbose > 2)
				6210	{
				6211	if (!spin->si_verbose)
				6212	verbose_enter();
				6213	MSG(str);
				6214	out_flush();
				6215	if (!spin->si_verbose)
				6216	verbose_leave();
				6217	}
				6218	}
				6219
				6220	/*
				6221	* ":[count]spellgood {word}"
				6222	* ":[count]spellwrong {word}"
				6223	* ":[count]spellundo {word}"
				6224	*/
				6225	void
				6226	ex_spell(exarg_T *eap)
				6227	{
				6228	spell_add_word(eap->arg, (int)STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong,
				6229	eap->forceit ? 0 : (int)eap->line2,
				6230	eap->cmdidx == CMD_spellundo);
				6231	}
				6232
				6233	/*
				6234	* Add "word[len]" to 'spellfile' as a good or bad word.
				6235	*/
				6236	void
				6237	spell_add_word(
				6238	char_u *word,
				6239	int len,
				6240	int bad,
				6241	int idx, /* "zG" and "zW": zero, otherwise index in
				6242	'spellfile' */
				6243	int undo) /* TRUE for "zug", "zuG", "zuw" and "zuW" */
				6244	{
				6245	FILE *fd = NULL;
				6246	buf_T *buf = NULL;
				6247	int new_spf = FALSE;
				6248	char_u *fname;
				6249	char_u *fnamebuf = NULL;
				6250	char_u line[MAXWLEN * 2];
				6251	long fpos, fpos_next = 0;
				6252	int i;
				6253	char_u *spf;
				6254
				6255	if (idx == 0) /* use internal wordlist */
				6256	{
				6257	if (int_wordlist == NULL)
				6258	{
				6259	int_wordlist = vim_tempname('s', FALSE);
				6260	if (int_wordlist == NULL)
				6261	return;
				6262	}
				6263	fname = int_wordlist;
				6264	}
				6265	else
				6266	{
				6267	/* If 'spellfile' isn't set figure out a good default value. */
				6268	if (*curwin->w_s->b_p_spf == NUL)
				6269	{
				6270	init_spellfile();
				6271	new_spf = TRUE;
				6272	}
				6273
				6274	if (*curwin->w_s->b_p_spf == NUL)
				6275	{
				6276	EMSG2(_(e_notset), "spellfile");
				6277	return;
				6278	}
				6279	fnamebuf = alloc(MAXPATHL);
				6280	if (fnamebuf == NULL)
				6281	return;
				6282
				6283	for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i)
				6284	{
				6285	copy_option_part(&spf, fnamebuf, MAXPATHL, ",");
				6286	if (i == idx)
				6287	break;
				6288	if (*spf == NUL)
				6289	{
				6290	EMSGN(_("E765: 'spellfile' does not have %ld entries"), idx);
				6291	vim_free(fnamebuf);
				6292	return;
				6293	}
				6294	}
				6295
				6296	/* Check that the user isn't editing the .add file somewhere. */
				6297	buf = buflist_findname_exp(fnamebuf);
				6298	if (buf != NULL && buf->b_ml.ml_mfp == NULL)
				6299	buf = NULL;
				6300	if (buf != NULL && bufIsChanged(buf))
				6301	{
				6302	EMSG(_(e_bufloaded));
				6303	vim_free(fnamebuf);
				6304	return;
				6305	}
				6306
				6307	fname = fnamebuf;
				6308	}
				6309
				6310	if (bad \|\| undo)
				6311	{
				6312	/* When the word appears as good word we need to remove that one,
				6313	* since its flags sort before the one with WF_BANNED. */
				6314	fd = mch_fopen((char *)fname, "r");
				6315	if (fd != NULL)
				6316	{
				6317	while (!vim_fgets(line, MAXWLEN * 2, fd))
				6318	{
				6319	fpos = fpos_next;
				6320	fpos_next = ftell(fd);
				6321	if (STRNCMP(word, line, len) == 0
				6322	&& (line[len] == '/' \|\| line[len] < ' '))
				6323	{
				6324	/* Found duplicate word. Remove it by writing a '#' at
				6325	* the start of the line. Mixing reading and writing
				6326	* doesn't work for all systems, close the file first. */
				6327	fclose(fd);
				6328	fd = mch_fopen((char *)fname, "r+");
				6329	if (fd == NULL)
				6330	break;
				6331	if (fseek(fd, fpos, SEEK_SET) == 0)
				6332	{
				6333	fputc('#', fd);
				6334	if (undo)
				6335	{
				6336	home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
				6337	smsg((char_u )_("Word '%.s' removed from %s"),
				6338	len, word, NameBuff);
				6339	}
				6340	}
				6341	fseek(fd, fpos_next, SEEK_SET);
				6342	}
				6343	}
				6344	if (fd != NULL)
				6345	fclose(fd);
				6346	}
				6347	}
				6348
				6349	if (!undo)
				6350	{
				6351	fd = mch_fopen((char *)fname, "a");
				6352	if (fd == NULL && new_spf)
				6353	{
				6354	char_u *p;
				6355
				6356	/* We just initialized the 'spellfile' option and can't open the
				6357	* file. We may need to create the "spell" directory first. We
				6358	* already checked the runtime directory is writable in
				6359	* init_spellfile(). */
				6360	if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname)
				6361	{
				6362	int c = *p;
				6363
				6364	/* The directory doesn't exist. Try creating it and opening
				6365	* the file again. */
				6366	*p = NUL;
				6367	vim_mkdir(fname, 0755);
				6368	*p = c;
				6369	fd = mch_fopen((char *)fname, "a");
				6370	}
				6371	}
				6372
				6373	if (fd == NULL)
				6374	EMSG2(_(e_notopen), fname);
				6375	else
				6376	{
				6377	if (bad)
				6378	fprintf(fd, "%.*s/!\n", len, word);
				6379	else
				6380	fprintf(fd, "%.*s\n", len, word);
				6381	fclose(fd);
				6382
				6383	home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
				6384	smsg((char_u )_("Word '%.s' added to %s"), len, word, NameBuff);
				6385	}
				6386	}
				6387
				6388	if (fd != NULL)
				6389	{
				6390	/* Update the .add.spl file. */
				6391	mkspell(1, &fname, FALSE, TRUE, TRUE);
				6392
				6393	/* If the .add file is edited somewhere, reload it. */
				6394	if (buf != NULL)
				6395	buf_reload(buf, buf->b_orig_mode);
				6396
				6397	redraw_all_later(SOME_VALID);
				6398	}
				6399	vim_free(fnamebuf);
				6400	}
				6401
				6402	/*
				6403	* Initialize 'spellfile' for the current buffer.
				6404	*/
				6405	static void
				6406	init_spellfile(void)
				6407	{
				6408	char_u *buf;
				6409	int l;
				6410	char_u *fname;
				6411	char_u *rtp;
				6412	char_u *lend;
				6413	int aspath = FALSE;
				6414	char_u *lstart = curbuf->b_s.b_p_spl;
				6415
				6416	if (*curwin->w_s->b_p_spl != NUL && curwin->w_s->b_langp.ga_len > 0)
				6417	{
				6418	buf = alloc(MAXPATHL);
				6419	if (buf == NULL)
				6420	return;
				6421
				6422	/* Find the end of the language name. Exclude the region. If there
				6423	* is a path separator remember the start of the tail. */
				6424	for (lend = curwin->w_s->b_p_spl; *lend != NUL
				6425	&& vim_strchr((char_u )",._", lend) == NULL; ++lend)
				6426	if (vim_ispathsep(*lend))
				6427	{
				6428	aspath = TRUE;
				6429	lstart = lend + 1;
				6430	}
				6431
				6432	/* Loop over all entries in 'runtimepath'. Use the first one where we
				6433	* are allowed to write. */
				6434	rtp = p_rtp;
				6435	while (*rtp != NUL)
				6436	{
				6437	if (aspath)
				6438	/* Use directory of an entry with path, e.g., for
				6439	* "/dir/lg.utf-8.spl" use "/dir". */
				6440	vim_strncpy(buf, curbuf->b_s.b_p_spl,
				6441	lstart - curbuf->b_s.b_p_spl - 1);
				6442	else
				6443	/* Copy the path from 'runtimepath' to buf[]. */
				6444	copy_option_part(&rtp, buf, MAXPATHL, ",");
				6445	if (filewritable(buf) == 2)
				6446	{
				6447	/* Use the first language name from 'spelllang' and the
				6448	* encoding used in the first loaded .spl file. */
				6449	if (aspath)
				6450	vim_strncpy(buf, curbuf->b_s.b_p_spl,
				6451	lend - curbuf->b_s.b_p_spl);
				6452	else
				6453	{
				6454	/* Create the "spell" directory if it doesn't exist yet. */
				6455	l = (int)STRLEN(buf);
				6456	vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell");
				6457	if (filewritable(buf) != 2)
				6458	vim_mkdir(buf, 0755);
				6459
				6460	l = (int)STRLEN(buf);
				6461	vim_snprintf((char *)buf + l, MAXPATHL - l,
				6462	"/%.*s", (int)(lend - lstart), lstart);
				6463	}
				6464	l = (int)STRLEN(buf);
				6465	fname = LANGP_ENTRY(curwin->w_s->b_langp, 0)
				6466	->lp_slang->sl_fname;
				6467	vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add",
				6468	fname != NULL
				6469	&& strstr((char *)gettail(fname), ".ascii.") != NULL
				6470	? (char_u *)"ascii" : spell_enc());
				6471	set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
				6472	break;
				6473	}
				6474	aspath = FALSE;
				6475	}
				6476
				6477	vim_free(buf);
				6478	}
				6479	}
				6480
				6481
				6482
				6483	/*
				6484	* Set the spell character tables from strings in the affix file.
				6485	*/
				6486	static int
				6487	set_spell_chartab(char_u fol, char_u low, char_u *upp)
				6488	{
				6489	/* We build the new tables here first, so that we can compare with the
				6490	* previous one. */
				6491	spelltab_T new_st;
				6492	char_u pf = fol, pl = low, *pu = upp;
				6493	int f, l, u;
				6494
				6495	clear_spell_chartab(&new_st);
				6496
				6497	while (*pf != NUL)
				6498	{
				6499	if (pl == NUL \|\| pu == NUL)
				6500	{
				6501	EMSG(_(e_affform));
				6502	return FAIL;
				6503	}
				6504	#ifdef FEAT_MBYTE
				6505	f = mb_ptr2char_adv(&pf);
				6506	l = mb_ptr2char_adv(&pl);
				6507	u = mb_ptr2char_adv(&pu);
				6508	#else
				6509	f = *pf++;
				6510	l = *pl++;
				6511	u = *pu++;
				6512	#endif
				6513	/* Every character that appears is a word character. */
				6514	if (f < 256)
				6515	new_st.st_isw[f] = TRUE;
				6516	if (l < 256)
				6517	new_st.st_isw[l] = TRUE;
				6518	if (u < 256)
				6519	new_st.st_isw[u] = TRUE;
				6520
				6521	/* if "LOW" and "FOL" are not the same the "LOW" char needs
				6522	* case-folding */
				6523	if (l < 256 && l != f)
				6524	{
				6525	if (f >= 256)
				6526	{
				6527	EMSG(_(e_affrange));
				6528	return FAIL;
				6529	}
				6530	new_st.st_fold[l] = f;
				6531	}
				6532
				6533	/* if "UPP" and "FOL" are not the same the "UPP" char needs
				6534	* case-folding, it's upper case and the "UPP" is the upper case of
				6535	* "FOL" . */
				6536	if (u < 256 && u != f)
				6537	{
				6538	if (f >= 256)
				6539	{
				6540	EMSG(_(e_affrange));
				6541	return FAIL;
				6542	}
				6543	new_st.st_fold[u] = f;
				6544	new_st.st_isu[u] = TRUE;
				6545	new_st.st_upper[f] = u;
				6546	}
				6547	}
				6548
				6549	if (pl != NUL \|\| pu != NUL)
				6550	{
				6551	EMSG(_(e_affform));
				6552	return FAIL;
				6553	}
				6554
				6555	return set_spell_finish(&new_st);
				6556	}
				6557
				6558	/*
				6559	* Set the spell character tables from strings in the .spl file.
				6560	*/
				6561	static void
				6562	set_spell_charflags(
				6563	char_u *flags,
				6564	int cnt, /* length of "flags" */
				6565	char_u *fol)
				6566	{
				6567	/* We build the new tables here first, so that we can compare with the
				6568	* previous one. */
				6569	spelltab_T new_st;
				6570	int i;
				6571	char_u *p = fol;
				6572	int c;
				6573
				6574	clear_spell_chartab(&new_st);
				6575
				6576	for (i = 0; i < 128; ++i)
				6577	{
				6578	if (i < cnt)
				6579	{
				6580	new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
				6581	new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
				6582	}
				6583
				6584	if (*p != NUL)
				6585	{
				6586	#ifdef FEAT_MBYTE
				6587	c = mb_ptr2char_adv(&p);
				6588	#else
				6589	c = *p++;
				6590	#endif
				6591	new_st.st_fold[i + 128] = c;
				6592	if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
				6593	new_st.st_upper[c] = i + 128;
				6594	}
				6595	}
				6596
				6597	(void)set_spell_finish(&new_st);
				6598	}
				6599
				6600	static int
				6601	set_spell_finish(spelltab_T *new_st)
				6602	{
				6603	int i;
				6604
				6605	if (did_set_spelltab)
				6606	{
				6607	/* check that it's the same table */
				6608	for (i = 0; i < 256; ++i)
				6609	{
				6610	if (spelltab.st_isw[i] != new_st->st_isw[i]
				6611	\|\| spelltab.st_isu[i] != new_st->st_isu[i]
				6612	\|\| spelltab.st_fold[i] != new_st->st_fold[i]
				6613	\|\| spelltab.st_upper[i] != new_st->st_upper[i])
				6614	{
				6615	EMSG(_("E763: Word characters differ between spell files"));
				6616	return FAIL;
				6617	}
				6618	}
				6619	}
				6620	else
				6621	{
				6622	/* copy the new spelltab into the one being used */
				6623	spelltab = *new_st;
				6624	did_set_spelltab = TRUE;
				6625	}
				6626
				6627	return OK;
				6628	}
				6629
				6630	/*
				6631	* Write the table with prefix conditions to the .spl file.
				6632	* When "fd" is NULL only count the length of what is written.
				6633	*/
				6634	static int
				6635	write_spell_prefcond(FILE fd, garray_T gap)
				6636	{
				6637	int i;
				6638	char_u *p;
				6639	int len;
				6640	int totlen;
				6641	size_t x = 1; /* collect return value of fwrite() */
				6642
				6643	if (fd != NULL)
				6644	put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */
				6645
				6646	totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */
				6647
				6648	for (i = 0; i < gap->ga_len; ++i)
				6649	{
				6650	/* <prefcond> : <condlen> <condstr> */
				6651	p = ((char_u **)gap->ga_data)[i];
				6652	if (p != NULL)
				6653	{
				6654	len = (int)STRLEN(p);
				6655	if (fd != NULL)
				6656	{
				6657	fputc(len, fd);
				6658	x &= fwrite(p, (size_t)len, (size_t)1, fd);
				6659	}
				6660	totlen += len;
				6661	}
				6662	else if (fd != NULL)
				6663	fputc(0, fd);
				6664	}
				6665
				6666	return totlen;
				6667	}
				6668
				6669
				6670	/*
				6671	* Use map string "map" for languages "lp".
				6672	*/
				6673	static void
				6674	set_map_str(slang_T lp, char_u map)
				6675	{
				6676	char_u *p;
				6677	int headc = 0;
				6678	int c;
				6679	int i;
				6680
				6681	if (*map == NUL)
				6682	{
				6683	lp->sl_has_map = FALSE;
				6684	return;
				6685	}
				6686	lp->sl_has_map = TRUE;
				6687
				6688	/* Init the array and hash tables empty. */
				6689	for (i = 0; i < 256; ++i)
				6690	lp->sl_map_array[i] = 0;
				6691	#ifdef FEAT_MBYTE
				6692	hash_init(&lp->sl_map_hash);
				6693	#endif
				6694
				6695	/*
				6696	* The similar characters are stored separated with slashes:
				6697	* "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and
				6698	* before the same slash. For characters above 255 sl_map_hash is used.
				6699	*/
				6700	for (p = map; *p != NUL; )
				6701	{
				6702	#ifdef FEAT_MBYTE
				6703	c = mb_cptr2char_adv(&p);
				6704	#else
				6705	c = *p++;
				6706	#endif
				6707	if (c == '/')
				6708	headc = 0;
				6709	else
				6710	{
				6711	if (headc == 0)
				6712	headc = c;
				6713
				6714	#ifdef FEAT_MBYTE
				6715	/* Characters above 255 don't fit in sl_map_array[], put them in
				6716	* the hash table. Each entry is the char, a NUL the headchar and
				6717	* a NUL. */
				6718	if (c >= 256)
				6719	{
				6720	int cl = mb_char2len(c);
				6721	int headcl = mb_char2len(headc);
				6722	char_u *b;
				6723	hash_T hash;
				6724	hashitem_T *hi;
				6725
				6726	b = alloc((unsigned)(cl + headcl + 2));
				6727	if (b == NULL)
				6728	return;
				6729	mb_char2bytes(c, b);
				6730	b[cl] = NUL;
				6731	mb_char2bytes(headc, b + cl + 1);
				6732	b[cl + 1 + headcl] = NUL;
				6733	hash = hash_hash(b);
				6734	hi = hash_lookup(&lp->sl_map_hash, b, hash);
				6735	if (HASHITEM_EMPTY(hi))
				6736	hash_add_item(&lp->sl_map_hash, hi, b, hash);
				6737	else
				6738	{
				6739	/* This should have been checked when generating the .spl
				6740	* file. */
				6741	EMSG(_("E783: duplicate char in MAP entry"));
				6742	vim_free(b);
				6743	}
				6744	}
				6745	else
				6746	#endif
				6747	lp->sl_map_array[c] = headc;
				6748	}
				6749	}
				6750	}
				6751
				6752
				6753	#endif /* FEAT_SPELL */