Blame - src/spellfile.c - android_external_vim

blob: 4b364421d4686eb1461c1c40a0a36b955177cb86 [file] [log] [blame]

Bram Moolenaar	edf3f97	2016-08-29 22:49:24 +0200	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2	*
				3	* VIM - Vi IMproved by Bram Moolenaar
				4	*
				5	* Do ":help uganda" in Vim to read copying and usage conditions.
				6	* Do ":help credits" in Vim to see a list of people who contributed.
				7	* See README.txt for an overview of the Vim source code.
				8	*/
				9
				10	/*
				11	* spellfile.c: code for reading and writing spell files.
				12	*
				13	* See spell.c for information about spell checking.
				14	*/
				15
				16	/*
				17	* Vim spell file format: <HEADER>
				18	* <SECTIONS>
				19	* <LWORDTREE>
				20	* <KWORDTREE>
				21	* <PREFIXTREE>
				22	*
				23	* <HEADER>: <fileID> <versionnr>
				24	*
				25	* <fileID> 8 bytes "VIMspell"
				26	* <versionnr> 1 byte VIMSPELLVERSION
				27	*
				28	*
				29	* Sections make it possible to add information to the .spl file without
				30	* making it incompatible with previous versions. There are two kinds of
				31	* sections:
				32	* 1. Not essential for correct spell checking. E.g. for making suggestions.
				33	* These are skipped when not supported.
				34	* 2. Optional information, but essential for spell checking when present.
				35	* E.g. conditions for affixes. When this section is present but not
				36	* supported an error message is given.
				37	*
				38	* <SECTIONS>: <section> ... <sectionend>
				39	*
				40	* <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
				41	*
				42	* <sectionID> 1 byte number from 0 to 254 identifying the section
				43	*
				44	* <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct
				45	* spell checking
				46	*
				47	* <sectionlen> 4 bytes length of section contents, MSB first
				48	*
				49	* <sectionend> 1 byte SN_END
				50	*
				51	*
				52	* sectionID == SN_INFO: <infotext>
				53	* <infotext> N bytes free format text with spell file info (version,
				54	* website, etc)
				55	*
				56	* sectionID == SN_REGION: <regionname> ...
Bram Moolenaar	2993ac5	2018-02-10 14:12:43 +0100	[diff] [blame]	57	* <regionname> 2 bytes Up to MAXREGIONS region names: ca, au, etc. Lower
				58	* case. First <regionname> is region 1.
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	59	*
				60	* sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
				61	* <folcharslen> <folchars>
				62	* <charflagslen> 1 byte Number of bytes in <charflags> (should be 128).
				63	* <charflags> N bytes List of flags (first one is for character 128):
				64	* 0x01 word character CF_WORD
				65	* 0x02 upper-case character CF_UPPER
				66	* <folcharslen> 2 bytes Number of bytes in <folchars>.
				67	* <folchars> N bytes Folded characters, first one is for character 128.
				68	*
				69	* sectionID == SN_MIDWORD: <midword>
				70	* <midword> N bytes Characters that are word characters only when used
				71	* in the middle of a word.
				72	*
				73	* sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
				74	* <prefcondcnt> 2 bytes Number of <prefcond> items following.
				75	* <prefcond> : <condlen> <condstr>
				76	* <condlen> 1 byte Length of <condstr>.
				77	* <condstr> N bytes Condition for the prefix.
				78	*
				79	* sectionID == SN_REP: <repcount> <rep> ...
				80	* <repcount> 2 bytes number of <rep> items, MSB first.
				81	* <rep> : <repfromlen> <repfrom> <reptolen> <repto>
				82	* <repfromlen> 1 byte length of <repfrom>
				83	* <repfrom> N bytes "from" part of replacement
				84	* <reptolen> 1 byte length of <repto>
				85	* <repto> N bytes "to" part of replacement
				86	*
				87	* sectionID == SN_REPSAL: <repcount> <rep> ...
				88	* just like SN_REP but for soundfolded words
				89	*
				90	* sectionID == SN_SAL: <salflags> <salcount> <sal> ...
				91	* <salflags> 1 byte flags for soundsalike conversion:
				92	* SAL_F0LLOWUP
				93	* SAL_COLLAPSE
				94	* SAL_REM_ACCENTS
				95	* <salcount> 2 bytes number of <sal> items following
				96	* <sal> : <salfromlen> <salfrom> <saltolen> <salto>
				97	* <salfromlen> 1 byte length of <salfrom>
				98	* <salfrom> N bytes "from" part of soundsalike
				99	* <saltolen> 1 byte length of <salto>
				100	* <salto> N bytes "to" part of soundsalike
				101	*
				102	* sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
				103	* <sofofromlen> 2 bytes length of <sofofrom>
				104	* <sofofrom> N bytes "from" part of soundfold
				105	* <sofotolen> 2 bytes length of <sofoto>
				106	* <sofoto> N bytes "to" part of soundfold
				107	*
				108	* sectionID == SN_SUGFILE: <timestamp>
				109	* <timestamp> 8 bytes time in seconds that must match with .sug file
				110	*
				111	* sectionID == SN_NOSPLITSUGS: nothing
				112	*
				113	* sectionID == SN_NOCOMPOUNDSUGS: nothing
				114	*
				115	* sectionID == SN_WORDS: <word> ...
				116	* <word> N bytes NUL terminated common word
				117	*
				118	* sectionID == SN_MAP: <mapstr>
				119	* <mapstr> N bytes String with sequences of similar characters,
				120	* separated by slashes.
				121	*
				122	* sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions>
				123	* <comppatcount> <comppattern> ... <compflags>
				124	* <compmax> 1 byte Maximum nr of words in compound word.
				125	* <compminlen> 1 byte Minimal word length for compounding.
				126	* <compsylmax> 1 byte Maximum nr of syllables in compound word.
				127	* <compoptions> 2 bytes COMP_ flags.
				128	* <comppatcount> 2 bytes number of <comppattern> following
				129	* <compflags> N bytes Flags from COMPOUNDRULE items, separated by
				130	* slashes.
				131	*
				132	* <comppattern>: <comppatlen> <comppattext>
				133	* <comppatlen> 1 byte length of <comppattext>
				134	* <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN
				135	*
				136	* sectionID == SN_NOBREAK: (empty, its presence is what matters)
				137	*
				138	* sectionID == SN_SYLLABLE: <syllable>
				139	* <syllable> N bytes String from SYLLABLE item.
				140	*
				141	* <LWORDTREE>: <wordtree>
				142	*
				143	* <KWORDTREE>: <wordtree>
				144	*
				145	* <PREFIXTREE>: <wordtree>
				146	*
				147	*
				148	* <wordtree>: <nodecount> <nodedata> ...
				149	*
				150	* <nodecount> 4 bytes Number of nodes following. MSB first.
				151	*
				152	* <nodedata>: <siblingcount> <sibling> ...
				153	*
				154	* <siblingcount> 1 byte Number of siblings in this node. The siblings
				155	* follow in sorted order.
				156	*
				157	* <sibling>: <byte> [ <nodeidx> <xbyte>
				158	* \| <flags> [<flags2>] [<region>] [<affixID>]
				159	* \| [<pflags>] <affixID> <prefcondnr> ]
				160	*
				161	* <byte> 1 byte Byte value of the sibling. Special cases:
				162	* BY_NOFLAGS: End of word without flags and for all
				163	* regions.
				164	* For PREFIXTREE <affixID> and
				165	* <prefcondnr> follow.
				166	* BY_FLAGS: End of word, <flags> follow.
				167	* For PREFIXTREE <pflags>, <affixID>
				168	* and <prefcondnr> follow.
				169	* BY_FLAGS2: End of word, <flags> and <flags2>
				170	* follow. Not used in PREFIXTREE.
				171	* BY_INDEX: Child of sibling is shared, <nodeidx>
				172	* and <xbyte> follow.
				173	*
				174	* <nodeidx> 3 bytes Index of child for this sibling, MSB first.
				175	*
				176	* <xbyte> 1 byte byte value of the sibling.
				177	*
				178	* <flags> 1 byte bitmask of:
				179	* WF_ALLCAP word must have only capitals
				180	* WF_ONECAP first char of word must be capital
				181	* WF_KEEPCAP keep-case word
				182	* WF_FIXCAP keep-case word, all caps not allowed
				183	* WF_RARE rare word
				184	* WF_BANNED bad word
				185	* WF_REGION <region> follows
				186	* WF_AFX <affixID> follows
				187	*
				188	* <flags2> 1 byte Bitmask of:
				189	* WF_HAS_AFF >> 8 word includes affix
				190	* WF_NEEDCOMP >> 8 word only valid in compound
				191	* WF_NOSUGGEST >> 8 word not used for suggestions
				192	* WF_COMPROOT >> 8 word already a compound
				193	* WF_NOCOMPBEF >> 8 no compounding before this word
				194	* WF_NOCOMPAFT >> 8 no compounding after this word
				195	*
				196	* <pflags> 1 byte bitmask of:
				197	* WFP_RARE rare prefix
				198	* WFP_NC non-combining prefix
				199	* WFP_UP letter after prefix made upper case
				200	*
				201	* <region> 1 byte Bitmask for regions in which word is valid. When
				202	* omitted it's valid in all regions.
				203	* Lowest bit is for region 1.
				204	*
				205	* <affixID> 1 byte ID of affix that can be used with this word. In
				206	* PREFIXTREE used for the required prefix ID.
				207	*
				208	* <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list
				209	* from HEADER.
				210	*
				211	* All text characters are in 'encoding', but stored as single bytes.
				212	*/
				213
				214	/*
				215	* Vim .sug file format: <SUGHEADER>
				216	* <SUGWORDTREE>
				217	* <SUGTABLE>
				218	*
				219	* <SUGHEADER>: <fileID> <versionnr> <timestamp>
				220	*
				221	* <fileID> 6 bytes "VIMsug"
				222	* <versionnr> 1 byte VIMSUGVERSION
				223	* <timestamp> 8 bytes timestamp that must match with .spl file
				224	*
				225	*
				226	* <SUGWORDTREE>: <wordtree> (see above, no flags or region used)
				227	*
				228	*
				229	* <SUGTABLE>: <sugwcount> <sugline> ...
				230	*
				231	* <sugwcount> 4 bytes number of <sugline> following
				232	*
				233	* <sugline>: <sugnr> ... NUL
				234	*
				235	* <sugnr>: X bytes word number that results in this soundfolded word,
				236	* stored as an offset to the previous number in as
				237	* few bytes as possible, see offset2bytes())
				238	*/
				239
				240	#include "vim.h"
				241
				242	#if defined(FEAT_SPELL) \|\| defined(PROTO)
				243
				244	#ifndef UNIX /* it's in os_unix.h for Unix */
				245	# include <time.h> /* for time_t */
				246	#endif
				247
				248	#ifndef UNIX /* it's in os_unix.h for Unix */
				249	# include <time.h> /* for time_t */
				250	#endif
				251
				252	/* Special byte values for <byte>. Some are only used in the tree for
				253	* postponed prefixes, some only in the other trees. This is a bit messy... */
				254	#define BY_NOFLAGS 0 /* end of word without flags or region; for
				255	* postponed prefix: no <pflags> */
				256	#define BY_INDEX 1 /* child is shared, index follows */
				257	#define BY_FLAGS 2 /* end of word, <flags> byte follows; for
				258	* postponed prefix: <pflags> follows */
				259	#define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes
				260	* follow; never used in prefix tree */
				261	#define BY_SPECIAL BY_FLAGS2 /* highest special byte value */
				262
Bram Moolenaar	3d2a47c	2019-11-07 20:48:42 +0100	[diff] [blame]	263	#define ZERO_FLAG 65009 // used when flag is zero: "0"
				264
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	265	/* Flags used in .spl file for soundsalike flags. */
				266	#define SAL_F0LLOWUP 1
				267	#define SAL_COLLAPSE 2
				268	#define SAL_REM_ACCENTS 4
				269
				270	#define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */
				271	#define VIMSPELLMAGICL 8
				272	#define VIMSPELLVERSION 50
				273
				274	/* Section IDs. Only renumber them when VIMSPELLVERSION changes! */
				275	#define SN_REGION 0 /* <regionname> section */
				276	#define SN_CHARFLAGS 1 /* charflags section */
				277	#define SN_MIDWORD 2 /* <midword> section */
				278	#define SN_PREFCOND 3 /* <prefcond> section */
				279	#define SN_REP 4 /* REP items section */
				280	#define SN_SAL 5 /* SAL items section */
				281	#define SN_SOFO 6 /* soundfolding section */
				282	#define SN_MAP 7 /* MAP items section */
				283	#define SN_COMPOUND 8 /* compound words section */
				284	#define SN_SYLLABLE 9 /* syllable section */
				285	#define SN_NOBREAK 10 /* NOBREAK section */
				286	#define SN_SUGFILE 11 /* timestamp for .sug file */
				287	#define SN_REPSAL 12 /* REPSAL items section */
				288	#define SN_WORDS 13 /* common words */
				289	#define SN_NOSPLITSUGS 14 /* don't split word for suggestions */
				290	#define SN_INFO 15 /* info section */
				291	#define SN_NOCOMPOUNDSUGS 16 /* don't compound for suggestions */
				292	#define SN_END 255 /* end of sections */
				293
				294	#define SNF_REQUIRED 1 /* <sectionflags>: required section */
				295
				296	#define CF_WORD 0x01
				297	#define CF_UPPER 0x02
				298
				299	static int set_spell_finish(spelltab_T *new_st);
				300	static int write_spell_prefcond(FILE fd, garray_T gap);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	301	static int read_region_section(FILE fd, slang_T slang, int len);
				302	static int read_charflags_section(FILE *fd);
				303	static int read_prefcond_section(FILE fd, slang_T lp);
				304	static int read_rep_section(FILE fd, garray_T gap, short *first);
				305	static int read_sal_section(FILE fd, slang_T slang);
				306	static int read_words_section(FILE fd, slang_T lp, int len);
				307	static int read_sofo_section(FILE fd, slang_T slang);
				308	static int read_compound(FILE fd, slang_T slang, int len);
				309	static int set_sofo(slang_T lp, char_u from, char_u *to);
				310	static void set_sal_first(slang_T *lp);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	311	static int mb_str2wide(char_u s);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	312	static int spell_read_tree(FILE fd, char_u bytsp, idx_T *idxsp, int prefixtree, int prefixcnt);
				313	static idx_T read_tree_node(FILE fd, char_u byts, idx_T *idxs, int maxidx, idx_T startidx, int prefixtree, int maxprefcondnr);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	314	static void set_spell_charflags(char_u flags, int cnt, char_u upp);
				315	static int set_spell_chartab(char_u fol, char_u low, char_u *upp);
				316	static void set_map_str(slang_T lp, char_u map);
				317
				318
				319	static char *e_spell_trunc = N_("E758: Truncated spell file");
				320	static char *e_afftrailing = N_("Trailing text in %s line %d: %s");
				321	static char *e_affname = N_("Affix name too long in %s line %d: %s");
				322	static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
				323	static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
				324	static char *msg_compressing = N_("Compressing word tree...");
				325
				326	/*
				327	* Load one spell file and store the info into a slang_T.
				328	*
				329	* This is invoked in three ways:
				330	* - From spell_load_cb() to load a spell file for the first time. "lang" is
				331	* the language name, "old_lp" is NULL. Will allocate an slang_T.
				332	* - To reload a spell file that was changed. "lang" is NULL and "old_lp"
				333	* points to the existing slang_T.
				334	* - Just after writing a .spl file; it's read back to produce the .sug file.
				335	* "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T.
				336	*
				337	* Returns the slang_T the spell file was loaded into. NULL for error.
				338	*/
				339	slang_T *
				340	spell_load_file(
				341	char_u *fname,
				342	char_u *lang,
				343	slang_T *old_lp,
				344	int silent) /* no error if file doesn't exist */
				345	{
				346	FILE *fd;
				347	char_u buf[VIMSPELLMAGICL];
				348	char_u *p;
				349	int i;
				350	int n;
				351	int len;
				352	char_u *save_sourcing_name = sourcing_name;
				353	linenr_T save_sourcing_lnum = sourcing_lnum;
				354	slang_T *lp = NULL;
				355	int c = 0;
				356	int res;
				357
				358	fd = mch_fopen((char *)fname, "r");
				359	if (fd == NULL)
				360	{
				361	if (!silent)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	362	semsg(_(e_notopen), fname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	363	else if (p_verbose > 2)
				364	{
				365	verbose_enter();
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	366	smsg((const char *)e_notopen, fname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	367	verbose_leave();
				368	}
				369	goto endFAIL;
				370	}
				371	if (p_verbose > 2)
				372	{
				373	verbose_enter();
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	374	smsg(_("Reading spell file \"%s\""), fname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	375	verbose_leave();
				376	}
				377
				378	if (old_lp == NULL)
				379	{
				380	lp = slang_alloc(lang);
				381	if (lp == NULL)
				382	goto endFAIL;
				383
				384	/* Remember the file name, used to reload the file when it's updated. */
				385	lp->sl_fname = vim_strsave(fname);
				386	if (lp->sl_fname == NULL)
				387	goto endFAIL;
				388
				389	/* Check for .add.spl (_add.spl for VMS). */
				390	lp->sl_add = strstr((char *)gettail(fname), SPL_FNAME_ADD) != NULL;
				391	}
				392	else
				393	lp = old_lp;
				394
				395	/* Set sourcing_name, so that error messages mention the file name. */
				396	sourcing_name = fname;
				397	sourcing_lnum = 0;
				398
				399	/*
				400	* <HEADER>: <fileID>
				401	*/
				402	for (i = 0; i < VIMSPELLMAGICL; ++i)
				403	buf[i] = getc(fd); /* <fileID> */
				404	if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
				405	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	406	emsg(_("E757: This does not look like a spell file"));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	407	goto endFAIL;
				408	}
				409	c = getc(fd); /* <versionnr> */
				410	if (c < VIMSPELLVERSION)
				411	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	412	emsg(_("E771: Old spell file, needs to be updated"));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	413	goto endFAIL;
				414	}
				415	else if (c > VIMSPELLVERSION)
				416	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	417	emsg(_("E772: Spell file is for newer version of Vim"));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	418	goto endFAIL;
				419	}
				420
				421
				422	/*
				423	* <SECTIONS>: <section> ... <sectionend>
				424	* <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
				425	*/
				426	for (;;)
				427	{
				428	n = getc(fd); /* <sectionID> or <sectionend> */
				429	if (n == SN_END)
				430	break;
				431	c = getc(fd); /* <sectionflags> */
				432	len = get4c(fd); /* <sectionlen> */
				433	if (len < 0)
				434	goto truncerr;
				435
				436	res = 0;
				437	switch (n)
				438	{
				439	case SN_INFO:
				440	lp->sl_info = read_string(fd, len); /* <infotext> */
				441	if (lp->sl_info == NULL)
				442	goto endFAIL;
				443	break;
				444
				445	case SN_REGION:
				446	res = read_region_section(fd, lp, len);
				447	break;
				448
				449	case SN_CHARFLAGS:
				450	res = read_charflags_section(fd);
				451	break;
				452
				453	case SN_MIDWORD:
				454	lp->sl_midword = read_string(fd, len); /* <midword> */
				455	if (lp->sl_midword == NULL)
				456	goto endFAIL;
				457	break;
				458
				459	case SN_PREFCOND:
				460	res = read_prefcond_section(fd, lp);
				461	break;
				462
				463	case SN_REP:
				464	res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first);
				465	break;
				466
				467	case SN_REPSAL:
				468	res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first);
				469	break;
				470
				471	case SN_SAL:
				472	res = read_sal_section(fd, lp);
				473	break;
				474
				475	case SN_SOFO:
				476	res = read_sofo_section(fd, lp);
				477	break;
				478
				479	case SN_MAP:
				480	p = read_string(fd, len); /* <mapstr> */
				481	if (p == NULL)
				482	goto endFAIL;
				483	set_map_str(lp, p);
				484	vim_free(p);
				485	break;
				486
				487	case SN_WORDS:
				488	res = read_words_section(fd, lp, len);
				489	break;
				490
				491	case SN_SUGFILE:
				492	lp->sl_sugtime = get8ctime(fd); /* <timestamp> */
				493	break;
				494
				495	case SN_NOSPLITSUGS:
				496	lp->sl_nosplitsugs = TRUE;
				497	break;
				498
				499	case SN_NOCOMPOUNDSUGS:
				500	lp->sl_nocompoundsugs = TRUE;
				501	break;
				502
				503	case SN_COMPOUND:
				504	res = read_compound(fd, lp, len);
				505	break;
				506
				507	case SN_NOBREAK:
				508	lp->sl_nobreak = TRUE;
				509	break;
				510
				511	case SN_SYLLABLE:
				512	lp->sl_syllable = read_string(fd, len); /* <syllable> */
				513	if (lp->sl_syllable == NULL)
				514	goto endFAIL;
				515	if (init_syl_tab(lp) == FAIL)
				516	goto endFAIL;
				517	break;
				518
				519	default:
				520	/* Unsupported section. When it's required give an error
				521	* message. When it's not required skip the contents. */
				522	if (c & SNF_REQUIRED)
				523	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	524	emsg(_("E770: Unsupported section in spell file"));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	525	goto endFAIL;
				526	}
				527	while (--len >= 0)
				528	if (getc(fd) < 0)
				529	goto truncerr;
				530	break;
				531	}
				532	someerror:
				533	if (res == SP_FORMERROR)
				534	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	535	emsg(_(e_format));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	536	goto endFAIL;
				537	}
				538	if (res == SP_TRUNCERROR)
				539	{
				540	truncerr:
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	541	emsg(_(e_spell_trunc));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	542	goto endFAIL;
				543	}
				544	if (res == SP_OTHERERROR)
				545	goto endFAIL;
				546	}
				547
				548	/* <LWORDTREE> */
				549	res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0);
				550	if (res != 0)
				551	goto someerror;
				552
				553	/* <KWORDTREE> */
				554	res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0);
				555	if (res != 0)
				556	goto someerror;
				557
				558	/* <PREFIXTREE> */
				559	res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE,
				560	lp->sl_prefixcnt);
				561	if (res != 0)
				562	goto someerror;
				563
				564	/* For a new file link it in the list of spell files. */
				565	if (old_lp == NULL && lang != NULL)
				566	{
				567	lp->sl_next = first_lang;
				568	first_lang = lp;
				569	}
				570
				571	goto endOK;
				572
				573	endFAIL:
				574	if (lang != NULL)
				575	/* truncating the name signals the error to spell_load_lang() */
				576	*lang = NUL;
				577	if (lp != NULL && old_lp == NULL)
				578	slang_free(lp);
				579	lp = NULL;
				580
				581	endOK:
				582	if (fd != NULL)
				583	fclose(fd);
				584	sourcing_name = save_sourcing_name;
				585	sourcing_lnum = save_sourcing_lnum;
				586
				587	return lp;
				588	}
				589
				590	/*
				591	* Fill in the wordcount fields for a trie.
				592	* Returns the total number of words.
				593	*/
				594	static void
				595	tree_count_words(char_u byts, idx_T idxs)
				596	{
				597	int depth;
				598	idx_T arridx[MAXWLEN];
				599	int curi[MAXWLEN];
				600	int c;
				601	idx_T n;
				602	int wordcount[MAXWLEN];
				603
				604	arridx[0] = 0;
				605	curi[0] = 1;
				606	wordcount[0] = 0;
				607	depth = 0;
				608	while (depth >= 0 && !got_int)
				609	{
				610	if (curi[depth] > byts[arridx[depth]])
				611	{
				612	/* Done all bytes at this node, go up one level. */
				613	idxs[arridx[depth]] = wordcount[depth];
				614	if (depth > 0)
				615	wordcount[depth - 1] += wordcount[depth];
				616
				617	--depth;
				618	fast_breakcheck();
				619	}
				620	else
				621	{
				622	/* Do one more byte at this node. */
				623	n = arridx[depth] + curi[depth];
				624	++curi[depth];
				625
				626	c = byts[n];
				627	if (c == 0)
				628	{
				629	/* End of word, count it. */
				630	++wordcount[depth];
				631
				632	/* Skip over any other NUL bytes (same word with different
				633	* flags). */
				634	while (byts[n + 1] == 0)
				635	{
				636	++n;
				637	++curi[depth];
				638	}
				639	}
				640	else
				641	{
				642	/* Normal char, go one level deeper to count the words. */
				643	++depth;
				644	arridx[depth] = idxs[n];
				645	curi[depth] = 1;
				646	wordcount[depth] = 0;
				647	}
				648	}
				649	}
				650	}
				651
				652	/*
				653	* Load the .sug files for languages that have one and weren't loaded yet.
				654	*/
				655	void
				656	suggest_load_files(void)
				657	{
				658	langp_T *lp;
				659	int lpi;
				660	slang_T *slang;
				661	char_u *dotp;
				662	FILE *fd;
				663	char_u buf[MAXWLEN];
				664	int i;
				665	time_t timestamp;
				666	int wcount;
				667	int wordnr;
				668	garray_T ga;
				669	int c;
				670
				671	/* Do this for all languages that support sound folding. */
				672	for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
				673	{
				674	lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
				675	slang = lp->lp_slang;
				676	if (slang->sl_sugtime != 0 && !slang->sl_sugloaded)
				677	{
				678	/* Change ".spl" to ".sug" and open the file. When the file isn't
				679	* found silently skip it. Do set "sl_sugloaded" so that we
				680	* don't try again and again. */
				681	slang->sl_sugloaded = TRUE;
				682
				683	dotp = vim_strrchr(slang->sl_fname, '.');
				684	if (dotp == NULL \|\| fnamecmp(dotp, ".spl") != 0)
				685	continue;
				686	STRCPY(dotp, ".sug");
				687	fd = mch_fopen((char *)slang->sl_fname, "r");
				688	if (fd == NULL)
				689	goto nextone;
				690
				691	/*
				692	* <SUGHEADER>: <fileID> <versionnr> <timestamp>
				693	*/
				694	for (i = 0; i < VIMSUGMAGICL; ++i)
				695	buf[i] = getc(fd); /* <fileID> */
				696	if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0)
				697	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	698	semsg(_("E778: This does not look like a .sug file: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	699	slang->sl_fname);
				700	goto nextone;
				701	}
				702	c = getc(fd); /* <versionnr> */
				703	if (c < VIMSUGVERSION)
				704	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	705	semsg(_("E779: Old .sug file, needs to be updated: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	706	slang->sl_fname);
				707	goto nextone;
				708	}
				709	else if (c > VIMSUGVERSION)
				710	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	711	semsg(_("E780: .sug file is for newer version of Vim: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	712	slang->sl_fname);
				713	goto nextone;
				714	}
				715
				716	/* Check the timestamp, it must be exactly the same as the one in
				717	* the .spl file. Otherwise the word numbers won't match. */
				718	timestamp = get8ctime(fd); /* <timestamp> */
				719	if (timestamp != slang->sl_sugtime)
				720	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	721	semsg(_("E781: .sug file doesn't match .spl file: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	722	slang->sl_fname);
				723	goto nextone;
				724	}
				725
				726	/*
				727	* <SUGWORDTREE>: <wordtree>
				728	* Read the trie with the soundfolded words.
				729	*/
				730	if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs,
				731	FALSE, 0) != 0)
				732	{
				733	someerror:
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	734	semsg(_("E782: error while reading .sug file: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	735	slang->sl_fname);
				736	slang_clear_sug(slang);
				737	goto nextone;
				738	}
				739
				740	/*
				741	* <SUGTABLE>: <sugwcount> <sugline> ...
				742	*
				743	* Read the table with word numbers. We use a file buffer for
				744	* this, because it's so much like a file with lines. Makes it
				745	* possible to swap the info and save on memory use.
				746	*/
				747	slang->sl_sugbuf = open_spellbuf();
				748	if (slang->sl_sugbuf == NULL)
				749	goto someerror;
				750	/* <sugwcount> */
				751	wcount = get4c(fd);
				752	if (wcount < 0)
				753	goto someerror;
				754
				755	/* Read all the wordnr lists into the buffer, one NUL terminated
				756	* list per line. */
				757	ga_init2(&ga, 1, 100);
				758	for (wordnr = 0; wordnr < wcount; ++wordnr)
				759	{
				760	ga.ga_len = 0;
				761	for (;;)
				762	{
				763	c = getc(fd); /* <sugline> */
				764	if (c < 0 \|\| ga_grow(&ga, 1) == FAIL)
				765	goto someerror;
				766	((char_u *)ga.ga_data)[ga.ga_len++] = c;
				767	if (c == NUL)
				768	break;
				769	}
				770	if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr,
				771	ga.ga_data, ga.ga_len, TRUE) == FAIL)
				772	goto someerror;
				773	}
				774	ga_clear(&ga);
				775
				776	/*
				777	* Need to put word counts in the word tries, so that we can find
				778	* a word by its number.
				779	*/
				780	tree_count_words(slang->sl_fbyts, slang->sl_fidxs);
				781	tree_count_words(slang->sl_sbyts, slang->sl_sidxs);
				782
				783	nextone:
				784	if (fd != NULL)
				785	fclose(fd);
				786	STRCPY(dotp, ".spl");
				787	}
				788	}
				789	}
				790
				791
				792	/*
				793	* Read a length field from "fd" in "cnt_bytes" bytes.
				794	* Allocate memory, read the string into it and add a NUL at the end.
				795	* Returns NULL when the count is zero.
				796	* Sets "cntp" to SP_ERROR when there is an error, length of the result
				797	* otherwise.
				798	*/
				799	static char_u *
				800	read_cnt_string(FILE fd, int cnt_bytes, int cntp)
				801	{
				802	int cnt = 0;
				803	int i;
				804	char_u *str;
				805
				806	/* read the length bytes, MSB first */
				807	for (i = 0; i < cnt_bytes; ++i)
				808	cnt = (cnt << 8) + getc(fd);
				809	if (cnt < 0)
				810	{
				811	*cntp = SP_TRUNCERROR;
				812	return NULL;
				813	}
				814	*cntp = cnt;
				815	if (cnt == 0)
				816	return NULL; /* nothing to read, return NULL */
				817
				818	str = read_string(fd, cnt);
				819	if (str == NULL)
				820	*cntp = SP_OTHERERROR;
				821	return str;
				822	}
				823
				824	/*
				825	* Read SN_REGION: <regionname> ...
				826	* Return SP_*ERROR flags.
				827	*/
				828	static int
				829	read_region_section(FILE fd, slang_T lp, int len)
				830	{
				831	int i;
				832
Bram Moolenaar	2993ac5	2018-02-10 14:12:43 +0100	[diff] [blame]	833	if (len > MAXREGIONS * 2)
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	834	return SP_FORMERROR;
				835	for (i = 0; i < len; ++i)
				836	lp->sl_regions[i] = getc(fd); /* <regionname> */
				837	lp->sl_regions[len] = NUL;
				838	return 0;
				839	}
				840
				841	/*
				842	* Read SN_CHARFLAGS section: <charflagslen> <charflags>
				843	* <folcharslen> <folchars>
				844	* Return SP_*ERROR flags.
				845	*/
				846	static int
				847	read_charflags_section(FILE *fd)
				848	{
				849	char_u *flags;
				850	char_u *fol;
				851	int flagslen, follen;
				852
				853	/* <charflagslen> <charflags> */
				854	flags = read_cnt_string(fd, 1, &flagslen);
				855	if (flagslen < 0)
				856	return flagslen;
				857
				858	/* <folcharslen> <folchars> */
				859	fol = read_cnt_string(fd, 2, &follen);
				860	if (follen < 0)
				861	{
				862	vim_free(flags);
				863	return follen;
				864	}
				865
				866	/* Set the word-char flags and fill SPELL_ISUPPER() table. */
				867	if (flags != NULL && fol != NULL)
				868	set_spell_charflags(flags, flagslen, fol);
				869
				870	vim_free(flags);
				871	vim_free(fol);
				872
				873	/* When <charflagslen> is zero then <fcharlen> must also be zero. */
				874	if ((flags == NULL) != (fol == NULL))
				875	return SP_FORMERROR;
				876	return 0;
				877	}
				878
				879	/*
				880	* Read SN_PREFCOND section.
				881	* Return SP_*ERROR flags.
				882	*/
				883	static int
				884	read_prefcond_section(FILE fd, slang_T lp)
				885	{
				886	int cnt;
				887	int i;
				888	int n;
				889	char_u *p;
				890	char_u buf[MAXWLEN + 1];
				891
				892	/* <prefcondcnt> <prefcond> ... */
				893	cnt = get2c(fd); /* <prefcondcnt> */
				894	if (cnt <= 0)
				895	return SP_FORMERROR;
				896
Bram Moolenaar	c799fe2	2019-05-28 23:08:19 +0200	[diff] [blame]	897	lp->sl_prefprog = ALLOC_CLEAR_MULT(regprog_T *, cnt);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	898	if (lp->sl_prefprog == NULL)
				899	return SP_OTHERERROR;
				900	lp->sl_prefixcnt = cnt;
				901
				902	for (i = 0; i < cnt; ++i)
				903	{
				904	/* <prefcond> : <condlen> <condstr> */
				905	n = getc(fd); /* <condlen> */
				906	if (n < 0 \|\| n >= MAXWLEN)
				907	return SP_FORMERROR;
				908
				909	/* When <condlen> is zero we have an empty condition. Otherwise
				910	* compile the regexp program used to check for the condition. */
				911	if (n > 0)
				912	{
				913	buf[0] = '^'; /* always match at one position only */
				914	p = buf + 1;
				915	while (n-- > 0)
				916	p++ = getc(fd); / <condstr> */
				917	*p = NUL;
				918	lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
				919	}
				920	}
				921	return 0;
				922	}
				923
				924	/*
				925	* Read REP or REPSAL items section from "fd": <repcount> <rep> ...
				926	* Return SP_*ERROR flags.
				927	*/
				928	static int
				929	read_rep_section(FILE fd, garray_T gap, short *first)
				930	{
				931	int cnt;
				932	fromto_T *ftp;
				933	int i;
				934
				935	cnt = get2c(fd); /* <repcount> */
				936	if (cnt < 0)
				937	return SP_TRUNCERROR;
				938
				939	if (ga_grow(gap, cnt) == FAIL)
				940	return SP_OTHERERROR;
				941
				942	/* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
				943	for (; gap->ga_len < cnt; ++gap->ga_len)
				944	{
				945	ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
				946	ftp->ft_from = read_cnt_string(fd, 1, &i);
				947	if (i < 0)
				948	return i;
				949	if (i == 0)
				950	return SP_FORMERROR;
				951	ftp->ft_to = read_cnt_string(fd, 1, &i);
				952	if (i <= 0)
				953	{
				954	vim_free(ftp->ft_from);
				955	if (i < 0)
				956	return i;
				957	return SP_FORMERROR;
				958	}
				959	}
				960
				961	/* Fill the first-index table. */
				962	for (i = 0; i < 256; ++i)
				963	first[i] = -1;
				964	for (i = 0; i < gap->ga_len; ++i)
				965	{
				966	ftp = &((fromto_T *)gap->ga_data)[i];
				967	if (first[*ftp->ft_from] == -1)
				968	first[*ftp->ft_from] = i;
				969	}
				970	return 0;
				971	}
				972
				973	/*
				974	* Read SN_SAL section: <salflags> <salcount> <sal> ...
				975	* Return SP_*ERROR flags.
				976	*/
				977	static int
				978	read_sal_section(FILE fd, slang_T slang)
				979	{
				980	int i;
				981	int cnt;
				982	garray_T *gap;
				983	salitem_T *smp;
				984	int ccnt;
				985	char_u *p;
				986	int c = NUL;
				987
				988	slang->sl_sofo = FALSE;
				989
				990	i = getc(fd); /* <salflags> */
				991	if (i & SAL_F0LLOWUP)
				992	slang->sl_followup = TRUE;
				993	if (i & SAL_COLLAPSE)
				994	slang->sl_collapse = TRUE;
				995	if (i & SAL_REM_ACCENTS)
				996	slang->sl_rem_accents = TRUE;
				997
				998	cnt = get2c(fd); /* <salcount> */
				999	if (cnt < 0)
				1000	return SP_TRUNCERROR;
				1001
				1002	gap = &slang->sl_sal;
				1003	ga_init2(gap, sizeof(salitem_T), 10);
				1004	if (ga_grow(gap, cnt + 1) == FAIL)
				1005	return SP_OTHERERROR;
				1006
				1007	/* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
				1008	for (; gap->ga_len < cnt; ++gap->ga_len)
				1009	{
				1010	smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
				1011	ccnt = getc(fd); /* <salfromlen> */
				1012	if (ccnt < 0)
				1013	return SP_TRUNCERROR;
				1014	if ((p = alloc(ccnt + 2)) == NULL)
				1015	return SP_OTHERERROR;
				1016	smp->sm_lead = p;
				1017
				1018	/* Read up to the first special char into sm_lead. */
				1019	for (i = 0; i < ccnt; ++i)
				1020	{
				1021	c = getc(fd); /* <salfrom> */
				1022	if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
				1023	break;
				1024	*p++ = c;
				1025	}
				1026	smp->sm_leadlen = (int)(p - smp->sm_lead);
				1027	*p++ = NUL;
				1028
				1029	/* Put (abc) chars in sm_oneof, if any. */
				1030	if (c == '(')
				1031	{
				1032	smp->sm_oneof = p;
				1033	for (++i; i < ccnt; ++i)
				1034	{
				1035	c = getc(fd); /* <salfrom> */
				1036	if (c == ')')
				1037	break;
				1038	*p++ = c;
				1039	}
				1040	*p++ = NUL;
				1041	if (++i < ccnt)
				1042	c = getc(fd);
				1043	}
				1044	else
				1045	smp->sm_oneof = NULL;
				1046
				1047	/* Any following chars go in sm_rules. */
				1048	smp->sm_rules = p;
				1049	if (i < ccnt)
				1050	/* store the char we got while checking for end of sm_lead */
				1051	*p++ = c;
				1052	for (++i; i < ccnt; ++i)
				1053	p++ = getc(fd); / <salfrom> */
				1054	*p++ = NUL;
				1055
				1056	/* <saltolen> <salto> */
				1057	smp->sm_to = read_cnt_string(fd, 1, &ccnt);
				1058	if (ccnt < 0)
				1059	{
				1060	vim_free(smp->sm_lead);
				1061	return ccnt;
				1062	}
				1063
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1064	if (has_mbyte)
				1065	{
				1066	/* convert the multi-byte strings to wide char strings */
				1067	smp->sm_lead_w = mb_str2wide(smp->sm_lead);
				1068	smp->sm_leadlen = mb_charlen(smp->sm_lead);
				1069	if (smp->sm_oneof == NULL)
				1070	smp->sm_oneof_w = NULL;
				1071	else
				1072	smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
				1073	if (smp->sm_to == NULL)
				1074	smp->sm_to_w = NULL;
				1075	else
				1076	smp->sm_to_w = mb_str2wide(smp->sm_to);
				1077	if (smp->sm_lead_w == NULL
				1078	\|\| (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL)
				1079	\|\| (smp->sm_to_w == NULL && smp->sm_to != NULL))
				1080	{
				1081	vim_free(smp->sm_lead);
				1082	vim_free(smp->sm_to);
				1083	vim_free(smp->sm_lead_w);
				1084	vim_free(smp->sm_oneof_w);
				1085	vim_free(smp->sm_to_w);
				1086	return SP_OTHERERROR;
				1087	}
				1088	}
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1089	}
				1090
				1091	if (gap->ga_len > 0)
				1092	{
				1093	/* Add one extra entry to mark the end with an empty sm_lead. Avoids
				1094	* that we need to check the index every time. */
				1095	smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
				1096	if ((p = alloc(1)) == NULL)
				1097	return SP_OTHERERROR;
				1098	p[0] = NUL;
				1099	smp->sm_lead = p;
				1100	smp->sm_leadlen = 0;
				1101	smp->sm_oneof = NULL;
				1102	smp->sm_rules = p;
				1103	smp->sm_to = NULL;
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1104	if (has_mbyte)
				1105	{
				1106	smp->sm_lead_w = mb_str2wide(smp->sm_lead);
				1107	smp->sm_leadlen = 0;
				1108	smp->sm_oneof_w = NULL;
				1109	smp->sm_to_w = NULL;
				1110	}
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1111	++gap->ga_len;
				1112	}
				1113
				1114	/* Fill the first-index table. */
				1115	set_sal_first(slang);
				1116
				1117	return 0;
				1118	}
				1119
				1120	/*
				1121	* Read SN_WORDS: <word> ...
				1122	* Return SP_*ERROR flags.
				1123	*/
				1124	static int
				1125	read_words_section(FILE fd, slang_T lp, int len)
				1126	{
				1127	int done = 0;
				1128	int i;
				1129	int c;
				1130	char_u word[MAXWLEN];
				1131
				1132	while (done < len)
				1133	{
				1134	/* Read one word at a time. */
				1135	for (i = 0; ; ++i)
				1136	{
				1137	c = getc(fd);
				1138	if (c == EOF)
				1139	return SP_TRUNCERROR;
				1140	word[i] = c;
				1141	if (word[i] == NUL)
				1142	break;
				1143	if (i == MAXWLEN - 1)
				1144	return SP_FORMERROR;
				1145	}
				1146
				1147	/* Init the count to 10. */
				1148	count_common_word(lp, word, -1, 10);
				1149	done += i + 1;
				1150	}
				1151	return 0;
				1152	}
				1153
				1154	/*
				1155	* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
				1156	* Return SP_*ERROR flags.
				1157	*/
				1158	static int
				1159	read_sofo_section(FILE fd, slang_T slang)
				1160	{
				1161	int cnt;
				1162	char_u from, to;
				1163	int res;
				1164
				1165	slang->sl_sofo = TRUE;
				1166
				1167	/* <sofofromlen> <sofofrom> */
				1168	from = read_cnt_string(fd, 2, &cnt);
				1169	if (cnt < 0)
				1170	return cnt;
				1171
				1172	/* <sofotolen> <sofoto> */
				1173	to = read_cnt_string(fd, 2, &cnt);
				1174	if (cnt < 0)
				1175	{
				1176	vim_free(from);
				1177	return cnt;
				1178	}
				1179
				1180	/* Store the info in slang->sl_sal and/or slang->sl_sal_first. */
				1181	if (from != NULL && to != NULL)
				1182	res = set_sofo(slang, from, to);
				1183	else if (from != NULL \|\| to != NULL)
				1184	res = SP_FORMERROR; /* only one of two strings is an error */
				1185	else
				1186	res = 0;
				1187
				1188	vim_free(from);
				1189	vim_free(to);
				1190	return res;
				1191	}
				1192
				1193	/*
				1194	* Read the compound section from the .spl file:
				1195	* <compmax> <compminlen> <compsylmax> <compoptions> <compflags>
				1196	* Returns SP_*ERROR flags.
				1197	*/
				1198	static int
				1199	read_compound(FILE fd, slang_T slang, int len)
				1200	{
				1201	int todo = len;
				1202	int c;
				1203	int atstart;
				1204	char_u *pat;
				1205	char_u *pp;
				1206	char_u *cp;
				1207	char_u *ap;
				1208	char_u *crp;
				1209	int cnt;
				1210	garray_T *gap;
				1211
				1212	if (todo < 2)
				1213	return SP_FORMERROR; /* need at least two bytes */
				1214
				1215	--todo;
				1216	c = getc(fd); /* <compmax> */
				1217	if (c < 2)
				1218	c = MAXWLEN;
				1219	slang->sl_compmax = c;
				1220
				1221	--todo;
				1222	c = getc(fd); /* <compminlen> */
				1223	if (c < 1)
				1224	c = 0;
				1225	slang->sl_compminlen = c;
				1226
				1227	--todo;
				1228	c = getc(fd); /* <compsylmax> */
				1229	if (c < 1)
				1230	c = MAXWLEN;
				1231	slang->sl_compsylmax = c;
				1232
				1233	c = getc(fd); /* <compoptions> */
				1234	if (c != 0)
				1235	ungetc(c, fd); /* be backwards compatible with Vim 7.0b */
				1236	else
				1237	{
				1238	--todo;
				1239	c = getc(fd); /* only use the lower byte for now */
				1240	--todo;
				1241	slang->sl_compoptions = c;
				1242
				1243	gap = &slang->sl_comppat;
				1244	c = get2c(fd); /* <comppatcount> */
				1245	todo -= 2;
				1246	ga_init2(gap, sizeof(char_u *), c);
				1247	if (ga_grow(gap, c) == OK)
				1248	while (--c >= 0)
				1249	{
				1250	((char_u **)(gap->ga_data))[gap->ga_len++] =
				1251	read_cnt_string(fd, 1, &cnt);
				1252	/* <comppatlen> <comppattext> */
				1253	if (cnt < 0)
				1254	return cnt;
				1255	todo -= cnt + 1;
				1256	}
				1257	}
				1258	if (todo < 0)
				1259	return SP_FORMERROR;
				1260
				1261	/* Turn the COMPOUNDRULE items into a regexp pattern:
				1262	* "a[bc]/ab+" -> "^$a[bc]\\|ab\+$$".
				1263	* Inserting backslashes may double the length, "^$<Nul>" is 7 bytes.
				1264	* Conversion to utf-8 may double the size. */
				1265	c = todo * 2 + 7;
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1266	if (enc_utf8)
				1267	c += todo * 2;
Bram Moolenaar	964b374	2019-05-24 18:54:09 +0200	[diff] [blame]	1268	pat = alloc(c);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1269	if (pat == NULL)
				1270	return SP_OTHERERROR;
				1271
				1272	/* We also need a list of all flags that can appear at the start and one
				1273	* for all flags. */
				1274	cp = alloc(todo + 1);
				1275	if (cp == NULL)
				1276	{
				1277	vim_free(pat);
				1278	return SP_OTHERERROR;
				1279	}
				1280	slang->sl_compstartflags = cp;
				1281	*cp = NUL;
				1282
				1283	ap = alloc(todo + 1);
				1284	if (ap == NULL)
				1285	{
				1286	vim_free(pat);
				1287	return SP_OTHERERROR;
				1288	}
				1289	slang->sl_compallflags = ap;
				1290	*ap = NUL;
				1291
				1292	/* And a list of all patterns in their original form, for checking whether
				1293	* compounding may work in match_compoundrule(). This is freed when we
				1294	* encounter a wildcard, the check doesn't work then. */
				1295	crp = alloc(todo + 1);
				1296	slang->sl_comprules = crp;
				1297
				1298	pp = pat;
				1299	*pp++ = '^';
				1300	*pp++ = '\\';
				1301	*pp++ = '(';
				1302
				1303	atstart = 1;
				1304	while (todo-- > 0)
				1305	{
				1306	c = getc(fd); /* <compflags> */
				1307	if (c == EOF)
				1308	{
				1309	vim_free(pat);
				1310	return SP_TRUNCERROR;
				1311	}
				1312
				1313	/* Add all flags to "sl_compallflags". */
				1314	if (vim_strchr((char_u )"?+[]/", c) == NULL
				1315	&& !byte_in_str(slang->sl_compallflags, c))
				1316	{
				1317	*ap++ = c;
				1318	*ap = NUL;
				1319	}
				1320
				1321	if (atstart != 0)
				1322	{
				1323	/* At start of item: copy flags to "sl_compstartflags". For a
				1324	* [abc] item set "atstart" to 2 and copy up to the ']'. */
				1325	if (c == '[')
				1326	atstart = 2;
				1327	else if (c == ']')
				1328	atstart = 0;
				1329	else
				1330	{
				1331	if (!byte_in_str(slang->sl_compstartflags, c))
				1332	{
				1333	*cp++ = c;
				1334	*cp = NUL;
				1335	}
				1336	if (atstart == 1)
				1337	atstart = 0;
				1338	}
				1339	}
				1340
				1341	/* Copy flag to "sl_comprules", unless we run into a wildcard. */
				1342	if (crp != NULL)
				1343	{
				1344	if (c == '?' \|\| c == '+' \|\| c == '*')
				1345	{
Bram Moolenaar	d23a823	2018-02-10 18:45:26 +0100	[diff] [blame]	1346	VIM_CLEAR(slang->sl_comprules);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1347	crp = NULL;
				1348	}
				1349	else
				1350	*crp++ = c;
				1351	}
				1352
				1353	if (c == '/') /* slash separates two items */
				1354	{
				1355	*pp++ = '\\';
				1356	*pp++ = '\|';
				1357	atstart = 1;
				1358	}
				1359	else /* normal char, "[abc]" and '' are copied as-is /
				1360	{
				1361	if (c == '?' \|\| c == '+' \|\| c == '~')
				1362	pp++ = '\\'; / "a?" becomes "a\?", "a+" becomes "a\+" */
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1363	if (enc_utf8)
				1364	pp += mb_char2bytes(c, pp);
				1365	else
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1366	*pp++ = c;
				1367	}
				1368	}
				1369
				1370	*pp++ = '\\';
				1371	*pp++ = ')';
				1372	*pp++ = '$';
				1373	*pp = NUL;
				1374
				1375	if (crp != NULL)
				1376	*crp = NUL;
				1377
				1378	slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT);
				1379	vim_free(pat);
				1380	if (slang->sl_compprog == NULL)
				1381	return SP_FORMERROR;
				1382
				1383	return 0;
				1384	}
				1385
				1386	/*
				1387	* Set the SOFOFROM and SOFOTO items in language "lp".
				1388	* Returns SP_*ERROR flags when there is something wrong.
				1389	*/
				1390	static int
				1391	set_sofo(slang_T lp, char_u from, char_u *to)
				1392	{
				1393	int i;
				1394
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1395	garray_T *gap;
				1396	char_u *s;
				1397	char_u *p;
				1398	int c;
				1399	int *inp;
				1400
				1401	if (has_mbyte)
				1402	{
				1403	/* Use "sl_sal" as an array with 256 pointers to a list of wide
				1404	* characters. The index is the low byte of the character.
				1405	* The list contains from-to pairs with a terminating NUL.
				1406	* sl_sal_first[] is used for latin1 "from" characters. */
				1407	gap = &lp->sl_sal;
				1408	ga_init2(gap, sizeof(int *), 1);
				1409	if (ga_grow(gap, 256) == FAIL)
				1410	return SP_OTHERERROR;
				1411	vim_memset(gap->ga_data, 0, sizeof(int ) 256);
				1412	gap->ga_len = 256;
				1413
				1414	/* First count the number of items for each list. Temporarily use
				1415	* sl_sal_first[] for this. */
				1416	for (p = from, s = to; p != NUL && s != NUL; )
				1417	{
				1418	c = mb_cptr2char_adv(&p);
Bram Moolenaar	91acfff	2017-03-12 19:22:36 +0100	[diff] [blame]	1419	MB_CPTR_ADV(s);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1420	if (c >= 256)
				1421	++lp->sl_sal_first[c & 0xff];
				1422	}
				1423	if (p != NUL \|\| s != NUL) /* lengths differ */
				1424	return SP_FORMERROR;
				1425
				1426	/* Allocate the lists. */
				1427	for (i = 0; i < 256; ++i)
				1428	if (lp->sl_sal_first[i] > 0)
				1429	{
				1430	p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
				1431	if (p == NULL)
				1432	return SP_OTHERERROR;
				1433	((int *)gap->ga_data)[i] = (int )p;
				1434	(int )p = 0;
				1435	}
				1436
				1437	/* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
				1438	* list. */
				1439	vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
				1440	for (p = from, s = to; p != NUL && s != NUL; )
				1441	{
				1442	c = mb_cptr2char_adv(&p);
				1443	i = mb_cptr2char_adv(&s);
				1444	if (c >= 256)
				1445	{
				1446	/* Append the from-to chars at the end of the list with
				1447	* the low byte. */
				1448	inp = ((int **)gap->ga_data)[c & 0xff];
				1449	while (*inp != 0)
				1450	++inp;
				1451	inp++ = c; / from char */
				1452	inp++ = i; / to char */
				1453	inp++ = NUL; / NUL at the end */
				1454	}
				1455	else
				1456	/* mapping byte to char is done in sl_sal_first[] */
				1457	lp->sl_sal_first[c] = i;
				1458	}
				1459	}
				1460	else
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1461	{
				1462	/* mapping bytes to bytes is done in sl_sal_first[] */
				1463	if (STRLEN(from) != STRLEN(to))
				1464	return SP_FORMERROR;
				1465
				1466	for (i = 0; to[i] != NUL; ++i)
				1467	lp->sl_sal_first[from[i]] = to[i];
				1468	lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */
				1469	}
				1470
				1471	return 0;
				1472	}
				1473
				1474	/*
				1475	* Fill the first-index table for "lp".
				1476	*/
				1477	static void
				1478	set_sal_first(slang_T *lp)
				1479	{
				1480	salfirst_T *sfirst;
				1481	int i;
				1482	salitem_T *smp;
				1483	int c;
				1484	garray_T *gap = &lp->sl_sal;
				1485
				1486	sfirst = lp->sl_sal_first;
				1487	for (i = 0; i < 256; ++i)
				1488	sfirst[i] = -1;
				1489	smp = (salitem_T *)gap->ga_data;
				1490	for (i = 0; i < gap->ga_len; ++i)
				1491	{
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1492	if (has_mbyte)
				1493	/* Use the lowest byte of the first character. For latin1 it's
				1494	* the character, for other encodings it should differ for most
				1495	* characters. */
				1496	c = *smp[i].sm_lead_w & 0xff;
				1497	else
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1498	c = *smp[i].sm_lead;
				1499	if (sfirst[c] == -1)
				1500	{
				1501	sfirst[c] = i;
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1502	if (has_mbyte)
				1503	{
				1504	int n;
				1505
				1506	/* Make sure all entries with this byte are following each
				1507	* other. Move the ones that are in the wrong position. Do
				1508	* keep the same ordering! */
				1509	while (i + 1 < gap->ga_len
				1510	&& (*smp[i + 1].sm_lead_w & 0xff) == c)
				1511	/* Skip over entry with same index byte. */
				1512	++i;
				1513
				1514	for (n = 1; i + n < gap->ga_len; ++n)
				1515	if ((*smp[i + n].sm_lead_w & 0xff) == c)
				1516	{
				1517	salitem_T tsal;
				1518
				1519	/* Move entry with same index byte after the entries
				1520	* we already found. */
				1521	++i;
				1522	--n;
				1523	tsal = smp[i + n];
				1524	mch_memmove(smp + i + 1, smp + i,
				1525	sizeof(salitem_T) * n);
				1526	smp[i] = tsal;
				1527	}
				1528	}
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1529	}
				1530	}
				1531	}
				1532
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1533	/*
				1534	* Turn a multi-byte string into a wide character string.
				1535	* Return it in allocated memory (NULL for out-of-memory)
				1536	*/
				1537	static int *
				1538	mb_str2wide(char_u *s)
				1539	{
				1540	int *res;
				1541	char_u *p;
				1542	int i = 0;
				1543
Bram Moolenaar	c799fe2	2019-05-28 23:08:19 +0200	[diff] [blame]	1544	res = ALLOC_MULT(int, mb_charlen(s) + 1);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1545	if (res != NULL)
				1546	{
				1547	for (p = s; *p != NUL; )
				1548	res[i++] = mb_ptr2char_adv(&p);
				1549	res[i] = NUL;
				1550	}
				1551	return res;
				1552	}
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1553
				1554	/*
				1555	* Read a tree from the .spl or .sug file.
				1556	* Allocates the memory and stores pointers in "bytsp" and "idxsp".
				1557	* This is skipped when the tree has zero length.
				1558	* Returns zero when OK, SP_ value for an error.
				1559	*/
				1560	static int
				1561	spell_read_tree(
				1562	FILE *fd,
				1563	char_u **bytsp,
				1564	idx_T **idxsp,
				1565	int prefixtree, /* TRUE for the prefix tree */
				1566	int prefixcnt) /* when "prefixtree" is TRUE: prefix count */
				1567	{
Bram Moolenaar	6d3c858	2017-02-26 15:27:23 +0100	[diff] [blame]	1568	long len;
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1569	int idx;
				1570	char_u *bp;
				1571	idx_T *ip;
				1572
				1573	/* The tree size was computed when writing the file, so that we can
				1574	* allocate it as one long block. <nodecount> */
				1575	len = get4c(fd);
				1576	if (len < 0)
				1577	return SP_TRUNCERROR;
Bram Moolenaar	6d3c858	2017-02-26 15:27:23 +0100	[diff] [blame]	1578	if (len >= LONG_MAX / (long)sizeof(int))
Bram Moolenaar	399c297	2017-02-09 21:07:12 +0100	[diff] [blame]	1579	/* Invalid length, multiply with sizeof(int) would overflow. */
				1580	return SP_FORMERROR;
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1581	if (len > 0)
				1582	{
				1583	/* Allocate the byte array. */
Bram Moolenaar	18a4ba2	2019-05-24 19:39:03 +0200	[diff] [blame]	1584	bp = alloc(len);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1585	if (bp == NULL)
				1586	return SP_OTHERERROR;
				1587	*bytsp = bp;
				1588
				1589	/* Allocate the index array. */
Bram Moolenaar	c799fe2	2019-05-28 23:08:19 +0200	[diff] [blame]	1590	ip = lalloc_clear(len * sizeof(int), TRUE);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1591	if (ip == NULL)
				1592	return SP_OTHERERROR;
				1593	*idxsp = ip;
				1594
				1595	/* Recursively read the tree and store it in the array. */
				1596	idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt);
				1597	if (idx < 0)
				1598	return idx;
				1599	}
				1600	return 0;
				1601	}
				1602
				1603	/*
				1604	* Read one row of siblings from the spell file and store it in the byte array
				1605	* "byts" and index array "idxs". Recursively read the children.
				1606	*
				1607	* NOTE: The code here must match put_node()!
				1608	*
				1609	* Returns the index (>= 0) following the siblings.
				1610	* Returns SP_TRUNCERROR if the file is shorter than expected.
				1611	* Returns SP_FORMERROR if there is a format error.
				1612	*/
				1613	static idx_T
				1614	read_tree_node(
				1615	FILE *fd,
				1616	char_u *byts,
				1617	idx_T *idxs,
				1618	int maxidx, /* size of arrays */
				1619	idx_T startidx, /* current index in "byts" and "idxs" */
				1620	int prefixtree, /* TRUE for reading PREFIXTREE */
				1621	int maxprefcondnr) /* maximum for <prefcondnr> */
				1622	{
				1623	int len;
				1624	int i;
				1625	int n;
				1626	idx_T idx = startidx;
				1627	int c;
				1628	int c2;
				1629	#define SHARED_MASK 0x8000000
				1630
				1631	len = getc(fd); /* <siblingcount> */
				1632	if (len <= 0)
				1633	return SP_TRUNCERROR;
				1634
				1635	if (startidx + len >= maxidx)
				1636	return SP_FORMERROR;
				1637	byts[idx++] = len;
				1638
				1639	/* Read the byte values, flag/region bytes and shared indexes. */
				1640	for (i = 1; i <= len; ++i)
				1641	{
				1642	c = getc(fd); /* <byte> */
				1643	if (c < 0)
				1644	return SP_TRUNCERROR;
				1645	if (c <= BY_SPECIAL)
				1646	{
				1647	if (c == BY_NOFLAGS && !prefixtree)
				1648	{
				1649	/* No flags, all regions. */
				1650	idxs[idx] = 0;
				1651	c = 0;
				1652	}
				1653	else if (c != BY_INDEX)
				1654	{
				1655	if (prefixtree)
				1656	{
				1657	/* Read the optional pflags byte, the prefix ID and the
				1658	* condition nr. In idxs[] store the prefix ID in the low
				1659	* byte, the condition index shifted up 8 bits, the flags
				1660	* shifted up 24 bits. */
				1661	if (c == BY_FLAGS)
				1662	c = getc(fd) << 24; /* <pflags> */
				1663	else
				1664	c = 0;
				1665
				1666	c \|= getc(fd); /* <affixID> */
				1667
				1668	n = get2c(fd); /* <prefcondnr> */
				1669	if (n >= maxprefcondnr)
				1670	return SP_FORMERROR;
				1671	c \|= (n << 8);
				1672	}
				1673	else /* c must be BY_FLAGS or BY_FLAGS2 */
				1674	{
				1675	/* Read flags and optional region and prefix ID. In
				1676	* idxs[] the flags go in the low two bytes, region above
				1677	* that and prefix ID above the region. */
				1678	c2 = c;
				1679	c = getc(fd); /* <flags> */
				1680	if (c2 == BY_FLAGS2)
				1681	c = (getc(fd) << 8) + c; /* <flags2> */
				1682	if (c & WF_REGION)
				1683	c = (getc(fd) << 16) + c; /* <region> */
				1684	if (c & WF_AFX)
				1685	c = (getc(fd) << 24) + c; /* <affixID> */
				1686	}
				1687
				1688	idxs[idx] = c;
				1689	c = 0;
				1690	}
				1691	else /* c == BY_INDEX */
				1692	{
				1693	/* <nodeidx> */
				1694	n = get3c(fd);
				1695	if (n < 0 \|\| n >= maxidx)
				1696	return SP_FORMERROR;
				1697	idxs[idx] = n + SHARED_MASK;
				1698	c = getc(fd); /* <xbyte> */
				1699	}
				1700	}
				1701	byts[idx++] = c;
				1702	}
				1703
				1704	/* Recursively read the children for non-shared siblings.
				1705	* Skip the end-of-word ones (zero byte value) and the shared ones (and
				1706	* remove SHARED_MASK) */
				1707	for (i = 1; i <= len; ++i)
				1708	if (byts[startidx + i] != 0)
				1709	{
				1710	if (idxs[startidx + i] & SHARED_MASK)
				1711	idxs[startidx + i] &= ~SHARED_MASK;
				1712	else
				1713	{
				1714	idxs[startidx + i] = idx;
				1715	idx = read_tree_node(fd, byts, idxs, maxidx, idx,
				1716	prefixtree, maxprefcondnr);
				1717	if (idx < 0)
				1718	break;
				1719	}
				1720	}
				1721
				1722	return idx;
				1723	}
				1724
				1725	/*
				1726	* Reload the spell file "fname" if it's loaded.
				1727	*/
				1728	static void
				1729	spell_reload_one(
				1730	char_u *fname,
				1731	int added_word) /* invoked through "zg" */
				1732	{
				1733	slang_T *slang;
				1734	int didit = FALSE;
				1735
				1736	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
				1737	{
Bram Moolenaar	99499b1	2019-05-23 21:35:48 +0200	[diff] [blame]	1738	if (fullpathcmp(fname, slang->sl_fname, FALSE, TRUE) == FPC_SAME)
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1739	{
				1740	slang_clear(slang);
				1741	if (spell_load_file(fname, NULL, slang, FALSE) == NULL)
				1742	/* reloading failed, clear the language */
				1743	slang_clear(slang);
				1744	redraw_all_later(SOME_VALID);
				1745	didit = TRUE;
				1746	}
				1747	}
				1748
				1749	/* When "zg" was used and the file wasn't loaded yet, should redo
				1750	* 'spelllang' to load it now. */
				1751	if (added_word && !didit)
				1752	did_set_spelllang(curwin);
				1753	}
				1754
				1755
				1756	/*
				1757	* Functions for ":mkspell".
				1758	*/
				1759
				1760	#define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff
				1761	and .dic file. */
				1762	/*
				1763	* Main structure to store the contents of a ".aff" file.
				1764	*/
				1765	typedef struct afffile_S
				1766	{
				1767	char_u af_enc; / "SET", normalized, alloc'ed string or NULL */
				1768	int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */
				1769	unsigned af_rare; /* RARE ID for rare word */
				1770	unsigned af_keepcase; /* KEEPCASE ID for keep-case word */
				1771	unsigned af_bad; /* BAD ID for banned word */
				1772	unsigned af_needaffix; /* NEEDAFFIX ID */
				1773	unsigned af_circumfix; /* CIRCUMFIX ID */
				1774	unsigned af_needcomp; /* NEEDCOMPOUND ID */
				1775	unsigned af_comproot; /* COMPOUNDROOT ID */
				1776	unsigned af_compforbid; /* COMPOUNDFORBIDFLAG ID */
				1777	unsigned af_comppermit; /* COMPOUNDPERMITFLAG ID */
				1778	unsigned af_nosuggest; /* NOSUGGEST ID */
				1779	int af_pfxpostpone; /* postpone prefixes without chop string and
				1780	without flags */
				1781	int af_ignoreextra; /* IGNOREEXTRA present */
				1782	hashtab_T af_pref; /* hashtable for prefixes, affheader_T */
				1783	hashtab_T af_suff; /* hashtable for suffixes, affheader_T */
				1784	hashtab_T af_comp; /* hashtable for compound flags, compitem_T */
				1785	} afffile_T;
				1786
				1787	#define AFT_CHAR 0 /* flags are one character */
				1788	#define AFT_LONG 1 /* flags are two characters */
				1789	#define AFT_CAPLONG 2 /* flags are one or two characters */
				1790	#define AFT_NUM 3 /* flags are numbers, comma separated */
				1791
				1792	typedef struct affentry_S affentry_T;
				1793	/* Affix entry from ".aff" file. Used for prefixes and suffixes. */
				1794	struct affentry_S
				1795	{
				1796	affentry_T ae_next; / next affix with same name/number */
				1797	char_u ae_chop; / text to chop off basic word (can be NULL) */
				1798	char_u ae_add; / text to add to basic word (can be NULL) */
				1799	char_u ae_flags; / flags on the affix (can be NULL) */
				1800	char_u ae_cond; / condition (NULL for ".") */
				1801	regprog_T ae_prog; / regexp program for ae_cond or NULL */
				1802	char ae_compforbid; /* COMPOUNDFORBIDFLAG found */
				1803	char ae_comppermit; /* COMPOUNDPERMITFLAG found */
				1804	};
				1805
Bram Moolenaar	264b74f	2019-01-24 17:18:42 +0100	[diff] [blame]	1806	#define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1807
				1808	/* Affix header from ".aff" file. Used for af_pref and af_suff. */
				1809	typedef struct affheader_S
				1810	{
				1811	char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */
				1812	unsigned ah_flag; /* affix name as number, uses "af_flagtype" */
				1813	int ah_newID; /* prefix ID after renumbering; 0 if not used */
				1814	int ah_combine; /* suffix may combine with prefix */
				1815	int ah_follows; /* another affix block should be following */
				1816	affentry_T ah_first; / first affix entry */
				1817	} affheader_T;
				1818
				1819	#define HI2AH(hi) ((affheader_T *)(hi)->hi_key)
				1820
				1821	/* Flag used in compound items. */
				1822	typedef struct compitem_S
				1823	{
				1824	char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */
				1825	unsigned ci_flag; /* affix name as number, uses "af_flagtype" */
				1826	int ci_newID; /* affix ID after renumbering. */
				1827	} compitem_T;
				1828
				1829	#define HI2CI(hi) ((compitem_T *)(hi)->hi_key)
				1830
				1831	/*
				1832	* Structure that is used to store the items in the word tree. This avoids
				1833	* the need to keep track of each allocated thing, everything is freed all at
				1834	* once after ":mkspell" is done.
				1835	* Note: "sb_next" must be just before "sb_data" to make sure the alignment of
				1836	* "sb_data" is correct for systems where pointers must be aligned on
				1837	* pointer-size boundaries and sizeof(pointer) > sizeof(int) (e.g., Sparc).
				1838	*/
				1839	#define SBLOCKSIZE 16000 /* size of sb_data */
				1840	typedef struct sblock_S sblock_T;
				1841	struct sblock_S
				1842	{
				1843	int sb_used; /* nr of bytes already in use */
				1844	sblock_T sb_next; / next block in list */
				1845	char_u sb_data[1]; /* data, actually longer */
				1846	};
				1847
				1848	/*
				1849	* A node in the tree.
				1850	*/
				1851	typedef struct wordnode_S wordnode_T;
				1852	struct wordnode_S
				1853	{
				1854	union /* shared to save space */
				1855	{
				1856	char_u hashkey[6]; /* the hash key, only used while compressing */
				1857	int index; /* index in written nodes (valid after first
				1858	round) */
				1859	} wn_u1;
				1860	union /* shared to save space */
				1861	{
				1862	wordnode_T next; / next node with same hash key */
				1863	wordnode_T wnode; / parent node that will write this node */
				1864	} wn_u2;
				1865	wordnode_T wn_child; / child (next byte in word) */
				1866	wordnode_T wn_sibling; / next sibling (alternate byte in word,
				1867	always sorted) */
				1868	int wn_refs; /* Nr. of references to this node. Only
				1869	relevant for first node in a list of
				1870	siblings, in following siblings it is
				1871	always one. */
				1872	char_u wn_byte; /* Byte for this node. NUL for word end */
				1873
				1874	/* Info for when "wn_byte" is NUL.
				1875	* In PREFIXTREE "wn_region" is used for the prefcondnr.
				1876	* In the soundfolded word tree "wn_flags" has the MSW of the wordnr and
				1877	* "wn_region" the LSW of the wordnr. */
				1878	char_u wn_affixID; /* supported/required prefix ID or 0 */
				1879	short_u wn_flags; /* WF_ flags */
				1880	short wn_region; /* region mask */
				1881
				1882	#ifdef SPELL_PRINTTREE
				1883	int wn_nr; /* sequence nr for printing */
				1884	#endif
				1885	};
				1886
				1887	#define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */
				1888
				1889	#define HI2WN(hi) (wordnode_T *)((hi)->hi_key)
				1890
				1891	/*
				1892	* Info used while reading the spell files.
				1893	*/
				1894	typedef struct spellinfo_S
				1895	{
				1896	wordnode_T si_foldroot; / tree with case-folded words */
				1897	long si_foldwcount; /* nr of words in si_foldroot */
				1898
				1899	wordnode_T si_keeproot; / tree with keep-case words */
				1900	long si_keepwcount; /* nr of words in si_keeproot */
				1901
				1902	wordnode_T si_prefroot; / tree with postponed prefixes */
				1903
				1904	long si_sugtree; /* creating the soundfolding trie */
				1905
				1906	sblock_T si_blocks; / memory blocks used */
				1907	long si_blocks_cnt; /* memory blocks allocated */
				1908	int si_did_emsg; /* TRUE when ran out of memory */
				1909
				1910	long si_compress_cnt; /* words to add before lowering
				1911	compression limit */
				1912	wordnode_T si_first_free; / List of nodes that have been freed during
				1913	compression, linked by "wn_child" field. */
				1914	long si_free_count; /* number of nodes in si_first_free */
				1915	#ifdef SPELL_PRINTTREE
				1916	int si_wordnode_nr; /* sequence nr for nodes */
				1917	#endif
				1918	buf_T si_spellbuf; / buffer used to store soundfold word table */
				1919
				1920	int si_ascii; /* handling only ASCII words */
				1921	int si_add; /* addition file */
				1922	int si_clear_chartab; /* when TRUE clear char tables */
				1923	int si_region; /* region mask */
				1924	vimconv_T si_conv; /* for conversion to 'encoding' */
				1925	int si_memtot; /* runtime memory used */
				1926	int si_verbose; /* verbose messages */
				1927	int si_msg_count; /* number of words added since last message */
				1928	char_u si_info; / info text chars or NULL */
				1929	int si_region_count; /* number of regions supported (1 when there
				1930	are no regions) */
Bram Moolenaar	2993ac5	2018-02-10 14:12:43 +0100	[diff] [blame]	1931	char_u si_region_name[MAXREGIONS * 2 + 1];
				1932	/* region names; used only if
				1933	* si_region_count > 1) */
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1934
				1935	garray_T si_rep; /* list of fromto_T entries from REP lines */
				1936	garray_T si_repsal; /* list of fromto_T entries from REPSAL lines */
				1937	garray_T si_sal; /* list of fromto_T entries from SAL lines */
				1938	char_u si_sofofr; / SOFOFROM text */
				1939	char_u si_sofoto; / SOFOTO text */
				1940	int si_nosugfile; /* NOSUGFILE item found */
				1941	int si_nosplitsugs; /* NOSPLITSUGS item found */
				1942	int si_nocompoundsugs; /* NOCOMPOUNDSUGS item found */
				1943	int si_followup; /* soundsalike: ? */
				1944	int si_collapse; /* soundsalike: ? */
				1945	hashtab_T si_commonwords; /* hashtable for common words */
				1946	time_t si_sugtime; /* timestamp for .sug file */
				1947	int si_rem_accents; /* soundsalike: remove accents */
				1948	garray_T si_map; /* MAP info concatenated */
				1949	char_u si_midword; / MIDWORD chars or NULL */
				1950	int si_compmax; /* max nr of words for compounding */
				1951	int si_compminlen; /* minimal length for compounding */
				1952	int si_compsylmax; /* max nr of syllables for compounding */
				1953	int si_compoptions; /* COMP_ flags */
				1954	garray_T si_comppat; /* CHECKCOMPOUNDPATTERN items, each stored as
				1955	a string */
				1956	char_u si_compflags; / flags used for compounding */
				1957	char_u si_nobreak; /* NOBREAK */
				1958	char_u si_syllable; / syllable string */
				1959	garray_T si_prefcond; /* table with conditions for postponed
				1960	* prefixes, each stored as a string */
				1961	int si_newprefID; /* current value for ah_newID */
				1962	int si_newcompID; /* current value for compound ID */
				1963	} spellinfo_T;
				1964
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1965	static int is_aff_rule(char_u *items, int itemcnt, char rulename, int mincount);
				1966	static void aff_process_flags(afffile_T affile, affentry_T entry);
				1967	static int spell_info_item(char_u *s);
				1968	static unsigned affitem2flag(int flagtype, char_u item, char_u fname, int lnum);
				1969	static unsigned get_affitem(int flagtype, char_u **pp);
				1970	static void process_compflags(spellinfo_T spin, afffile_T aff, char_u *compflags);
				1971	static void check_renumber(spellinfo_T *spin);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1972	static void aff_check_number(int spinval, int affval, char *name);
				1973	static void aff_check_string(char_u spinval, char_u affval, char *name);
				1974	static int str_equal(char_u s1, char_u s2);
				1975	static void add_fromto(spellinfo_T spin, garray_T gap, char_u from, char_u to);
				1976	static int sal_to_bool(char_u *s);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1977	static int get_affix_flags(afffile_T affile, char_u afflist);
				1978	static int get_pfxlist(afffile_T affile, char_u afflist, char_u *store_afflist);
				1979	static void get_compflags(afffile_T affile, char_u afflist, char_u *store_afflist);
				1980	static int store_aff_word(spellinfo_T spin, char_u word, char_u afflist, afffile_T affile, hashtab_T ht, hashtab_T xht, int condit, int flags, char_u *pfxlist, int pfxlen);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1981	static void getroom(spellinfo_T spin, size_t len, int align);
				1982	static char_u getroom_save(spellinfo_T spin, char_u *s);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1983	static int store_word(spellinfo_T spin, char_u word, int flags, int region, char_u *pfxlist, int need_affix);
				1984	static int tree_add_word(spellinfo_T spin, char_u word, wordnode_T *tree, int flags, int region, int affixID);
				1985	static wordnode_T get_wordnode(spellinfo_T spin);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1986	static void free_wordnode(spellinfo_T spin, wordnode_T n);
				1987	static void wordtree_compress(spellinfo_T spin, wordnode_T root);
				1988	static int node_compress(spellinfo_T spin, wordnode_T node, hashtab_T ht, int tot);
				1989	static int node_equal(wordnode_T n1, wordnode_T n2);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1990	static void clear_node(wordnode_T *node);
				1991	static int put_node(FILE fd, wordnode_T node, int idx, int regionmask, int prefixtree);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	1992	static int sug_filltree(spellinfo_T spin, slang_T slang);
				1993	static int sug_maketable(spellinfo_T *spin);
				1994	static int sug_filltable(spellinfo_T spin, wordnode_T node, int startwordnr, garray_T *gap);
				1995	static int offset2bytes(int nr, char_u *buf);
				1996	static void sug_write(spellinfo_T spin, char_u fname);
				1997	static void spell_message(spellinfo_T spin, char_u str);
				1998	static void init_spellfile(void);
				1999
				2000	/* In the postponed prefixes tree wn_flags is used to store the WFP_ flags,
				2001	* but it must be negative to indicate the prefix tree to tree_add_word().
				2002	* Use a negative number with the lower 8 bits zero. */
				2003	#define PFX_FLAGS -256
				2004
				2005	/* flags for "condit" argument of store_aff_word() */
				2006	#define CONDIT_COMB 1 /* affix must combine */
				2007	#define CONDIT_CFIX 2 /* affix must have CIRCUMFIX flag */
				2008	#define CONDIT_SUF 4 /* add a suffix for matching flags */
				2009	#define CONDIT_AFF 8 /* word already has an affix */
				2010
				2011	/*
				2012	* Tunable parameters for when the tree is compressed. See 'mkspellmem'.
				2013	*/
				2014	static long compress_start = 30000; /* memory / SBLOCKSIZE */
				2015	static long compress_inc = 100; /* memory / SBLOCKSIZE */
				2016	static long compress_added = 500000; /* word count */
				2017
				2018	/*
				2019	* Check the 'mkspellmem' option. Return FAIL if it's wrong.
				2020	* Sets "sps_flags".
				2021	*/
				2022	int
				2023	spell_check_msm(void)
				2024	{
				2025	char_u *p = p_msm;
				2026	long start = 0;
				2027	long incr = 0;
				2028	long added = 0;
				2029
				2030	if (!VIM_ISDIGIT(*p))
				2031	return FAIL;
				2032	/* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/
				2033	start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102);
				2034	if (*p != ',')
				2035	return FAIL;
				2036	++p;
				2037	if (!VIM_ISDIGIT(*p))
				2038	return FAIL;
				2039	incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10);
				2040	if (*p != ',')
				2041	return FAIL;
				2042	++p;
				2043	if (!VIM_ISDIGIT(*p))
				2044	return FAIL;
				2045	added = getdigits(&p) * 1024;
				2046	if (*p != NUL)
				2047	return FAIL;
				2048
				2049	if (start == 0 \|\| incr == 0 \|\| added == 0 \|\| incr > start)
				2050	return FAIL;
				2051
				2052	compress_start = start;
				2053	compress_inc = incr;
				2054	compress_added = added;
				2055	return OK;
				2056	}
				2057
				2058	#ifdef SPELL_PRINTTREE
				2059	/*
				2060	* For debugging the tree code: print the current tree in a (more or less)
				2061	* readable format, so that we can see what happens when adding a word and/or
				2062	* compressing the tree.
				2063	* Based on code from Olaf Seibert.
				2064	*/
				2065	#define PRINTLINESIZE 1000
				2066	#define PRINTWIDTH 6
				2067
				2068	#define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
				2069	PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
				2070
				2071	static char line1[PRINTLINESIZE];
				2072	static char line2[PRINTLINESIZE];
				2073	static char line3[PRINTLINESIZE];
				2074
				2075	static void
				2076	spell_clear_flags(wordnode_T *node)
				2077	{
				2078	wordnode_T *np;
				2079
				2080	for (np = node; np != NULL; np = np->wn_sibling)
				2081	{
				2082	np->wn_u1.index = FALSE;
				2083	spell_clear_flags(np->wn_child);
				2084	}
				2085	}
				2086
				2087	static void
				2088	spell_print_node(wordnode_T *node, int depth)
				2089	{
				2090	if (node->wn_u1.index)
				2091	{
				2092	/* Done this node before, print the reference. */
				2093	PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
				2094	PRINTSOME(line2, depth, " ", 0, 0);
				2095	PRINTSOME(line3, depth, " ", 0, 0);
Bram Moolenaar	32526b3	2019-01-19 17:43:09 +0100	[diff] [blame]	2096	msg(line1);
				2097	msg(line2);
				2098	msg(line3);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2099	}
				2100	else
				2101	{
				2102	node->wn_u1.index = TRUE;
				2103
				2104	if (node->wn_byte != NUL)
				2105	{
				2106	if (node->wn_child != NULL)
				2107	PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0);
				2108	else
				2109	/* Cannot happen? */
				2110	PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0);
				2111	}
				2112	else
				2113	PRINTSOME(line1, depth, " $ ", 0, 0);
				2114
				2115	PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs);
				2116
				2117	if (node->wn_sibling != NULL)
				2118	PRINTSOME(line3, depth, " \| ", 0, 0);
				2119	else
				2120	PRINTSOME(line3, depth, " ", 0, 0);
				2121
				2122	if (node->wn_byte == NUL)
				2123	{
Bram Moolenaar	32526b3	2019-01-19 17:43:09 +0100	[diff] [blame]	2124	msg(line1);
				2125	msg(line2);
				2126	msg(line3);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2127	}
				2128
				2129	/* do the children */
				2130	if (node->wn_byte != NUL && node->wn_child != NULL)
				2131	spell_print_node(node->wn_child, depth + 1);
				2132
				2133	/* do the siblings */
				2134	if (node->wn_sibling != NULL)
				2135	{
				2136	/* get rid of all parent details except \| */
				2137	STRCPY(line1, line3);
				2138	STRCPY(line2, line3);
				2139	spell_print_node(node->wn_sibling, depth);
				2140	}
				2141	}
				2142	}
				2143
				2144	static void
				2145	spell_print_tree(wordnode_T *root)
				2146	{
				2147	if (root != NULL)
				2148	{
				2149	/* Clear the "wn_u1.index" fields, used to remember what has been
				2150	* done. */
				2151	spell_clear_flags(root);
				2152
				2153	/* Recursively print the tree. */
				2154	spell_print_node(root, 0);
				2155	}
				2156	}
				2157	#endif /* SPELL_PRINTTREE */
				2158
				2159	/*
				2160	* Read the affix file "fname".
				2161	* Returns an afffile_T, NULL for complete failure.
				2162	*/
				2163	static afffile_T *
				2164	spell_read_aff(spellinfo_T spin, char_u fname)
				2165	{
				2166	FILE *fd;
				2167	afffile_T *aff;
				2168	char_u rline[MAXLINELEN];
				2169	char_u *line;
				2170	char_u *pc = NULL;
				2171	#define MAXITEMCNT 30
				2172	char_u *(items[MAXITEMCNT]);
				2173	int itemcnt;
				2174	char_u *p;
				2175	int lnum = 0;
				2176	affheader_T *cur_aff = NULL;
				2177	int did_postpone_prefix = FALSE;
				2178	int aff_todo = 0;
				2179	hashtab_T *tp;
				2180	char_u *low = NULL;
				2181	char_u *fol = NULL;
				2182	char_u *upp = NULL;
				2183	int do_rep;
				2184	int do_repsal;
				2185	int do_sal;
				2186	int do_mapline;
				2187	int found_map = FALSE;
				2188	hashitem_T *hi;
				2189	int l;
				2190	int compminlen = 0; /* COMPOUNDMIN value */
				2191	int compsylmax = 0; /* COMPOUNDSYLMAX value */
				2192	int compoptions = 0; /* COMP_ flags */
				2193	int compmax = 0; /* COMPOUNDWORDMAX value */
				2194	char_u compflags = NULL; / COMPOUNDFLAG and COMPOUNDRULE
				2195	concatenated */
				2196	char_u midword = NULL; / MIDWORD value */
				2197	char_u syllable = NULL; / SYLLABLE value */
				2198	char_u sofofrom = NULL; / SOFOFROM value */
				2199	char_u sofoto = NULL; / SOFOTO value */
				2200
				2201	/*
				2202	* Open the file.
				2203	*/
				2204	fd = mch_fopen((char *)fname, "r");
				2205	if (fd == NULL)
				2206	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2207	semsg(_(e_notopen), fname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2208	return NULL;
				2209	}
				2210
Bram Moolenaar	c166927	2018-06-19 14:23:53 +0200	[diff] [blame]	2211	vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s..."), fname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2212	spell_message(spin, IObuff);
				2213
				2214	/* Only do REP lines when not done in another .aff file already. */
				2215	do_rep = spin->si_rep.ga_len == 0;
				2216
				2217	/* Only do REPSAL lines when not done in another .aff file already. */
				2218	do_repsal = spin->si_repsal.ga_len == 0;
				2219
				2220	/* Only do SAL lines when not done in another .aff file already. */
				2221	do_sal = spin->si_sal.ga_len == 0;
				2222
				2223	/* Only do MAP lines when not done in another .aff file already. */
				2224	do_mapline = spin->si_map.ga_len == 0;
				2225
				2226	/*
				2227	* Allocate and init the afffile_T structure.
				2228	*/
				2229	aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE);
				2230	if (aff == NULL)
				2231	{
				2232	fclose(fd);
				2233	return NULL;
				2234	}
				2235	hash_init(&aff->af_pref);
				2236	hash_init(&aff->af_suff);
				2237	hash_init(&aff->af_comp);
				2238
				2239	/*
				2240	* Read all the lines in the file one by one.
				2241	*/
				2242	while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
				2243	{
				2244	line_breakcheck();
				2245	++lnum;
				2246
				2247	/* Skip comment lines. */
				2248	if (*rline == '#')
				2249	continue;
				2250
				2251	/* Convert from "SET" to 'encoding' when needed. */
				2252	vim_free(pc);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2253	if (spin->si_conv.vc_type != CONV_NONE)
				2254	{
				2255	pc = string_convert(&spin->si_conv, rline, NULL);
				2256	if (pc == NULL)
				2257	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2258	smsg(_("Conversion failure for word in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2259	fname, lnum, rline);
				2260	continue;
				2261	}
				2262	line = pc;
				2263	}
				2264	else
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2265	{
				2266	pc = NULL;
				2267	line = rline;
				2268	}
				2269
				2270	/* Split the line up in white separated items. Put a NUL after each
				2271	* item. */
				2272	itemcnt = 0;
				2273	for (p = line; ; )
				2274	{
				2275	while (p != NUL && p <= ' ') /* skip white space and CR/NL */
				2276	++p;
				2277	if (*p == NUL)
				2278	break;
				2279	if (itemcnt == MAXITEMCNT) /* too many items */
				2280	break;
				2281	items[itemcnt++] = p;
				2282	/* A few items have arbitrary text argument, don't split them. */
				2283	if (itemcnt == 2 && spell_info_item(items[0]))
				2284	while (p >= ' ' \|\| p == TAB) /* skip until CR/NL */
				2285	++p;
				2286	else
				2287	while (p > ' ') / skip until white space or CR/NL */
				2288	++p;
				2289	if (*p == NUL)
				2290	break;
				2291	*p++ = NUL;
				2292	}
				2293
				2294	/* Handle non-empty lines. */
				2295	if (itemcnt > 0)
				2296	{
				2297	if (is_aff_rule(items, itemcnt, "SET", 2) && aff->af_enc == NULL)
				2298	{
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2299	/* Setup for conversion from "ENC" to 'encoding'. */
				2300	aff->af_enc = enc_canonize(items[1]);
				2301	if (aff->af_enc != NULL && !spin->si_ascii
				2302	&& convert_setup(&spin->si_conv, aff->af_enc,
				2303	p_enc) == FAIL)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2304	smsg(_("Conversion in %s not supported: from %s to %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2305	fname, aff->af_enc, p_enc);
				2306	spin->si_conv.vc_fail = TRUE;
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2307	}
				2308	else if (is_aff_rule(items, itemcnt, "FLAG", 2)
				2309	&& aff->af_flagtype == AFT_CHAR)
				2310	{
				2311	if (STRCMP(items[1], "long") == 0)
				2312	aff->af_flagtype = AFT_LONG;
				2313	else if (STRCMP(items[1], "num") == 0)
				2314	aff->af_flagtype = AFT_NUM;
				2315	else if (STRCMP(items[1], "caplong") == 0)
				2316	aff->af_flagtype = AFT_CAPLONG;
				2317	else
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2318	smsg(_("Invalid value for FLAG in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2319	fname, lnum, items[1]);
				2320	if (aff->af_rare != 0
				2321	\|\| aff->af_keepcase != 0
				2322	\|\| aff->af_bad != 0
				2323	\|\| aff->af_needaffix != 0
				2324	\|\| aff->af_circumfix != 0
				2325	\|\| aff->af_needcomp != 0
				2326	\|\| aff->af_comproot != 0
				2327	\|\| aff->af_nosuggest != 0
				2328	\|\| compflags != NULL
				2329	\|\| aff->af_suff.ht_used > 0
				2330	\|\| aff->af_pref.ht_used > 0)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2331	smsg(_("FLAG after using flags in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2332	fname, lnum, items[1]);
				2333	}
				2334	else if (spell_info_item(items[0]))
				2335	{
				2336	p = (char_u *)getroom(spin,
				2337	(spin->si_info == NULL ? 0 : STRLEN(spin->si_info))
				2338	+ STRLEN(items[0])
				2339	+ STRLEN(items[1]) + 3, FALSE);
				2340	if (p != NULL)
				2341	{
				2342	if (spin->si_info != NULL)
				2343	{
				2344	STRCPY(p, spin->si_info);
				2345	STRCAT(p, "\n");
				2346	}
				2347	STRCAT(p, items[0]);
				2348	STRCAT(p, " ");
				2349	STRCAT(p, items[1]);
				2350	spin->si_info = p;
				2351	}
				2352	}
				2353	else if (is_aff_rule(items, itemcnt, "MIDWORD", 2)
				2354	&& midword == NULL)
				2355	{
				2356	midword = getroom_save(spin, items[1]);
				2357	}
				2358	else if (is_aff_rule(items, itemcnt, "TRY", 2))
				2359	{
				2360	/* ignored, we look in the tree for what chars may appear */
				2361	}
				2362	/* TODO: remove "RAR" later */
				2363	else if ((is_aff_rule(items, itemcnt, "RAR", 2)
				2364	\|\| is_aff_rule(items, itemcnt, "RARE", 2))
				2365	&& aff->af_rare == 0)
				2366	{
				2367	aff->af_rare = affitem2flag(aff->af_flagtype, items[1],
				2368	fname, lnum);
				2369	}
				2370	/* TODO: remove "KEP" later */
				2371	else if ((is_aff_rule(items, itemcnt, "KEP", 2)
				2372	\|\| is_aff_rule(items, itemcnt, "KEEPCASE", 2))
				2373	&& aff->af_keepcase == 0)
				2374	{
				2375	aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1],
				2376	fname, lnum);
				2377	}
				2378	else if ((is_aff_rule(items, itemcnt, "BAD", 2)
				2379	\|\| is_aff_rule(items, itemcnt, "FORBIDDENWORD", 2))
				2380	&& aff->af_bad == 0)
				2381	{
				2382	aff->af_bad = affitem2flag(aff->af_flagtype, items[1],
				2383	fname, lnum);
				2384	}
				2385	else if (is_aff_rule(items, itemcnt, "NEEDAFFIX", 2)
				2386	&& aff->af_needaffix == 0)
				2387	{
				2388	aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1],
				2389	fname, lnum);
				2390	}
				2391	else if (is_aff_rule(items, itemcnt, "CIRCUMFIX", 2)
				2392	&& aff->af_circumfix == 0)
				2393	{
				2394	aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1],
				2395	fname, lnum);
				2396	}
				2397	else if (is_aff_rule(items, itemcnt, "NOSUGGEST", 2)
				2398	&& aff->af_nosuggest == 0)
				2399	{
				2400	aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1],
				2401	fname, lnum);
				2402	}
				2403	else if ((is_aff_rule(items, itemcnt, "NEEDCOMPOUND", 2)
				2404	\|\| is_aff_rule(items, itemcnt, "ONLYINCOMPOUND", 2))
				2405	&& aff->af_needcomp == 0)
				2406	{
				2407	aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1],
				2408	fname, lnum);
				2409	}
				2410	else if (is_aff_rule(items, itemcnt, "COMPOUNDROOT", 2)
				2411	&& aff->af_comproot == 0)
				2412	{
				2413	aff->af_comproot = affitem2flag(aff->af_flagtype, items[1],
				2414	fname, lnum);
				2415	}
				2416	else if (is_aff_rule(items, itemcnt, "COMPOUNDFORBIDFLAG", 2)
				2417	&& aff->af_compforbid == 0)
				2418	{
				2419	aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1],
				2420	fname, lnum);
				2421	if (aff->af_pref.ht_used > 0)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2422	smsg(_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2423	fname, lnum);
				2424	}
				2425	else if (is_aff_rule(items, itemcnt, "COMPOUNDPERMITFLAG", 2)
				2426	&& aff->af_comppermit == 0)
				2427	{
				2428	aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1],
				2429	fname, lnum);
				2430	if (aff->af_pref.ht_used > 0)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2431	smsg(_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2432	fname, lnum);
				2433	}
				2434	else if (is_aff_rule(items, itemcnt, "COMPOUNDFLAG", 2)
				2435	&& compflags == NULL)
				2436	{
				2437	/* Turn flag "c" into COMPOUNDRULE compatible string "c+",
				2438	* "Na" into "Na+", "1234" into "1234+". */
				2439	p = getroom(spin, STRLEN(items[1]) + 2, FALSE);
				2440	if (p != NULL)
				2441	{
				2442	STRCPY(p, items[1]);
				2443	STRCAT(p, "+");
				2444	compflags = p;
				2445	}
				2446	}
				2447	else if (is_aff_rule(items, itemcnt, "COMPOUNDRULES", 2))
				2448	{
				2449	/* We don't use the count, but do check that it's a number and
				2450	* not COMPOUNDRULE mistyped. */
				2451	if (atoi((char *)items[1]) == 0)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2452	smsg(_("Wrong COMPOUNDRULES value in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2453	fname, lnum, items[1]);
				2454	}
				2455	else if (is_aff_rule(items, itemcnt, "COMPOUNDRULE", 2))
				2456	{
				2457	/* Don't use the first rule if it is a number. */
				2458	if (compflags != NULL \|\| *skipdigits(items[1]) != NUL)
				2459	{
				2460	/* Concatenate this string to previously defined ones,
				2461	* using a slash to separate them. */
				2462	l = (int)STRLEN(items[1]) + 1;
				2463	if (compflags != NULL)
				2464	l += (int)STRLEN(compflags) + 1;
				2465	p = getroom(spin, l, FALSE);
				2466	if (p != NULL)
				2467	{
				2468	if (compflags != NULL)
				2469	{
				2470	STRCPY(p, compflags);
				2471	STRCAT(p, "/");
				2472	}
				2473	STRCAT(p, items[1]);
				2474	compflags = p;
				2475	}
				2476	}
				2477	}
				2478	else if (is_aff_rule(items, itemcnt, "COMPOUNDWORDMAX", 2)
				2479	&& compmax == 0)
				2480	{
				2481	compmax = atoi((char *)items[1]);
				2482	if (compmax == 0)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2483	smsg(_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2484	fname, lnum, items[1]);
				2485	}
				2486	else if (is_aff_rule(items, itemcnt, "COMPOUNDMIN", 2)
				2487	&& compminlen == 0)
				2488	{
				2489	compminlen = atoi((char *)items[1]);
				2490	if (compminlen == 0)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2491	smsg(_("Wrong COMPOUNDMIN value in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2492	fname, lnum, items[1]);
				2493	}
				2494	else if (is_aff_rule(items, itemcnt, "COMPOUNDSYLMAX", 2)
				2495	&& compsylmax == 0)
				2496	{
				2497	compsylmax = atoi((char *)items[1]);
				2498	if (compsylmax == 0)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2499	smsg(_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2500	fname, lnum, items[1]);
				2501	}
				2502	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDDUP", 1))
				2503	{
				2504	compoptions \|= COMP_CHECKDUP;
				2505	}
				2506	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDREP", 1))
				2507	{
				2508	compoptions \|= COMP_CHECKREP;
				2509	}
				2510	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDCASE", 1))
				2511	{
				2512	compoptions \|= COMP_CHECKCASE;
				2513	}
				2514	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDTRIPLE", 1))
				2515	{
				2516	compoptions \|= COMP_CHECKTRIPLE;
				2517	}
				2518	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 2))
				2519	{
				2520	if (atoi((char *)items[1]) == 0)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2521	smsg(_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2522	fname, lnum, items[1]);
				2523	}
				2524	else if (is_aff_rule(items, itemcnt, "CHECKCOMPOUNDPATTERN", 3))
				2525	{
				2526	garray_T *gap = &spin->si_comppat;
				2527	int i;
				2528
				2529	/* Only add the couple if it isn't already there. */
				2530	for (i = 0; i < gap->ga_len - 1; i += 2)
				2531	if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0
				2532	&& STRCMP(((char_u **)(gap->ga_data))[i + 1],
				2533	items[2]) == 0)
				2534	break;
				2535	if (i >= gap->ga_len && ga_grow(gap, 2) == OK)
				2536	{
				2537	((char_u **)(gap->ga_data))[gap->ga_len++]
				2538	= getroom_save(spin, items[1]);
				2539	((char_u **)(gap->ga_data))[gap->ga_len++]
				2540	= getroom_save(spin, items[2]);
				2541	}
				2542	}
				2543	else if (is_aff_rule(items, itemcnt, "SYLLABLE", 2)
				2544	&& syllable == NULL)
				2545	{
				2546	syllable = getroom_save(spin, items[1]);
				2547	}
				2548	else if (is_aff_rule(items, itemcnt, "NOBREAK", 1))
				2549	{
				2550	spin->si_nobreak = TRUE;
				2551	}
				2552	else if (is_aff_rule(items, itemcnt, "NOSPLITSUGS", 1))
				2553	{
				2554	spin->si_nosplitsugs = TRUE;
				2555	}
				2556	else if (is_aff_rule(items, itemcnt, "NOCOMPOUNDSUGS", 1))
				2557	{
				2558	spin->si_nocompoundsugs = TRUE;
				2559	}
				2560	else if (is_aff_rule(items, itemcnt, "NOSUGFILE", 1))
				2561	{
				2562	spin->si_nosugfile = TRUE;
				2563	}
				2564	else if (is_aff_rule(items, itemcnt, "PFXPOSTPONE", 1))
				2565	{
				2566	aff->af_pfxpostpone = TRUE;
				2567	}
				2568	else if (is_aff_rule(items, itemcnt, "IGNOREEXTRA", 1))
				2569	{
				2570	aff->af_ignoreextra = TRUE;
				2571	}
				2572	else if ((STRCMP(items[0], "PFX") == 0
				2573	\|\| STRCMP(items[0], "SFX") == 0)
				2574	&& aff_todo == 0
				2575	&& itemcnt >= 4)
				2576	{
				2577	int lasti = 4;
				2578	char_u key[AH_KEY_LEN];
				2579
				2580	if (*items[0] == 'P')
				2581	tp = &aff->af_pref;
				2582	else
				2583	tp = &aff->af_suff;
				2584
				2585	/* Myspell allows the same affix name to be used multiple
				2586	* times. The affix files that do this have an undocumented
				2587	* "S" flag on all but the last block, thus we check for that
				2588	* and store it in ah_follows. */
				2589	vim_strncpy(key, items[1], AH_KEY_LEN - 1);
				2590	hi = hash_find(tp, key);
				2591	if (!HASHITEM_EMPTY(hi))
				2592	{
				2593	cur_aff = HI2AH(hi);
				2594	if (cur_aff->ah_combine != (*items[2] == 'Y'))
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2595	smsg(_("Different combining flag in continued affix block in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2596	fname, lnum, items[1]);
				2597	if (!cur_aff->ah_follows)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2598	smsg(_("Duplicate affix in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2599	fname, lnum, items[1]);
				2600	}
				2601	else
				2602	{
				2603	/* New affix letter. */
				2604	cur_aff = (affheader_T *)getroom(spin,
				2605	sizeof(affheader_T), TRUE);
				2606	if (cur_aff == NULL)
				2607	break;
				2608	cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1],
				2609	fname, lnum);
				2610	if (cur_aff->ah_flag == 0 \|\| STRLEN(items[1]) >= AH_KEY_LEN)
				2611	break;
				2612	if (cur_aff->ah_flag == aff->af_bad
				2613	\|\| cur_aff->ah_flag == aff->af_rare
				2614	\|\| cur_aff->ah_flag == aff->af_keepcase
				2615	\|\| cur_aff->ah_flag == aff->af_needaffix
				2616	\|\| cur_aff->ah_flag == aff->af_circumfix
				2617	\|\| cur_aff->ah_flag == aff->af_nosuggest
				2618	\|\| cur_aff->ah_flag == aff->af_needcomp
				2619	\|\| cur_aff->ah_flag == aff->af_comproot)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2620	smsg(_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2621	fname, lnum, items[1]);
				2622	STRCPY(cur_aff->ah_key, items[1]);
				2623	hash_add(tp, cur_aff->ah_key);
				2624
				2625	cur_aff->ah_combine = (*items[2] == 'Y');
				2626	}
				2627
				2628	/* Check for the "S" flag, which apparently means that another
				2629	* block with the same affix name is following. */
				2630	if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0)
				2631	{
				2632	++lasti;
				2633	cur_aff->ah_follows = TRUE;
				2634	}
				2635	else
				2636	cur_aff->ah_follows = FALSE;
				2637
				2638	/* Myspell allows extra text after the item, but that might
				2639	* mean mistakes go unnoticed. Require a comment-starter. */
				2640	if (itemcnt > lasti && *items[lasti] != '#')
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2641	smsg(_(e_afftrailing), fname, lnum, items[lasti]);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2642
				2643	if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2644	smsg(_("Expected Y or N in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2645	fname, lnum, items[2]);
				2646
				2647	if (*items[0] == 'P' && aff->af_pfxpostpone)
				2648	{
				2649	if (cur_aff->ah_newID == 0)
				2650	{
				2651	/* Use a new number in the .spl file later, to be able
				2652	* to handle multiple .aff files. */
				2653	check_renumber(spin);
				2654	cur_aff->ah_newID = ++spin->si_newprefID;
				2655
				2656	/* We only really use ah_newID if the prefix is
				2657	* postponed. We know that only after handling all
				2658	* the items. */
				2659	did_postpone_prefix = FALSE;
				2660	}
				2661	else
				2662	/* Did use the ID in a previous block. */
				2663	did_postpone_prefix = TRUE;
				2664	}
				2665
				2666	aff_todo = atoi((char *)items[3]);
				2667	}
				2668	else if ((STRCMP(items[0], "PFX") == 0
				2669	\|\| STRCMP(items[0], "SFX") == 0)
				2670	&& aff_todo > 0
				2671	&& STRCMP(cur_aff->ah_key, items[1]) == 0
				2672	&& itemcnt >= 5)
				2673	{
				2674	affentry_T *aff_entry;
				2675	int upper = FALSE;
				2676	int lasti = 5;
				2677
				2678	/* Myspell allows extra text after the item, but that might
				2679	* mean mistakes go unnoticed. Require a comment-starter,
				2680	* unless IGNOREEXTRA is used. Hunspell uses a "-" item. */
				2681	if (itemcnt > lasti
				2682	&& !aff->af_ignoreextra
				2683	&& *items[lasti] != '#'
				2684	&& (STRCMP(items[lasti], "-") != 0
				2685	\|\| itemcnt != lasti + 1))
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2686	smsg(_(e_afftrailing), fname, lnum, items[lasti]);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2687
				2688	/* New item for an affix letter. */
				2689	--aff_todo;
				2690	aff_entry = (affentry_T *)getroom(spin,
				2691	sizeof(affentry_T), TRUE);
				2692	if (aff_entry == NULL)
				2693	break;
				2694
				2695	if (STRCMP(items[2], "0") != 0)
				2696	aff_entry->ae_chop = getroom_save(spin, items[2]);
				2697	if (STRCMP(items[3], "0") != 0)
				2698	{
				2699	aff_entry->ae_add = getroom_save(spin, items[3]);
				2700
				2701	/* Recognize flags on the affix: abcd/XYZ */
				2702	aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/');
				2703	if (aff_entry->ae_flags != NULL)
				2704	{
				2705	*aff_entry->ae_flags++ = NUL;
				2706	aff_process_flags(aff, aff_entry);
				2707	}
				2708	}
				2709
				2710	/* Don't use an affix entry with non-ASCII characters when
				2711	* "spin->si_ascii" is TRUE. */
				2712	if (!spin->si_ascii \|\| !(has_non_ascii(aff_entry->ae_chop)
				2713	\|\| has_non_ascii(aff_entry->ae_add)))
				2714	{
				2715	aff_entry->ae_next = cur_aff->ah_first;
				2716	cur_aff->ah_first = aff_entry;
				2717
				2718	if (STRCMP(items[4], ".") != 0)
				2719	{
				2720	char_u buf[MAXLINELEN];
				2721
				2722	aff_entry->ae_cond = getroom_save(spin, items[4]);
				2723	if (*items[0] == 'P')
				2724	sprintf((char *)buf, "^%s", items[4]);
				2725	else
				2726	sprintf((char *)buf, "%s$", items[4]);
				2727	aff_entry->ae_prog = vim_regcomp(buf,
				2728	RE_MAGIC + RE_STRING + RE_STRICT);
				2729	if (aff_entry->ae_prog == NULL)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2730	smsg(_("Broken condition in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2731	fname, lnum, items[4]);
				2732	}
				2733
				2734	/* For postponed prefixes we need an entry in si_prefcond
				2735	* for the condition. Use an existing one if possible.
				2736	* Can't be done for an affix with flags, ignoring
				2737	* COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. */
				2738	if (*items[0] == 'P' && aff->af_pfxpostpone
				2739	&& aff_entry->ae_flags == NULL)
				2740	{
				2741	/* When the chop string is one lower-case letter and
				2742	* the add string ends in the upper-case letter we set
				2743	* the "upper" flag, clear "ae_chop" and remove the
				2744	* letters from "ae_add". The condition must either
				2745	* be empty or start with the same letter. */
				2746	if (aff_entry->ae_chop != NULL
				2747	&& aff_entry->ae_add != NULL
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2748	&& aff_entry->ae_chop[(*mb_ptr2len)(
Bram Moolenaar	264b74f	2019-01-24 17:18:42 +0100	[diff] [blame]	2749	aff_entry->ae_chop)] == NUL)
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2750	{
				2751	int c, c_up;
				2752
				2753	c = PTR2CHAR(aff_entry->ae_chop);
				2754	c_up = SPELL_TOUPPER(c);
				2755	if (c_up != c
				2756	&& (aff_entry->ae_cond == NULL
				2757	\|\| PTR2CHAR(aff_entry->ae_cond) == c))
				2758	{
				2759	p = aff_entry->ae_add
				2760	+ STRLEN(aff_entry->ae_add);
Bram Moolenaar	91acfff	2017-03-12 19:22:36 +0100	[diff] [blame]	2761	MB_PTR_BACK(aff_entry->ae_add, p);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2762	if (PTR2CHAR(p) == c_up)
				2763	{
				2764	upper = TRUE;
				2765	aff_entry->ae_chop = NULL;
				2766	*p = NUL;
				2767
				2768	/* The condition is matched with the
				2769	* actual word, thus must check for the
				2770	* upper-case letter. */
				2771	if (aff_entry->ae_cond != NULL)
				2772	{
				2773	char_u buf[MAXLINELEN];
Bram Moolenaar	264b74f	2019-01-24 17:18:42 +0100	[diff] [blame]	2774
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2775	if (has_mbyte)
				2776	{
				2777	onecap_copy(items[4], buf, TRUE);
				2778	aff_entry->ae_cond = getroom_save(
				2779	spin, buf);
				2780	}
				2781	else
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2782	*aff_entry->ae_cond = c_up;
				2783	if (aff_entry->ae_cond != NULL)
				2784	{
				2785	sprintf((char *)buf, "^%s",
				2786	aff_entry->ae_cond);
				2787	vim_regfree(aff_entry->ae_prog);
				2788	aff_entry->ae_prog = vim_regcomp(
				2789	buf, RE_MAGIC + RE_STRING);
				2790	}
				2791	}
				2792	}
				2793	}
				2794	}
				2795
				2796	if (aff_entry->ae_chop == NULL
				2797	&& aff_entry->ae_flags == NULL)
				2798	{
				2799	int idx;
				2800	char_u **pp;
				2801	int n;
				2802
				2803	/* Find a previously used condition. */
				2804	for (idx = spin->si_prefcond.ga_len - 1; idx >= 0;
				2805	--idx)
				2806	{
				2807	p = ((char_u **)spin->si_prefcond.ga_data)[idx];
				2808	if (str_equal(p, aff_entry->ae_cond))
				2809	break;
				2810	}
				2811	if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK)
				2812	{
				2813	/* Not found, add a new condition. */
				2814	idx = spin->si_prefcond.ga_len++;
				2815	pp = ((char_u **)spin->si_prefcond.ga_data)
				2816	+ idx;
				2817	if (aff_entry->ae_cond == NULL)
				2818	*pp = NULL;
				2819	else
				2820	*pp = getroom_save(spin,
				2821	aff_entry->ae_cond);
				2822	}
				2823
				2824	/* Add the prefix to the prefix tree. */
				2825	if (aff_entry->ae_add == NULL)
				2826	p = (char_u *)"";
				2827	else
				2828	p = aff_entry->ae_add;
				2829
				2830	/* PFX_FLAGS is a negative number, so that
				2831	* tree_add_word() knows this is the prefix tree. */
				2832	n = PFX_FLAGS;
				2833	if (!cur_aff->ah_combine)
				2834	n \|= WFP_NC;
				2835	if (upper)
				2836	n \|= WFP_UP;
				2837	if (aff_entry->ae_comppermit)
				2838	n \|= WFP_COMPPERMIT;
				2839	if (aff_entry->ae_compforbid)
				2840	n \|= WFP_COMPFORBID;
				2841	tree_add_word(spin, p, spin->si_prefroot, n,
				2842	idx, cur_aff->ah_newID);
				2843	did_postpone_prefix = TRUE;
				2844	}
				2845
				2846	/* Didn't actually use ah_newID, backup si_newprefID. */
				2847	if (aff_todo == 0 && !did_postpone_prefix)
				2848	{
				2849	--spin->si_newprefID;
				2850	cur_aff->ah_newID = 0;
				2851	}
				2852	}
				2853	}
				2854	}
				2855	else if (is_aff_rule(items, itemcnt, "FOL", 2) && fol == NULL)
				2856	{
				2857	fol = vim_strsave(items[1]);
				2858	}
				2859	else if (is_aff_rule(items, itemcnt, "LOW", 2) && low == NULL)
				2860	{
				2861	low = vim_strsave(items[1]);
				2862	}
				2863	else if (is_aff_rule(items, itemcnt, "UPP", 2) && upp == NULL)
				2864	{
				2865	upp = vim_strsave(items[1]);
				2866	}
				2867	else if (is_aff_rule(items, itemcnt, "REP", 2)
				2868	\|\| is_aff_rule(items, itemcnt, "REPSAL", 2))
				2869	{
				2870	/* Ignore REP/REPSAL count */;
				2871	if (!isdigit(*items[1]))
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2872	smsg(_("Expected REP(SAL) count in %s line %d"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2873	fname, lnum);
				2874	}
				2875	else if ((STRCMP(items[0], "REP") == 0
				2876	\|\| STRCMP(items[0], "REPSAL") == 0)
				2877	&& itemcnt >= 3)
				2878	{
				2879	/* REP/REPSAL item */
				2880	/* Myspell ignores extra arguments, we require it starts with
				2881	* # to detect mistakes. */
				2882	if (itemcnt > 3 && items[3][0] != '#')
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2883	smsg(_(e_afftrailing), fname, lnum, items[3]);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2884	if (items[0][3] == 'S' ? do_repsal : do_rep)
				2885	{
				2886	/* Replace underscore with space (can't include a space
				2887	* directly). */
Bram Moolenaar	91acfff	2017-03-12 19:22:36 +0100	[diff] [blame]	2888	for (p = items[1]; *p != NUL; MB_PTR_ADV(p))
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2889	if (*p == '_')
				2890	*p = ' ';
Bram Moolenaar	91acfff	2017-03-12 19:22:36 +0100	[diff] [blame]	2891	for (p = items[2]; *p != NUL; MB_PTR_ADV(p))
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2892	if (*p == '_')
				2893	*p = ' ';
				2894	add_fromto(spin, items[0][3] == 'S'
				2895	? &spin->si_repsal
				2896	: &spin->si_rep, items[1], items[2]);
				2897	}
				2898	}
				2899	else if (is_aff_rule(items, itemcnt, "MAP", 2))
				2900	{
				2901	/* MAP item or count */
				2902	if (!found_map)
				2903	{
				2904	/* First line contains the count. */
				2905	found_map = TRUE;
				2906	if (!isdigit(*items[1]))
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2907	smsg(_("Expected MAP count in %s line %d"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2908	fname, lnum);
				2909	}
				2910	else if (do_mapline)
				2911	{
				2912	int c;
				2913
				2914	/* Check that every character appears only once. */
				2915	for (p = items[1]; *p != NUL; )
				2916	{
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2917	c = mb_ptr2char_adv(&p);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2918	if ((spin->si_map.ga_len > 0
				2919	&& vim_strchr(spin->si_map.ga_data, c)
				2920	!= NULL)
				2921	\|\| vim_strchr(p, c) != NULL)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2922	smsg(_("Duplicate character in MAP in %s line %d"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2923	fname, lnum);
				2924	}
				2925
				2926	/* We simply concatenate all the MAP strings, separated by
				2927	* slashes. */
				2928	ga_concat(&spin->si_map, items[1]);
				2929	ga_append(&spin->si_map, '/');
				2930	}
				2931	}
				2932	/* Accept "SAL from to" and "SAL from to #comment". */
				2933	else if (is_aff_rule(items, itemcnt, "SAL", 3))
				2934	{
				2935	if (do_sal)
				2936	{
				2937	/* SAL item (sounds-a-like)
				2938	* Either one of the known keys or a from-to pair. */
				2939	if (STRCMP(items[1], "followup") == 0)
				2940	spin->si_followup = sal_to_bool(items[2]);
				2941	else if (STRCMP(items[1], "collapse_result") == 0)
				2942	spin->si_collapse = sal_to_bool(items[2]);
				2943	else if (STRCMP(items[1], "remove_accents") == 0)
				2944	spin->si_rem_accents = sal_to_bool(items[2]);
				2945	else
				2946	/* when "to" is "_" it means empty */
				2947	add_fromto(spin, &spin->si_sal, items[1],
				2948	STRCMP(items[2], "_") == 0 ? (char_u *)""
				2949	: items[2]);
				2950	}
				2951	}
				2952	else if (is_aff_rule(items, itemcnt, "SOFOFROM", 2)
				2953	&& sofofrom == NULL)
				2954	{
				2955	sofofrom = getroom_save(spin, items[1]);
				2956	}
				2957	else if (is_aff_rule(items, itemcnt, "SOFOTO", 2)
				2958	&& sofoto == NULL)
				2959	{
				2960	sofoto = getroom_save(spin, items[1]);
				2961	}
				2962	else if (STRCMP(items[0], "COMMON") == 0)
				2963	{
				2964	int i;
				2965
				2966	for (i = 1; i < itemcnt; ++i)
				2967	{
				2968	if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords,
				2969	items[i])))
				2970	{
				2971	p = vim_strsave(items[i]);
				2972	if (p == NULL)
				2973	break;
				2974	hash_add(&spin->si_commonwords, p);
				2975	}
				2976	}
				2977	}
				2978	else
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	2979	smsg(_("Unrecognized or duplicate item in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	2980	fname, lnum, items[0]);
				2981	}
				2982	}
				2983
				2984	if (fol != NULL \|\| low != NULL \|\| upp != NULL)
				2985	{
				2986	if (spin->si_clear_chartab)
				2987	{
				2988	/* Clear the char type tables, don't want to use any of the
				2989	* currently used spell properties. */
				2990	init_spell_chartab();
				2991	spin->si_clear_chartab = FALSE;
				2992	}
				2993
				2994	/*
				2995	* Don't write a word table for an ASCII file, so that we don't check
				2996	* for conflicts with a word table that matches 'encoding'.
				2997	* Don't write one for utf-8 either, we use utf_*() and
				2998	* mb_get_class(), the list of chars in the file will be incomplete.
				2999	*/
Bram Moolenaar	264b74f	2019-01-24 17:18:42 +0100	[diff] [blame]	3000	if (!spin->si_ascii && !enc_utf8)
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3001	{
				3002	if (fol == NULL \|\| low == NULL \|\| upp == NULL)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	3003	smsg(_("Missing FOL/LOW/UPP line in %s"), fname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3004	else
				3005	(void)set_spell_chartab(fol, low, upp);
				3006	}
				3007
				3008	vim_free(fol);
				3009	vim_free(low);
				3010	vim_free(upp);
				3011	}
				3012
				3013	/* Use compound specifications of the .aff file for the spell info. */
				3014	if (compmax != 0)
				3015	{
				3016	aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX");
				3017	spin->si_compmax = compmax;
				3018	}
				3019
				3020	if (compminlen != 0)
				3021	{
				3022	aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN");
				3023	spin->si_compminlen = compminlen;
				3024	}
				3025
				3026	if (compsylmax != 0)
				3027	{
				3028	if (syllable == NULL)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	3029	smsg(_("COMPOUNDSYLMAX used without SYLLABLE"));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3030	aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX");
				3031	spin->si_compsylmax = compsylmax;
				3032	}
				3033
				3034	if (compoptions != 0)
				3035	{
				3036	aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options");
				3037	spin->si_compoptions \|= compoptions;
				3038	}
				3039
				3040	if (compflags != NULL)
				3041	process_compflags(spin, aff, compflags);
				3042
				3043	/* Check that we didn't use too many renumbered flags. */
				3044	if (spin->si_newcompID < spin->si_newprefID)
				3045	{
				3046	if (spin->si_newcompID == 127 \|\| spin->si_newcompID == 255)
Bram Moolenaar	32526b3	2019-01-19 17:43:09 +0100	[diff] [blame]	3047	msg(_("Too many postponed prefixes"));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3048	else if (spin->si_newprefID == 0 \|\| spin->si_newprefID == 127)
Bram Moolenaar	32526b3	2019-01-19 17:43:09 +0100	[diff] [blame]	3049	msg(_("Too many compound flags"));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3050	else
Bram Moolenaar	32526b3	2019-01-19 17:43:09 +0100	[diff] [blame]	3051	msg(_("Too many postponed prefixes and/or compound flags"));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3052	}
				3053
				3054	if (syllable != NULL)
				3055	{
				3056	aff_check_string(spin->si_syllable, syllable, "SYLLABLE");
				3057	spin->si_syllable = syllable;
				3058	}
				3059
				3060	if (sofofrom != NULL \|\| sofoto != NULL)
				3061	{
				3062	if (sofofrom == NULL \|\| sofoto == NULL)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	3063	smsg(_("Missing SOFO%s line in %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3064	sofofrom == NULL ? "FROM" : "TO", fname);
				3065	else if (spin->si_sal.ga_len > 0)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	3066	smsg(_("Both SAL and SOFO lines in %s"), fname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3067	else
				3068	{
				3069	aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM");
				3070	aff_check_string(spin->si_sofoto, sofoto, "SOFOTO");
				3071	spin->si_sofofr = sofofrom;
				3072	spin->si_sofoto = sofoto;
				3073	}
				3074	}
				3075
				3076	if (midword != NULL)
				3077	{
				3078	aff_check_string(spin->si_midword, midword, "MIDWORD");
				3079	spin->si_midword = midword;
				3080	}
				3081
				3082	vim_free(pc);
				3083	fclose(fd);
				3084	return aff;
				3085	}
				3086
				3087	/*
				3088	* Return TRUE when items[0] equals "rulename", there are "mincount" items or
				3089	* a comment is following after item "mincount".
				3090	*/
				3091	static int
				3092	is_aff_rule(
				3093	char_u **items,
				3094	int itemcnt,
				3095	char *rulename,
				3096	int mincount)
				3097	{
				3098	return (STRCMP(items[0], rulename) == 0
				3099	&& (itemcnt == mincount
				3100	\|\| (itemcnt > mincount && items[mincount][0] == '#')));
				3101	}
				3102
				3103	/*
				3104	* For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from
				3105	* ae_flags to ae_comppermit and ae_compforbid.
				3106	*/
				3107	static void
				3108	aff_process_flags(afffile_T affile, affentry_T entry)
				3109	{
				3110	char_u *p;
				3111	char_u *prevp;
				3112	unsigned flag;
				3113
				3114	if (entry->ae_flags != NULL
				3115	&& (affile->af_compforbid != 0 \|\| affile->af_comppermit != 0))
				3116	{
				3117	for (p = entry->ae_flags; *p != NUL; )
				3118	{
				3119	prevp = p;
				3120	flag = get_affitem(affile->af_flagtype, &p);
				3121	if (flag == affile->af_comppermit \|\| flag == affile->af_compforbid)
				3122	{
				3123	STRMOVE(prevp, p);
				3124	p = prevp;
				3125	if (flag == affile->af_comppermit)
				3126	entry->ae_comppermit = TRUE;
				3127	else
				3128	entry->ae_compforbid = TRUE;
				3129	}
				3130	if (affile->af_flagtype == AFT_NUM && *p == ',')
				3131	++p;
				3132	}
				3133	if (*entry->ae_flags == NUL)
				3134	entry->ae_flags = NULL; /* nothing left */
				3135	}
				3136	}
				3137
				3138	/*
				3139	* Return TRUE if "s" is the name of an info item in the affix file.
				3140	*/
				3141	static int
				3142	spell_info_item(char_u *s)
				3143	{
				3144	return STRCMP(s, "NAME") == 0
				3145	\|\| STRCMP(s, "HOME") == 0
				3146	\|\| STRCMP(s, "VERSION") == 0
				3147	\|\| STRCMP(s, "AUTHOR") == 0
				3148	\|\| STRCMP(s, "EMAIL") == 0
				3149	\|\| STRCMP(s, "COPYRIGHT") == 0;
				3150	}
				3151
				3152	/*
				3153	* Turn an affix flag name into a number, according to the FLAG type.
				3154	* returns zero for failure.
				3155	*/
				3156	static unsigned
				3157	affitem2flag(
				3158	int flagtype,
				3159	char_u *item,
				3160	char_u *fname,
				3161	int lnum)
				3162	{
				3163	unsigned res;
				3164	char_u *p = item;
				3165
				3166	res = get_affitem(flagtype, &p);
				3167	if (res == 0)
				3168	{
				3169	if (flagtype == AFT_NUM)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	3170	smsg(_("Flag is not a number in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3171	fname, lnum, item);
				3172	else
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	3173	smsg(_("Illegal flag in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3174	fname, lnum, item);
				3175	}
				3176	if (*p != NUL)
				3177	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	3178	smsg(_(e_affname), fname, lnum, item);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3179	return 0;
				3180	}
				3181
				3182	return res;
				3183	}
				3184
				3185	/*
				3186	* Get one affix name from "*pp" and advance the pointer.
Bram Moolenaar	3d2a47c	2019-11-07 20:48:42 +0100	[diff] [blame]	3187	* Returns ZERO_FLAG for "0".
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3188	* Returns zero for an error, still advances the pointer then.
				3189	*/
				3190	static unsigned
				3191	get_affitem(int flagtype, char_u **pp)
				3192	{
				3193	int res;
				3194
				3195	if (flagtype == AFT_NUM)
				3196	{
				3197	if (!VIM_ISDIGIT(**pp))
				3198	{
				3199	++pp; / always advance, avoid getting stuck */
				3200	return 0;
				3201	}
				3202	res = getdigits(pp);
Bram Moolenaar	3d2a47c	2019-11-07 20:48:42 +0100	[diff] [blame]	3203	if (res == 0)
				3204	res = ZERO_FLAG;
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3205	}
				3206	else
				3207	{
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3208	res = mb_ptr2char_adv(pp);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3209	if (flagtype == AFT_LONG \|\| (flagtype == AFT_CAPLONG
				3210	&& res >= 'A' && res <= 'Z'))
				3211	{
				3212	if (**pp == NUL)
				3213	return 0;
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3214	res = mb_ptr2char_adv(pp) + (res << 16);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3215	}
				3216	}
				3217	return res;
				3218	}
				3219
				3220	/*
				3221	* Process the "compflags" string used in an affix file and append it to
				3222	* spin->si_compflags.
				3223	* The processing involves changing the affix names to ID numbers, so that
				3224	* they fit in one byte.
				3225	*/
				3226	static void
				3227	process_compflags(
				3228	spellinfo_T *spin,
				3229	afffile_T *aff,
				3230	char_u *compflags)
				3231	{
				3232	char_u *p;
				3233	char_u *prevp;
				3234	unsigned flag;
				3235	compitem_T *ci;
				3236	int id;
				3237	int len;
				3238	char_u *tp;
				3239	char_u key[AH_KEY_LEN];
				3240	hashitem_T *hi;
				3241
				3242	/* Make room for the old and the new compflags, concatenated with a / in
				3243	* between. Processing it makes it shorter, but we don't know by how
				3244	* much, thus allocate the maximum. */
				3245	len = (int)STRLEN(compflags) + 1;
				3246	if (spin->si_compflags != NULL)
				3247	len += (int)STRLEN(spin->si_compflags) + 1;
				3248	p = getroom(spin, len, FALSE);
				3249	if (p == NULL)
				3250	return;
				3251	if (spin->si_compflags != NULL)
				3252	{
				3253	STRCPY(p, spin->si_compflags);
				3254	STRCAT(p, "/");
				3255	}
				3256	spin->si_compflags = p;
				3257	tp = p + STRLEN(p);
				3258
				3259	for (p = compflags; *p != NUL; )
				3260	{
				3261	if (vim_strchr((char_u )"/?+[]", *p) != NULL)
				3262	/* Copy non-flag characters directly. */
				3263	tp++ = p++;
				3264	else
				3265	{
				3266	/* First get the flag number, also checks validity. */
				3267	prevp = p;
				3268	flag = get_affitem(aff->af_flagtype, &p);
				3269	if (flag != 0)
				3270	{
				3271	/* Find the flag in the hashtable. If it was used before, use
				3272	* the existing ID. Otherwise add a new entry. */
				3273	vim_strncpy(key, prevp, p - prevp);
				3274	hi = hash_find(&aff->af_comp, key);
				3275	if (!HASHITEM_EMPTY(hi))
				3276	id = HI2CI(hi)->ci_newID;
				3277	else
				3278	{
				3279	ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE);
				3280	if (ci == NULL)
				3281	break;
				3282	STRCPY(ci->ci_key, key);
				3283	ci->ci_flag = flag;
				3284	/* Avoid using a flag ID that has a special meaning in a
				3285	* regexp (also inside []). */
				3286	do
				3287	{
				3288	check_renumber(spin);
				3289	id = spin->si_newcompID--;
				3290	} while (vim_strchr((char_u )"/?+[]\\-^", id) != NULL);
				3291	ci->ci_newID = id;
				3292	hash_add(&aff->af_comp, ci->ci_key);
				3293	}
				3294	*tp++ = id;
				3295	}
				3296	if (aff->af_flagtype == AFT_NUM && *p == ',')
				3297	++p;
				3298	}
				3299	}
				3300
				3301	*tp = NUL;
				3302	}
				3303
				3304	/*
				3305	* Check that the new IDs for postponed affixes and compounding don't overrun
				3306	* each other. We have almost 255 available, but start at 0-127 to avoid
				3307	* using two bytes for utf-8. When the 0-127 range is used up go to 128-255.
				3308	* When that is used up an error message is given.
				3309	*/
				3310	static void
				3311	check_renumber(spellinfo_T *spin)
				3312	{
				3313	if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128)
				3314	{
				3315	spin->si_newprefID = 127;
				3316	spin->si_newcompID = 255;
				3317	}
				3318	}
				3319
				3320	/*
				3321	* Return TRUE if flag "flag" appears in affix list "afflist".
				3322	*/
				3323	static int
				3324	flag_in_afflist(int flagtype, char_u *afflist, unsigned flag)
				3325	{
				3326	char_u *p;
				3327	unsigned n;
				3328
				3329	switch (flagtype)
				3330	{
				3331	case AFT_CHAR:
				3332	return vim_strchr(afflist, flag) != NULL;
				3333
				3334	case AFT_CAPLONG:
				3335	case AFT_LONG:
				3336	for (p = afflist; *p != NUL; )
				3337	{
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3338	n = mb_ptr2char_adv(&p);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3339	if ((flagtype == AFT_LONG \|\| (n >= 'A' && n <= 'Z'))
				3340	&& *p != NUL)
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3341	n = mb_ptr2char_adv(&p) + (n << 16);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3342	if (n == flag)
				3343	return TRUE;
				3344	}
				3345	break;
				3346
				3347	case AFT_NUM:
				3348	for (p = afflist; *p != NUL; )
				3349	{
				3350	n = getdigits(&p);
Bram Moolenaar	3d2a47c	2019-11-07 20:48:42 +0100	[diff] [blame]	3351	if (n == 0)
				3352	n = ZERO_FLAG;
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3353	if (n == flag)
				3354	return TRUE;
				3355	if (p != NUL) / skip over comma */
				3356	++p;
				3357	}
				3358	break;
				3359	}
				3360	return FALSE;
				3361	}
				3362
				3363	/*
				3364	* Give a warning when "spinval" and "affval" numbers are set and not the same.
				3365	*/
				3366	static void
				3367	aff_check_number(int spinval, int affval, char *name)
				3368	{
				3369	if (spinval != 0 && spinval != affval)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	3370	smsg(_("%s value differs from what is used in another .aff file"), name);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3371	}
				3372
				3373	/*
				3374	* Give a warning when "spinval" and "affval" strings are set and not the same.
				3375	*/
				3376	static void
				3377	aff_check_string(char_u spinval, char_u affval, char *name)
				3378	{
				3379	if (spinval != NULL && STRCMP(spinval, affval) != 0)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	3380	smsg(_("%s value differs from what is used in another .aff file"), name);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3381	}
				3382
				3383	/*
				3384	* Return TRUE if strings "s1" and "s2" are equal. Also consider both being
				3385	* NULL as equal.
				3386	*/
				3387	static int
				3388	str_equal(char_u s1, char_u s2)
				3389	{
				3390	if (s1 == NULL \|\| s2 == NULL)
				3391	return s1 == s2;
				3392	return STRCMP(s1, s2) == 0;
				3393	}
				3394
				3395	/*
				3396	* Add a from-to item to "gap". Used for REP and SAL items.
				3397	* They are stored case-folded.
				3398	*/
				3399	static void
				3400	add_fromto(
				3401	spellinfo_T *spin,
				3402	garray_T *gap,
				3403	char_u *from,
				3404	char_u *to)
				3405	{
				3406	fromto_T *ftp;
				3407	char_u word[MAXWLEN];
				3408
				3409	if (ga_grow(gap, 1) == OK)
				3410	{
				3411	ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
				3412	(void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN);
				3413	ftp->ft_from = getroom_save(spin, word);
				3414	(void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN);
				3415	ftp->ft_to = getroom_save(spin, word);
				3416	++gap->ga_len;
				3417	}
				3418	}
				3419
				3420	/*
				3421	* Convert a boolean argument in a SAL line to TRUE or FALSE;
				3422	*/
				3423	static int
				3424	sal_to_bool(char_u *s)
				3425	{
				3426	return STRCMP(s, "1") == 0 \|\| STRCMP(s, "true") == 0;
				3427	}
				3428
				3429	/*
				3430	* Free the structure filled by spell_read_aff().
				3431	*/
				3432	static void
				3433	spell_free_aff(afffile_T *aff)
				3434	{
				3435	hashtab_T *ht;
				3436	hashitem_T *hi;
				3437	int todo;
				3438	affheader_T *ah;
				3439	affentry_T *ae;
				3440
				3441	vim_free(aff->af_enc);
				3442
				3443	/* All this trouble to free the "ae_prog" items... */
				3444	for (ht = &aff->af_pref; ; ht = &aff->af_suff)
				3445	{
				3446	todo = (int)ht->ht_used;
				3447	for (hi = ht->ht_array; todo > 0; ++hi)
				3448	{
				3449	if (!HASHITEM_EMPTY(hi))
				3450	{
				3451	--todo;
				3452	ah = HI2AH(hi);
				3453	for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
				3454	vim_regfree(ae->ae_prog);
				3455	}
				3456	}
				3457	if (ht == &aff->af_suff)
				3458	break;
				3459	}
				3460
				3461	hash_clear(&aff->af_pref);
				3462	hash_clear(&aff->af_suff);
				3463	hash_clear(&aff->af_comp);
				3464	}
				3465
				3466	/*
				3467	* Read dictionary file "fname".
				3468	* Returns OK or FAIL;
				3469	*/
				3470	static int
				3471	spell_read_dic(spellinfo_T spin, char_u fname, afffile_T *affile)
				3472	{
				3473	hashtab_T ht;
				3474	char_u line[MAXLINELEN];
				3475	char_u *p;
				3476	char_u *afflist;
				3477	char_u store_afflist[MAXWLEN];
				3478	int pfxlen;
				3479	int need_affix;
				3480	char_u *dw;
				3481	char_u *pc;
				3482	char_u *w;
				3483	int l;
				3484	hash_T hash;
				3485	hashitem_T *hi;
				3486	FILE *fd;
				3487	int lnum = 1;
				3488	int non_ascii = 0;
				3489	int retval = OK;
				3490	char_u message[MAXLINELEN + MAXWLEN];
				3491	int flags;
				3492	int duplicate = 0;
				3493
				3494	/*
				3495	* Open the file.
				3496	*/
				3497	fd = mch_fopen((char *)fname, "r");
				3498	if (fd == NULL)
				3499	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	3500	semsg(_(e_notopen), fname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3501	return FAIL;
				3502	}
				3503
				3504	/* The hashtable is only used to detect duplicated words. */
				3505	hash_init(&ht);
				3506
				3507	vim_snprintf((char *)IObuff, IOSIZE,
Bram Moolenaar	c166927	2018-06-19 14:23:53 +0200	[diff] [blame]	3508	_("Reading dictionary file %s..."), fname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3509	spell_message(spin, IObuff);
				3510
				3511	/* start with a message for the first line */
				3512	spin->si_msg_count = 999999;
				3513
				3514	/* Read and ignore the first line: word count. */
				3515	(void)vim_fgets(line, MAXLINELEN, fd);
				3516	if (!vim_isdigit(*skipwhite(line)))
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	3517	semsg(_("E760: No word count in %s"), fname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3518
				3519	/*
				3520	* Read all the lines in the file one by one.
				3521	* The words are converted to 'encoding' here, before being added to
				3522	* the hashtable.
				3523	*/
				3524	while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
				3525	{
				3526	line_breakcheck();
				3527	++lnum;
				3528	if (line[0] == '#' \|\| line[0] == '/')
				3529	continue; /* comment line */
				3530
				3531	/* Remove CR, LF and white space from the end. White space halfway
				3532	* the word is kept to allow e.g., "et al.". */
				3533	l = (int)STRLEN(line);
				3534	while (l > 0 && line[l - 1] <= ' ')
				3535	--l;
				3536	if (l == 0)
				3537	continue; /* empty line */
				3538	line[l] = NUL;
				3539
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3540	/* Convert from "SET" to 'encoding' when needed. */
				3541	if (spin->si_conv.vc_type != CONV_NONE)
				3542	{
				3543	pc = string_convert(&spin->si_conv, line, NULL);
				3544	if (pc == NULL)
				3545	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	3546	smsg(_("Conversion failure for word in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3547	fname, lnum, line);
				3548	continue;
				3549	}
				3550	w = pc;
				3551	}
				3552	else
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3553	{
				3554	pc = NULL;
				3555	w = line;
				3556	}
				3557
				3558	/* Truncate the word at the "/", set "afflist" to what follows.
				3559	* Replace "\/" by "/" and "\\" by "\". */
				3560	afflist = NULL;
Bram Moolenaar	91acfff	2017-03-12 19:22:36 +0100	[diff] [blame]	3561	for (p = w; *p != NUL; MB_PTR_ADV(p))
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3562	{
				3563	if (*p == '\\' && (p[1] == '\\' \|\| p[1] == '/'))
				3564	STRMOVE(p, p + 1);
				3565	else if (*p == '/')
				3566	{
				3567	*p = NUL;
				3568	afflist = p + 1;
				3569	break;
				3570	}
				3571	}
				3572
				3573	/* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
				3574	if (spin->si_ascii && has_non_ascii(w))
				3575	{
				3576	++non_ascii;
				3577	vim_free(pc);
				3578	continue;
				3579	}
				3580
				3581	/* This takes time, print a message every 10000 words. */
				3582	if (spin->si_verbose && spin->si_msg_count > 10000)
				3583	{
				3584	spin->si_msg_count = 0;
				3585	vim_snprintf((char *)message, sizeof(message),
Bram Moolenaar	ea39176	2018-04-08 13:07:22 +0200	[diff] [blame]	3586	_("line %6d, word %6ld - %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3587	lnum, spin->si_foldwcount + spin->si_keepwcount, w);
				3588	msg_start();
Bram Moolenaar	32526b3	2019-01-19 17:43:09 +0100	[diff] [blame]	3589	msg_outtrans_long_attr(message, 0);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3590	msg_clr_eos();
				3591	msg_didout = FALSE;
				3592	msg_col = 0;
				3593	out_flush();
				3594	}
				3595
				3596	/* Store the word in the hashtable to be able to find duplicates. */
				3597	dw = (char_u *)getroom_save(spin, w);
				3598	if (dw == NULL)
				3599	{
				3600	retval = FAIL;
				3601	vim_free(pc);
				3602	break;
				3603	}
				3604
				3605	hash = hash_hash(dw);
				3606	hi = hash_lookup(&ht, dw, hash);
				3607	if (!HASHITEM_EMPTY(hi))
				3608	{
				3609	if (p_verbose > 0)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	3610	smsg(_("Duplicate word in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3611	fname, lnum, dw);
				3612	else if (duplicate == 0)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	3613	smsg(_("First duplicate word in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3614	fname, lnum, dw);
				3615	++duplicate;
				3616	}
				3617	else
				3618	hash_add_item(&ht, hi, dw, hash);
				3619
				3620	flags = 0;
				3621	store_afflist[0] = NUL;
				3622	pfxlen = 0;
				3623	need_affix = FALSE;
				3624	if (afflist != NULL)
				3625	{
				3626	/* Extract flags from the affix list. */
				3627	flags \|= get_affix_flags(affile, afflist);
				3628
				3629	if (affile->af_needaffix != 0 && flag_in_afflist(
				3630	affile->af_flagtype, afflist, affile->af_needaffix))
				3631	need_affix = TRUE;
				3632
				3633	if (affile->af_pfxpostpone)
				3634	/* Need to store the list of prefix IDs with the word. */
				3635	pfxlen = get_pfxlist(affile, afflist, store_afflist);
				3636
				3637	if (spin->si_compflags != NULL)
				3638	/* Need to store the list of compound flags with the word.
				3639	* Concatenate them to the list of prefix IDs. */
				3640	get_compflags(affile, afflist, store_afflist + pfxlen);
				3641	}
				3642
				3643	/* Add the word to the word tree(s). */
				3644	if (store_word(spin, dw, flags, spin->si_region,
				3645	store_afflist, need_affix) == FAIL)
				3646	retval = FAIL;
				3647
				3648	if (afflist != NULL)
				3649	{
				3650	/* Find all matching suffixes and add the resulting words.
				3651	* Additionally do matching prefixes that combine. */
				3652	if (store_aff_word(spin, dw, afflist, affile,
				3653	&affile->af_suff, &affile->af_pref,
				3654	CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
				3655	retval = FAIL;
				3656
				3657	/* Find all matching prefixes and add the resulting words. */
				3658	if (store_aff_word(spin, dw, afflist, affile,
				3659	&affile->af_pref, NULL,
				3660	CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
				3661	retval = FAIL;
				3662	}
				3663
				3664	vim_free(pc);
				3665	}
				3666
				3667	if (duplicate > 0)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	3668	smsg(_("%d duplicate word(s) in %s"), duplicate, fname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3669	if (spin->si_ascii && non_ascii > 0)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	3670	smsg(_("Ignored %d word(s) with non-ASCII characters in %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3671	non_ascii, fname);
				3672	hash_clear(&ht);
				3673
				3674	fclose(fd);
				3675	return retval;
				3676	}
				3677
				3678	/*
				3679	* Check for affix flags in "afflist" that are turned into word flags.
				3680	* Return WF_ flags.
				3681	*/
				3682	static int
				3683	get_affix_flags(afffile_T affile, char_u afflist)
				3684	{
				3685	int flags = 0;
				3686
				3687	if (affile->af_keepcase != 0 && flag_in_afflist(
				3688	affile->af_flagtype, afflist, affile->af_keepcase))
				3689	flags \|= WF_KEEPCAP \| WF_FIXCAP;
				3690	if (affile->af_rare != 0 && flag_in_afflist(
				3691	affile->af_flagtype, afflist, affile->af_rare))
				3692	flags \|= WF_RARE;
				3693	if (affile->af_bad != 0 && flag_in_afflist(
				3694	affile->af_flagtype, afflist, affile->af_bad))
				3695	flags \|= WF_BANNED;
				3696	if (affile->af_needcomp != 0 && flag_in_afflist(
				3697	affile->af_flagtype, afflist, affile->af_needcomp))
				3698	flags \|= WF_NEEDCOMP;
				3699	if (affile->af_comproot != 0 && flag_in_afflist(
				3700	affile->af_flagtype, afflist, affile->af_comproot))
				3701	flags \|= WF_COMPROOT;
				3702	if (affile->af_nosuggest != 0 && flag_in_afflist(
				3703	affile->af_flagtype, afflist, affile->af_nosuggest))
				3704	flags \|= WF_NOSUGGEST;
				3705	return flags;
				3706	}
				3707
				3708	/*
				3709	* Get the list of prefix IDs from the affix list "afflist".
				3710	* Used for PFXPOSTPONE.
				3711	* Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL
				3712	* and return the number of affixes.
				3713	*/
				3714	static int
				3715	get_pfxlist(
				3716	afffile_T *affile,
				3717	char_u *afflist,
				3718	char_u *store_afflist)
				3719	{
				3720	char_u *p;
				3721	char_u *prevp;
				3722	int cnt = 0;
				3723	int id;
				3724	char_u key[AH_KEY_LEN];
				3725	hashitem_T *hi;
				3726
				3727	for (p = afflist; *p != NUL; )
				3728	{
				3729	prevp = p;
				3730	if (get_affitem(affile->af_flagtype, &p) != 0)
				3731	{
				3732	/* A flag is a postponed prefix flag if it appears in "af_pref"
Bram Moolenaar	c4568ab	2018-11-16 16:21:05 +0100	[diff] [blame]	3733	* and its ID is not zero. */
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3734	vim_strncpy(key, prevp, p - prevp);
				3735	hi = hash_find(&affile->af_pref, key);
				3736	if (!HASHITEM_EMPTY(hi))
				3737	{
				3738	id = HI2AH(hi)->ah_newID;
				3739	if (id != 0)
				3740	store_afflist[cnt++] = id;
				3741	}
				3742	}
				3743	if (affile->af_flagtype == AFT_NUM && *p == ',')
				3744	++p;
				3745	}
				3746
				3747	store_afflist[cnt] = NUL;
				3748	return cnt;
				3749	}
				3750
				3751	/*
				3752	* Get the list of compound IDs from the affix list "afflist" that are used
				3753	* for compound words.
				3754	* Puts the flags in "store_afflist[]".
				3755	*/
				3756	static void
				3757	get_compflags(
				3758	afffile_T *affile,
				3759	char_u *afflist,
				3760	char_u *store_afflist)
				3761	{
				3762	char_u *p;
				3763	char_u *prevp;
				3764	int cnt = 0;
				3765	char_u key[AH_KEY_LEN];
				3766	hashitem_T *hi;
				3767
				3768	for (p = afflist; *p != NUL; )
				3769	{
				3770	prevp = p;
				3771	if (get_affitem(affile->af_flagtype, &p) != 0)
				3772	{
				3773	/* A flag is a compound flag if it appears in "af_comp". */
				3774	vim_strncpy(key, prevp, p - prevp);
				3775	hi = hash_find(&affile->af_comp, key);
				3776	if (!HASHITEM_EMPTY(hi))
				3777	store_afflist[cnt++] = HI2CI(hi)->ci_newID;
				3778	}
				3779	if (affile->af_flagtype == AFT_NUM && *p == ',')
				3780	++p;
				3781	}
				3782
				3783	store_afflist[cnt] = NUL;
				3784	}
				3785
				3786	/*
				3787	* Apply affixes to a word and store the resulting words.
				3788	* "ht" is the hashtable with affentry_T that need to be applied, either
				3789	* prefixes or suffixes.
				3790	* "xht", when not NULL, is the prefix hashtable, to be used additionally on
				3791	* the resulting words for combining affixes.
				3792	*
				3793	* Returns FAIL when out of memory.
				3794	*/
				3795	static int
				3796	store_aff_word(
				3797	spellinfo_T spin, / spell info */
				3798	char_u word, / basic word start */
				3799	char_u afflist, / list of names of supported affixes */
				3800	afffile_T *affile,
				3801	hashtab_T *ht,
				3802	hashtab_T *xht,
				3803	int condit, /* CONDIT_SUF et al. */
				3804	int flags, /* flags for the word */
				3805	char_u pfxlist, / list of prefix IDs */
				3806	int pfxlen) /* nr of flags in "pfxlist" for prefixes, rest
				3807	* is compound flags */
				3808	{
				3809	int todo;
				3810	hashitem_T *hi;
				3811	affheader_T *ah;
				3812	affentry_T *ae;
				3813	char_u newword[MAXWLEN];
				3814	int retval = OK;
				3815	int i, j;
				3816	char_u *p;
				3817	int use_flags;
				3818	char_u *use_pfxlist;
				3819	int use_pfxlen;
				3820	int need_affix;
				3821	char_u store_afflist[MAXWLEN];
				3822	char_u pfx_pfxlist[MAXWLEN];
				3823	size_t wordlen = STRLEN(word);
				3824	int use_condit;
				3825
				3826	todo = (int)ht->ht_used;
				3827	for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
				3828	{
				3829	if (!HASHITEM_EMPTY(hi))
				3830	{
				3831	--todo;
				3832	ah = HI2AH(hi);
				3833
				3834	/* Check that the affix combines, if required, and that the word
				3835	* supports this affix. */
				3836	if (((condit & CONDIT_COMB) == 0 \|\| ah->ah_combine)
				3837	&& flag_in_afflist(affile->af_flagtype, afflist,
				3838	ah->ah_flag))
				3839	{
				3840	/* Loop over all affix entries with this name. */
				3841	for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
				3842	{
				3843	/* Check the condition. It's not logical to match case
				3844	* here, but it is required for compatibility with
				3845	* Myspell.
				3846	* Another requirement from Myspell is that the chop
				3847	* string is shorter than the word itself.
				3848	* For prefixes, when "PFXPOSTPONE" was used, only do
				3849	* prefixes with a chop string and/or flags.
				3850	* When a previously added affix had CIRCUMFIX this one
				3851	* must have it too, if it had not then this one must not
				3852	* have one either. */
				3853	if ((xht != NULL \|\| !affile->af_pfxpostpone
				3854	\|\| ae->ae_chop != NULL
				3855	\|\| ae->ae_flags != NULL)
				3856	&& (ae->ae_chop == NULL
				3857	\|\| STRLEN(ae->ae_chop) < wordlen)
				3858	&& (ae->ae_prog == NULL
				3859	\|\| vim_regexec_prog(&ae->ae_prog, FALSE,
				3860	word, (colnr_T)0))
				3861	&& (((condit & CONDIT_CFIX) == 0)
				3862	== ((condit & CONDIT_AFF) == 0
				3863	\|\| ae->ae_flags == NULL
				3864	\|\| !flag_in_afflist(affile->af_flagtype,
				3865	ae->ae_flags, affile->af_circumfix))))
				3866	{
				3867	/* Match. Remove the chop and add the affix. */
				3868	if (xht == NULL)
				3869	{
				3870	/* prefix: chop/add at the start of the word */
				3871	if (ae->ae_add == NULL)
				3872	*newword = NUL;
				3873	else
				3874	vim_strncpy(newword, ae->ae_add, MAXWLEN - 1);
				3875	p = word;
				3876	if (ae->ae_chop != NULL)
				3877	{
				3878	/* Skip chop string. */
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3879	if (has_mbyte)
				3880	{
				3881	i = mb_charlen(ae->ae_chop);
				3882	for ( ; i > 0; --i)
Bram Moolenaar	91acfff	2017-03-12 19:22:36 +0100	[diff] [blame]	3883	MB_PTR_ADV(p);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3884	}
				3885	else
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3886	p += STRLEN(ae->ae_chop);
				3887	}
				3888	STRCAT(newword, p);
				3889	}
				3890	else
				3891	{
				3892	/* suffix: chop/add at the end of the word */
				3893	vim_strncpy(newword, word, MAXWLEN - 1);
				3894	if (ae->ae_chop != NULL)
				3895	{
				3896	/* Remove chop string. */
				3897	p = newword + STRLEN(newword);
				3898	i = (int)MB_CHARLEN(ae->ae_chop);
				3899	for ( ; i > 0; --i)
Bram Moolenaar	91acfff	2017-03-12 19:22:36 +0100	[diff] [blame]	3900	MB_PTR_BACK(newword, p);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	3901	*p = NUL;
				3902	}
				3903	if (ae->ae_add != NULL)
				3904	STRCAT(newword, ae->ae_add);
				3905	}
				3906
				3907	use_flags = flags;
				3908	use_pfxlist = pfxlist;
				3909	use_pfxlen = pfxlen;
				3910	need_affix = FALSE;
				3911	use_condit = condit \| CONDIT_COMB \| CONDIT_AFF;
				3912	if (ae->ae_flags != NULL)
				3913	{
				3914	/* Extract flags from the affix list. */
				3915	use_flags \|= get_affix_flags(affile, ae->ae_flags);
				3916
				3917	if (affile->af_needaffix != 0 && flag_in_afflist(
				3918	affile->af_flagtype, ae->ae_flags,
				3919	affile->af_needaffix))
				3920	need_affix = TRUE;
				3921
				3922	/* When there is a CIRCUMFIX flag the other affix
				3923	* must also have it and we don't add the word
				3924	* with one affix. */
				3925	if (affile->af_circumfix != 0 && flag_in_afflist(
				3926	affile->af_flagtype, ae->ae_flags,
				3927	affile->af_circumfix))
				3928	{
				3929	use_condit \|= CONDIT_CFIX;
				3930	if ((condit & CONDIT_CFIX) == 0)
				3931	need_affix = TRUE;
				3932	}
				3933
				3934	if (affile->af_pfxpostpone
				3935	\|\| spin->si_compflags != NULL)
				3936	{
				3937	if (affile->af_pfxpostpone)
				3938	/* Get prefix IDS from the affix list. */
				3939	use_pfxlen = get_pfxlist(affile,
				3940	ae->ae_flags, store_afflist);
				3941	else
				3942	use_pfxlen = 0;
				3943	use_pfxlist = store_afflist;
				3944
				3945	/* Combine the prefix IDs. Avoid adding the
				3946	* same ID twice. */
				3947	for (i = 0; i < pfxlen; ++i)
				3948	{
				3949	for (j = 0; j < use_pfxlen; ++j)
				3950	if (pfxlist[i] == use_pfxlist[j])
				3951	break;
				3952	if (j == use_pfxlen)
				3953	use_pfxlist[use_pfxlen++] = pfxlist[i];
				3954	}
				3955
				3956	if (spin->si_compflags != NULL)
				3957	/* Get compound IDS from the affix list. */
				3958	get_compflags(affile, ae->ae_flags,
				3959	use_pfxlist + use_pfxlen);
				3960
				3961	/* Combine the list of compound flags.
				3962	* Concatenate them to the prefix IDs list.
				3963	* Avoid adding the same ID twice. */
				3964	for (i = pfxlen; pfxlist[i] != NUL; ++i)
				3965	{
				3966	for (j = use_pfxlen;
				3967	use_pfxlist[j] != NUL; ++j)
				3968	if (pfxlist[i] == use_pfxlist[j])
				3969	break;
				3970	if (use_pfxlist[j] == NUL)
				3971	{
				3972	use_pfxlist[j++] = pfxlist[i];
				3973	use_pfxlist[j] = NUL;
				3974	}
				3975	}
				3976	}
				3977	}
				3978
				3979	/* Obey a "COMPOUNDFORBIDFLAG" of the affix: don't
				3980	* use the compound flags. */
				3981	if (use_pfxlist != NULL && ae->ae_compforbid)
				3982	{
				3983	vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen);
				3984	use_pfxlist = pfx_pfxlist;
				3985	}
				3986
				3987	/* When there are postponed prefixes... */
				3988	if (spin->si_prefroot != NULL
				3989	&& spin->si_prefroot->wn_sibling != NULL)
				3990	{
				3991	/* ... add a flag to indicate an affix was used. */
				3992	use_flags \|= WF_HAS_AFF;
				3993
				3994	/* ... don't use a prefix list if combining
				3995	* affixes is not allowed. But do use the
				3996	* compound flags after them. */
				3997	if (!ah->ah_combine && use_pfxlist != NULL)
				3998	use_pfxlist += use_pfxlen;
				3999	}
				4000
				4001	/* When compounding is supported and there is no
				4002	* "COMPOUNDPERMITFLAG" then forbid compounding on the
				4003	* side where the affix is applied. */
				4004	if (spin->si_compflags != NULL && !ae->ae_comppermit)
				4005	{
				4006	if (xht != NULL)
				4007	use_flags \|= WF_NOCOMPAFT;
				4008	else
				4009	use_flags \|= WF_NOCOMPBEF;
				4010	}
				4011
				4012	/* Store the modified word. */
				4013	if (store_word(spin, newword, use_flags,
				4014	spin->si_region, use_pfxlist,
				4015	need_affix) == FAIL)
				4016	retval = FAIL;
				4017
				4018	/* When added a prefix or a first suffix and the affix
				4019	* has flags may add a(nother) suffix. RECURSIVE! */
				4020	if ((condit & CONDIT_SUF) && ae->ae_flags != NULL)
				4021	if (store_aff_word(spin, newword, ae->ae_flags,
				4022	affile, &affile->af_suff, xht,
				4023	use_condit & (xht == NULL
				4024	? ~0 : ~CONDIT_SUF),
				4025	use_flags, use_pfxlist, pfxlen) == FAIL)
				4026	retval = FAIL;
				4027
				4028	/* When added a suffix and combining is allowed also
				4029	* try adding a prefix additionally. Both for the
				4030	* word flags and for the affix flags. RECURSIVE! */
				4031	if (xht != NULL && ah->ah_combine)
				4032	{
				4033	if (store_aff_word(spin, newword,
				4034	afflist, affile,
				4035	xht, NULL, use_condit,
				4036	use_flags, use_pfxlist,
				4037	pfxlen) == FAIL
				4038	\|\| (ae->ae_flags != NULL
				4039	&& store_aff_word(spin, newword,
				4040	ae->ae_flags, affile,
				4041	xht, NULL, use_condit,
				4042	use_flags, use_pfxlist,
				4043	pfxlen) == FAIL))
				4044	retval = FAIL;
				4045	}
				4046	}
				4047	}
				4048	}
				4049	}
				4050	}
				4051
				4052	return retval;
				4053	}
				4054
				4055	/*
				4056	* Read a file with a list of words.
				4057	*/
				4058	static int
				4059	spell_read_wordfile(spellinfo_T spin, char_u fname)
				4060	{
				4061	FILE *fd;
				4062	long lnum = 0;
				4063	char_u rline[MAXLINELEN];
				4064	char_u *line;
				4065	char_u *pc = NULL;
				4066	char_u *p;
				4067	int l;
				4068	int retval = OK;
				4069	int did_word = FALSE;
				4070	int non_ascii = 0;
				4071	int flags;
				4072	int regionmask;
				4073
				4074	/*
				4075	* Open the file.
				4076	*/
				4077	fd = mch_fopen((char *)fname, "r");
				4078	if (fd == NULL)
				4079	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	4080	semsg(_(e_notopen), fname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4081	return FAIL;
				4082	}
				4083
Bram Moolenaar	c166927	2018-06-19 14:23:53 +0200	[diff] [blame]	4084	vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s..."), fname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4085	spell_message(spin, IObuff);
				4086
				4087	/*
				4088	* Read all the lines in the file one by one.
				4089	*/
				4090	while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
				4091	{
				4092	line_breakcheck();
				4093	++lnum;
				4094
				4095	/* Skip comment lines. */
				4096	if (*rline == '#')
				4097	continue;
				4098
				4099	/* Remove CR, LF and white space from the end. */
				4100	l = (int)STRLEN(rline);
				4101	while (l > 0 && rline[l - 1] <= ' ')
				4102	--l;
				4103	if (l == 0)
				4104	continue; /* empty or blank line */
				4105	rline[l] = NUL;
				4106
				4107	/* Convert from "/encoding={encoding}" to 'encoding' when needed. */
				4108	vim_free(pc);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4109	if (spin->si_conv.vc_type != CONV_NONE)
				4110	{
				4111	pc = string_convert(&spin->si_conv, rline, NULL);
				4112	if (pc == NULL)
				4113	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	4114	smsg(_("Conversion failure for word in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4115	fname, lnum, rline);
				4116	continue;
				4117	}
				4118	line = pc;
				4119	}
				4120	else
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4121	{
				4122	pc = NULL;
				4123	line = rline;
				4124	}
				4125
				4126	if (*line == '/')
				4127	{
				4128	++line;
				4129	if (STRNCMP(line, "encoding=", 9) == 0)
				4130	{
				4131	if (spin->si_conv.vc_type != CONV_NONE)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	4132	smsg(_("Duplicate /encoding= line ignored in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4133	fname, lnum, line - 1);
				4134	else if (did_word)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	4135	smsg(_("/encoding= line after word ignored in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4136	fname, lnum, line - 1);
				4137	else
				4138	{
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4139	char_u *enc;
				4140
				4141	/* Setup for conversion to 'encoding'. */
				4142	line += 9;
				4143	enc = enc_canonize(line);
				4144	if (enc != NULL && !spin->si_ascii
				4145	&& convert_setup(&spin->si_conv, enc,
				4146	p_enc) == FAIL)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	4147	smsg(_("Conversion in %s not supported: from %s to %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4148	fname, line, p_enc);
				4149	vim_free(enc);
				4150	spin->si_conv.vc_fail = TRUE;
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4151	}
				4152	continue;
				4153	}
				4154
				4155	if (STRNCMP(line, "regions=", 8) == 0)
				4156	{
				4157	if (spin->si_region_count > 1)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	4158	smsg(_("Duplicate /regions= line ignored in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4159	fname, lnum, line);
				4160	else
				4161	{
				4162	line += 8;
Bram Moolenaar	2993ac5	2018-02-10 14:12:43 +0100	[diff] [blame]	4163	if (STRLEN(line) > MAXREGIONS * 2)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	4164	smsg(_("Too many regions in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4165	fname, lnum, line);
				4166	else
				4167	{
				4168	spin->si_region_count = (int)STRLEN(line) / 2;
				4169	STRCPY(spin->si_region_name, line);
				4170
				4171	/* Adjust the mask for a word valid in all regions. */
				4172	spin->si_region = (1 << spin->si_region_count) - 1;
				4173	}
				4174	}
				4175	continue;
				4176	}
				4177
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	4178	smsg(_("/ line ignored in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4179	fname, lnum, line - 1);
				4180	continue;
				4181	}
				4182
				4183	flags = 0;
				4184	regionmask = spin->si_region;
				4185
				4186	/* Check for flags and region after a slash. */
				4187	p = vim_strchr(line, '/');
				4188	if (p != NULL)
				4189	{
				4190	*p++ = NUL;
				4191	while (*p != NUL)
				4192	{
				4193	if (p == '=') / keep-case word */
				4194	flags \|= WF_KEEPCAP \| WF_FIXCAP;
				4195	else if (p == '!') / Bad, bad, wicked word. */
				4196	flags \|= WF_BANNED;
				4197	else if (p == '?') / Rare word. */
				4198	flags \|= WF_RARE;
				4199	else if (VIM_ISDIGIT(p)) / region number(s) */
				4200	{
				4201	if ((flags & WF_REGION) == 0) /* first one */
				4202	regionmask = 0;
				4203	flags \|= WF_REGION;
				4204
				4205	l = *p - '0';
Bram Moolenaar	ee03b94	2017-10-27 00:57:05 +0200	[diff] [blame]	4206	if (l == 0 \|\| l > spin->si_region_count)
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4207	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	4208	smsg(_("Invalid region nr in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4209	fname, lnum, p);
				4210	break;
				4211	}
				4212	regionmask \|= 1 << (l - 1);
				4213	}
				4214	else
				4215	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	4216	smsg(_("Unrecognized flags in %s line %d: %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4217	fname, lnum, p);
				4218	break;
				4219	}
				4220	++p;
				4221	}
				4222	}
				4223
				4224	/* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
				4225	if (spin->si_ascii && has_non_ascii(line))
				4226	{
				4227	++non_ascii;
				4228	continue;
				4229	}
				4230
				4231	/* Normal word: store it. */
				4232	if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL)
				4233	{
				4234	retval = FAIL;
				4235	break;
				4236	}
				4237	did_word = TRUE;
				4238	}
				4239
				4240	vim_free(pc);
				4241	fclose(fd);
				4242
				4243	if (spin->si_ascii && non_ascii > 0)
				4244	{
				4245	vim_snprintf((char *)IObuff, IOSIZE,
				4246	_("Ignored %d words with non-ASCII characters"), non_ascii);
				4247	spell_message(spin, IObuff);
				4248	}
				4249
				4250	return retval;
				4251	}
				4252
				4253	/*
				4254	* Get part of an sblock_T, "len" bytes long.
				4255	* This avoids calling free() for every little struct we use (and keeping
				4256	* track of them).
				4257	* The memory is cleared to all zeros.
				4258	* Returns NULL when out of memory.
				4259	*/
				4260	static void *
				4261	getroom(
				4262	spellinfo_T *spin,
				4263	size_t len, /* length needed */
				4264	int align) /* align for pointer */
				4265	{
				4266	char_u *p;
				4267	sblock_T *bl = spin->si_blocks;
				4268
				4269	if (align && bl != NULL)
				4270	/* Round size up for alignment. On some systems structures need to be
				4271	* aligned to the size of a pointer (e.g., SPARC). */
				4272	bl->sb_used = (bl->sb_used + sizeof(char *) - 1)
				4273	& ~(sizeof(char *) - 1);
				4274
				4275	if (bl == NULL \|\| bl->sb_used + len > SBLOCKSIZE)
				4276	{
				4277	if (len >= SBLOCKSIZE)
				4278	bl = NULL;
				4279	else
				4280	/* Allocate a block of memory. It is not freed until much later. */
Bram Moolenaar	c799fe2	2019-05-28 23:08:19 +0200	[diff] [blame]	4281	bl = alloc_clear(sizeof(sblock_T) + SBLOCKSIZE);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4282	if (bl == NULL)
				4283	{
				4284	if (!spin->si_did_emsg)
				4285	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	4286	emsg(_("E845: Insufficient memory, word list will be incomplete"));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4287	spin->si_did_emsg = TRUE;
				4288	}
				4289	return NULL;
				4290	}
				4291	bl->sb_next = spin->si_blocks;
				4292	spin->si_blocks = bl;
				4293	bl->sb_used = 0;
				4294	++spin->si_blocks_cnt;
				4295	}
				4296
				4297	p = bl->sb_data + bl->sb_used;
				4298	bl->sb_used += (int)len;
				4299
				4300	return p;
				4301	}
				4302
				4303	/*
				4304	* Make a copy of a string into memory allocated with getroom().
				4305	* Returns NULL when out of memory.
				4306	*/
				4307	static char_u *
				4308	getroom_save(spellinfo_T spin, char_u s)
				4309	{
				4310	char_u *sc;
				4311
				4312	sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE);
				4313	if (sc != NULL)
				4314	STRCPY(sc, s);
				4315	return sc;
				4316	}
				4317
				4318
				4319	/*
				4320	* Free the list of allocated sblock_T.
				4321	*/
				4322	static void
				4323	free_blocks(sblock_T *bl)
				4324	{
				4325	sblock_T *next;
				4326
				4327	while (bl != NULL)
				4328	{
				4329	next = bl->sb_next;
				4330	vim_free(bl);
				4331	bl = next;
				4332	}
				4333	}
				4334
				4335	/*
				4336	* Allocate the root of a word tree.
				4337	* Returns NULL when out of memory.
				4338	*/
				4339	static wordnode_T *
				4340	wordtree_alloc(spellinfo_T *spin)
				4341	{
				4342	return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
				4343	}
				4344
				4345	/*
				4346	* Store a word in the tree(s).
				4347	* Always store it in the case-folded tree. For a keep-case word this is
				4348	* useful when the word can also be used with all caps (no WF_FIXCAP flag) and
				4349	* used to find suggestions.
				4350	* For a keep-case word also store it in the keep-case tree.
				4351	* When "pfxlist" is not NULL store the word for each postponed prefix ID and
				4352	* compound flag.
				4353	*/
				4354	static int
				4355	store_word(
				4356	spellinfo_T *spin,
				4357	char_u *word,
				4358	int flags, /* extra flags, WF_BANNED */
				4359	int region, /* supported region(s) */
				4360	char_u pfxlist, / list of prefix IDs or NULL */
				4361	int need_affix) /* only store word with affix ID */
				4362	{
				4363	int len = (int)STRLEN(word);
				4364	int ct = captype(word, word + len);
				4365	char_u foldword[MAXWLEN];
				4366	int res = OK;
				4367	char_u *p;
				4368
				4369	(void)spell_casefold(word, len, foldword, MAXWLEN);
				4370	for (p = pfxlist; res == OK; ++p)
				4371	{
				4372	if (!need_affix \|\| (p != NULL && *p != NUL))
				4373	res = tree_add_word(spin, foldword, spin->si_foldroot, ct \| flags,
				4374	region, p == NULL ? 0 : *p);
				4375	if (p == NULL \|\| *p == NUL)
				4376	break;
				4377	}
				4378	++spin->si_foldwcount;
				4379
				4380	if (res == OK && (ct == WF_KEEPCAP \|\| (flags & WF_KEEPCAP)))
				4381	{
				4382	for (p = pfxlist; res == OK; ++p)
				4383	{
				4384	if (!need_affix \|\| (p != NULL && *p != NUL))
				4385	res = tree_add_word(spin, word, spin->si_keeproot, flags,
				4386	region, p == NULL ? 0 : *p);
				4387	if (p == NULL \|\| *p == NUL)
				4388	break;
				4389	}
				4390	++spin->si_keepwcount;
				4391	}
				4392	return res;
				4393	}
				4394
				4395	/*
				4396	* Add word "word" to a word tree at "root".
				4397	* When "flags" < 0 we are adding to the prefix tree where "flags" is used for
				4398	* "rare" and "region" is the condition nr.
				4399	* Returns FAIL when out of memory.
				4400	*/
				4401	static int
				4402	tree_add_word(
				4403	spellinfo_T *spin,
				4404	char_u *word,
				4405	wordnode_T *root,
				4406	int flags,
				4407	int region,
				4408	int affixID)
				4409	{
				4410	wordnode_T *node = root;
				4411	wordnode_T *np;
				4412	wordnode_T copyp, *copyprev;
				4413	wordnode_T **prev = NULL;
				4414	int i;
				4415
				4416	/* Add each byte of the word to the tree, including the NUL at the end. */
				4417	for (i = 0; ; ++i)
				4418	{
				4419	/* When there is more than one reference to this node we need to make
				4420	* a copy, so that we can modify it. Copy the whole list of siblings
				4421	* (we don't optimize for a partly shared list of siblings). */
				4422	if (node != NULL && node->wn_refs > 1)
				4423	{
				4424	--node->wn_refs;
				4425	copyprev = prev;
				4426	for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling)
				4427	{
				4428	/* Allocate a new node and copy the info. */
				4429	np = get_wordnode(spin);
				4430	if (np == NULL)
				4431	return FAIL;
				4432	np->wn_child = copyp->wn_child;
				4433	if (np->wn_child != NULL)
				4434	++np->wn_child->wn_refs; /* child gets extra ref */
				4435	np->wn_byte = copyp->wn_byte;
				4436	if (np->wn_byte == NUL)
				4437	{
				4438	np->wn_flags = copyp->wn_flags;
				4439	np->wn_region = copyp->wn_region;
				4440	np->wn_affixID = copyp->wn_affixID;
				4441	}
				4442
				4443	/* Link the new node in the list, there will be one ref. */
				4444	np->wn_refs = 1;
				4445	if (copyprev != NULL)
				4446	*copyprev = np;
				4447	copyprev = &np->wn_sibling;
				4448
				4449	/* Let "node" point to the head of the copied list. */
				4450	if (copyp == node)
				4451	node = np;
				4452	}
				4453	}
				4454
				4455	/* Look for the sibling that has the same character. They are sorted
				4456	* on byte value, thus stop searching when a sibling is found with a
				4457	* higher byte value. For zero bytes (end of word) the sorting is
				4458	* done on flags and then on affixID. */
				4459	while (node != NULL
				4460	&& (node->wn_byte < word[i]
				4461	\|\| (node->wn_byte == NUL
				4462	&& (flags < 0
				4463	? node->wn_affixID < (unsigned)affixID
				4464	: (node->wn_flags < (unsigned)(flags & WN_MASK)
				4465	\|\| (node->wn_flags == (flags & WN_MASK)
				4466	&& (spin->si_sugtree
				4467	? (node->wn_region & 0xffff) < region
				4468	: node->wn_affixID
				4469	< (unsigned)affixID)))))))
				4470	{
				4471	prev = &node->wn_sibling;
				4472	node = *prev;
				4473	}
				4474	if (node == NULL
				4475	\|\| node->wn_byte != word[i]
				4476	\|\| (word[i] == NUL
				4477	&& (flags < 0
				4478	\|\| spin->si_sugtree
				4479	\|\| node->wn_flags != (flags & WN_MASK)
				4480	\|\| node->wn_affixID != affixID)))
				4481	{
				4482	/* Allocate a new node. */
				4483	np = get_wordnode(spin);
				4484	if (np == NULL)
				4485	return FAIL;
				4486	np->wn_byte = word[i];
				4487
				4488	/* If "node" is NULL this is a new child or the end of the sibling
				4489	* list: ref count is one. Otherwise use ref count of sibling and
				4490	* make ref count of sibling one (matters when inserting in front
				4491	* of the list of siblings). */
				4492	if (node == NULL)
				4493	np->wn_refs = 1;
				4494	else
				4495	{
				4496	np->wn_refs = node->wn_refs;
				4497	node->wn_refs = 1;
				4498	}
				4499	if (prev != NULL)
				4500	*prev = np;
				4501	np->wn_sibling = node;
				4502	node = np;
				4503	}
				4504
				4505	if (word[i] == NUL)
				4506	{
				4507	node->wn_flags = flags;
				4508	node->wn_region \|= region;
				4509	node->wn_affixID = affixID;
				4510	break;
				4511	}
				4512	prev = &node->wn_child;
				4513	node = *prev;
				4514	}
				4515	#ifdef SPELL_PRINTTREE
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	4516	smsg("Added \"%s\"", word);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4517	spell_print_tree(root->wn_sibling);
				4518	#endif
				4519
				4520	/* count nr of words added since last message */
				4521	++spin->si_msg_count;
				4522
				4523	if (spin->si_compress_cnt > 1)
				4524	{
				4525	if (--spin->si_compress_cnt == 1)
				4526	/* Did enough words to lower the block count limit. */
				4527	spin->si_blocks_cnt += compress_inc;
				4528	}
				4529
				4530	/*
				4531	* When we have allocated lots of memory we need to compress the word tree
				4532	* to free up some room. But compression is slow, and we might actually
				4533	* need that room, thus only compress in the following situations:
				4534	* 1. When not compressed before (si_compress_cnt == 0): when using
				4535	* "compress_start" blocks.
				4536	* 2. When compressed before and used "compress_inc" blocks before
				4537	* adding "compress_added" words (si_compress_cnt > 1).
				4538	* 3. When compressed before, added "compress_added" words
				4539	* (si_compress_cnt == 1) and the number of free nodes drops below the
				4540	* maximum word length.
				4541	*/
				4542	#ifndef SPELL_COMPRESS_ALLWAYS
				4543	if (spin->si_compress_cnt == 1
				4544	? spin->si_free_count < MAXWLEN
				4545	: spin->si_blocks_cnt >= compress_start)
				4546	#endif
				4547	{
				4548	/* Decrement the block counter. The effect is that we compress again
				4549	* when the freed up room has been used and another "compress_inc"
				4550	* blocks have been allocated. Unless "compress_added" words have
				4551	* been added, then the limit is put back again. */
				4552	spin->si_blocks_cnt -= compress_inc;
				4553	spin->si_compress_cnt = compress_added;
				4554
				4555	if (spin->si_verbose)
				4556	{
				4557	msg_start();
Bram Moolenaar	32526b3	2019-01-19 17:43:09 +0100	[diff] [blame]	4558	msg_puts(_(msg_compressing));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4559	msg_clr_eos();
				4560	msg_didout = FALSE;
				4561	msg_col = 0;
				4562	out_flush();
				4563	}
				4564
				4565	/* Compress both trees. Either they both have many nodes, which makes
				4566	* compression useful, or one of them is small, which means
				4567	* compression goes fast. But when filling the soundfold word tree
				4568	* there is no keep-case tree. */
				4569	wordtree_compress(spin, spin->si_foldroot);
				4570	if (affixID >= 0)
				4571	wordtree_compress(spin, spin->si_keeproot);
				4572	}
				4573
				4574	return OK;
				4575	}
				4576
				4577	/*
				4578	* Get a wordnode_T, either from the list of previously freed nodes or
				4579	* allocate a new one.
				4580	* Returns NULL when out of memory.
				4581	*/
				4582	static wordnode_T *
				4583	get_wordnode(spellinfo_T *spin)
				4584	{
				4585	wordnode_T *n;
				4586
				4587	if (spin->si_first_free == NULL)
				4588	n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
				4589	else
				4590	{
				4591	n = spin->si_first_free;
				4592	spin->si_first_free = n->wn_child;
				4593	vim_memset(n, 0, sizeof(wordnode_T));
				4594	--spin->si_free_count;
				4595	}
				4596	#ifdef SPELL_PRINTTREE
				4597	if (n != NULL)
				4598	n->wn_nr = ++spin->si_wordnode_nr;
				4599	#endif
				4600	return n;
				4601	}
				4602
				4603	/*
				4604	* Decrement the reference count on a node (which is the head of a list of
				4605	* siblings). If the reference count becomes zero free the node and its
				4606	* siblings.
				4607	* Returns the number of nodes actually freed.
				4608	*/
				4609	static int
				4610	deref_wordnode(spellinfo_T spin, wordnode_T node)
				4611	{
				4612	wordnode_T *np;
				4613	int cnt = 0;
				4614
				4615	if (--node->wn_refs == 0)
				4616	{
				4617	for (np = node; np != NULL; np = np->wn_sibling)
				4618	{
				4619	if (np->wn_child != NULL)
				4620	cnt += deref_wordnode(spin, np->wn_child);
				4621	free_wordnode(spin, np);
				4622	++cnt;
				4623	}
				4624	++cnt; /* length field */
				4625	}
				4626	return cnt;
				4627	}
				4628
				4629	/*
				4630	* Free a wordnode_T for re-use later.
				4631	* Only the "wn_child" field becomes invalid.
				4632	*/
				4633	static void
				4634	free_wordnode(spellinfo_T spin, wordnode_T n)
				4635	{
				4636	n->wn_child = spin->si_first_free;
				4637	spin->si_first_free = n;
				4638	++spin->si_free_count;
				4639	}
				4640
				4641	/*
				4642	* Compress a tree: find tails that are identical and can be shared.
				4643	*/
				4644	static void
				4645	wordtree_compress(spellinfo_T spin, wordnode_T root)
				4646	{
				4647	hashtab_T ht;
				4648	int n;
				4649	int tot = 0;
				4650	int perc;
				4651
				4652	/* Skip the root itself, it's not actually used. The first sibling is the
				4653	* start of the tree. */
				4654	if (root->wn_sibling != NULL)
				4655	{
				4656	hash_init(&ht);
				4657	n = node_compress(spin, root->wn_sibling, &ht, &tot);
				4658
				4659	#ifndef SPELL_PRINTTREE
				4660	if (spin->si_verbose \|\| p_verbose > 2)
				4661	#endif
				4662	{
				4663	if (tot > 1000000)
				4664	perc = (tot - n) / (tot / 100);
				4665	else if (tot == 0)
				4666	perc = 0;
				4667	else
				4668	perc = (tot - n) * 100 / tot;
				4669	vim_snprintf((char *)IObuff, IOSIZE,
				4670	_("Compressed %d of %d nodes; %d (%d%%) remaining"),
				4671	n, tot, tot - n, perc);
				4672	spell_message(spin, IObuff);
				4673	}
				4674	#ifdef SPELL_PRINTTREE
				4675	spell_print_tree(root->wn_sibling);
				4676	#endif
				4677	hash_clear(&ht);
				4678	}
				4679	}
				4680
				4681	/*
				4682	* Compress a node, its siblings and its children, depth first.
				4683	* Returns the number of compressed nodes.
				4684	*/
				4685	static int
				4686	node_compress(
				4687	spellinfo_T *spin,
				4688	wordnode_T *node,
				4689	hashtab_T *ht,
				4690	int tot) / total count of nodes before compressing,
				4691	incremented while going through the tree */
				4692	{
				4693	wordnode_T *np;
				4694	wordnode_T *tp;
				4695	wordnode_T *child;
				4696	hash_T hash;
				4697	hashitem_T *hi;
				4698	int len = 0;
				4699	unsigned nr, n;
				4700	int compressed = 0;
				4701
				4702	/*
				4703	* Go through the list of siblings. Compress each child and then try
				4704	* finding an identical child to replace it.
				4705	* Note that with "child" we mean not just the node that is pointed to,
				4706	* but the whole list of siblings of which the child node is the first.
				4707	*/
				4708	for (np = node; np != NULL && !got_int; np = np->wn_sibling)
				4709	{
				4710	++len;
				4711	if ((child = np->wn_child) != NULL)
				4712	{
				4713	/* Compress the child first. This fills hashkey. */
				4714	compressed += node_compress(spin, child, ht, tot);
				4715
				4716	/* Try to find an identical child. */
				4717	hash = hash_hash(child->wn_u1.hashkey);
				4718	hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
				4719	if (!HASHITEM_EMPTY(hi))
				4720	{
				4721	/* There are children we encountered before with a hash value
				4722	* identical to the current child. Now check if there is one
				4723	* that is really identical. */
				4724	for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
				4725	if (node_equal(child, tp))
				4726	{
				4727	/* Found one! Now use that child in place of the
				4728	* current one. This means the current child and all
				4729	* its siblings is unlinked from the tree. */
				4730	++tp->wn_refs;
				4731	compressed += deref_wordnode(spin, child);
				4732	np->wn_child = tp;
				4733	break;
				4734	}
				4735	if (tp == NULL)
				4736	{
				4737	/* No other child with this hash value equals the child of
				4738	* the node, add it to the linked list after the first
				4739	* item. */
				4740	tp = HI2WN(hi);
				4741	child->wn_u2.next = tp->wn_u2.next;
				4742	tp->wn_u2.next = child;
				4743	}
				4744	}
				4745	else
				4746	/* No other child has this hash value, add it to the
				4747	* hashtable. */
				4748	hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
				4749	}
				4750	}
				4751	tot += len + 1; / add one for the node that stores the length */
				4752
				4753	/*
				4754	* Make a hash key for the node and its siblings, so that we can quickly
				4755	* find a lookalike node. This must be done after compressing the sibling
				4756	* list, otherwise the hash key would become invalid by the compression.
				4757	*/
				4758	node->wn_u1.hashkey[0] = len;
				4759	nr = 0;
				4760	for (np = node; np != NULL; np = np->wn_sibling)
				4761	{
				4762	if (np->wn_byte == NUL)
				4763	/* end node: use wn_flags, wn_region and wn_affixID */
				4764	n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16);
				4765	else
				4766	/* byte node: use the byte value and the child pointer */
				4767	n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8));
				4768	nr = nr * 101 + n;
				4769	}
				4770
				4771	/* Avoid NUL bytes, it terminates the hash key. */
				4772	n = nr & 0xff;
				4773	node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
				4774	n = (nr >> 8) & 0xff;
				4775	node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
				4776	n = (nr >> 16) & 0xff;
				4777	node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
				4778	n = (nr >> 24) & 0xff;
				4779	node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
				4780	node->wn_u1.hashkey[5] = NUL;
				4781
				4782	/* Check for CTRL-C pressed now and then. */
				4783	fast_breakcheck();
				4784
				4785	return compressed;
				4786	}
				4787
				4788	/*
				4789	* Return TRUE when two nodes have identical siblings and children.
				4790	*/
				4791	static int
				4792	node_equal(wordnode_T n1, wordnode_T n2)
				4793	{
				4794	wordnode_T *p1;
				4795	wordnode_T *p2;
				4796
				4797	for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
				4798	p1 = p1->wn_sibling, p2 = p2->wn_sibling)
				4799	if (p1->wn_byte != p2->wn_byte
				4800	\|\| (p1->wn_byte == NUL
				4801	? (p1->wn_flags != p2->wn_flags
				4802	\|\| p1->wn_region != p2->wn_region
				4803	\|\| p1->wn_affixID != p2->wn_affixID)
				4804	: (p1->wn_child != p2->wn_child)))
				4805	break;
				4806
				4807	return p1 == NULL && p2 == NULL;
				4808	}
				4809
Bram Moolenaar	eae1b91	2019-05-09 15:12:55 +0200	[diff] [blame]	4810	static int rep_compare(const void s1, const void s2);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4811
				4812	/*
				4813	* Function given to qsort() to sort the REP items on "from" string.
				4814	*/
				4815	static int
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4816	rep_compare(const void s1, const void s2)
				4817	{
				4818	fromto_T p1 = (fromto_T )s1;
				4819	fromto_T p2 = (fromto_T )s2;
				4820
				4821	return STRCMP(p1->ft_from, p2->ft_from);
				4822	}
				4823
				4824	/*
				4825	* Write the Vim .spl file "fname".
				4826	* Return FAIL or OK;
				4827	*/
				4828	static int
				4829	write_vim_spell(spellinfo_T spin, char_u fname)
				4830	{
				4831	FILE *fd;
				4832	int regionmask;
				4833	int round;
				4834	wordnode_T *tree;
				4835	int nodecount;
				4836	int i;
				4837	int l;
				4838	garray_T *gap;
				4839	fromto_T *ftp;
				4840	char_u *p;
				4841	int rr;
				4842	int retval = OK;
				4843	size_t fwv = 1; /* collect return value of fwrite() to avoid
				4844	warnings from picky compiler */
				4845
				4846	fd = mch_fopen((char *)fname, "w");
				4847	if (fd == NULL)
				4848	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	4849	semsg(_(e_notopen), fname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4850	return FAIL;
				4851	}
				4852
				4853	/* <HEADER>: <fileID> <versionnr> */
				4854	/* <fileID> */
				4855	fwv &= fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd);
				4856	if (fwv != (size_t)1)
				4857	/* Catch first write error, don't try writing more. */
				4858	goto theend;
				4859
				4860	putc(VIMSPELLVERSION, fd); /* <versionnr> */
				4861
				4862	/*
				4863	* <SECTIONS>: <section> ... <sectionend>
				4864	*/
				4865
				4866	/* SN_INFO: <infotext> */
				4867	if (spin->si_info != NULL)
				4868	{
				4869	putc(SN_INFO, fd); /* <sectionID> */
				4870	putc(0, fd); /* <sectionflags> */
				4871
				4872	i = (int)STRLEN(spin->si_info);
				4873	put_bytes(fd, (long_u)i, 4); /* <sectionlen> */
				4874	fwv &= fwrite(spin->si_info, (size_t)i, (size_t)1, fd); /* <infotext> */
				4875	}
				4876
				4877	/* SN_REGION: <regionname> ...
				4878	* Write the region names only if there is more than one. */
				4879	if (spin->si_region_count > 1)
				4880	{
				4881	putc(SN_REGION, fd); /* <sectionID> */
				4882	putc(SNF_REQUIRED, fd); /* <sectionflags> */
				4883	l = spin->si_region_count * 2;
				4884	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				4885	fwv &= fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd);
				4886	/* <regionname> ... */
				4887	regionmask = (1 << spin->si_region_count) - 1;
				4888	}
				4889	else
				4890	regionmask = 0;
				4891
				4892	/* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars>
				4893	*
				4894	* The table with character flags and the table for case folding.
				4895	* This makes sure the same characters are recognized as word characters
				4896	* when generating an when using a spell file.
				4897	* Skip this for ASCII, the table may conflict with the one used for
				4898	* 'encoding'.
				4899	* Also skip this for an .add.spl file, the main spell file must contain
				4900	* the table (avoids that it conflicts). File is shorter too.
				4901	*/
				4902	if (!spin->si_ascii && !spin->si_add)
				4903	{
				4904	char_u folchars[128 * 8];
				4905	int flags;
				4906
				4907	putc(SN_CHARFLAGS, fd); /* <sectionID> */
				4908	putc(SNF_REQUIRED, fd); /* <sectionflags> */
				4909
				4910	/* Form the <folchars> string first, we need to know its length. */
				4911	l = 0;
				4912	for (i = 128; i < 256; ++i)
				4913	{
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4914	if (has_mbyte)
				4915	l += mb_char2bytes(spelltab.st_fold[i], folchars + l);
				4916	else
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	4917	folchars[l++] = spelltab.st_fold[i];
				4918	}
				4919	put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */
				4920
				4921	fputc(128, fd); /* <charflagslen> */
				4922	for (i = 128; i < 256; ++i)
				4923	{
				4924	flags = 0;
				4925	if (spelltab.st_isw[i])
				4926	flags \|= CF_WORD;
				4927	if (spelltab.st_isu[i])
				4928	flags \|= CF_UPPER;
				4929	fputc(flags, fd); /* <charflags> */
				4930	}
				4931
				4932	put_bytes(fd, (long_u)l, 2); /* <folcharslen> */
				4933	fwv &= fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */
				4934	}
				4935
				4936	/* SN_MIDWORD: <midword> */
				4937	if (spin->si_midword != NULL)
				4938	{
				4939	putc(SN_MIDWORD, fd); /* <sectionID> */
				4940	putc(SNF_REQUIRED, fd); /* <sectionflags> */
				4941
				4942	i = (int)STRLEN(spin->si_midword);
				4943	put_bytes(fd, (long_u)i, 4); /* <sectionlen> */
				4944	fwv &= fwrite(spin->si_midword, (size_t)i, (size_t)1, fd);
				4945	/* <midword> */
				4946	}
				4947
				4948	/* SN_PREFCOND: <prefcondcnt> <prefcond> ... */
				4949	if (spin->si_prefcond.ga_len > 0)
				4950	{
				4951	putc(SN_PREFCOND, fd); /* <sectionID> */
				4952	putc(SNF_REQUIRED, fd); /* <sectionflags> */
				4953
				4954	l = write_spell_prefcond(NULL, &spin->si_prefcond);
				4955	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				4956
				4957	write_spell_prefcond(fd, &spin->si_prefcond);
				4958	}
				4959
				4960	/* SN_REP: <repcount> <rep> ...
				4961	* SN_SAL: <salflags> <salcount> <sal> ...
				4962	* SN_REPSAL: <repcount> <rep> ... */
				4963
				4964	/* round 1: SN_REP section
				4965	* round 2: SN_SAL section (unless SN_SOFO is used)
				4966	* round 3: SN_REPSAL section */
				4967	for (round = 1; round <= 3; ++round)
				4968	{
				4969	if (round == 1)
				4970	gap = &spin->si_rep;
				4971	else if (round == 2)
				4972	{
				4973	/* Don't write SN_SAL when using a SN_SOFO section */
				4974	if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
				4975	continue;
				4976	gap = &spin->si_sal;
				4977	}
				4978	else
				4979	gap = &spin->si_repsal;
				4980
				4981	/* Don't write the section if there are no items. */
				4982	if (gap->ga_len == 0)
				4983	continue;
				4984
				4985	/* Sort the REP/REPSAL items. */
				4986	if (round != 2)
				4987	qsort(gap->ga_data, (size_t)gap->ga_len,
				4988	sizeof(fromto_T), rep_compare);
				4989
				4990	i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL);
				4991	putc(i, fd); /* <sectionID> */
				4992
				4993	/* This is for making suggestions, section is not required. */
				4994	putc(0, fd); /* <sectionflags> */
				4995
				4996	/* Compute the length of what follows. */
				4997	l = 2; /* count <repcount> or <salcount> */
				4998	for (i = 0; i < gap->ga_len; ++i)
				4999	{
				5000	ftp = &((fromto_T *)gap->ga_data)[i];
				5001	l += 1 + (int)STRLEN(ftp->ft_from); /* count <fromlen> and <from> */
				5002	l += 1 + (int)STRLEN(ftp->ft_to); /* count <tolen> and <to> */
				5003	}
				5004	if (round == 2)
				5005	++l; /* count <salflags> */
				5006	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				5007
				5008	if (round == 2)
				5009	{
				5010	i = 0;
				5011	if (spin->si_followup)
				5012	i \|= SAL_F0LLOWUP;
				5013	if (spin->si_collapse)
				5014	i \|= SAL_COLLAPSE;
				5015	if (spin->si_rem_accents)
				5016	i \|= SAL_REM_ACCENTS;
				5017	putc(i, fd); /* <salflags> */
				5018	}
				5019
				5020	put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */
				5021	for (i = 0; i < gap->ga_len; ++i)
				5022	{
				5023	/* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
				5024	/* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
				5025	ftp = &((fromto_T *)gap->ga_data)[i];
				5026	for (rr = 1; rr <= 2; ++rr)
				5027	{
				5028	p = rr == 1 ? ftp->ft_from : ftp->ft_to;
				5029	l = (int)STRLEN(p);
				5030	putc(l, fd);
				5031	if (l > 0)
				5032	fwv &= fwrite(p, l, (size_t)1, fd);
				5033	}
				5034	}
				5035
				5036	}
				5037
				5038	/* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
				5039	* This is for making suggestions, section is not required. */
				5040	if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
				5041	{
				5042	putc(SN_SOFO, fd); /* <sectionID> */
				5043	putc(0, fd); /* <sectionflags> */
				5044
				5045	l = (int)STRLEN(spin->si_sofofr);
				5046	put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4);
				5047	/* <sectionlen> */
				5048
				5049	put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */
				5050	fwv &= fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */
				5051
				5052	l = (int)STRLEN(spin->si_sofoto);
				5053	put_bytes(fd, (long_u)l, 2); /* <sofotolen> */
				5054	fwv &= fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */
				5055	}
				5056
				5057	/* SN_WORDS: <word> ...
				5058	* This is for making suggestions, section is not required. */
				5059	if (spin->si_commonwords.ht_used > 0)
				5060	{
				5061	putc(SN_WORDS, fd); /* <sectionID> */
				5062	putc(0, fd); /* <sectionflags> */
				5063
				5064	/* round 1: count the bytes
				5065	* round 2: write the bytes */
				5066	for (round = 1; round <= 2; ++round)
				5067	{
				5068	int todo;
				5069	int len = 0;
				5070	hashitem_T *hi;
				5071
				5072	todo = (int)spin->si_commonwords.ht_used;
				5073	for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi)
				5074	if (!HASHITEM_EMPTY(hi))
				5075	{
				5076	l = (int)STRLEN(hi->hi_key) + 1;
				5077	len += l;
				5078	if (round == 2) /* <word> */
				5079	fwv &= fwrite(hi->hi_key, (size_t)l, (size_t)1, fd);
				5080	--todo;
				5081	}
				5082	if (round == 1)
				5083	put_bytes(fd, (long_u)len, 4); /* <sectionlen> */
				5084	}
				5085	}
				5086
				5087	/* SN_MAP: <mapstr>
				5088	* This is for making suggestions, section is not required. */
				5089	if (spin->si_map.ga_len > 0)
				5090	{
				5091	putc(SN_MAP, fd); /* <sectionID> */
				5092	putc(0, fd); /* <sectionflags> */
				5093	l = spin->si_map.ga_len;
				5094	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				5095	fwv &= fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd);
				5096	/* <mapstr> */
				5097	}
				5098
				5099	/* SN_SUGFILE: <timestamp>
				5100	* This is used to notify that a .sug file may be available and at the
				5101	* same time allows for checking that a .sug file that is found matches
				5102	* with this .spl file. That's because the word numbers must be exactly
				5103	* right. */
				5104	if (!spin->si_nosugfile
				5105	&& (spin->si_sal.ga_len > 0
				5106	\|\| (spin->si_sofofr != NULL && spin->si_sofoto != NULL)))
				5107	{
				5108	putc(SN_SUGFILE, fd); /* <sectionID> */
				5109	putc(0, fd); /* <sectionflags> */
				5110	put_bytes(fd, (long_u)8, 4); /* <sectionlen> */
				5111
				5112	/* Set si_sugtime and write it to the file. */
				5113	spin->si_sugtime = time(NULL);
				5114	put_time(fd, spin->si_sugtime); /* <timestamp> */
				5115	}
				5116
				5117	/* SN_NOSPLITSUGS: nothing
				5118	* This is used to notify that no suggestions with word splits are to be
				5119	* made. */
				5120	if (spin->si_nosplitsugs)
				5121	{
				5122	putc(SN_NOSPLITSUGS, fd); /* <sectionID> */
				5123	putc(0, fd); /* <sectionflags> */
				5124	put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
				5125	}
				5126
				5127	/* SN_NOCOMPUNDSUGS: nothing
				5128	* This is used to notify that no suggestions with compounds are to be
				5129	* made. */
				5130	if (spin->si_nocompoundsugs)
				5131	{
				5132	putc(SN_NOCOMPOUNDSUGS, fd); /* <sectionID> */
				5133	putc(0, fd); /* <sectionflags> */
				5134	put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
				5135	}
				5136
				5137	/* SN_COMPOUND: compound info.
				5138	* We don't mark it required, when not supported all compound words will
				5139	* be bad words. */
				5140	if (spin->si_compflags != NULL)
				5141	{
				5142	putc(SN_COMPOUND, fd); /* <sectionID> */
				5143	putc(0, fd); /* <sectionflags> */
				5144
				5145	l = (int)STRLEN(spin->si_compflags);
				5146	for (i = 0; i < spin->si_comppat.ga_len; ++i)
				5147	l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1;
				5148	put_bytes(fd, (long_u)(l + 7), 4); /* <sectionlen> */
				5149
				5150	putc(spin->si_compmax, fd); /* <compmax> */
				5151	putc(spin->si_compminlen, fd); /* <compminlen> */
				5152	putc(spin->si_compsylmax, fd); /* <compsylmax> */
				5153	putc(0, fd); /* for Vim 7.0b compatibility */
				5154	putc(spin->si_compoptions, fd); /* <compoptions> */
				5155	put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2);
				5156	/* <comppatcount> */
				5157	for (i = 0; i < spin->si_comppat.ga_len; ++i)
				5158	{
				5159	p = ((char_u **)(spin->si_comppat.ga_data))[i];
				5160	putc((int)STRLEN(p), fd); /* <comppatlen> */
				5161	fwv &= fwrite(p, (size_t)STRLEN(p), (size_t)1, fd);
				5162	/* <comppattext> */
				5163	}
				5164	/* <compflags> */
				5165	fwv &= fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags),
				5166	(size_t)1, fd);
				5167	}
				5168
				5169	/* SN_NOBREAK: NOBREAK flag */
				5170	if (spin->si_nobreak)
				5171	{
				5172	putc(SN_NOBREAK, fd); /* <sectionID> */
				5173	putc(0, fd); /* <sectionflags> */
				5174
				5175	/* It's empty, the presence of the section flags the feature. */
				5176	put_bytes(fd, (long_u)0, 4); /* <sectionlen> */
				5177	}
				5178
				5179	/* SN_SYLLABLE: syllable info.
				5180	* We don't mark it required, when not supported syllables will not be
				5181	* counted. */
				5182	if (spin->si_syllable != NULL)
				5183	{
				5184	putc(SN_SYLLABLE, fd); /* <sectionID> */
				5185	putc(0, fd); /* <sectionflags> */
				5186
				5187	l = (int)STRLEN(spin->si_syllable);
				5188	put_bytes(fd, (long_u)l, 4); /* <sectionlen> */
				5189	fwv &= fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd);
				5190	/* <syllable> */
				5191	}
				5192
				5193	/* end of <SECTIONS> */
				5194	putc(SN_END, fd); /* <sectionend> */
				5195
				5196
				5197	/*
				5198	* <LWORDTREE> <KWORDTREE> <PREFIXTREE>
				5199	*/
				5200	spin->si_memtot = 0;
				5201	for (round = 1; round <= 3; ++round)
				5202	{
				5203	if (round == 1)
				5204	tree = spin->si_foldroot->wn_sibling;
				5205	else if (round == 2)
				5206	tree = spin->si_keeproot->wn_sibling;
				5207	else
				5208	tree = spin->si_prefroot->wn_sibling;
				5209
				5210	/* Clear the index and wnode fields in the tree. */
				5211	clear_node(tree);
				5212
				5213	/* Count the number of nodes. Needed to be able to allocate the
				5214	* memory when reading the nodes. Also fills in index for shared
				5215	* nodes. */
				5216	nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
				5217
				5218	/* number of nodes in 4 bytes */
				5219	put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
				5220	spin->si_memtot += nodecount + nodecount * sizeof(int);
				5221
				5222	/* Write the nodes. */
				5223	(void)put_node(fd, tree, 0, regionmask, round == 3);
				5224	}
				5225
				5226	/* Write another byte to check for errors (file system full). */
				5227	if (putc(0, fd) == EOF)
				5228	retval = FAIL;
				5229	theend:
				5230	if (fclose(fd) == EOF)
				5231	retval = FAIL;
				5232
				5233	if (fwv != (size_t)1)
				5234	retval = FAIL;
				5235	if (retval == FAIL)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	5236	emsg(_(e_write));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	5237
				5238	return retval;
				5239	}
				5240
				5241	/*
				5242	* Clear the index and wnode fields of "node", it siblings and its
				5243	* children. This is needed because they are a union with other items to save
				5244	* space.
				5245	*/
				5246	static void
				5247	clear_node(wordnode_T *node)
				5248	{
				5249	wordnode_T *np;
				5250
				5251	if (node != NULL)
				5252	for (np = node; np != NULL; np = np->wn_sibling)
				5253	{
				5254	np->wn_u1.index = 0;
				5255	np->wn_u2.wnode = NULL;
				5256
				5257	if (np->wn_byte != NUL)
				5258	clear_node(np->wn_child);
				5259	}
				5260	}
				5261
				5262
				5263	/*
				5264	* Dump a word tree at node "node".
				5265	*
				5266	* This first writes the list of possible bytes (siblings). Then for each
				5267	* byte recursively write the children.
				5268	*
				5269	* NOTE: The code here must match the code in read_tree_node(), since
				5270	* assumptions are made about the indexes (so that we don't have to write them
				5271	* in the file).
				5272	*
				5273	* Returns the number of nodes used.
				5274	*/
				5275	static int
				5276	put_node(
				5277	FILE fd, / NULL when only counting */
				5278	wordnode_T *node,
				5279	int idx,
				5280	int regionmask,
				5281	int prefixtree) /* TRUE for PREFIXTREE */
				5282	{
				5283	int newindex = idx;
				5284	int siblingcount = 0;
				5285	wordnode_T *np;
				5286	int flags;
				5287
				5288	/* If "node" is zero the tree is empty. */
				5289	if (node == NULL)
				5290	return 0;
				5291
				5292	/* Store the index where this node is written. */
				5293	node->wn_u1.index = idx;
				5294
				5295	/* Count the number of siblings. */
				5296	for (np = node; np != NULL; np = np->wn_sibling)
				5297	++siblingcount;
				5298
				5299	/* Write the sibling count. */
				5300	if (fd != NULL)
				5301	putc(siblingcount, fd); /* <siblingcount> */
				5302
				5303	/* Write each sibling byte and optionally extra info. */
				5304	for (np = node; np != NULL; np = np->wn_sibling)
				5305	{
				5306	if (np->wn_byte == 0)
				5307	{
				5308	if (fd != NULL)
				5309	{
				5310	/* For a NUL byte (end of word) write the flags etc. */
				5311	if (prefixtree)
				5312	{
				5313	/* In PREFIXTREE write the required affixID and the
				5314	* associated condition nr (stored in wn_region). The
				5315	* byte value is misused to store the "rare" and "not
				5316	* combining" flags */
				5317	if (np->wn_flags == (short_u)PFX_FLAGS)
				5318	putc(BY_NOFLAGS, fd); /* <byte> */
				5319	else
				5320	{
				5321	putc(BY_FLAGS, fd); /* <byte> */
				5322	putc(np->wn_flags, fd); /* <pflags> */
				5323	}
				5324	putc(np->wn_affixID, fd); /* <affixID> */
				5325	put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */
				5326	}
				5327	else
				5328	{
				5329	/* For word trees we write the flag/region items. */
				5330	flags = np->wn_flags;
				5331	if (regionmask != 0 && np->wn_region != regionmask)
				5332	flags \|= WF_REGION;
				5333	if (np->wn_affixID != 0)
				5334	flags \|= WF_AFX;
				5335	if (flags == 0)
				5336	{
				5337	/* word without flags or region */
				5338	putc(BY_NOFLAGS, fd); /* <byte> */
				5339	}
				5340	else
				5341	{
				5342	if (np->wn_flags >= 0x100)
				5343	{
				5344	putc(BY_FLAGS2, fd); /* <byte> */
				5345	putc(flags, fd); /* <flags> */
				5346	putc((unsigned)flags >> 8, fd); /* <flags2> */
				5347	}
				5348	else
				5349	{
				5350	putc(BY_FLAGS, fd); /* <byte> */
				5351	putc(flags, fd); /* <flags> */
				5352	}
				5353	if (flags & WF_REGION)
				5354	putc(np->wn_region, fd); /* <region> */
				5355	if (flags & WF_AFX)
				5356	putc(np->wn_affixID, fd); /* <affixID> */
				5357	}
				5358	}
				5359	}
				5360	}
				5361	else
				5362	{
				5363	if (np->wn_child->wn_u1.index != 0
				5364	&& np->wn_child->wn_u2.wnode != node)
				5365	{
				5366	/* The child is written elsewhere, write the reference. */
				5367	if (fd != NULL)
				5368	{
				5369	putc(BY_INDEX, fd); /* <byte> */
				5370	/* <nodeidx> */
				5371	put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
				5372	}
				5373	}
				5374	else if (np->wn_child->wn_u2.wnode == NULL)
				5375	/* We will write the child below and give it an index. */
				5376	np->wn_child->wn_u2.wnode = node;
				5377
				5378	if (fd != NULL)
				5379	if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
				5380	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	5381	emsg(_(e_write));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	5382	return 0;
				5383	}
				5384	}
				5385	}
				5386
				5387	/* Space used in the array when reading: one for each sibling and one for
				5388	* the count. */
				5389	newindex += siblingcount + 1;
				5390
				5391	/* Recursively dump the children of each sibling. */
				5392	for (np = node; np != NULL; np = np->wn_sibling)
				5393	if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
				5394	newindex = put_node(fd, np->wn_child, newindex, regionmask,
				5395	prefixtree);
				5396
				5397	return newindex;
				5398	}
				5399
				5400
				5401	/*
				5402	* ":mkspell [-ascii] outfile infile ..."
				5403	* ":mkspell [-ascii] addfile"
				5404	*/
				5405	void
				5406	ex_mkspell(exarg_T *eap)
				5407	{
				5408	int fcount;
				5409	char_u **fnames;
				5410	char_u *arg = eap->arg;
				5411	int ascii = FALSE;
				5412
				5413	if (STRNCMP(arg, "-ascii", 6) == 0)
				5414	{
				5415	ascii = TRUE;
				5416	arg = skipwhite(arg + 6);
				5417	}
				5418
				5419	/* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
				5420	if (get_arglist_exp(arg, &fcount, &fnames, FALSE) == OK)
				5421	{
				5422	mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
				5423	FreeWild(fcount, fnames);
				5424	}
				5425	}
				5426
				5427	/*
				5428	* Create the .sug file.
				5429	* Uses the soundfold info in "spin".
				5430	* Writes the file with the name "wfname", with ".spl" changed to ".sug".
				5431	*/
				5432	static void
				5433	spell_make_sugfile(spellinfo_T spin, char_u wfname)
				5434	{
				5435	char_u *fname = NULL;
				5436	int len;
				5437	slang_T *slang;
				5438	int free_slang = FALSE;
				5439
				5440	/*
				5441	* Read back the .spl file that was written. This fills the required
				5442	* info for soundfolding. This also uses less memory than the
				5443	* pointer-linked version of the trie. And it avoids having two versions
				5444	* of the code for the soundfolding stuff.
				5445	* It might have been done already by spell_reload_one().
				5446	*/
				5447	for (slang = first_lang; slang != NULL; slang = slang->sl_next)
Bram Moolenaar	99499b1	2019-05-23 21:35:48 +0200	[diff] [blame]	5448	if (fullpathcmp(wfname, slang->sl_fname, FALSE, TRUE) == FPC_SAME)
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	5449	break;
				5450	if (slang == NULL)
				5451	{
				5452	spell_message(spin, (char_u *)_("Reading back spell file..."));
				5453	slang = spell_load_file(wfname, NULL, NULL, FALSE);
				5454	if (slang == NULL)
				5455	return;
				5456	free_slang = TRUE;
				5457	}
				5458
				5459	/*
				5460	* Clear the info in "spin" that is used.
				5461	*/
				5462	spin->si_blocks = NULL;
				5463	spin->si_blocks_cnt = 0;
				5464	spin->si_compress_cnt = 0; /* will stay at 0 all the time*/
				5465	spin->si_free_count = 0;
				5466	spin->si_first_free = NULL;
				5467	spin->si_foldwcount = 0;
				5468
				5469	/*
				5470	* Go through the trie of good words, soundfold each word and add it to
				5471	* the soundfold trie.
				5472	*/
				5473	spell_message(spin, (char_u *)_("Performing soundfolding..."));
				5474	if (sug_filltree(spin, slang) == FAIL)
				5475	goto theend;
				5476
				5477	/*
				5478	* Create the table which links each soundfold word with a list of the
				5479	* good words it may come from. Creates buffer "spin->si_spellbuf".
				5480	* This also removes the wordnr from the NUL byte entries to make
				5481	* compression possible.
				5482	*/
				5483	if (sug_maketable(spin) == FAIL)
				5484	goto theend;
				5485
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	5486	smsg(_("Number of words after soundfolding: %ld"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	5487	(long)spin->si_spellbuf->b_ml.ml_line_count);
				5488
				5489	/*
				5490	* Compress the soundfold trie.
				5491	*/
				5492	spell_message(spin, (char_u *)_(msg_compressing));
				5493	wordtree_compress(spin, spin->si_foldroot);
				5494
				5495	/*
				5496	* Write the .sug file.
				5497	* Make the file name by changing ".spl" to ".sug".
				5498	*/
				5499	fname = alloc(MAXPATHL);
				5500	if (fname == NULL)
				5501	goto theend;
				5502	vim_strncpy(fname, wfname, MAXPATHL - 1);
				5503	len = (int)STRLEN(fname);
				5504	fname[len - 2] = 'u';
				5505	fname[len - 1] = 'g';
				5506	sug_write(spin, fname);
				5507
				5508	theend:
				5509	vim_free(fname);
				5510	if (free_slang)
				5511	slang_free(slang);
				5512	free_blocks(spin->si_blocks);
				5513	close_spellbuf(spin->si_spellbuf);
				5514	}
				5515
				5516	/*
				5517	* Build the soundfold trie for language "slang".
				5518	*/
				5519	static int
				5520	sug_filltree(spellinfo_T spin, slang_T slang)
				5521	{
				5522	char_u *byts;
				5523	idx_T *idxs;
				5524	int depth;
				5525	idx_T arridx[MAXWLEN];
				5526	int curi[MAXWLEN];
				5527	char_u tword[MAXWLEN];
				5528	char_u tsalword[MAXWLEN];
				5529	int c;
				5530	idx_T n;
				5531	unsigned words_done = 0;
				5532	int wordcount[MAXWLEN];
				5533
				5534	/* We use si_foldroot for the soundfolded trie. */
				5535	spin->si_foldroot = wordtree_alloc(spin);
				5536	if (spin->si_foldroot == NULL)
				5537	return FAIL;
				5538
				5539	/* let tree_add_word() know we're adding to the soundfolded tree */
				5540	spin->si_sugtree = TRUE;
				5541
				5542	/*
				5543	* Go through the whole case-folded tree, soundfold each word and put it
				5544	* in the trie.
				5545	*/
				5546	byts = slang->sl_fbyts;
				5547	idxs = slang->sl_fidxs;
				5548
				5549	arridx[0] = 0;
				5550	curi[0] = 1;
				5551	wordcount[0] = 0;
				5552
				5553	depth = 0;
				5554	while (depth >= 0 && !got_int)
				5555	{
				5556	if (curi[depth] > byts[arridx[depth]])
				5557	{
				5558	/* Done all bytes at this node, go up one level. */
				5559	idxs[arridx[depth]] = wordcount[depth];
				5560	if (depth > 0)
				5561	wordcount[depth - 1] += wordcount[depth];
				5562
				5563	--depth;
				5564	line_breakcheck();
				5565	}
				5566	else
				5567	{
				5568
				5569	/* Do one more byte at this node. */
				5570	n = arridx[depth] + curi[depth];
				5571	++curi[depth];
				5572
				5573	c = byts[n];
				5574	if (c == 0)
				5575	{
				5576	/* Sound-fold the word. */
				5577	tword[depth] = NUL;
				5578	spell_soundfold(slang, tword, TRUE, tsalword);
				5579
				5580	/* We use the "flags" field for the MSB of the wordnr,
				5581	* "region" for the LSB of the wordnr. */
				5582	if (tree_add_word(spin, tsalword, spin->si_foldroot,
				5583	words_done >> 16, words_done & 0xffff,
				5584	0) == FAIL)
				5585	return FAIL;
				5586
				5587	++words_done;
				5588	++wordcount[depth];
				5589
				5590	/* Reset the block count each time to avoid compression
				5591	* kicking in. */
				5592	spin->si_blocks_cnt = 0;
				5593
				5594	/* Skip over any other NUL bytes (same word with different
				5595	* flags). */
				5596	while (byts[n + 1] == 0)
				5597	{
				5598	++n;
				5599	++curi[depth];
				5600	}
				5601	}
				5602	else
				5603	{
				5604	/* Normal char, go one level deeper. */
				5605	tword[depth++] = c;
				5606	arridx[depth] = idxs[n];
				5607	curi[depth] = 1;
				5608	wordcount[depth] = 0;
				5609	}
				5610	}
				5611	}
				5612
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	5613	smsg(_("Total number of words: %d"), words_done);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	5614
				5615	return OK;
				5616	}
				5617
				5618	/*
				5619	* Make the table that links each word in the soundfold trie to the words it
				5620	* can be produced from.
				5621	* This is not unlike lines in a file, thus use a memfile to be able to access
				5622	* the table efficiently.
				5623	* Returns FAIL when out of memory.
				5624	*/
				5625	static int
				5626	sug_maketable(spellinfo_T *spin)
				5627	{
				5628	garray_T ga;
				5629	int res = OK;
				5630
				5631	/* Allocate a buffer, open a memline for it and create the swap file
				5632	* (uses a temp file, not a .swp file). */
				5633	spin->si_spellbuf = open_spellbuf();
				5634	if (spin->si_spellbuf == NULL)
				5635	return FAIL;
				5636
				5637	/* Use a buffer to store the line info, avoids allocating many small
				5638	* pieces of memory. */
				5639	ga_init2(&ga, 1, 100);
				5640
				5641	/* recursively go through the tree */
				5642	if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1)
				5643	res = FAIL;
				5644
				5645	ga_clear(&ga);
				5646	return res;
				5647	}
				5648
				5649	/*
				5650	* Fill the table for one node and its children.
				5651	* Returns the wordnr at the start of the node.
				5652	* Returns -1 when out of memory.
				5653	*/
				5654	static int
				5655	sug_filltable(
				5656	spellinfo_T *spin,
				5657	wordnode_T *node,
				5658	int startwordnr,
				5659	garray_T gap) / place to store line of numbers */
				5660	{
				5661	wordnode_T p, np;
				5662	int wordnr = startwordnr;
				5663	int nr;
				5664	int prev_nr;
				5665
				5666	for (p = node; p != NULL; p = p->wn_sibling)
				5667	{
				5668	if (p->wn_byte == NUL)
				5669	{
				5670	gap->ga_len = 0;
				5671	prev_nr = 0;
				5672	for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling)
				5673	{
				5674	if (ga_grow(gap, 10) == FAIL)
				5675	return -1;
				5676
				5677	nr = (np->wn_flags << 16) + (np->wn_region & 0xffff);
				5678	/* Compute the offset from the previous nr and store the
				5679	* offset in a way that it takes a minimum number of bytes.
				5680	* It's a bit like utf-8, but without the need to mark
				5681	* following bytes. */
				5682	nr -= prev_nr;
				5683	prev_nr += nr;
				5684	gap->ga_len += offset2bytes(nr,
				5685	(char_u *)gap->ga_data + gap->ga_len);
				5686	}
				5687
				5688	/* add the NUL byte */
				5689	((char_u *)gap->ga_data)[gap->ga_len++] = NUL;
				5690
				5691	if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr,
				5692	gap->ga_data, gap->ga_len, TRUE) == FAIL)
				5693	return -1;
				5694	++wordnr;
				5695
				5696	/* Remove extra NUL entries, we no longer need them. We don't
				5697	* bother freeing the nodes, the won't be reused anyway. */
				5698	while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL)
				5699	p->wn_sibling = p->wn_sibling->wn_sibling;
				5700
				5701	/* Clear the flags on the remaining NUL node, so that compression
				5702	* works a lot better. */
				5703	p->wn_flags = 0;
				5704	p->wn_region = 0;
				5705	}
				5706	else
				5707	{
				5708	wordnr = sug_filltable(spin, p->wn_child, wordnr, gap);
				5709	if (wordnr == -1)
				5710	return -1;
				5711	}
				5712	}
				5713	return wordnr;
				5714	}
				5715
				5716	/*
				5717	* Convert an offset into a minimal number of bytes.
				5718	* Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL
				5719	* bytes.
				5720	*/
				5721	static int
				5722	offset2bytes(int nr, char_u *buf)
				5723	{
				5724	int rem;
				5725	int b1, b2, b3, b4;
				5726
				5727	/* Split the number in parts of base 255. We need to avoid NUL bytes. */
				5728	b1 = nr % 255 + 1;
				5729	rem = nr / 255;
				5730	b2 = rem % 255 + 1;
				5731	rem = rem / 255;
				5732	b3 = rem % 255 + 1;
				5733	b4 = rem / 255 + 1;
				5734
				5735	if (b4 > 1 \|\| b3 > 0x1f) /* 4 bytes */
				5736	{
				5737	buf[0] = 0xe0 + b4;
				5738	buf[1] = b3;
				5739	buf[2] = b2;
				5740	buf[3] = b1;
				5741	return 4;
				5742	}
				5743	if (b3 > 1 \|\| b2 > 0x3f ) /* 3 bytes */
				5744	{
				5745	buf[0] = 0xc0 + b3;
				5746	buf[1] = b2;
				5747	buf[2] = b1;
				5748	return 3;
				5749	}
				5750	if (b2 > 1 \|\| b1 > 0x7f ) /* 2 bytes */
				5751	{
				5752	buf[0] = 0x80 + b2;
				5753	buf[1] = b1;
				5754	return 2;
				5755	}
				5756	/* 1 byte */
				5757	buf[0] = b1;
				5758	return 1;
				5759	}
				5760
				5761	/*
				5762	* Write the .sug file in "fname".
				5763	*/
				5764	static void
				5765	sug_write(spellinfo_T spin, char_u fname)
				5766	{
				5767	FILE *fd;
				5768	wordnode_T *tree;
				5769	int nodecount;
				5770	int wcount;
				5771	char_u *line;
				5772	linenr_T lnum;
				5773	int len;
				5774
				5775	/* Create the file. Note that an existing file is silently overwritten! */
				5776	fd = mch_fopen((char *)fname, "w");
				5777	if (fd == NULL)
				5778	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	5779	semsg(_(e_notopen), fname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	5780	return;
				5781	}
				5782
				5783	vim_snprintf((char *)IObuff, IOSIZE,
Bram Moolenaar	c166927	2018-06-19 14:23:53 +0200	[diff] [blame]	5784	_("Writing suggestion file %s..."), fname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	5785	spell_message(spin, IObuff);
				5786
				5787	/*
				5788	* <SUGHEADER>: <fileID> <versionnr> <timestamp>
				5789	*/
				5790	if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */
				5791	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	5792	emsg(_(e_write));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	5793	goto theend;
				5794	}
				5795	putc(VIMSUGVERSION, fd); /* <versionnr> */
				5796
				5797	/* Write si_sugtime to the file. */
				5798	put_time(fd, spin->si_sugtime); /* <timestamp> */
				5799
				5800	/*
				5801	* <SUGWORDTREE>
				5802	*/
				5803	spin->si_memtot = 0;
				5804	tree = spin->si_foldroot->wn_sibling;
				5805
				5806	/* Clear the index and wnode fields in the tree. */
				5807	clear_node(tree);
				5808
				5809	/* Count the number of nodes. Needed to be able to allocate the
				5810	* memory when reading the nodes. Also fills in index for shared
				5811	* nodes. */
				5812	nodecount = put_node(NULL, tree, 0, 0, FALSE);
				5813
				5814	/* number of nodes in 4 bytes */
				5815	put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */
				5816	spin->si_memtot += nodecount + nodecount * sizeof(int);
				5817
				5818	/* Write the nodes. */
				5819	(void)put_node(fd, tree, 0, 0, FALSE);
				5820
				5821	/*
				5822	* <SUGTABLE>: <sugwcount> <sugline> ...
				5823	*/
				5824	wcount = spin->si_spellbuf->b_ml.ml_line_count;
				5825	put_bytes(fd, (long_u)wcount, 4); /* <sugwcount> */
				5826
				5827	for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum)
				5828	{
				5829	/* <sugline>: <sugnr> ... NUL */
				5830	line = ml_get_buf(spin->si_spellbuf, lnum, FALSE);
				5831	len = (int)STRLEN(line) + 1;
				5832	if (fwrite(line, (size_t)len, (size_t)1, fd) == 0)
				5833	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	5834	emsg(_(e_write));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	5835	goto theend;
				5836	}
				5837	spin->si_memtot += len;
				5838	}
				5839
				5840	/* Write another byte to check for errors. */
				5841	if (putc(0, fd) == EOF)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	5842	emsg(_(e_write));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	5843
				5844	vim_snprintf((char *)IObuff, IOSIZE,
				5845	_("Estimated runtime memory use: %d bytes"), spin->si_memtot);
				5846	spell_message(spin, IObuff);
				5847
				5848	theend:
				5849	/* close the file */
				5850	fclose(fd);
				5851	}
				5852
				5853
				5854	/*
				5855	* Create a Vim spell file from one or more word lists.
				5856	* "fnames[0]" is the output file name.
				5857	* "fnames[fcount - 1]" is the last input file name.
				5858	* Exception: when "fnames[0]" ends in ".add" it's used as the input file name
				5859	* and ".spl" is appended to make the output file name.
				5860	*/
				5861	void
				5862	mkspell(
				5863	int fcount,
				5864	char_u **fnames,
				5865	int ascii, /* -ascii argument given */
				5866	int over_write, /* overwrite existing output file */
				5867	int added_word) /* invoked through "zg" */
				5868	{
				5869	char_u *fname = NULL;
				5870	char_u *wfname;
				5871	char_u **innames;
				5872	int incount;
Bram Moolenaar	2993ac5	2018-02-10 14:12:43 +0100	[diff] [blame]	5873	afffile_T *(afile[MAXREGIONS]);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	5874	int i;
				5875	int len;
				5876	stat_T st;
				5877	int error = FALSE;
				5878	spellinfo_T spin;
				5879
				5880	vim_memset(&spin, 0, sizeof(spin));
				5881	spin.si_verbose = !added_word;
				5882	spin.si_ascii = ascii;
				5883	spin.si_followup = TRUE;
				5884	spin.si_rem_accents = TRUE;
				5885	ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20);
				5886	ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20);
				5887	ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
				5888	ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
				5889	ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20);
				5890	ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
				5891	hash_init(&spin.si_commonwords);
				5892	spin.si_newcompID = 127; /* start compound ID at first maximum */
				5893
				5894	/* default: fnames[0] is output file, following are input files */
				5895	innames = &fnames[1];
				5896	incount = fcount - 1;
				5897
				5898	wfname = alloc(MAXPATHL);
				5899	if (wfname == NULL)
				5900	return;
				5901
				5902	if (fcount >= 1)
				5903	{
				5904	len = (int)STRLEN(fnames[0]);
				5905	if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
				5906	{
				5907	/* For ":mkspell path/en.latin1.add" output file is
				5908	* "path/en.latin1.add.spl". */
				5909	innames = &fnames[0];
				5910	incount = 1;
				5911	vim_snprintf((char *)wfname, MAXPATHL, "%s.spl", fnames[0]);
				5912	}
				5913	else if (fcount == 1)
				5914	{
				5915	/* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */
				5916	innames = &fnames[0];
				5917	incount = 1;
				5918	vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
				5919	fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
				5920	}
				5921	else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
				5922	{
				5923	/* Name ends in ".spl", use as the file name. */
				5924	vim_strncpy(wfname, fnames[0], MAXPATHL - 1);
				5925	}
				5926	else
				5927	/* Name should be language, make the file name from it. */
				5928	vim_snprintf((char *)wfname, MAXPATHL, SPL_FNAME_TMPL,
				5929	fnames[0], spin.si_ascii ? (char_u *)"ascii" : spell_enc());
				5930
				5931	/* Check for .ascii.spl. */
				5932	if (strstr((char *)gettail(wfname), SPL_FNAME_ASCII) != NULL)
				5933	spin.si_ascii = TRUE;
				5934
				5935	/* Check for .add.spl. */
				5936	if (strstr((char *)gettail(wfname), SPL_FNAME_ADD) != NULL)
				5937	spin.si_add = TRUE;
				5938	}
				5939
				5940	if (incount <= 0)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	5941	emsg(_(e_invarg)); /* need at least output and input names */
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	5942	else if (vim_strchr(gettail(wfname), '_') != NULL)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	5943	emsg(_("E751: Output file name must not have region name"));
Bram Moolenaar	2993ac5	2018-02-10 14:12:43 +0100	[diff] [blame]	5944	else if (incount > MAXREGIONS)
Bram Moolenaar	b5443cc	2019-01-15 20:19:40 +0100	[diff] [blame]	5945	semsg(_("E754: Only up to %d regions supported"), MAXREGIONS);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	5946	else
				5947	{
				5948	/* Check for overwriting before doing things that may take a lot of
				5949	* time. */
				5950	if (!over_write && mch_stat((char *)wfname, &st) >= 0)
				5951	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	5952	emsg(_(e_exists));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	5953	goto theend;
				5954	}
				5955	if (mch_isdir(wfname))
				5956	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	5957	semsg(_(e_isadir2), wfname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	5958	goto theend;
				5959	}
				5960
				5961	fname = alloc(MAXPATHL);
				5962	if (fname == NULL)
				5963	goto theend;
				5964
				5965	/*
				5966	* Init the aff and dic pointers.
				5967	* Get the region names if there are more than 2 arguments.
				5968	*/
				5969	for (i = 0; i < incount; ++i)
				5970	{
				5971	afile[i] = NULL;
				5972
				5973	if (incount > 1)
				5974	{
				5975	len = (int)STRLEN(innames[i]);
				5976	if (STRLEN(gettail(innames[i])) < 5
				5977	\|\| innames[i][len - 3] != '_')
				5978	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	5979	semsg(_("E755: Invalid region in %s"), innames[i]);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	5980	goto theend;
				5981	}
				5982	spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
				5983	spin.si_region_name[i * 2 + 1] =
				5984	TOLOWER_ASC(innames[i][len - 1]);
				5985	}
				5986	}
				5987	spin.si_region_count = incount;
				5988
				5989	spin.si_foldroot = wordtree_alloc(&spin);
				5990	spin.si_keeproot = wordtree_alloc(&spin);
				5991	spin.si_prefroot = wordtree_alloc(&spin);
				5992	if (spin.si_foldroot == NULL
				5993	\|\| spin.si_keeproot == NULL
				5994	\|\| spin.si_prefroot == NULL)
				5995	{
				5996	free_blocks(spin.si_blocks);
				5997	goto theend;
				5998	}
				5999
				6000	/* When not producing a .add.spl file clear the character table when
				6001	* we encounter one in the .aff file. This means we dump the current
				6002	* one in the .spl file if the .aff file doesn't define one. That's
				6003	* better than guessing the contents, the table will match a
				6004	* previously loaded spell file. */
				6005	if (!spin.si_add)
				6006	spin.si_clear_chartab = TRUE;
				6007
				6008	/*
				6009	* Read all the .aff and .dic files.
				6010	* Text is converted to 'encoding'.
				6011	* Words are stored in the case-folded and keep-case trees.
				6012	*/
				6013	for (i = 0; i < incount && !error; ++i)
				6014	{
				6015	spin.si_conv.vc_type = CONV_NONE;
				6016	spin.si_region = 1 << i;
				6017
				6018	vim_snprintf((char *)fname, MAXPATHL, "%s.aff", innames[i]);
				6019	if (mch_stat((char *)fname, &st) >= 0)
				6020	{
				6021	/* Read the .aff file. Will init "spin->si_conv" based on the
				6022	* "SET" line. */
				6023	afile[i] = spell_read_aff(&spin, fname);
				6024	if (afile[i] == NULL)
				6025	error = TRUE;
				6026	else
				6027	{
				6028	/* Read the .dic file and store the words in the trees. */
				6029	vim_snprintf((char *)fname, MAXPATHL, "%s.dic",
				6030	innames[i]);
				6031	if (spell_read_dic(&spin, fname, afile[i]) == FAIL)
				6032	error = TRUE;
				6033	}
				6034	}
				6035	else
				6036	{
				6037	/* No .aff file, try reading the file as a word list. Store
				6038	* the words in the trees. */
				6039	if (spell_read_wordfile(&spin, innames[i]) == FAIL)
				6040	error = TRUE;
				6041	}
				6042
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6043	/* Free any conversion stuff. */
				6044	convert_setup(&spin.si_conv, NULL, NULL);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6045	}
				6046
				6047	if (spin.si_compflags != NULL && spin.si_nobreak)
Bram Moolenaar	32526b3	2019-01-19 17:43:09 +0100	[diff] [blame]	6048	msg(_("Warning: both compounding and NOBREAK specified"));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6049
				6050	if (!error && !got_int)
				6051	{
				6052	/*
				6053	* Combine tails in the tree.
				6054	*/
				6055	spell_message(&spin, (char_u *)_(msg_compressing));
				6056	wordtree_compress(&spin, spin.si_foldroot);
				6057	wordtree_compress(&spin, spin.si_keeproot);
				6058	wordtree_compress(&spin, spin.si_prefroot);
				6059	}
				6060
				6061	if (!error && !got_int)
				6062	{
				6063	/*
				6064	* Write the info in the spell file.
				6065	*/
				6066	vim_snprintf((char *)IObuff, IOSIZE,
Bram Moolenaar	c166927	2018-06-19 14:23:53 +0200	[diff] [blame]	6067	_("Writing spell file %s..."), wfname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6068	spell_message(&spin, IObuff);
				6069
				6070	error = write_vim_spell(&spin, wfname) == FAIL;
				6071
				6072	spell_message(&spin, (char_u *)_("Done!"));
				6073	vim_snprintf((char *)IObuff, IOSIZE,
				6074	_("Estimated runtime memory use: %d bytes"), spin.si_memtot);
				6075	spell_message(&spin, IObuff);
				6076
				6077	/*
				6078	* If the file is loaded need to reload it.
				6079	*/
				6080	if (!error)
				6081	spell_reload_one(wfname, added_word);
				6082	}
				6083
				6084	/* Free the allocated memory. */
				6085	ga_clear(&spin.si_rep);
				6086	ga_clear(&spin.si_repsal);
				6087	ga_clear(&spin.si_sal);
				6088	ga_clear(&spin.si_map);
				6089	ga_clear(&spin.si_comppat);
				6090	ga_clear(&spin.si_prefcond);
				6091	hash_clear_all(&spin.si_commonwords, 0);
				6092
				6093	/* Free the .aff file structures. */
				6094	for (i = 0; i < incount; ++i)
				6095	if (afile[i] != NULL)
				6096	spell_free_aff(afile[i]);
				6097
				6098	/* Free all the bits and pieces at once. */
				6099	free_blocks(spin.si_blocks);
				6100
				6101	/*
				6102	* If there is soundfolding info and no NOSUGFILE item create the
				6103	* .sug file with the soundfolded word trie.
				6104	*/
				6105	if (spin.si_sugtime != 0 && !error && !got_int)
				6106	spell_make_sugfile(&spin, wfname);
				6107
				6108	}
				6109
				6110	theend:
				6111	vim_free(fname);
				6112	vim_free(wfname);
				6113	}
				6114
				6115	/*
				6116	* Display a message for spell file processing when 'verbose' is set or using
				6117	* ":mkspell". "str" can be IObuff.
				6118	*/
				6119	static void
				6120	spell_message(spellinfo_T spin, char_u str)
				6121	{
				6122	if (spin->si_verbose \|\| p_verbose > 2)
				6123	{
				6124	if (!spin->si_verbose)
				6125	verbose_enter();
Bram Moolenaar	32526b3	2019-01-19 17:43:09 +0100	[diff] [blame]	6126	msg((char *)str);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6127	out_flush();
				6128	if (!spin->si_verbose)
				6129	verbose_leave();
				6130	}
				6131	}
				6132
				6133	/*
				6134	* ":[count]spellgood {word}"
Bram Moolenaar	08cc374	2019-08-11 22:51:14 +0200	[diff] [blame]	6135	* ":[count]spellwrong {word}"
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6136	* ":[count]spellundo {word}"
Bram Moolenaar	08cc374	2019-08-11 22:51:14 +0200	[diff] [blame]	6137	* ":[count]spellrare {word}"
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6138	*/
				6139	void
				6140	ex_spell(exarg_T *eap)
				6141	{
Bram Moolenaar	08cc374	2019-08-11 22:51:14 +0200	[diff] [blame]	6142	spell_add_word(eap->arg, (int)STRLEN(eap->arg),
				6143	eap->cmdidx == CMD_spellwrong ? SPELL_ADD_BAD :
				6144	eap->cmdidx == CMD_spellrare ? SPELL_ADD_RARE : SPELL_ADD_GOOD,
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6145	eap->forceit ? 0 : (int)eap->line2,
				6146	eap->cmdidx == CMD_spellundo);
				6147	}
				6148
				6149	/*
Bram Moolenaar	08cc374	2019-08-11 22:51:14 +0200	[diff] [blame]	6150	* Add "word[len]" to 'spellfile' as a good, rare or bad word.
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6151	*/
				6152	void
				6153	spell_add_word(
				6154	char_u *word,
				6155	int len,
Bram Moolenaar	08cc374	2019-08-11 22:51:14 +0200	[diff] [blame]	6156	int what, // SPELL_ADD_ values
				6157	int idx, // "zG" and "zW": zero, otherwise index in
				6158	// 'spellfile'
				6159	int undo) // TRUE for "zug", "zuG", "zuw" and "zuW"
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6160	{
				6161	FILE *fd = NULL;
				6162	buf_T *buf = NULL;
				6163	int new_spf = FALSE;
				6164	char_u *fname;
				6165	char_u *fnamebuf = NULL;
				6166	char_u line[MAXWLEN * 2];
				6167	long fpos, fpos_next = 0;
				6168	int i;
				6169	char_u *spf;
				6170
				6171	if (idx == 0) /* use internal wordlist */
				6172	{
				6173	if (int_wordlist == NULL)
				6174	{
				6175	int_wordlist = vim_tempname('s', FALSE);
				6176	if (int_wordlist == NULL)
				6177	return;
				6178	}
				6179	fname = int_wordlist;
				6180	}
				6181	else
				6182	{
				6183	/* If 'spellfile' isn't set figure out a good default value. */
				6184	if (*curwin->w_s->b_p_spf == NUL)
				6185	{
				6186	init_spellfile();
				6187	new_spf = TRUE;
				6188	}
				6189
				6190	if (*curwin->w_s->b_p_spf == NUL)
				6191	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	6192	semsg(_(e_notset), "spellfile");
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6193	return;
				6194	}
				6195	fnamebuf = alloc(MAXPATHL);
				6196	if (fnamebuf == NULL)
				6197	return;
				6198
				6199	for (spf = curwin->w_s->b_p_spf, i = 1; *spf != NUL; ++i)
				6200	{
				6201	copy_option_part(&spf, fnamebuf, MAXPATHL, ",");
				6202	if (i == idx)
				6203	break;
				6204	if (*spf == NUL)
				6205	{
Bram Moolenaar	b5443cc	2019-01-15 20:19:40 +0100	[diff] [blame]	6206	semsg(_("E765: 'spellfile' does not have %d entries"), idx);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6207	vim_free(fnamebuf);
				6208	return;
				6209	}
				6210	}
				6211
				6212	/* Check that the user isn't editing the .add file somewhere. */
				6213	buf = buflist_findname_exp(fnamebuf);
				6214	if (buf != NULL && buf->b_ml.ml_mfp == NULL)
				6215	buf = NULL;
				6216	if (buf != NULL && bufIsChanged(buf))
				6217	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	6218	emsg(_(e_bufloaded));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6219	vim_free(fnamebuf);
				6220	return;
				6221	}
				6222
				6223	fname = fnamebuf;
				6224	}
				6225
Bram Moolenaar	08cc374	2019-08-11 22:51:14 +0200	[diff] [blame]	6226	if (what == SPELL_ADD_BAD \|\| undo)
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6227	{
				6228	/* When the word appears as good word we need to remove that one,
				6229	* since its flags sort before the one with WF_BANNED. */
				6230	fd = mch_fopen((char *)fname, "r");
				6231	if (fd != NULL)
				6232	{
				6233	while (!vim_fgets(line, MAXWLEN * 2, fd))
				6234	{
				6235	fpos = fpos_next;
				6236	fpos_next = ftell(fd);
				6237	if (STRNCMP(word, line, len) == 0
				6238	&& (line[len] == '/' \|\| line[len] < ' '))
				6239	{
				6240	/* Found duplicate word. Remove it by writing a '#' at
				6241	* the start of the line. Mixing reading and writing
				6242	* doesn't work for all systems, close the file first. */
				6243	fclose(fd);
				6244	fd = mch_fopen((char *)fname, "r+");
				6245	if (fd == NULL)
				6246	break;
				6247	if (fseek(fd, fpos, SEEK_SET) == 0)
				6248	{
				6249	fputc('#', fd);
				6250	if (undo)
				6251	{
				6252	home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	6253	smsg(_("Word '%.*s' removed from %s"),
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6254	len, word, NameBuff);
				6255	}
				6256	}
				6257	fseek(fd, fpos_next, SEEK_SET);
				6258	}
				6259	}
				6260	if (fd != NULL)
				6261	fclose(fd);
				6262	}
				6263	}
				6264
				6265	if (!undo)
				6266	{
				6267	fd = mch_fopen((char *)fname, "a");
				6268	if (fd == NULL && new_spf)
				6269	{
				6270	char_u *p;
				6271
				6272	/* We just initialized the 'spellfile' option and can't open the
				6273	* file. We may need to create the "spell" directory first. We
				6274	* already checked the runtime directory is writable in
				6275	* init_spellfile(). */
				6276	if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname)
				6277	{
				6278	int c = *p;
				6279
				6280	/* The directory doesn't exist. Try creating it and opening
				6281	* the file again. */
				6282	*p = NUL;
				6283	vim_mkdir(fname, 0755);
				6284	*p = c;
				6285	fd = mch_fopen((char *)fname, "a");
				6286	}
				6287	}
				6288
				6289	if (fd == NULL)
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	6290	semsg(_(e_notopen), fname);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6291	else
				6292	{
Bram Moolenaar	08cc374	2019-08-11 22:51:14 +0200	[diff] [blame]	6293	if (what == SPELL_ADD_BAD)
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6294	fprintf(fd, "%.*s/!\n", len, word);
Bram Moolenaar	08cc374	2019-08-11 22:51:14 +0200	[diff] [blame]	6295	else if (what == SPELL_ADD_RARE)
				6296	fprintf(fd, "%.*s/?\n", len, word);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6297	else
				6298	fprintf(fd, "%.*s\n", len, word);
				6299	fclose(fd);
				6300
				6301	home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	6302	smsg(_("Word '%.*s' added to %s"), len, word, NameBuff);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6303	}
				6304	}
				6305
				6306	if (fd != NULL)
				6307	{
				6308	/* Update the .add.spl file. */
				6309	mkspell(1, &fname, FALSE, TRUE, TRUE);
				6310
				6311	/* If the .add file is edited somewhere, reload it. */
				6312	if (buf != NULL)
				6313	buf_reload(buf, buf->b_orig_mode);
				6314
				6315	redraw_all_later(SOME_VALID);
				6316	}
				6317	vim_free(fnamebuf);
				6318	}
				6319
				6320	/*
				6321	* Initialize 'spellfile' for the current buffer.
				6322	*/
				6323	static void
				6324	init_spellfile(void)
				6325	{
				6326	char_u *buf;
				6327	int l;
				6328	char_u *fname;
				6329	char_u *rtp;
				6330	char_u *lend;
				6331	int aspath = FALSE;
				6332	char_u *lstart = curbuf->b_s.b_p_spl;
				6333
				6334	if (*curwin->w_s->b_p_spl != NUL && curwin->w_s->b_langp.ga_len > 0)
				6335	{
				6336	buf = alloc(MAXPATHL);
				6337	if (buf == NULL)
				6338	return;
				6339
				6340	/* Find the end of the language name. Exclude the region. If there
				6341	* is a path separator remember the start of the tail. */
				6342	for (lend = curwin->w_s->b_p_spl; *lend != NUL
				6343	&& vim_strchr((char_u )",._", lend) == NULL; ++lend)
				6344	if (vim_ispathsep(*lend))
				6345	{
				6346	aspath = TRUE;
				6347	lstart = lend + 1;
				6348	}
				6349
				6350	/* Loop over all entries in 'runtimepath'. Use the first one where we
				6351	* are allowed to write. */
				6352	rtp = p_rtp;
				6353	while (*rtp != NUL)
				6354	{
				6355	if (aspath)
				6356	/* Use directory of an entry with path, e.g., for
				6357	* "/dir/lg.utf-8.spl" use "/dir". */
				6358	vim_strncpy(buf, curbuf->b_s.b_p_spl,
				6359	lstart - curbuf->b_s.b_p_spl - 1);
				6360	else
				6361	/* Copy the path from 'runtimepath' to buf[]. */
				6362	copy_option_part(&rtp, buf, MAXPATHL, ",");
				6363	if (filewritable(buf) == 2)
				6364	{
				6365	/* Use the first language name from 'spelllang' and the
				6366	* encoding used in the first loaded .spl file. */
				6367	if (aspath)
				6368	vim_strncpy(buf, curbuf->b_s.b_p_spl,
				6369	lend - curbuf->b_s.b_p_spl);
				6370	else
				6371	{
				6372	/* Create the "spell" directory if it doesn't exist yet. */
				6373	l = (int)STRLEN(buf);
				6374	vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell");
				6375	if (filewritable(buf) != 2)
				6376	vim_mkdir(buf, 0755);
				6377
				6378	l = (int)STRLEN(buf);
				6379	vim_snprintf((char *)buf + l, MAXPATHL - l,
				6380	"/%.*s", (int)(lend - lstart), lstart);
				6381	}
				6382	l = (int)STRLEN(buf);
				6383	fname = LANGP_ENTRY(curwin->w_s->b_langp, 0)
				6384	->lp_slang->sl_fname;
				6385	vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add",
				6386	fname != NULL
				6387	&& strstr((char *)gettail(fname), ".ascii.") != NULL
				6388	? (char_u *)"ascii" : spell_enc());
				6389	set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
				6390	break;
				6391	}
				6392	aspath = FALSE;
				6393	}
				6394
				6395	vim_free(buf);
				6396	}
				6397	}
				6398
				6399
				6400
				6401	/*
				6402	* Set the spell character tables from strings in the affix file.
				6403	*/
				6404	static int
				6405	set_spell_chartab(char_u fol, char_u low, char_u *upp)
				6406	{
				6407	/* We build the new tables here first, so that we can compare with the
				6408	* previous one. */
				6409	spelltab_T new_st;
				6410	char_u pf = fol, pl = low, *pu = upp;
				6411	int f, l, u;
				6412
				6413	clear_spell_chartab(&new_st);
				6414
				6415	while (*pf != NUL)
				6416	{
				6417	if (pl == NUL \|\| pu == NUL)
				6418	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	6419	emsg(_(e_affform));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6420	return FAIL;
				6421	}
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6422	f = mb_ptr2char_adv(&pf);
				6423	l = mb_ptr2char_adv(&pl);
				6424	u = mb_ptr2char_adv(&pu);
Bram Moolenaar	264b74f	2019-01-24 17:18:42 +0100	[diff] [blame]	6425
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6426	/* Every character that appears is a word character. */
				6427	if (f < 256)
				6428	new_st.st_isw[f] = TRUE;
				6429	if (l < 256)
				6430	new_st.st_isw[l] = TRUE;
				6431	if (u < 256)
				6432	new_st.st_isw[u] = TRUE;
				6433
				6434	/* if "LOW" and "FOL" are not the same the "LOW" char needs
				6435	* case-folding */
				6436	if (l < 256 && l != f)
				6437	{
				6438	if (f >= 256)
				6439	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	6440	emsg(_(e_affrange));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6441	return FAIL;
				6442	}
				6443	new_st.st_fold[l] = f;
				6444	}
				6445
				6446	/* if "UPP" and "FOL" are not the same the "UPP" char needs
				6447	* case-folding, it's upper case and the "UPP" is the upper case of
				6448	* "FOL" . */
				6449	if (u < 256 && u != f)
				6450	{
				6451	if (f >= 256)
				6452	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	6453	emsg(_(e_affrange));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6454	return FAIL;
				6455	}
				6456	new_st.st_fold[u] = f;
				6457	new_st.st_isu[u] = TRUE;
				6458	new_st.st_upper[f] = u;
				6459	}
				6460	}
				6461
				6462	if (pl != NUL \|\| pu != NUL)
				6463	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	6464	emsg(_(e_affform));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6465	return FAIL;
				6466	}
				6467
				6468	return set_spell_finish(&new_st);
				6469	}
				6470
				6471	/*
				6472	* Set the spell character tables from strings in the .spl file.
				6473	*/
				6474	static void
				6475	set_spell_charflags(
				6476	char_u *flags,
				6477	int cnt, /* length of "flags" */
				6478	char_u *fol)
				6479	{
				6480	/* We build the new tables here first, so that we can compare with the
				6481	* previous one. */
				6482	spelltab_T new_st;
				6483	int i;
				6484	char_u *p = fol;
				6485	int c;
				6486
				6487	clear_spell_chartab(&new_st);
				6488
				6489	for (i = 0; i < 128; ++i)
				6490	{
				6491	if (i < cnt)
				6492	{
				6493	new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
				6494	new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
				6495	}
				6496
				6497	if (*p != NUL)
				6498	{
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6499	c = mb_ptr2char_adv(&p);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6500	new_st.st_fold[i + 128] = c;
				6501	if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
				6502	new_st.st_upper[c] = i + 128;
				6503	}
				6504	}
				6505
				6506	(void)set_spell_finish(&new_st);
				6507	}
				6508
				6509	static int
				6510	set_spell_finish(spelltab_T *new_st)
				6511	{
				6512	int i;
				6513
				6514	if (did_set_spelltab)
				6515	{
				6516	/* check that it's the same table */
				6517	for (i = 0; i < 256; ++i)
				6518	{
				6519	if (spelltab.st_isw[i] != new_st->st_isw[i]
				6520	\|\| spelltab.st_isu[i] != new_st->st_isu[i]
				6521	\|\| spelltab.st_fold[i] != new_st->st_fold[i]
				6522	\|\| spelltab.st_upper[i] != new_st->st_upper[i])
				6523	{
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	6524	emsg(_("E763: Word characters differ between spell files"));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6525	return FAIL;
				6526	}
				6527	}
				6528	}
				6529	else
				6530	{
				6531	/* copy the new spelltab into the one being used */
				6532	spelltab = *new_st;
				6533	did_set_spelltab = TRUE;
				6534	}
				6535
				6536	return OK;
				6537	}
				6538
				6539	/*
				6540	* Write the table with prefix conditions to the .spl file.
				6541	* When "fd" is NULL only count the length of what is written.
				6542	*/
				6543	static int
				6544	write_spell_prefcond(FILE fd, garray_T gap)
				6545	{
				6546	int i;
				6547	char_u *p;
				6548	int len;
				6549	int totlen;
				6550	size_t x = 1; /* collect return value of fwrite() */
				6551
				6552	if (fd != NULL)
				6553	put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */
				6554
				6555	totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */
				6556
				6557	for (i = 0; i < gap->ga_len; ++i)
				6558	{
				6559	/* <prefcond> : <condlen> <condstr> */
				6560	p = ((char_u **)gap->ga_data)[i];
				6561	if (p != NULL)
				6562	{
				6563	len = (int)STRLEN(p);
				6564	if (fd != NULL)
				6565	{
				6566	fputc(len, fd);
				6567	x &= fwrite(p, (size_t)len, (size_t)1, fd);
				6568	}
				6569	totlen += len;
				6570	}
				6571	else if (fd != NULL)
				6572	fputc(0, fd);
				6573	}
				6574
				6575	return totlen;
				6576	}
				6577
				6578
				6579	/*
				6580	* Use map string "map" for languages "lp".
				6581	*/
				6582	static void
				6583	set_map_str(slang_T lp, char_u map)
				6584	{
				6585	char_u *p;
				6586	int headc = 0;
				6587	int c;
				6588	int i;
				6589
				6590	if (*map == NUL)
				6591	{
				6592	lp->sl_has_map = FALSE;
				6593	return;
				6594	}
				6595	lp->sl_has_map = TRUE;
				6596
				6597	/* Init the array and hash tables empty. */
				6598	for (i = 0; i < 256; ++i)
				6599	lp->sl_map_array[i] = 0;
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6600	hash_init(&lp->sl_map_hash);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6601
				6602	/*
				6603	* The similar characters are stored separated with slashes:
				6604	* "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and
				6605	* before the same slash. For characters above 255 sl_map_hash is used.
				6606	*/
				6607	for (p = map; *p != NUL; )
				6608	{
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6609	c = mb_cptr2char_adv(&p);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6610	if (c == '/')
				6611	headc = 0;
				6612	else
				6613	{
				6614	if (headc == 0)
				6615	headc = c;
				6616
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6617	/* Characters above 255 don't fit in sl_map_array[], put them in
				6618	* the hash table. Each entry is the char, a NUL the headchar and
				6619	* a NUL. */
				6620	if (c >= 256)
				6621	{
				6622	int cl = mb_char2len(c);
				6623	int headcl = mb_char2len(headc);
				6624	char_u *b;
				6625	hash_T hash;
				6626	hashitem_T *hi;
				6627
Bram Moolenaar	964b374	2019-05-24 18:54:09 +0200	[diff] [blame]	6628	b = alloc(cl + headcl + 2);
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6629	if (b == NULL)
				6630	return;
				6631	mb_char2bytes(c, b);
				6632	b[cl] = NUL;
				6633	mb_char2bytes(headc, b + cl + 1);
				6634	b[cl + 1 + headcl] = NUL;
				6635	hash = hash_hash(b);
				6636	hi = hash_lookup(&lp->sl_map_hash, b, hash);
				6637	if (HASHITEM_EMPTY(hi))
				6638	hash_add_item(&lp->sl_map_hash, hi, b, hash);
				6639	else
				6640	{
				6641	/* This should have been checked when generating the .spl
				6642	* file. */
Bram Moolenaar	f9e3e09	2019-01-13 23:38:42 +0100	[diff] [blame]	6643	emsg(_("E783: duplicate char in MAP entry"));
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6644	vim_free(b);
				6645	}
				6646	}
				6647	else
Bram Moolenaar	9ccfebd	2016-07-19 16:39:08 +0200	[diff] [blame]	6648	lp->sl_map_array[c] = headc;
				6649	}
				6650	}
				6651	}
				6652
				6653
				6654	#endif /* FEAT_SPELL */