blob: 1ca1f9eb27c4f06e3c092d9adea5cb9bf0a71c0f [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaare19defe2005-03-21 08:23:33 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * spell.c: code for spell checking
Bram Moolenaarfc735152005-03-22 22:54:12 +000012 *
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +020013 * See spellfile.c for the Vim spell file format.
14 *
Bram Moolenaar51485f02005-06-04 21:55:20 +000015 * The spell checking mechanism uses a tree (aka trie). Each node in the tree
16 * has a list of bytes that can appear (siblings). For each byte there is a
17 * pointer to the node with the byte that follows in the word (child).
Bram Moolenaar9f30f502005-06-14 22:01:04 +000018 *
19 * A NUL byte is used where the word may end. The bytes are sorted, so that
20 * binary searching can be used and the NUL bytes are at the start. The
21 * number of possible bytes is stored before the list of bytes.
22 *
23 * The tree uses two arrays: "byts" stores the characters, "idxs" stores
24 * either the next index or flags. The tree starts at index 0. For example,
25 * to lookup "vi" this sequence is followed:
26 * i = 0
27 * len = byts[i]
28 * n = where "v" appears in byts[i + 1] to byts[i + len]
29 * i = idxs[n]
30 * len = byts[i]
31 * n = where "i" appears in byts[i + 1] to byts[i + len]
32 * i = idxs[n]
33 * len = byts[i]
34 * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
Bram Moolenaar51485f02005-06-04 21:55:20 +000035 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +000036 * There are two word trees: one with case-folded words and one with words in
Bram Moolenaar51485f02005-06-04 21:55:20 +000037 * original case. The second one is only used for keep-case words and is
38 * usually small.
39 *
Bram Moolenaarae5bce12005-08-15 21:41:48 +000040 * There is one additional tree for when not all prefixes are applied when
Bram Moolenaar1d73c882005-06-19 22:48:47 +000041 * generating the .spl file. This tree stores all the possible prefixes, as
42 * if they were words. At each word (prefix) end the prefix nr is stored, the
43 * following word must support this prefix nr. And the condition nr is
44 * stored, used to lookup the condition that the word must match with.
45 *
Bram Moolenaar51485f02005-06-04 21:55:20 +000046 * Thanks to Olaf Seibert for providing an example implementation of this tree
47 * and the compression mechanism.
Bram Moolenaar4770d092006-01-12 23:22:24 +000048 * LZ trie ideas:
49 * http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf
50 * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +000051 *
52 * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +000053 *
Bram Moolenaar402d2fe2005-04-15 21:00:38 +000054 * Why doesn't Vim use aspell/ispell/myspell/etc.?
55 * See ":help develop-spell".
56 */
57
Bram Moolenaar51485f02005-06-04 21:55:20 +000058/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +000059 * Use this to adjust the score after finding suggestions, based on the
60 * suggested word sounding like the bad word. This is much faster than doing
61 * it for every possible suggestion.
Bram Moolenaar4770d092006-01-12 23:22:24 +000062 * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@"
63 * vs "ht") and goes down in the list.
Bram Moolenaard857f0e2005-06-21 22:37:39 +000064 * Used when 'spellsuggest' is set to "best".
65 */
66#define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4)
67
68/*
Bram Moolenaar4770d092006-01-12 23:22:24 +000069 * Do the opposite: based on a maximum end score and a known sound score,
Bram Moolenaar6949d1d2008-08-25 02:14:05 +000070 * compute the maximum word score that can be used.
Bram Moolenaar4770d092006-01-12 23:22:24 +000071 */
72#define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3)
73
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +020074#define IN_SPELL_C
Bram Moolenaare19defe2005-03-21 08:23:33 +000075#include "vim.h"
76
Bram Moolenaarf71a3db2006-03-12 21:50:18 +000077#if defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaare19defe2005-03-21 08:23:33 +000078
Bram Moolenaar4770d092006-01-12 23:22:24 +000079#ifndef UNIX /* it's in os_unix.h for Unix */
80# include <time.h> /* for time_t */
81#endif
82
Bram Moolenaar2d3f4892006-01-20 23:02:51 +000083/* only used for su_badflags */
84#define WF_MIXCAP 0x20 /* mix of upper and lower case: macaRONI */
85
Bram Moolenaar0dc065e2005-07-04 22:49:24 +000086#define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP)
Bram Moolenaar51485f02005-06-04 21:55:20 +000087
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000088#define REGION_ALL 0xff /* word valid in all regions */
89
Bram Moolenaar4770d092006-01-12 23:22:24 +000090#define VIMSUGMAGIC "VIMsug" /* string at start of Vim .sug file */
91#define VIMSUGMAGICL 6
92#define VIMSUGVERSION 1
93
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000094/* Result values. Lower number is accepted over higher one. */
95#define SP_BANNED -1
Bram Moolenaar402d2fe2005-04-15 21:00:38 +000096#define SP_OK 0
Bram Moolenaarcfc6c432005-06-06 21:50:35 +000097#define SP_RARE 1
98#define SP_LOCAL 2
99#define SP_BAD 3
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000100
Bram Moolenaar4770d092006-01-12 23:22:24 +0000101typedef struct wordcount_S
102{
103 short_u wc_count; /* nr of times word was seen */
104 char_u wc_word[1]; /* word, actually longer */
105} wordcount_T;
106
Bram Moolenaar84026842016-07-17 20:37:43 +0200107#define WC_KEY_OFF offsetof(wordcount_T, wc_word)
Bram Moolenaar4770d092006-01-12 23:22:24 +0000108#define HI2WC(hi) ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF))
109#define MAXWORDCOUNT 0xffff
110
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000111/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000112 * Information used when looking for suggestions.
113 */
114typedef struct suginfo_S
115{
116 garray_T su_ga; /* suggestions, contains "suggest_T" */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000117 int su_maxcount; /* max. number of suggestions displayed */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000118 int su_maxscore; /* maximum score for adding to su_ga */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000119 int su_sfmaxscore; /* idem, for when doing soundfold words */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000120 garray_T su_sga; /* like su_ga, sound-folded scoring */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000121 char_u *su_badptr; /* start of bad word in line */
122 int su_badlen; /* length of detected bad word in line */
Bram Moolenaar0c405862005-06-22 22:26:26 +0000123 int su_badflags; /* caps flags for bad word */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000124 char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */
125 char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +0000126 char_u su_sal_badword[MAXWLEN]; /* su_badword soundfolded */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000127 hashtab_T su_banned; /* table with banned words */
Bram Moolenaar8b96d642005-09-05 22:05:30 +0000128 slang_T *su_sallang; /* default language for sound folding */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000129} suginfo_T;
130
131/* One word suggestion. Used in "si_ga". */
132typedef struct suggest_S
133{
134 char_u *st_word; /* suggested word, allocated string */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000135 int st_wordlen; /* STRLEN(st_word) */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000136 int st_orglen; /* length of replaced text */
137 int st_score; /* lower is better */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000138 int st_altscore; /* used when st_score compares equal */
139 int st_salscore; /* st_score is for soundalike */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000140 int st_had_bonus; /* bonus already included in score */
Bram Moolenaar8b96d642005-09-05 22:05:30 +0000141 slang_T *st_slang; /* language used for sound folding */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000142} suggest_T;
143
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000144#define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i])
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000145
Bram Moolenaar4770d092006-01-12 23:22:24 +0000146/* TRUE if a word appears in the list of banned words. */
147#define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word)))
148
Bram Moolenaar6949d1d2008-08-25 02:14:05 +0000149/* Number of suggestions kept when cleaning up. We need to keep more than
Bram Moolenaar4770d092006-01-12 23:22:24 +0000150 * what is displayed, because when rescore_suggestions() is called the score
151 * may change and wrong suggestions may be removed later. */
152#define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 130 ? 150 : (su)->su_maxcount + 20)
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000153
154/* Threshold for sorting and cleaning up suggestions. Don't want to keep lots
155 * of suggestions that are not going to be displayed. */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000156#define SUG_MAX_COUNT(su) (SUG_CLEAN_COUNT(su) + 50)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000157
158/* score for various changes */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000159#define SCORE_SPLIT 149 /* split bad word */
Bram Moolenaare1438bb2006-03-01 22:01:55 +0000160#define SCORE_SPLIT_NO 249 /* split bad word with NOSPLITSUGS */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000161#define SCORE_ICASE 52 /* slightly different case */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000162#define SCORE_REGION 200 /* word is for different region */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000163#define SCORE_RARE 180 /* rare word */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000164#define SCORE_SWAP 75 /* swap two characters */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000165#define SCORE_SWAP3 110 /* swap two characters in three */
Bram Moolenaar1e015462005-09-25 22:16:38 +0000166#define SCORE_REP 65 /* REP replacement */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000167#define SCORE_SUBST 93 /* substitute a character */
168#define SCORE_SIMILAR 33 /* substitute a similar character */
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +0000169#define SCORE_SUBCOMP 33 /* substitute a composing character */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000170#define SCORE_DEL 94 /* delete a character */
Bram Moolenaar1e015462005-09-25 22:16:38 +0000171#define SCORE_DELDUP 66 /* delete a duplicated character */
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +0000172#define SCORE_DELCOMP 28 /* delete a composing character */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000173#define SCORE_INS 96 /* insert a character */
Bram Moolenaar1e015462005-09-25 22:16:38 +0000174#define SCORE_INSDUP 67 /* insert a duplicate character */
Bram Moolenaar8b59de92005-08-11 19:59:29 +0000175#define SCORE_INSCOMP 30 /* insert a composing character */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000176#define SCORE_NONWORD 103 /* change non-word to word char */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000177
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000178#define SCORE_FILE 30 /* suggestion from a file */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000179#define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower.
180 * 350 allows for about three changes. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000181
Bram Moolenaar4770d092006-01-12 23:22:24 +0000182#define SCORE_COMMON1 30 /* subtracted for words seen before */
183#define SCORE_COMMON2 40 /* subtracted for words often seen */
184#define SCORE_COMMON3 50 /* subtracted for words very often seen */
185#define SCORE_THRES2 10 /* word count threshold for COMMON2 */
186#define SCORE_THRES3 100 /* word count threshold for COMMON3 */
187
188/* When trying changed soundfold words it becomes slow when trying more than
189 * two changes. With less then two changes it's slightly faster but we miss a
190 * few good suggestions. In rare cases we need to try three of four changes.
191 */
192#define SCORE_SFMAX1 200 /* maximum score for first try */
193#define SCORE_SFMAX2 300 /* maximum score for second try */
194#define SCORE_SFMAX3 400 /* maximum score for third try */
195
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000196#define SCORE_BIG SCORE_INS * 3 /* big difference */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000197#define SCORE_MAXMAX 999999 /* accept any score */
198#define SCORE_LIMITMAX 350 /* for spell_edit_score_limit() */
199
200/* for spell_edit_score_limit() we need to know the minimum value of
201 * SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS */
202#define SCORE_EDIT_MIN SCORE_SIMILAR
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000203
204/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000205 * Structure to store info for word matching.
206 */
207typedef struct matchinf_S
208{
209 langp_T *mi_lp; /* info for language and region */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000210
211 /* pointers to original text to be checked */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000212 char_u *mi_word; /* start of word being checked */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000213 char_u *mi_end; /* end of matching word so far */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000214 char_u *mi_fend; /* next char to be added to mi_fword */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000215 char_u *mi_cend; /* char after what was used for
216 mi_capflags */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000217
218 /* case-folded text */
219 char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000220 int mi_fwordlen; /* nr of valid bytes in mi_fword */
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000221
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000222 /* for when checking word after a prefix */
223 int mi_prefarridx; /* index in sl_pidxs with list of
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000224 affixID/condition */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000225 int mi_prefcnt; /* number of entries at mi_prefarridx */
226 int mi_prefixlen; /* byte length of prefix */
Bram Moolenaar53805d12005-08-01 07:08:33 +0000227#ifdef FEAT_MBYTE
228 int mi_cprefixlen; /* byte length of prefix in original
229 case */
230#else
231# define mi_cprefixlen mi_prefixlen /* it's the same value */
232#endif
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000233
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000234 /* for when checking a compound word */
235 int mi_compoff; /* start of following word offset */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000236 char_u mi_compflags[MAXWLEN]; /* flags for compound words used */
237 int mi_complen; /* nr of compound words used */
Bram Moolenaar899dddf2006-03-26 21:06:50 +0000238 int mi_compextra; /* nr of COMPOUNDROOT words */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000239
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000240 /* others */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000241 int mi_result; /* result so far: SP_BAD, SP_OK, etc. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000242 int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
Bram Moolenaar860cae12010-06-05 23:22:07 +0200243 win_T *mi_win; /* buffer being checked */
Bram Moolenaar78622822005-08-23 21:00:13 +0000244
245 /* for NOBREAK */
246 int mi_result2; /* "mi_resul" without following word */
247 char_u *mi_end2; /* "mi_end" without following word */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000248} matchinf_T;
249
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000250
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100251static int spell_iswordp(char_u *p, win_T *wp);
Bram Moolenaar9c96f592005-06-30 21:52:39 +0000252#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100253static int spell_mb_isword_class(int cl, win_T *wp);
Bram Moolenaar9c96f592005-06-30 21:52:39 +0000254#endif
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000255
256/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000257 * For finding suggestions: At each node in the tree these states are tried:
Bram Moolenaarea424162005-06-16 21:51:00 +0000258 */
259typedef enum
260{
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000261 STATE_START = 0, /* At start of node check for NUL bytes (goodword
262 * ends); if badword ends there is a match, otherwise
263 * try splitting word. */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000264 STATE_NOPREFIX, /* try without prefix */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000265 STATE_SPLITUNDO, /* Undo splitting. */
Bram Moolenaarea424162005-06-16 21:51:00 +0000266 STATE_ENDNUL, /* Past NUL bytes at start of the node. */
267 STATE_PLAIN, /* Use each byte of the node. */
268 STATE_DEL, /* Delete a byte from the bad word. */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000269 STATE_INS_PREP, /* Prepare for inserting bytes. */
Bram Moolenaarea424162005-06-16 21:51:00 +0000270 STATE_INS, /* Insert a byte in the bad word. */
271 STATE_SWAP, /* Swap two bytes. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000272 STATE_UNSWAP, /* Undo swap two characters. */
273 STATE_SWAP3, /* Swap two characters over three. */
274 STATE_UNSWAP3, /* Undo Swap two characters over three. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000275 STATE_UNROT3L, /* Undo rotate three characters left */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000276 STATE_UNROT3R, /* Undo rotate three characters right */
Bram Moolenaarea424162005-06-16 21:51:00 +0000277 STATE_REP_INI, /* Prepare for using REP items. */
278 STATE_REP, /* Use matching REP items from the .aff file. */
279 STATE_REP_UNDO, /* Undo a REP item replacement. */
280 STATE_FINAL /* End of this node. */
281} state_T;
282
283/*
Bram Moolenaar0c405862005-06-22 22:26:26 +0000284 * Struct to keep the state at each level in suggest_try_change().
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000285 */
286typedef struct trystate_S
287{
Bram Moolenaarea424162005-06-16 21:51:00 +0000288 state_T ts_state; /* state at this level, STATE_ */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000289 int ts_score; /* score */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000290 idx_T ts_arridx; /* index in tree array, start of node */
Bram Moolenaarea424162005-06-16 21:51:00 +0000291 short ts_curi; /* index in list of child nodes */
292 char_u ts_fidx; /* index in fword[], case-folded bad word */
293 char_u ts_fidxtry; /* ts_fidx at which bytes may be changed */
294 char_u ts_twordlen; /* valid length of tword[] */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +0000295 char_u ts_prefixdepth; /* stack depth for end of prefix or
Bram Moolenaard12a1322005-08-21 22:08:24 +0000296 * PFD_PREFIXTREE or PFD_NOPREFIX */
297 char_u ts_flags; /* TSF_ flags */
Bram Moolenaarea424162005-06-16 21:51:00 +0000298#ifdef FEAT_MBYTE
299 char_u ts_tcharlen; /* number of bytes in tword character */
300 char_u ts_tcharidx; /* current byte index in tword character */
301 char_u ts_isdiff; /* DIFF_ values */
302 char_u ts_fcharstart; /* index in fword where badword char started */
303#endif
Bram Moolenaar5195e452005-08-19 20:32:47 +0000304 char_u ts_prewordlen; /* length of word in "preword[]" */
305 char_u ts_splitoff; /* index in "tword" after last split */
Bram Moolenaar78622822005-08-23 21:00:13 +0000306 char_u ts_splitfidx; /* "ts_fidx" at word split */
Bram Moolenaar5195e452005-08-19 20:32:47 +0000307 char_u ts_complen; /* nr of compound words used */
Bram Moolenaard12a1322005-08-21 22:08:24 +0000308 char_u ts_compsplit; /* index for "compflags" where word was spit */
Bram Moolenaar0c405862005-06-22 22:26:26 +0000309 char_u ts_save_badflags; /* su_badflags saved here */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000310 char_u ts_delidx; /* index in fword for char that was deleted,
311 valid when "ts_flags" has TSF_DIDDEL */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000312} trystate_T;
313
Bram Moolenaarea424162005-06-16 21:51:00 +0000314/* values for ts_isdiff */
315#define DIFF_NONE 0 /* no different byte (yet) */
316#define DIFF_YES 1 /* different byte found */
317#define DIFF_INSERT 2 /* inserting character */
318
Bram Moolenaard12a1322005-08-21 22:08:24 +0000319/* values for ts_flags */
320#define TSF_PREFIXOK 1 /* already checked that prefix is OK */
321#define TSF_DIDSPLIT 2 /* tried split at this point */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000322#define TSF_DIDDEL 4 /* did a delete, "ts_delidx" has index */
Bram Moolenaard12a1322005-08-21 22:08:24 +0000323
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000324/* special values ts_prefixdepth */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +0000325#define PFD_NOPREFIX 0xff /* not using prefixes */
Bram Moolenaard12a1322005-08-21 22:08:24 +0000326#define PFD_PREFIXTREE 0xfe /* walking through the prefix tree */
Bram Moolenaar4770d092006-01-12 23:22:24 +0000327#define PFD_NOTSPECIAL 0xfd /* highest value that's not special */
Bram Moolenaar42eeac32005-06-29 22:40:58 +0000328
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000329/* mode values for find_word */
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000330#define FIND_FOLDWORD 0 /* find word case-folded */
331#define FIND_KEEPWORD 1 /* find keep-case word */
332#define FIND_PREFIX 2 /* find word after prefix */
333#define FIND_COMPOUND 3 /* find case-folded compound word */
334#define FIND_KEEPCOMPOUND 4 /* find keep-case compound word */
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000335
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100336static void find_word(matchinf_T *mip, int mode);
337static int match_checkcompoundpattern(char_u *ptr, int wlen, garray_T *gap);
338static int can_compound(slang_T *slang, char_u *word, char_u *flags);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100339static int match_compoundrule(slang_T *slang, char_u *compflags);
340static int valid_word_prefix(int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req);
341static void find_prefix(matchinf_T *mip, int mode);
342static int fold_more(matchinf_T *mip);
343static int spell_valid_case(int wordflags, int treeflags);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100344static void spell_load_cb(char_u *fname, void *cookie);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100345static int count_syllables(slang_T *slang, char_u *word);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100346static void clear_midword(win_T *buf);
347static void use_midword(slang_T *lp, win_T *buf);
348static int find_region(char_u *rp, char_u *region);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100349static int check_need_cap(linenr_T lnum, colnr_T col);
350static void spell_find_suggest(char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive);
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000351#ifdef FEAT_EVAL
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100352static void spell_suggest_expr(suginfo_T *su, char_u *expr);
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000353#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100354static void spell_suggest_file(suginfo_T *su, char_u *fname);
355static void spell_suggest_intern(suginfo_T *su, int interactive);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100356static void spell_find_cleanup(suginfo_T *su);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100357static void suggest_try_special(suginfo_T *su);
358static void suggest_try_change(suginfo_T *su);
359static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, int soundfold);
360static void go_deeper(trystate_T *stack, int depth, int score_add);
Bram Moolenaar53805d12005-08-01 07:08:33 +0000361#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100362static int nofold_len(char_u *fword, int flen, char_u *word);
Bram Moolenaar53805d12005-08-01 07:08:33 +0000363#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100364static void find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword);
365static void score_comp_sal(suginfo_T *su);
366static void score_combine(suginfo_T *su);
367static int stp_sal_score(suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound);
368static void suggest_try_soundalike_prep(void);
369static void suggest_try_soundalike(suginfo_T *su);
370static void suggest_try_soundalike_finish(void);
371static void add_sound_suggest(suginfo_T *su, char_u *goodword, int score, langp_T *lp);
372static int soundfold_find(slang_T *slang, char_u *word);
373static void make_case_word(char_u *fword, char_u *cword, int flags);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100374static int similar_chars(slang_T *slang, int c1, int c2);
375static void add_suggestion(suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf);
376static void check_suggestions(suginfo_T *su, garray_T *gap);
377static void add_banned(suginfo_T *su, char_u *word);
378static void rescore_suggestions(suginfo_T *su);
379static void rescore_one(suginfo_T *su, suggest_T *stp);
380static int cleanup_suggestions(garray_T *gap, int maxscore, int keep);
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100381static void spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res);
382static void spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res);
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000383#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100384static void spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res);
Bram Moolenaara1ba8112005-06-28 23:23:32 +0000385#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100386static int soundalike_score(char_u *goodsound, char_u *badsound);
387static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword);
388static int spell_edit_score_limit(slang_T *slang, char_u *badword, char_u *goodword, int limit);
Bram Moolenaar4770d092006-01-12 23:22:24 +0000389#ifdef FEAT_MBYTE
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100390static int spell_edit_score_limit_w(slang_T *slang, char_u *badword, char_u *goodword, int limit);
Bram Moolenaar4770d092006-01-12 23:22:24 +0000391#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +0100392static void dump_word(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum);
393static linenr_T dump_prefixes(slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000394
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000395
Bram Moolenaara40ceaf2006-01-13 22:35:40 +0000396/* Remember what "z?" replaced. */
397static char_u *repl_from = NULL;
398static char_u *repl_to = NULL;
399
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000400/*
401 * Main spell-checking function.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000402 * "ptr" points to a character that could be the start of a word.
Bram Moolenaar482aaeb2005-09-29 18:26:07 +0000403 * "*attrp" is set to the highlight index for a badly spelled word. For a
404 * non-word or when it's OK it remains unchanged.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000405 * This must only be called when 'spelllang' is not empty.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000406 *
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000407 * "capcol" is used to check for a Capitalised word after the end of a
408 * sentence. If it's zero then perform the check. Return the column where to
409 * check next, or -1 when no sentence end was found. If it's NULL then don't
410 * worry.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000411 *
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000412 * Returns the length of the word in bytes, also when it's OK, so that the
413 * caller can skip over the word.
414 */
415 int
Bram Moolenaar764b23c2016-01-30 21:10:09 +0100416spell_check(
417 win_T *wp, /* current window */
418 char_u *ptr,
419 hlf_T *attrp,
420 int *capcol, /* column to check for Capital */
421 int docount) /* count good words */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000422{
423 matchinf_T mi; /* Most things are put in "mi" so that it can
424 be passed to functions quickly. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000425 int nrlen = 0; /* found a number first */
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000426 int c;
Bram Moolenaar5195e452005-08-19 20:32:47 +0000427 int wrongcaplen = 0;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000428 int lpi;
Bram Moolenaar4770d092006-01-12 23:22:24 +0000429 int count_word = docount;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000430
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000431 /* A word never starts at a space or a control character. Return quickly
432 * then, skipping over the character. */
433 if (*ptr <= ' ')
434 return 1;
Bram Moolenaara226a6d2006-02-26 23:59:20 +0000435
436 /* Return here when loading language files failed. */
Bram Moolenaar860cae12010-06-05 23:22:07 +0200437 if (wp->w_s->b_langp.ga_len == 0)
Bram Moolenaara226a6d2006-02-26 23:59:20 +0000438 return 1;
439
Bram Moolenaar5195e452005-08-19 20:32:47 +0000440 vim_memset(&mi, 0, sizeof(matchinf_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000441
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000442 /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and
Bram Moolenaar43abc522005-12-10 20:15:02 +0000443 * 0X99FF. But always do check spelling to find "3GPP" and "11
444 * julifeest". */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000445 if (*ptr >= '0' && *ptr <= '9')
Bram Moolenaar51485f02005-06-04 21:55:20 +0000446 {
Bram Moolenaar887c1fe2016-01-02 17:56:35 +0100447 if (*ptr == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
448 mi.mi_end = skipbin(ptr + 2);
449 else if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
Bram Moolenaar3982c542005-06-08 21:56:31 +0000450 mi.mi_end = skiphex(ptr + 2);
Bram Moolenaar51485f02005-06-04 21:55:20 +0000451 else
452 mi.mi_end = skipdigits(ptr);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000453 nrlen = (int)(mi.mi_end - ptr);
Bram Moolenaar51485f02005-06-04 21:55:20 +0000454 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000455
Bram Moolenaar0c405862005-06-22 22:26:26 +0000456 /* Find the normal end of the word (until the next non-word character). */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000457 mi.mi_word = ptr;
Bram Moolenaar43abc522005-12-10 20:15:02 +0000458 mi.mi_fend = ptr;
Bram Moolenaar860cae12010-06-05 23:22:07 +0200459 if (spell_iswordp(mi.mi_fend, wp))
Bram Moolenaar51485f02005-06-04 21:55:20 +0000460 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000461 do
Bram Moolenaar51485f02005-06-04 21:55:20 +0000462 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100463 MB_PTR_ADV(mi.mi_fend);
Bram Moolenaar860cae12010-06-05 23:22:07 +0200464 } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp));
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000465
Bram Moolenaar860cae12010-06-05 23:22:07 +0200466 if (capcol != NULL && *capcol == 0 && wp->w_s->b_cap_prog != NULL)
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000467 {
468 /* Check word starting with capital letter. */
Bram Moolenaar53805d12005-08-01 07:08:33 +0000469 c = PTR2CHAR(ptr);
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000470 if (!SPELL_ISUPPER(c))
Bram Moolenaar5195e452005-08-19 20:32:47 +0000471 wrongcaplen = (int)(mi.mi_fend - ptr);
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000472 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000473 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000474 if (capcol != NULL)
475 *capcol = -1;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000476
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000477 /* We always use the characters up to the next non-word character,
478 * also for bad words. */
479 mi.mi_end = mi.mi_fend;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000480
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000481 /* Check caps type later. */
Bram Moolenaar860cae12010-06-05 23:22:07 +0200482 mi.mi_capflags = 0;
483 mi.mi_cend = NULL;
484 mi.mi_win = wp;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000485
Bram Moolenaar5195e452005-08-19 20:32:47 +0000486 /* case-fold the word with one non-word character, so that we can check
487 * for the word end. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000488 if (*mi.mi_fend != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100489 MB_PTR_ADV(mi.mi_fend);
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000490
491 (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
492 MAXWLEN + 1);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000493 mi.mi_fwordlen = (int)STRLEN(mi.mi_fword);
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000494
495 /* The word is bad unless we recognize it. */
496 mi.mi_result = SP_BAD;
Bram Moolenaar78622822005-08-23 21:00:13 +0000497 mi.mi_result2 = SP_BAD;
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000498
499 /*
500 * Loop over the languages specified in 'spelllang'.
Bram Moolenaar4770d092006-01-12 23:22:24 +0000501 * We check them all, because a word may be matched longer in another
502 * language.
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000503 */
Bram Moolenaar860cae12010-06-05 23:22:07 +0200504 for (lpi = 0; lpi < wp->w_s->b_langp.ga_len; ++lpi)
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000505 {
Bram Moolenaar860cae12010-06-05 23:22:07 +0200506 mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, lpi);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000507
508 /* If reloading fails the language is still in the list but everything
509 * has been cleared. */
510 if (mi.mi_lp->lp_slang->sl_fidxs == NULL)
511 continue;
512
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000513 /* Check for a matching word in case-folded words. */
514 find_word(&mi, FIND_FOLDWORD);
515
516 /* Check for a matching word in keep-case words. */
517 find_word(&mi, FIND_KEEPWORD);
518
519 /* Check for matching prefixes. */
Bram Moolenaard12a1322005-08-21 22:08:24 +0000520 find_prefix(&mi, FIND_FOLDWORD);
Bram Moolenaar78622822005-08-23 21:00:13 +0000521
522 /* For a NOBREAK language, may want to use a word without a following
523 * word as a backup. */
524 if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
525 && mi.mi_result2 != SP_BAD)
526 {
527 mi.mi_result = mi.mi_result2;
528 mi.mi_end = mi.mi_end2;
529 }
Bram Moolenaar4770d092006-01-12 23:22:24 +0000530
531 /* Count the word in the first language where it's found to be OK. */
532 if (count_word && mi.mi_result == SP_OK)
533 {
534 count_common_word(mi.mi_lp->lp_slang, ptr,
535 (int)(mi.mi_end - ptr), 1);
536 count_word = FALSE;
537 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000538 }
539
540 if (mi.mi_result != SP_OK)
541 {
Bram Moolenaar0c405862005-06-22 22:26:26 +0000542 /* If we found a number skip over it. Allows for "42nd". Do flag
543 * rare and local words, e.g., "3GPP". */
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000544 if (nrlen > 0)
Bram Moolenaar0c405862005-06-22 22:26:26 +0000545 {
546 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
547 return nrlen;
548 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000549
550 /* When we are at a non-word character there is no error, just
551 * skip over the character (try looking for a word after it). */
Bram Moolenaarcc63c642013-11-12 04:44:01 +0100552 else if (!spell_iswordp_nmw(ptr, wp))
Bram Moolenaar63d5a1e2005-04-19 21:30:25 +0000553 {
Bram Moolenaar860cae12010-06-05 23:22:07 +0200554 if (capcol != NULL && wp->w_s->b_cap_prog != NULL)
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000555 {
556 regmatch_T regmatch;
Bram Moolenaardffa5b82014-11-19 16:38:07 +0100557 int r;
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000558
559 /* Check for end of sentence. */
Bram Moolenaar860cae12010-06-05 23:22:07 +0200560 regmatch.regprog = wp->w_s->b_cap_prog;
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000561 regmatch.rm_ic = FALSE;
Bram Moolenaardffa5b82014-11-19 16:38:07 +0100562 r = vim_regexec(&regmatch, ptr, 0);
563 wp->w_s->b_cap_prog = regmatch.regprog;
564 if (r)
Bram Moolenaarf9184a12005-07-02 23:10:47 +0000565 *capcol = (int)(regmatch.endp[0] - ptr);
566 }
567
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000568#ifdef FEAT_MBYTE
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000569 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000570 return (*mb_ptr2len)(ptr);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000571#endif
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000572 return 1;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000573 }
Bram Moolenaar5195e452005-08-19 20:32:47 +0000574 else if (mi.mi_end == ptr)
575 /* Always include at least one character. Required for when there
576 * is a mixup in "midword". */
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100577 MB_PTR_ADV(mi.mi_end);
Bram Moolenaar78622822005-08-23 21:00:13 +0000578 else if (mi.mi_result == SP_BAD
Bram Moolenaar860cae12010-06-05 23:22:07 +0200579 && LANGP_ENTRY(wp->w_s->b_langp, 0)->lp_slang->sl_nobreak)
Bram Moolenaar78622822005-08-23 21:00:13 +0000580 {
581 char_u *p, *fp;
582 int save_result = mi.mi_result;
583
584 /* First language in 'spelllang' is NOBREAK. Find first position
585 * at which any word would be valid. */
Bram Moolenaar860cae12010-06-05 23:22:07 +0200586 mi.mi_lp = LANGP_ENTRY(wp->w_s->b_langp, 0);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000587 if (mi.mi_lp->lp_slang->sl_fidxs != NULL)
Bram Moolenaar78622822005-08-23 21:00:13 +0000588 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000589 p = mi.mi_word;
590 fp = mi.mi_fword;
591 for (;;)
Bram Moolenaar78622822005-08-23 21:00:13 +0000592 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100593 MB_PTR_ADV(p);
594 MB_PTR_ADV(fp);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000595 if (p >= mi.mi_end)
596 break;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000597 mi.mi_compoff = (int)(fp - mi.mi_fword);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000598 find_word(&mi, FIND_COMPOUND);
599 if (mi.mi_result != SP_BAD)
600 {
601 mi.mi_end = p;
602 break;
603 }
Bram Moolenaar78622822005-08-23 21:00:13 +0000604 }
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000605 mi.mi_result = save_result;
Bram Moolenaar78622822005-08-23 21:00:13 +0000606 }
Bram Moolenaar78622822005-08-23 21:00:13 +0000607 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000608
609 if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
Bram Moolenaar482aaeb2005-09-29 18:26:07 +0000610 *attrp = HLF_SPB;
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000611 else if (mi.mi_result == SP_RARE)
Bram Moolenaar482aaeb2005-09-29 18:26:07 +0000612 *attrp = HLF_SPR;
Bram Moolenaard857f0e2005-06-21 22:37:39 +0000613 else
Bram Moolenaar482aaeb2005-09-29 18:26:07 +0000614 *attrp = HLF_SPL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000615 }
616
Bram Moolenaar5195e452005-08-19 20:32:47 +0000617 if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE))
618 {
619 /* Report SpellCap only when the word isn't badly spelled. */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +0000620 *attrp = HLF_SPC;
Bram Moolenaar5195e452005-08-19 20:32:47 +0000621 return wrongcaplen;
622 }
623
Bram Moolenaar51485f02005-06-04 21:55:20 +0000624 return (int)(mi.mi_end - ptr);
625}
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000626
Bram Moolenaar51485f02005-06-04 21:55:20 +0000627/*
628 * Check if the word at "mip->mi_word" is in the tree.
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000629 * When "mode" is FIND_FOLDWORD check in fold-case word tree.
630 * When "mode" is FIND_KEEPWORD check in keep-case word tree.
631 * When "mode" is FIND_PREFIX check for word after prefix in fold-case word
632 * tree.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000633 *
634 * For a match mip->mi_result is updated.
635 */
636 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +0100637find_word(matchinf_T *mip, int mode)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000638{
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000639 idx_T arridx = 0;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000640 int endlen[MAXWLEN]; /* length at possible word endings */
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000641 idx_T endidx[MAXWLEN]; /* possible word endings */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000642 int endidxcnt = 0;
643 int len;
644 int wlen = 0;
645 int flen;
646 int c;
647 char_u *ptr;
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000648 idx_T lo, hi, m;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000649#ifdef FEAT_MBYTE
650 char_u *s;
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000651#endif
Bram Moolenaare52325c2005-08-22 22:54:29 +0000652 char_u *p;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000653 int res = SP_BAD;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000654 slang_T *slang = mip->mi_lp->lp_slang;
655 unsigned flags;
656 char_u *byts;
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000657 idx_T *idxs;
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000658 int word_ends;
Bram Moolenaard12a1322005-08-21 22:08:24 +0000659 int prefix_found;
Bram Moolenaar78622822005-08-23 21:00:13 +0000660 int nobreak_result;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000661
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000662 if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000663 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000664 /* Check for word with matching case in keep-case tree. */
665 ptr = mip->mi_word;
666 flen = 9999; /* no case folding, always enough bytes */
667 byts = slang->sl_kbyts;
668 idxs = slang->sl_kidxs;
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000669
670 if (mode == FIND_KEEPCOMPOUND)
671 /* Skip over the previously found word(s). */
672 wlen += mip->mi_compoff;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000673 }
674 else
675 {
676 /* Check for case-folded in case-folded tree. */
677 ptr = mip->mi_fword;
678 flen = mip->mi_fwordlen; /* available case-folded bytes */
679 byts = slang->sl_fbyts;
680 idxs = slang->sl_fidxs;
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000681
682 if (mode == FIND_PREFIX)
683 {
684 /* Skip over the prefix. */
685 wlen = mip->mi_prefixlen;
686 flen -= mip->mi_prefixlen;
687 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000688 else if (mode == FIND_COMPOUND)
689 {
690 /* Skip over the previously found word(s). */
691 wlen = mip->mi_compoff;
692 flen -= mip->mi_compoff;
693 }
694
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000695 }
696
Bram Moolenaar51485f02005-06-04 21:55:20 +0000697 if (byts == NULL)
698 return; /* array is empty */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +0000699
Bram Moolenaar51485f02005-06-04 21:55:20 +0000700 /*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000701 * Repeat advancing in the tree until:
702 * - there is a byte that doesn't match,
703 * - we reach the end of the tree,
704 * - or we reach the end of the line.
Bram Moolenaar51485f02005-06-04 21:55:20 +0000705 */
706 for (;;)
707 {
Bram Moolenaar0c405862005-06-22 22:26:26 +0000708 if (flen <= 0 && *mip->mi_fend != NUL)
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000709 flen = fold_more(mip);
Bram Moolenaar51485f02005-06-04 21:55:20 +0000710
711 len = byts[arridx++];
712
713 /* If the first possible byte is a zero the word could end here.
714 * Remember this index, we first check for the longest word. */
715 if (byts[arridx] == 0)
716 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +0000717 if (endidxcnt == MAXWLEN)
718 {
719 /* Must be a corrupted spell file. */
720 EMSG(_(e_format));
721 return;
722 }
Bram Moolenaar51485f02005-06-04 21:55:20 +0000723 endlen[endidxcnt] = wlen;
724 endidx[endidxcnt++] = arridx++;
725 --len;
726
727 /* Skip over the zeros, there can be several flag/region
728 * combinations. */
729 while (len > 0 && byts[arridx] == 0)
730 {
731 ++arridx;
732 --len;
733 }
734 if (len == 0)
735 break; /* no children, word must end here */
736 }
737
738 /* Stop looking at end of the line. */
739 if (ptr[wlen] == NUL)
740 break;
741
742 /* Perform a binary search in the list of accepted bytes. */
743 c = ptr[wlen];
Bram Moolenaar0c405862005-06-22 22:26:26 +0000744 if (c == TAB) /* <Tab> is handled like <Space> */
745 c = ' ';
Bram Moolenaar51485f02005-06-04 21:55:20 +0000746 lo = arridx;
747 hi = arridx + len - 1;
748 while (lo < hi)
749 {
750 m = (lo + hi) / 2;
751 if (byts[m] > c)
752 hi = m - 1;
753 else if (byts[m] < c)
754 lo = m + 1;
755 else
756 {
757 lo = hi = m;
758 break;
759 }
760 }
761
762 /* Stop if there is no matching byte. */
763 if (hi < lo || byts[lo] != c)
764 break;
765
766 /* Continue at the child (if there is one). */
767 arridx = idxs[lo];
768 ++wlen;
769 --flen;
Bram Moolenaar0c405862005-06-22 22:26:26 +0000770
771 /* One space in the good word may stand for several spaces in the
772 * checked word. */
773 if (c == ' ')
774 {
775 for (;;)
776 {
777 if (flen <= 0 && *mip->mi_fend != NUL)
778 flen = fold_more(mip);
779 if (ptr[wlen] != ' ' && ptr[wlen] != TAB)
780 break;
781 ++wlen;
782 --flen;
783 }
784 }
Bram Moolenaar51485f02005-06-04 21:55:20 +0000785 }
786
787 /*
788 * Verify that one of the possible endings is valid. Try the longest
789 * first.
790 */
791 while (endidxcnt > 0)
792 {
793 --endidxcnt;
794 arridx = endidx[endidxcnt];
795 wlen = endlen[endidxcnt];
796
797#ifdef FEAT_MBYTE
798 if ((*mb_head_off)(ptr, ptr + wlen) > 0)
799 continue; /* not at first byte of character */
800#endif
Bram Moolenaar860cae12010-06-05 23:22:07 +0200801 if (spell_iswordp(ptr + wlen, mip->mi_win))
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000802 {
Bram Moolenaar78622822005-08-23 21:00:13 +0000803 if (slang->sl_compprog == NULL && !slang->sl_nobreak)
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000804 continue; /* next char is a word character */
805 word_ends = FALSE;
806 }
807 else
808 word_ends = TRUE;
Bram Moolenaard12a1322005-08-21 22:08:24 +0000809 /* The prefix flag is before compound flags. Once a valid prefix flag
810 * has been found we try compound flags. */
811 prefix_found = FALSE;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000812
813#ifdef FEAT_MBYTE
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000814 if (mode != FIND_KEEPWORD && has_mbyte)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000815 {
816 /* Compute byte length in original word, length may change
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000817 * when folding case. This can be slow, take a shortcut when the
818 * case-folded word is equal to the keep-case word. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000819 p = mip->mi_word;
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000820 if (STRNCMP(ptr, p, wlen) != 0)
821 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100822 for (s = ptr; s < ptr + wlen; MB_PTR_ADV(s))
823 MB_PTR_ADV(p);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000824 wlen = (int)(p - mip->mi_word);
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000825 }
Bram Moolenaar51485f02005-06-04 21:55:20 +0000826 }
827#endif
828
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000829 /* Check flags and region. For FIND_PREFIX check the condition and
830 * prefix ID.
831 * Repeat this if there are more flags/region alternatives until there
832 * is a match. */
833 res = SP_BAD;
834 for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0;
835 --len, ++arridx)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000836 {
837 flags = idxs[arridx];
Bram Moolenaar9f30f502005-06-14 22:01:04 +0000838
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000839 /* For the fold-case tree check that the case of the checked word
840 * matches with what the word in the tree requires.
841 * For keep-case tree the case is always right. For prefixes we
842 * don't bother to check. */
843 if (mode == FIND_FOLDWORD)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000844 {
Bram Moolenaar51485f02005-06-04 21:55:20 +0000845 if (mip->mi_cend != mip->mi_word + wlen)
846 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000847 /* mi_capflags was set for a different word length, need
848 * to do it again. */
Bram Moolenaar51485f02005-06-04 21:55:20 +0000849 mip->mi_cend = mip->mi_word + wlen;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +0000850 mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
Bram Moolenaar51485f02005-06-04 21:55:20 +0000851 }
852
Bram Moolenaar0c405862005-06-22 22:26:26 +0000853 if (mip->mi_capflags == WF_KEEPCAP
854 || !spell_valid_case(mip->mi_capflags, flags))
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000855 continue;
Bram Moolenaar51485f02005-06-04 21:55:20 +0000856 }
857
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000858 /* When mode is FIND_PREFIX the word must support the prefix:
859 * check the prefix ID and the condition. Do that for the list at
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000860 * mip->mi_prefarridx that find_prefix() filled. */
Bram Moolenaard12a1322005-08-21 22:08:24 +0000861 else if (mode == FIND_PREFIX && !prefix_found)
Bram Moolenaar51485f02005-06-04 21:55:20 +0000862 {
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000863 c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
Bram Moolenaardfb9ac02005-07-05 21:36:03 +0000864 flags,
Bram Moolenaar53805d12005-08-01 07:08:33 +0000865 mip->mi_word + mip->mi_cprefixlen, slang,
866 FALSE);
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000867 if (c == 0)
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000868 continue;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +0000869
870 /* Use the WF_RARE flag for a rare prefix. */
871 if (c & WF_RAREPFX)
872 flags |= WF_RARE;
Bram Moolenaard12a1322005-08-21 22:08:24 +0000873 prefix_found = TRUE;
Bram Moolenaar1d73c882005-06-19 22:48:47 +0000874 }
875
Bram Moolenaar78622822005-08-23 21:00:13 +0000876 if (slang->sl_nobreak)
877 {
878 if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND)
879 && (flags & WF_BANNED) == 0)
880 {
881 /* NOBREAK: found a valid following word. That's all we
882 * need to know, so return. */
883 mip->mi_result = SP_OK;
884 break;
885 }
886 }
887
888 else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
889 || !word_ends))
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000890 {
Bram Moolenaar2113a1d2006-09-11 19:38:08 +0000891 /* If there is no compound flag or the word is shorter than
Bram Moolenaar5195e452005-08-19 20:32:47 +0000892 * COMPOUNDMIN reject it quickly.
893 * Makes you wonder why someone puts a compound flag on a word
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000894 * that's too short... Myspell compatibility requires this
895 * anyway. */
Bram Moolenaare52325c2005-08-22 22:54:29 +0000896 if (((unsigned)flags >> 24) == 0
897 || wlen - mip->mi_compoff < slang->sl_compminlen)
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000898 continue;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000899#ifdef FEAT_MBYTE
900 /* For multi-byte chars check character length against
901 * COMPOUNDMIN. */
902 if (has_mbyte
Bram Moolenaarda2303d2005-08-30 21:55:26 +0000903 && slang->sl_compminlen > 0
Bram Moolenaarac6e65f2005-08-29 22:25:38 +0000904 && mb_charlen_len(mip->mi_word + mip->mi_compoff,
905 wlen - mip->mi_compoff) < slang->sl_compminlen)
906 continue;
907#endif
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000908
Bram Moolenaar899dddf2006-03-26 21:06:50 +0000909 /* Limit the number of compound words to COMPOUNDWORDMAX if no
Bram Moolenaare52325c2005-08-22 22:54:29 +0000910 * maximum for syllables is specified. */
Bram Moolenaar899dddf2006-03-26 21:06:50 +0000911 if (!word_ends && mip->mi_complen + mip->mi_compextra + 2
912 > slang->sl_compmax
Bram Moolenaare52325c2005-08-22 22:54:29 +0000913 && slang->sl_compsylmax == MAXWLEN)
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000914 continue;
Bram Moolenaar5195e452005-08-19 20:32:47 +0000915
Bram Moolenaar910f66f2006-04-05 20:41:53 +0000916 /* Don't allow compounding on a side where an affix was added,
917 * unless COMPOUNDPERMITFLAG was used. */
918 if (mip->mi_complen > 0 && (flags & WF_NOCOMPBEF))
919 continue;
920 if (!word_ends && (flags & WF_NOCOMPAFT))
921 continue;
922
Bram Moolenaard12a1322005-08-21 22:08:24 +0000923 /* Quickly check if compounding is possible with this flag. */
Bram Moolenaar6de68532005-08-24 22:08:48 +0000924 if (!byte_in_str(mip->mi_complen == 0
Bram Moolenaard12a1322005-08-21 22:08:24 +0000925 ? slang->sl_compstartflags
926 : slang->sl_compallflags,
Bram Moolenaar6de68532005-08-24 22:08:48 +0000927 ((unsigned)flags >> 24)))
Bram Moolenaar5195e452005-08-19 20:32:47 +0000928 continue;
929
Bram Moolenaar9f94b052008-11-30 20:12:46 +0000930 /* If there is a match with a CHECKCOMPOUNDPATTERN rule
931 * discard the compound word. */
932 if (match_checkcompoundpattern(ptr, wlen, &slang->sl_comppat))
933 continue;
934
Bram Moolenaare52325c2005-08-22 22:54:29 +0000935 if (mode == FIND_COMPOUND)
936 {
937 int capflags;
938
939 /* Need to check the caps type of the appended compound
940 * word. */
941#ifdef FEAT_MBYTE
942 if (has_mbyte && STRNCMP(ptr, mip->mi_word,
943 mip->mi_compoff) != 0)
944 {
945 /* case folding may have changed the length */
946 p = mip->mi_word;
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100947 for (s = ptr; s < ptr + mip->mi_compoff; MB_PTR_ADV(s))
948 MB_PTR_ADV(p);
Bram Moolenaare52325c2005-08-22 22:54:29 +0000949 }
950 else
951#endif
952 p = mip->mi_word + mip->mi_compoff;
953 capflags = captype(p, mip->mi_word + wlen);
954 if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP
955 && (flags & WF_FIXCAP) != 0))
956 continue;
957
958 if (capflags != WF_ALLCAP)
959 {
960 /* When the character before the word is a word
961 * character we do not accept a Onecap word. We do
962 * accept a no-caps word, even when the dictionary
963 * word specifies ONECAP. */
Bram Moolenaar91acfff2017-03-12 19:22:36 +0100964 MB_PTR_BACK(mip->mi_word, p);
Bram Moolenaarcc63c642013-11-12 04:44:01 +0100965 if (spell_iswordp_nmw(p, mip->mi_win)
Bram Moolenaare52325c2005-08-22 22:54:29 +0000966 ? capflags == WF_ONECAP
967 : (flags & WF_ONECAP) != 0
968 && capflags != WF_ONECAP)
969 continue;
970 }
971 }
972
Bram Moolenaar5195e452005-08-19 20:32:47 +0000973 /* If the word ends the sequence of compound flags of the
Bram Moolenaar362e1a32006-03-06 23:29:24 +0000974 * words must match with one of the COMPOUNDRULE items and
Bram Moolenaar5195e452005-08-19 20:32:47 +0000975 * the number of syllables must not be too large. */
976 mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24);
977 mip->mi_compflags[mip->mi_complen + 1] = NUL;
978 if (word_ends)
979 {
980 char_u fword[MAXWLEN];
981
982 if (slang->sl_compsylmax < MAXWLEN)
983 {
984 /* "fword" is only needed for checking syllables. */
985 if (ptr == mip->mi_word)
986 (void)spell_casefold(ptr, wlen, fword, MAXWLEN);
987 else
988 vim_strncpy(fword, ptr, endlen[endidxcnt]);
989 }
990 if (!can_compound(slang, fword, mip->mi_compflags))
991 continue;
992 }
Bram Moolenaar9f94b052008-11-30 20:12:46 +0000993 else if (slang->sl_comprules != NULL
994 && !match_compoundrule(slang, mip->mi_compflags))
995 /* The compound flags collected so far do not match any
996 * COMPOUNDRULE, discard the compounded word. */
997 continue;
Bram Moolenaarae5bce12005-08-15 21:41:48 +0000998 }
999
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001000 /* Check NEEDCOMPOUND: can't use word without compounding. */
1001 else if (flags & WF_NEEDCOMP)
1002 continue;
1003
Bram Moolenaar78622822005-08-23 21:00:13 +00001004 nobreak_result = SP_OK;
1005
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001006 if (!word_ends)
1007 {
Bram Moolenaar78622822005-08-23 21:00:13 +00001008 int save_result = mip->mi_result;
1009 char_u *save_end = mip->mi_end;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001010 langp_T *save_lp = mip->mi_lp;
1011 int lpi;
Bram Moolenaar78622822005-08-23 21:00:13 +00001012
1013 /* Check that a valid word follows. If there is one and we
1014 * are compounding, it will set "mi_result", thus we are
1015 * always finished here. For NOBREAK we only check that a
1016 * valid word follows.
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001017 * Recursive! */
Bram Moolenaar78622822005-08-23 21:00:13 +00001018 if (slang->sl_nobreak)
1019 mip->mi_result = SP_BAD;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001020
1021 /* Find following word in case-folded tree. */
1022 mip->mi_compoff = endlen[endidxcnt];
1023#ifdef FEAT_MBYTE
1024 if (has_mbyte && mode == FIND_KEEPWORD)
1025 {
1026 /* Compute byte length in case-folded word from "wlen":
1027 * byte length in keep-case word. Length may change when
1028 * folding case. This can be slow, take a shortcut when
1029 * the case-folded word is equal to the keep-case word. */
1030 p = mip->mi_fword;
1031 if (STRNCMP(ptr, p, wlen) != 0)
1032 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001033 for (s = ptr; s < ptr + wlen; MB_PTR_ADV(s))
1034 MB_PTR_ADV(p);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00001035 mip->mi_compoff = (int)(p - mip->mi_fword);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001036 }
1037 }
1038#endif
Bram Moolenaarba534352016-04-21 09:20:26 +02001039#if 0 /* Disabled, see below */
Bram Moolenaard12a1322005-08-21 22:08:24 +00001040 c = mip->mi_compoff;
Bram Moolenaarba534352016-04-21 09:20:26 +02001041#endif
Bram Moolenaar5195e452005-08-19 20:32:47 +00001042 ++mip->mi_complen;
Bram Moolenaar899dddf2006-03-26 21:06:50 +00001043 if (flags & WF_COMPROOT)
1044 ++mip->mi_compextra;
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001045
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001046 /* For NOBREAK we need to try all NOBREAK languages, at least
1047 * to find the ".add" file(s). */
Bram Moolenaar860cae12010-06-05 23:22:07 +02001048 for (lpi = 0; lpi < mip->mi_win->w_s->b_langp.ga_len; ++lpi)
Bram Moolenaar78622822005-08-23 21:00:13 +00001049 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001050 if (slang->sl_nobreak)
1051 {
Bram Moolenaar860cae12010-06-05 23:22:07 +02001052 mip->mi_lp = LANGP_ENTRY(mip->mi_win->w_s->b_langp, lpi);
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001053 if (mip->mi_lp->lp_slang->sl_fidxs == NULL
1054 || !mip->mi_lp->lp_slang->sl_nobreak)
1055 continue;
1056 }
Bram Moolenaard12a1322005-08-21 22:08:24 +00001057
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001058 find_word(mip, FIND_COMPOUND);
1059
1060 /* When NOBREAK any word that matches is OK. Otherwise we
1061 * need to find the longest match, thus try with keep-case
1062 * and prefix too. */
Bram Moolenaar78622822005-08-23 21:00:13 +00001063 if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1064 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001065 /* Find following word in keep-case tree. */
1066 mip->mi_compoff = wlen;
1067 find_word(mip, FIND_KEEPCOMPOUND);
1068
Bram Moolenaar910f66f2006-04-05 20:41:53 +00001069#if 0 /* Disabled, a prefix must not appear halfway a compound word,
1070 unless the COMPOUNDPERMITFLAG is used and then it can't be a
1071 postponed prefix. */
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001072 if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1073 {
1074 /* Check for following word with prefix. */
1075 mip->mi_compoff = c;
1076 find_prefix(mip, FIND_COMPOUND);
1077 }
Bram Moolenaar910f66f2006-04-05 20:41:53 +00001078#endif
Bram Moolenaar78622822005-08-23 21:00:13 +00001079 }
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001080
1081 if (!slang->sl_nobreak)
1082 break;
Bram Moolenaar78622822005-08-23 21:00:13 +00001083 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00001084 --mip->mi_complen;
Bram Moolenaar899dddf2006-03-26 21:06:50 +00001085 if (flags & WF_COMPROOT)
1086 --mip->mi_compextra;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001087 mip->mi_lp = save_lp;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001088
Bram Moolenaar78622822005-08-23 21:00:13 +00001089 if (slang->sl_nobreak)
1090 {
1091 nobreak_result = mip->mi_result;
1092 mip->mi_result = save_result;
1093 mip->mi_end = save_end;
1094 }
1095 else
1096 {
1097 if (mip->mi_result == SP_OK)
1098 break;
1099 continue;
1100 }
Bram Moolenaarae5bce12005-08-15 21:41:48 +00001101 }
1102
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001103 if (flags & WF_BANNED)
1104 res = SP_BANNED;
1105 else if (flags & WF_REGION)
1106 {
1107 /* Check region. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001108 if ((mip->mi_lp->lp_region & (flags >> 16)) != 0)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001109 res = SP_OK;
1110 else
1111 res = SP_LOCAL;
1112 }
1113 else if (flags & WF_RARE)
1114 res = SP_RARE;
1115 else
1116 res = SP_OK;
1117
Bram Moolenaar78622822005-08-23 21:00:13 +00001118 /* Always use the longest match and the best result. For NOBREAK
1119 * we separately keep the longest match without a following good
1120 * word as a fall-back. */
1121 if (nobreak_result == SP_BAD)
1122 {
1123 if (mip->mi_result2 > res)
1124 {
1125 mip->mi_result2 = res;
1126 mip->mi_end2 = mip->mi_word + wlen;
1127 }
1128 else if (mip->mi_result2 == res
1129 && mip->mi_end2 < mip->mi_word + wlen)
1130 mip->mi_end2 = mip->mi_word + wlen;
1131 }
1132 else if (mip->mi_result > res)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001133 {
1134 mip->mi_result = res;
1135 mip->mi_end = mip->mi_word + wlen;
1136 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001137 else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001138 mip->mi_end = mip->mi_word + wlen;
1139
Bram Moolenaar78622822005-08-23 21:00:13 +00001140 if (mip->mi_result == SP_OK)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001141 break;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001142 }
1143
Bram Moolenaar78622822005-08-23 21:00:13 +00001144 if (mip->mi_result == SP_OK)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001145 break;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001146 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001147}
1148
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001149/*
Bram Moolenaar9f94b052008-11-30 20:12:46 +00001150 * Return TRUE if there is a match between the word ptr[wlen] and
1151 * CHECKCOMPOUNDPATTERN rules, assuming that we will concatenate with another
1152 * word.
1153 * A match means that the first part of CHECKCOMPOUNDPATTERN matches at the
1154 * end of ptr[wlen] and the second part matches after it.
1155 */
1156 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01001157match_checkcompoundpattern(
1158 char_u *ptr,
1159 int wlen,
1160 garray_T *gap) /* &sl_comppat */
Bram Moolenaar9f94b052008-11-30 20:12:46 +00001161{
1162 int i;
1163 char_u *p;
1164 int len;
1165
1166 for (i = 0; i + 1 < gap->ga_len; i += 2)
1167 {
1168 p = ((char_u **)gap->ga_data)[i + 1];
1169 if (STRNCMP(ptr + wlen, p, STRLEN(p)) == 0)
1170 {
1171 /* Second part matches at start of following compound word, now
1172 * check if first part matches at end of previous word. */
1173 p = ((char_u **)gap->ga_data)[i];
Bram Moolenaar19c9c762008-12-09 21:34:39 +00001174 len = (int)STRLEN(p);
Bram Moolenaar9f94b052008-11-30 20:12:46 +00001175 if (len <= wlen && STRNCMP(ptr + wlen - len, p, len) == 0)
1176 return TRUE;
1177 }
1178 }
1179 return FALSE;
1180}
1181
1182/*
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00001183 * Return TRUE if "flags" is a valid sequence of compound flags and "word"
1184 * does not have too many syllables.
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00001185 */
1186 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01001187can_compound(slang_T *slang, char_u *word, char_u *flags)
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00001188{
Bram Moolenaar6de68532005-08-24 22:08:48 +00001189#ifdef FEAT_MBYTE
1190 char_u uflags[MAXWLEN * 2];
1191 int i;
1192#endif
1193 char_u *p;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001194
1195 if (slang->sl_compprog == NULL)
1196 return FALSE;
Bram Moolenaar6de68532005-08-24 22:08:48 +00001197#ifdef FEAT_MBYTE
1198 if (enc_utf8)
1199 {
1200 /* Need to convert the single byte flags to utf8 characters. */
1201 p = uflags;
1202 for (i = 0; flags[i] != NUL; ++i)
Bram Moolenaarace95982017-03-29 17:30:27 +02001203 p += utf_char2bytes(flags[i], p);
Bram Moolenaar6de68532005-08-24 22:08:48 +00001204 *p = NUL;
1205 p = uflags;
1206 }
1207 else
1208#endif
1209 p = flags;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01001210 if (!vim_regexec_prog(&slang->sl_compprog, FALSE, p, 0))
Bram Moolenaar5195e452005-08-19 20:32:47 +00001211 return FALSE;
1212
Bram Moolenaare52325c2005-08-22 22:54:29 +00001213 /* Count the number of syllables. This may be slow, do it last. If there
1214 * are too many syllables AND the number of compound words is above
Bram Moolenaar899dddf2006-03-26 21:06:50 +00001215 * COMPOUNDWORDMAX then compounding is not allowed. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00001216 if (slang->sl_compsylmax < MAXWLEN
1217 && count_syllables(slang, word) > slang->sl_compsylmax)
Bram Moolenaar6de68532005-08-24 22:08:48 +00001218 return (int)STRLEN(flags) < slang->sl_compmax;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001219 return TRUE;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00001220}
1221
1222/*
Bram Moolenaar9f94b052008-11-30 20:12:46 +00001223 * Return TRUE when the sequence of flags in "compflags" plus "flag" can
1224 * possibly form a valid compounded word. This also checks the COMPOUNDRULE
1225 * lines if they don't contain wildcards.
1226 */
1227 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01001228can_be_compound(
1229 trystate_T *sp,
1230 slang_T *slang,
1231 char_u *compflags,
1232 int flag)
Bram Moolenaar9f94b052008-11-30 20:12:46 +00001233{
1234 /* If the flag doesn't appear in sl_compstartflags or sl_compallflags
1235 * then it can't possibly compound. */
1236 if (!byte_in_str(sp->ts_complen == sp->ts_compsplit
1237 ? slang->sl_compstartflags : slang->sl_compallflags, flag))
1238 return FALSE;
1239
1240 /* If there are no wildcards, we can check if the flags collected so far
1241 * possibly can form a match with COMPOUNDRULE patterns. This only
1242 * makes sense when we have two or more words. */
1243 if (slang->sl_comprules != NULL && sp->ts_complen > sp->ts_compsplit)
1244 {
1245 int v;
1246
1247 compflags[sp->ts_complen] = flag;
1248 compflags[sp->ts_complen + 1] = NUL;
1249 v = match_compoundrule(slang, compflags + sp->ts_compsplit);
1250 compflags[sp->ts_complen] = NUL;
1251 return v;
1252 }
1253
1254 return TRUE;
1255}
1256
1257
1258/*
1259 * Return TRUE if the compound flags in compflags[] match the start of any
1260 * compound rule. This is used to stop trying a compound if the flags
1261 * collected so far can't possibly match any compound rule.
1262 * Caller must check that slang->sl_comprules is not NULL.
1263 */
1264 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01001265match_compoundrule(slang_T *slang, char_u *compflags)
Bram Moolenaar9f94b052008-11-30 20:12:46 +00001266{
1267 char_u *p;
1268 int i;
1269 int c;
1270
1271 /* loop over all the COMPOUNDRULE entries */
1272 for (p = slang->sl_comprules; *p != NUL; ++p)
1273 {
1274 /* loop over the flags in the compound word we have made, match
1275 * them against the current rule entry */
1276 for (i = 0; ; ++i)
1277 {
1278 c = compflags[i];
1279 if (c == NUL)
1280 /* found a rule that matches for the flags we have so far */
1281 return TRUE;
1282 if (*p == '/' || *p == NUL)
1283 break; /* end of rule, it's too short */
1284 if (*p == '[')
1285 {
1286 int match = FALSE;
1287
1288 /* compare against all the flags in [] */
1289 ++p;
1290 while (*p != ']' && *p != NUL)
1291 if (*p++ == c)
1292 match = TRUE;
1293 if (!match)
1294 break; /* none matches */
1295 }
1296 else if (*p != c)
1297 break; /* flag of word doesn't match flag in pattern */
1298 ++p;
1299 }
1300
1301 /* Skip to the next "/", where the next pattern starts. */
1302 p = vim_strchr(p, '/');
1303 if (p == NULL)
1304 break;
1305 }
1306
1307 /* Checked all the rules and none of them match the flags, so there
1308 * can't possibly be a compound starting with these flags. */
1309 return FALSE;
1310}
1311
1312/*
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001313 * Return non-zero if the prefix indicated by "arridx" matches with the prefix
1314 * ID in "flags" for the word "word".
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001315 * The WF_RAREPFX flag is included in the return value for a rare prefix.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001316 */
1317 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01001318valid_word_prefix(
1319 int totprefcnt, /* nr of prefix IDs */
1320 int arridx, /* idx in sl_pidxs[] */
1321 int flags,
1322 char_u *word,
1323 slang_T *slang,
1324 int cond_req) /* only use prefixes with a condition */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001325{
1326 int prefcnt;
1327 int pidx;
Bram Moolenaardffa5b82014-11-19 16:38:07 +01001328 regprog_T **rp;
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001329 int prefid;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001330
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001331 prefid = (unsigned)flags >> 24;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001332 for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt)
1333 {
1334 pidx = slang->sl_pidxs[arridx + prefcnt];
1335
1336 /* Check the prefix ID. */
1337 if (prefid != (pidx & 0xff))
1338 continue;
1339
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00001340 /* Check if the prefix doesn't combine and the word already has a
1341 * suffix. */
1342 if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC))
1343 continue;
1344
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001345 /* Check the condition, if there is one. The condition index is
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001346 * stored in the two bytes above the prefix ID byte. */
Bram Moolenaardffa5b82014-11-19 16:38:07 +01001347 rp = &slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff];
1348 if (*rp != NULL)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001349 {
Bram Moolenaardffa5b82014-11-19 16:38:07 +01001350 if (!vim_regexec_prog(rp, FALSE, word, 0))
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001351 continue;
1352 }
Bram Moolenaar53805d12005-08-01 07:08:33 +00001353 else if (cond_req)
1354 continue;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001355
Bram Moolenaar53805d12005-08-01 07:08:33 +00001356 /* It's a match! Return the WF_ flags. */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001357 return pidx;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001358 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00001359 return 0;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001360}
1361
1362/*
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001363 * Check if the word at "mip->mi_word" has a matching prefix.
1364 * If it does, then check the following word.
1365 *
Bram Moolenaard12a1322005-08-21 22:08:24 +00001366 * If "mode" is "FIND_COMPOUND" then do the same after another word, find a
1367 * prefix in a compound word.
1368 *
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001369 * For a match mip->mi_result is updated.
1370 */
1371 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01001372find_prefix(matchinf_T *mip, int mode)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001373{
1374 idx_T arridx = 0;
1375 int len;
1376 int wlen = 0;
1377 int flen;
1378 int c;
1379 char_u *ptr;
1380 idx_T lo, hi, m;
1381 slang_T *slang = mip->mi_lp->lp_slang;
1382 char_u *byts;
1383 idx_T *idxs;
1384
Bram Moolenaar42eeac32005-06-29 22:40:58 +00001385 byts = slang->sl_pbyts;
1386 if (byts == NULL)
1387 return; /* array is empty */
1388
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001389 /* We use the case-folded word here, since prefixes are always
1390 * case-folded. */
1391 ptr = mip->mi_fword;
1392 flen = mip->mi_fwordlen; /* available case-folded bytes */
Bram Moolenaard12a1322005-08-21 22:08:24 +00001393 if (mode == FIND_COMPOUND)
1394 {
1395 /* Skip over the previously found word(s). */
1396 ptr += mip->mi_compoff;
1397 flen -= mip->mi_compoff;
1398 }
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001399 idxs = slang->sl_pidxs;
1400
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001401 /*
1402 * Repeat advancing in the tree until:
1403 * - there is a byte that doesn't match,
1404 * - we reach the end of the tree,
1405 * - or we reach the end of the line.
1406 */
1407 for (;;)
1408 {
1409 if (flen == 0 && *mip->mi_fend != NUL)
1410 flen = fold_more(mip);
1411
1412 len = byts[arridx++];
1413
1414 /* If the first possible byte is a zero the prefix could end here.
1415 * Check if the following word matches and supports the prefix. */
1416 if (byts[arridx] == 0)
1417 {
1418 /* There can be several prefixes with different conditions. We
1419 * try them all, since we don't know which one will give the
1420 * longest match. The word is the same each time, pass the list
1421 * of possible prefixes to find_word(). */
1422 mip->mi_prefarridx = arridx;
1423 mip->mi_prefcnt = len;
1424 while (len > 0 && byts[arridx] == 0)
1425 {
1426 ++arridx;
1427 --len;
1428 }
1429 mip->mi_prefcnt -= len;
1430
1431 /* Find the word that comes after the prefix. */
1432 mip->mi_prefixlen = wlen;
Bram Moolenaard12a1322005-08-21 22:08:24 +00001433 if (mode == FIND_COMPOUND)
1434 /* Skip over the previously found word(s). */
1435 mip->mi_prefixlen += mip->mi_compoff;
1436
Bram Moolenaar53805d12005-08-01 07:08:33 +00001437#ifdef FEAT_MBYTE
1438 if (has_mbyte)
1439 {
1440 /* Case-folded length may differ from original length. */
Bram Moolenaard12a1322005-08-21 22:08:24 +00001441 mip->mi_cprefixlen = nofold_len(mip->mi_fword,
1442 mip->mi_prefixlen, mip->mi_word);
Bram Moolenaar53805d12005-08-01 07:08:33 +00001443 }
1444 else
Bram Moolenaard12a1322005-08-21 22:08:24 +00001445 mip->mi_cprefixlen = mip->mi_prefixlen;
Bram Moolenaar53805d12005-08-01 07:08:33 +00001446#endif
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001447 find_word(mip, FIND_PREFIX);
1448
1449
1450 if (len == 0)
1451 break; /* no children, word must end here */
1452 }
1453
1454 /* Stop looking at end of the line. */
1455 if (ptr[wlen] == NUL)
1456 break;
1457
1458 /* Perform a binary search in the list of accepted bytes. */
1459 c = ptr[wlen];
1460 lo = arridx;
1461 hi = arridx + len - 1;
1462 while (lo < hi)
1463 {
1464 m = (lo + hi) / 2;
1465 if (byts[m] > c)
1466 hi = m - 1;
1467 else if (byts[m] < c)
1468 lo = m + 1;
1469 else
1470 {
1471 lo = hi = m;
1472 break;
1473 }
1474 }
1475
1476 /* Stop if there is no matching byte. */
1477 if (hi < lo || byts[lo] != c)
1478 break;
1479
1480 /* Continue at the child (if there is one). */
1481 arridx = idxs[lo];
1482 ++wlen;
1483 --flen;
1484 }
1485}
1486
1487/*
1488 * Need to fold at least one more character. Do until next non-word character
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00001489 * for efficiency. Include the non-word character too.
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001490 * Return the length of the folded chars in bytes.
1491 */
1492 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01001493fold_more(matchinf_T *mip)
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001494{
1495 int flen;
1496 char_u *p;
1497
1498 p = mip->mi_fend;
1499 do
1500 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001501 MB_PTR_ADV(mip->mi_fend);
Bram Moolenaar860cae12010-06-05 23:22:07 +02001502 } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_win));
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001503
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00001504 /* Include the non-word character so that we can check for the word end. */
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001505 if (*mip->mi_fend != NUL)
Bram Moolenaar91acfff2017-03-12 19:22:36 +01001506 MB_PTR_ADV(mip->mi_fend);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001507
1508 (void)spell_casefold(p, (int)(mip->mi_fend - p),
1509 mip->mi_fword + mip->mi_fwordlen,
1510 MAXWLEN - mip->mi_fwordlen);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00001511 flen = (int)STRLEN(mip->mi_fword + mip->mi_fwordlen);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001512 mip->mi_fwordlen += flen;
1513 return flen;
1514}
1515
1516/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001517 * Check case flags for a word. Return TRUE if the word has the requested
1518 * case.
1519 */
1520 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01001521spell_valid_case(
1522 int wordflags, /* flags for the checked word. */
1523 int treeflags) /* flags for the word in the spell tree */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001524{
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00001525 return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001526 || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00001527 && ((treeflags & WF_ONECAP) == 0
1528 || (wordflags & WF_ONECAP) != 0)));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001529}
1530
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001531/*
1532 * Return TRUE if spell checking is not enabled.
1533 */
1534 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01001535no_spell_checking(win_T *wp)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001536{
Bram Moolenaar860cae12010-06-05 23:22:07 +02001537 if (!wp->w_p_spell || *wp->w_s->b_p_spl == NUL
1538 || wp->w_s->b_langp.ga_len == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00001539 {
1540 EMSG(_("E756: Spell checking is not enabled"));
1541 return TRUE;
1542 }
1543 return FALSE;
1544}
Bram Moolenaar51485f02005-06-04 21:55:20 +00001545
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001546/*
1547 * Move to next spell error.
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001548 * "curline" is FALSE for "[s", "]s", "[S" and "]S".
1549 * "curline" is TRUE to find word under/after cursor in the same line.
Bram Moolenaar5195e452005-08-19 20:32:47 +00001550 * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move
1551 * to after badly spelled word before the cursor.
Bram Moolenaar6de68532005-08-24 22:08:48 +00001552 * Return 0 if not found, length of the badly spelled word otherwise.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001553 */
1554 int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01001555spell_move_to(
1556 win_T *wp,
1557 int dir, /* FORWARD or BACKWARD */
1558 int allwords, /* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */
1559 int curline,
1560 hlf_T *attrp) /* return: attributes of bad word or NULL
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001561 (only when "dir" is FORWARD) */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001562{
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001563 linenr_T lnum;
1564 pos_T found_pos;
Bram Moolenaar6de68532005-08-24 22:08:48 +00001565 int found_len = 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001566 char_u *line;
1567 char_u *p;
Bram Moolenaar0c405862005-06-22 22:26:26 +00001568 char_u *endp;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001569 hlf_T attr;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001570 int len;
Bram Moolenaar34b466e2013-11-28 17:41:46 +01001571#ifdef FEAT_SYN_HL
Bram Moolenaar860cae12010-06-05 23:22:07 +02001572 int has_syntax = syntax_present(wp);
Bram Moolenaar34b466e2013-11-28 17:41:46 +01001573#endif
Bram Moolenaar89d40322006-08-29 15:30:07 +00001574 int col;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001575 int can_spell;
Bram Moolenaar0c405862005-06-22 22:26:26 +00001576 char_u *buf = NULL;
1577 int buflen = 0;
1578 int skip = 0;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001579 int capcol = -1;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001580 int found_one = FALSE;
1581 int wrapped = FALSE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001582
Bram Moolenaar95529562005-08-25 21:21:38 +00001583 if (no_spell_checking(wp))
Bram Moolenaar6de68532005-08-24 22:08:48 +00001584 return 0;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001585
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001586 /*
1587 * Start looking for bad word at the start of the line, because we can't
Bram Moolenaar86ca6e32006-03-29 21:06:37 +00001588 * start halfway a word, we don't know where it starts or ends.
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001589 *
1590 * When searching backwards, we continue in the line to find the last
1591 * bad word (in the cursor line: before the cursor).
Bram Moolenaar0c405862005-06-22 22:26:26 +00001592 *
1593 * We concatenate the start of the next line, so that wrapped words work
1594 * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards
1595 * though...
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001596 */
Bram Moolenaar95529562005-08-25 21:21:38 +00001597 lnum = wp->w_cursor.lnum;
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01001598 CLEAR_POS(&found_pos);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001599
1600 while (!got_int)
1601 {
Bram Moolenaar95529562005-08-25 21:21:38 +00001602 line = ml_get_buf(wp->w_buffer, lnum, FALSE);
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001603
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00001604 len = (int)STRLEN(line);
Bram Moolenaar0c405862005-06-22 22:26:26 +00001605 if (buflen < len + MAXWLEN + 2)
1606 {
1607 vim_free(buf);
1608 buflen = len + MAXWLEN + 2;
1609 buf = alloc(buflen);
1610 if (buf == NULL)
1611 break;
1612 }
1613
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001614 /* In first line check first word for Capital. */
1615 if (lnum == 1)
1616 capcol = 0;
1617
1618 /* For checking first word with a capital skip white space. */
1619 if (capcol == 0)
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001620 capcol = getwhitecols(line);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00001621 else if (curline && wp == curwin)
1622 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00001623 /* For spellbadword(): check if first word needs a capital. */
Bram Moolenaare2e69e42017-09-02 20:30:35 +02001624 col = getwhitecols(line);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00001625 if (check_need_cap(lnum, col))
1626 capcol = col;
1627
1628 /* Need to get the line again, may have looked at the previous
1629 * one. */
1630 line = ml_get_buf(wp->w_buffer, lnum, FALSE);
1631 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001632
Bram Moolenaar0c405862005-06-22 22:26:26 +00001633 /* Copy the line into "buf" and append the start of the next line if
1634 * possible. */
1635 STRCPY(buf, line);
Bram Moolenaar95529562005-08-25 21:21:38 +00001636 if (lnum < wp->w_buffer->b_ml.ml_line_count)
Bram Moolenaar5dd95a12006-05-13 12:09:24 +00001637 spell_cat_line(buf + STRLEN(buf),
1638 ml_get_buf(wp->w_buffer, lnum + 1, FALSE), MAXWLEN);
Bram Moolenaar0c405862005-06-22 22:26:26 +00001639
1640 p = buf + skip;
1641 endp = buf + len;
1642 while (p < endp)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001643 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001644 /* When searching backward don't search after the cursor. Unless
1645 * we wrapped around the end of the buffer. */
Bram Moolenaar51485f02005-06-04 21:55:20 +00001646 if (dir == BACKWARD
Bram Moolenaar95529562005-08-25 21:21:38 +00001647 && lnum == wp->w_cursor.lnum
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001648 && !wrapped
Bram Moolenaar95529562005-08-25 21:21:38 +00001649 && (colnr_T)(p - buf) >= wp->w_cursor.col)
Bram Moolenaar51485f02005-06-04 21:55:20 +00001650 break;
1651
1652 /* start of word */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001653 attr = HLF_COUNT;
Bram Moolenaar4770d092006-01-12 23:22:24 +00001654 len = spell_check(wp, p, &attr, &capcol, FALSE);
Bram Moolenaar51485f02005-06-04 21:55:20 +00001655
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001656 if (attr != HLF_COUNT)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001657 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001658 /* We found a bad word. Check the attribute. */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001659 if (allwords || attr == HLF_SPB)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001660 {
Bram Moolenaar51485f02005-06-04 21:55:20 +00001661 /* When searching forward only accept a bad word after
1662 * the cursor. */
1663 if (dir == BACKWARD
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001664 || lnum != wp->w_cursor.lnum
Bram Moolenaar95529562005-08-25 21:21:38 +00001665 || (lnum == wp->w_cursor.lnum
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001666 && (wrapped
1667 || (colnr_T)(curline ? p - buf + len
Bram Moolenaar0c405862005-06-22 22:26:26 +00001668 : p - buf)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001669 > wp->w_cursor.col)))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001670 {
Bram Moolenaar34b466e2013-11-28 17:41:46 +01001671#ifdef FEAT_SYN_HL
Bram Moolenaar51485f02005-06-04 21:55:20 +00001672 if (has_syntax)
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001673 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00001674 col = (int)(p - buf);
Bram Moolenaar95529562005-08-25 21:21:38 +00001675 (void)syn_get_id(wp, lnum, (colnr_T)col,
Bram Moolenaar56cefaf2008-01-12 15:47:10 +00001676 FALSE, &can_spell, FALSE);
Bram Moolenaard68071d2006-05-02 22:08:30 +00001677 if (!can_spell)
1678 attr = HLF_COUNT;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001679 }
1680 else
Bram Moolenaarf71a3db2006-03-12 21:50:18 +00001681#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00001682 can_spell = TRUE;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001683
Bram Moolenaar51485f02005-06-04 21:55:20 +00001684 if (can_spell)
1685 {
Bram Moolenaard68071d2006-05-02 22:08:30 +00001686 found_one = TRUE;
Bram Moolenaar51485f02005-06-04 21:55:20 +00001687 found_pos.lnum = lnum;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00001688 found_pos.col = (int)(p - buf);
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001689#ifdef FEAT_VIRTUALEDIT
Bram Moolenaar51485f02005-06-04 21:55:20 +00001690 found_pos.coladd = 0;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001691#endif
Bram Moolenaar51485f02005-06-04 21:55:20 +00001692 if (dir == FORWARD)
1693 {
1694 /* No need to search further. */
Bram Moolenaar95529562005-08-25 21:21:38 +00001695 wp->w_cursor = found_pos;
Bram Moolenaar0c405862005-06-22 22:26:26 +00001696 vim_free(buf);
Bram Moolenaar95529562005-08-25 21:21:38 +00001697 if (attrp != NULL)
1698 *attrp = attr;
Bram Moolenaar6de68532005-08-24 22:08:48 +00001699 return len;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001700 }
Bram Moolenaar5195e452005-08-19 20:32:47 +00001701 else if (curline)
1702 /* Insert mode completion: put cursor after
1703 * the bad word. */
1704 found_pos.col += len;
Bram Moolenaar6de68532005-08-24 22:08:48 +00001705 found_len = len;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001706 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001707 }
Bram Moolenaard68071d2006-05-02 22:08:30 +00001708 else
1709 found_one = TRUE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001710 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001711 }
1712
Bram Moolenaar51485f02005-06-04 21:55:20 +00001713 /* advance to character after the word */
1714 p += len;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001715 capcol -= len;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001716 }
1717
Bram Moolenaar5195e452005-08-19 20:32:47 +00001718 if (dir == BACKWARD && found_pos.lnum != 0)
1719 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001720 /* Use the last match in the line (before the cursor). */
Bram Moolenaar95529562005-08-25 21:21:38 +00001721 wp->w_cursor = found_pos;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001722 vim_free(buf);
Bram Moolenaar6de68532005-08-24 22:08:48 +00001723 return found_len;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001724 }
1725
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001726 if (curline)
Bram Moolenaar0c405862005-06-22 22:26:26 +00001727 break; /* only check cursor line */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001728
Bram Moolenaard3f78dc2017-02-25 14:21:10 +01001729 /* If we are back at the starting line and searched it again there
1730 * is no match, give up. */
1731 if (lnum == wp->w_cursor.lnum && wrapped)
1732 break;
1733
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001734 /* Advance to next line. */
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001735 if (dir == BACKWARD)
1736 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001737 if (lnum > 1)
1738 --lnum;
1739 else if (!p_ws)
1740 break; /* at first line and 'nowrapscan' */
1741 else
1742 {
1743 /* Wrap around to the end of the buffer. May search the
1744 * starting line again and accept the last match. */
1745 lnum = wp->w_buffer->b_ml.ml_line_count;
1746 wrapped = TRUE;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00001747 if (!shortmess(SHM_SEARCH))
1748 give_warning((char_u *)_(top_bot_msg), TRUE);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001749 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001750 capcol = -1;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001751 }
1752 else
1753 {
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001754 if (lnum < wp->w_buffer->b_ml.ml_line_count)
1755 ++lnum;
1756 else if (!p_ws)
1757 break; /* at first line and 'nowrapscan' */
1758 else
1759 {
1760 /* Wrap around to the start of the buffer. May search the
1761 * starting line again and accept the first match. */
1762 lnum = 1;
1763 wrapped = TRUE;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00001764 if (!shortmess(SHM_SEARCH))
1765 give_warning((char_u *)_(bot_top_msg), TRUE);
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00001766 }
1767
1768 /* If we are back at the starting line and there is no match then
1769 * give up. */
Bram Moolenaard3f78dc2017-02-25 14:21:10 +01001770 if (lnum == wp->w_cursor.lnum && !found_one)
Bram Moolenaar0c405862005-06-22 22:26:26 +00001771 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +00001772
1773 /* Skip the characters at the start of the next line that were
1774 * included in a match crossing line boundaries. */
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00001775 if (attr == HLF_COUNT)
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00001776 skip = (int)(p - endp);
Bram Moolenaar0c405862005-06-22 22:26:26 +00001777 else
1778 skip = 0;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001779
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00001780 /* Capcol skips over the inserted space. */
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001781 --capcol;
1782
1783 /* But after empty line check first word in next line */
1784 if (*skipwhite(line) == NUL)
1785 capcol = 0;
Bram Moolenaar2cf8b302005-04-20 19:37:22 +00001786 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001787
1788 line_breakcheck();
1789 }
1790
Bram Moolenaar0c405862005-06-22 22:26:26 +00001791 vim_free(buf);
Bram Moolenaar6de68532005-08-24 22:08:48 +00001792 return 0;
Bram Moolenaar0c405862005-06-22 22:26:26 +00001793}
1794
1795/*
1796 * For spell checking: concatenate the start of the following line "line" into
1797 * "buf", blanking-out special characters. Copy less then "maxlen" bytes.
Bram Moolenaar6a5d2ac2008-04-01 15:14:36 +00001798 * Keep the blanks at the start of the next line, this is used in win_line()
1799 * to skip those bytes if the word was OK.
Bram Moolenaar0c405862005-06-22 22:26:26 +00001800 */
1801 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01001802spell_cat_line(char_u *buf, char_u *line, int maxlen)
Bram Moolenaar0c405862005-06-22 22:26:26 +00001803{
1804 char_u *p;
1805 int n;
1806
1807 p = skipwhite(line);
1808 while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL)
1809 p = skipwhite(p + 1);
1810
1811 if (*p != NUL)
1812 {
Bram Moolenaar6a5d2ac2008-04-01 15:14:36 +00001813 /* Only worth concatenating if there is something else than spaces to
1814 * concatenate. */
1815 n = (int)(p - line) + 1;
1816 if (n < maxlen - 1)
1817 {
1818 vim_memset(buf, ' ', n);
1819 vim_strncpy(buf + n, p, maxlen - 1 - n);
1820 }
Bram Moolenaar0c405862005-06-22 22:26:26 +00001821 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001822}
1823
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00001824/*
1825 * Structure used for the cookie argument of do_in_runtimepath().
1826 */
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001827typedef struct spelload_S
1828{
1829 char_u sl_lang[MAXWLEN + 1]; /* language name */
1830 slang_T *sl_slang; /* resulting slang_T struct */
1831 int sl_nobreak; /* NOBREAK language found */
1832} spelload_T;
1833
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001834/*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001835 * Load word list(s) for "lang" from Vim spell file(s).
Bram Moolenaarb765d632005-06-07 21:00:02 +00001836 * "lang" must be the language without the region: e.g., "en".
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001837 */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001838 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01001839spell_load_lang(char_u *lang)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001840{
Bram Moolenaarb765d632005-06-07 21:00:02 +00001841 char_u fname_enc[85];
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001842 int r;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001843 spelload_T sl;
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00001844 int round;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001845
Bram Moolenaarb765d632005-06-07 21:00:02 +00001846 /* Copy the language name to pass it to spell_load_cb() as a cookie.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001847 * It's truncated when an error is detected. */
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001848 STRCPY(sl.sl_lang, lang);
1849 sl.sl_slang = NULL;
1850 sl.sl_nobreak = FALSE;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001851
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00001852 /* We may retry when no spell file is found for the language, an
1853 * autocommand may load it then. */
1854 for (round = 1; round <= 2; ++round)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001855 {
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00001856 /*
1857 * Find the first spell file for "lang" in 'runtimepath' and load it.
1858 */
Bram Moolenaarb765d632005-06-07 21:00:02 +00001859 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
Bram Moolenaar56f78042010-12-08 17:09:32 +01001860#ifdef VMS
1861 "spell/%s_%s.spl",
1862#else
1863 "spell/%s.%s.spl",
1864#endif
1865 lang, spell_enc());
Bram Moolenaar7f8989d2016-03-12 22:11:39 +01001866 r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl);
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00001867
1868 if (r == FAIL && *sl.sl_lang != NUL)
1869 {
1870 /* Try loading the ASCII version. */
1871 vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
Bram Moolenaar56f78042010-12-08 17:09:32 +01001872#ifdef VMS
1873 "spell/%s_ascii.spl",
1874#else
1875 "spell/%s.ascii.spl",
1876#endif
1877 lang);
Bram Moolenaar7f8989d2016-03-12 22:11:39 +01001878 r = do_in_runtimepath(fname_enc, 0, spell_load_cb, &sl);
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00001879
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00001880 if (r == FAIL && *sl.sl_lang != NUL && round == 1
1881 && apply_autocmds(EVENT_SPELLFILEMISSING, lang,
1882 curbuf->b_fname, FALSE, curbuf))
1883 continue;
1884 break;
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00001885 }
Bram Moolenaar362e1a32006-03-06 23:29:24 +00001886 break;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001887 }
1888
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00001889 if (r == FAIL)
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00001890 {
Bram Moolenaar56f78042010-12-08 17:09:32 +01001891 smsg((char_u *)
1892#ifdef VMS
1893 _("Warning: Cannot find word list \"%s_%s.spl\" or \"%s_ascii.spl\""),
1894#else
1895 _("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""),
1896#endif
Bram Moolenaar5195e452005-08-19 20:32:47 +00001897 lang, spell_enc(), lang);
Bram Moolenaarb8a7b562006-02-01 21:47:16 +00001898 }
Bram Moolenaarda2303d2005-08-30 21:55:26 +00001899 else if (sl.sl_slang != NULL)
Bram Moolenaarb765d632005-06-07 21:00:02 +00001900 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00001901 /* At least one file was loaded, now load ALL the additions. */
Bram Moolenaarb765d632005-06-07 21:00:02 +00001902 STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl");
Bram Moolenaar7f8989d2016-03-12 22:11:39 +01001903 do_in_runtimepath(fname_enc, DIP_ALL, spell_load_cb, &sl);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001904 }
1905}
1906
1907/*
1908 * Return the encoding used for spell checking: Use 'encoding', except that we
1909 * use "latin1" for "latin9". And limit to 60 characters (just in case).
1910 */
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001911 char_u *
Bram Moolenaar764b23c2016-01-30 21:10:09 +01001912spell_enc(void)
Bram Moolenaarb765d632005-06-07 21:00:02 +00001913{
1914
1915#ifdef FEAT_MBYTE
1916 if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
1917 return p_enc;
1918#endif
1919 return (char_u *)"latin1";
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001920}
1921
1922/*
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001923 * Get the name of the .spl file for the internal wordlist into
1924 * "fname[MAXPATHL]".
1925 */
1926 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01001927int_wordlist_spl(char_u *fname)
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001928{
Bram Moolenaar56f78042010-12-08 17:09:32 +01001929 vim_snprintf((char *)fname, MAXPATHL, SPL_FNAME_TMPL,
Bram Moolenaarf9184a12005-07-02 23:10:47 +00001930 int_wordlist, spell_enc());
1931}
1932
1933/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00001934 * Allocate a new slang_T for language "lang". "lang" can be NULL.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001935 * Caller must fill "sl_next".
1936 */
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001937 slang_T *
Bram Moolenaar764b23c2016-01-30 21:10:09 +01001938slang_alloc(char_u *lang)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001939{
1940 slang_T *lp;
1941
Bram Moolenaar51485f02005-06-04 21:55:20 +00001942 lp = (slang_T *)alloc_clear(sizeof(slang_T));
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001943 if (lp != NULL)
1944 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00001945 if (lang != NULL)
1946 lp->sl_name = vim_strsave(lang);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001947 ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
Bram Moolenaar4770d092006-01-12 23:22:24 +00001948 ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10);
Bram Moolenaar5195e452005-08-19 20:32:47 +00001949 lp->sl_compmax = MAXWLEN;
Bram Moolenaar5195e452005-08-19 20:32:47 +00001950 lp->sl_compsylmax = MAXWLEN;
Bram Moolenaar4770d092006-01-12 23:22:24 +00001951 hash_init(&lp->sl_wordcount);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001952 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00001953
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001954 return lp;
1955}
1956
1957/*
1958 * Free the contents of an slang_T and the structure itself.
1959 */
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001960 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01001961slang_free(slang_T *lp)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001962{
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00001963 vim_free(lp->sl_name);
Bram Moolenaarb765d632005-06-07 21:00:02 +00001964 vim_free(lp->sl_fname);
1965 slang_clear(lp);
1966 vim_free(lp);
1967}
1968
1969/*
1970 * Clear an slang_T so that the file can be reloaded.
1971 */
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02001972 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01001973slang_clear(slang_T *lp)
Bram Moolenaarb765d632005-06-07 21:00:02 +00001974{
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001975 garray_T *gap;
1976 fromto_T *ftp;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00001977 salitem_T *smp;
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001978 int i;
Bram Moolenaar4770d092006-01-12 23:22:24 +00001979 int round;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001980
Bram Moolenaard23a8232018-02-10 18:45:26 +01001981 VIM_CLEAR(lp->sl_fbyts);
1982 VIM_CLEAR(lp->sl_kbyts);
1983 VIM_CLEAR(lp->sl_pbyts);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00001984
Bram Moolenaard23a8232018-02-10 18:45:26 +01001985 VIM_CLEAR(lp->sl_fidxs);
1986 VIM_CLEAR(lp->sl_kidxs);
1987 VIM_CLEAR(lp->sl_pidxs);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001988
Bram Moolenaar4770d092006-01-12 23:22:24 +00001989 for (round = 1; round <= 2; ++round)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001990 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00001991 gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal;
1992 while (gap->ga_len > 0)
1993 {
1994 ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len];
1995 vim_free(ftp->ft_from);
1996 vim_free(ftp->ft_to);
1997 }
1998 ga_clear(gap);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00001999 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002000
2001 gap = &lp->sl_sal;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002002 if (lp->sl_sofo)
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002003 {
2004 /* "ga_len" is set to 1 without adding an item for latin1 */
2005 if (gap->ga_data != NULL)
2006 /* SOFOFROM and SOFOTO items: free lists of wide characters. */
2007 for (i = 0; i < gap->ga_len; ++i)
2008 vim_free(((int **)gap->ga_data)[i]);
2009 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +00002010 else
2011 /* SAL items: free salitem_T items */
2012 while (gap->ga_len > 0)
2013 {
2014 smp = &((salitem_T *)gap->ga_data)[--gap->ga_len];
2015 vim_free(smp->sm_lead);
2016 /* Don't free sm_oneof and sm_rules, they point into sm_lead. */
2017 vim_free(smp->sm_to);
2018#ifdef FEAT_MBYTE
2019 vim_free(smp->sm_lead_w);
2020 vim_free(smp->sm_oneof_w);
2021 vim_free(smp->sm_to_w);
2022#endif
2023 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00002024 ga_clear(gap);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002025
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002026 for (i = 0; i < lp->sl_prefixcnt; ++i)
Bram Moolenaar473de612013-06-08 18:19:48 +02002027 vim_regfree(lp->sl_prefprog[i]);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002028 lp->sl_prefixcnt = 0;
Bram Moolenaard23a8232018-02-10 18:45:26 +01002029 VIM_CLEAR(lp->sl_prefprog);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002030
Bram Moolenaard23a8232018-02-10 18:45:26 +01002031 VIM_CLEAR(lp->sl_info);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00002032
Bram Moolenaard23a8232018-02-10 18:45:26 +01002033 VIM_CLEAR(lp->sl_midword);
Bram Moolenaar1d73c882005-06-19 22:48:47 +00002034
Bram Moolenaar473de612013-06-08 18:19:48 +02002035 vim_regfree(lp->sl_compprog);
Bram Moolenaar5195e452005-08-19 20:32:47 +00002036 lp->sl_compprog = NULL;
Bram Moolenaard23a8232018-02-10 18:45:26 +01002037 VIM_CLEAR(lp->sl_comprules);
2038 VIM_CLEAR(lp->sl_compstartflags);
2039 VIM_CLEAR(lp->sl_compallflags);
Bram Moolenaar5195e452005-08-19 20:32:47 +00002040
Bram Moolenaard23a8232018-02-10 18:45:26 +01002041 VIM_CLEAR(lp->sl_syllable);
Bram Moolenaar5195e452005-08-19 20:32:47 +00002042 ga_clear(&lp->sl_syl_items);
Bram Moolenaarae5bce12005-08-15 21:41:48 +00002043
Bram Moolenaar899dddf2006-03-26 21:06:50 +00002044 ga_clear_strings(&lp->sl_comppat);
2045
Bram Moolenaar4770d092006-01-12 23:22:24 +00002046 hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF);
2047 hash_init(&lp->sl_wordcount);
Bram Moolenaarea424162005-06-16 21:51:00 +00002048
Bram Moolenaar4770d092006-01-12 23:22:24 +00002049#ifdef FEAT_MBYTE
2050 hash_clear_all(&lp->sl_map_hash, 0);
Bram Moolenaarea424162005-06-16 21:51:00 +00002051#endif
Bram Moolenaar5195e452005-08-19 20:32:47 +00002052
Bram Moolenaar4770d092006-01-12 23:22:24 +00002053 /* Clear info from .sug file. */
2054 slang_clear_sug(lp);
2055
Bram Moolenaar5195e452005-08-19 20:32:47 +00002056 lp->sl_compmax = MAXWLEN;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002057 lp->sl_compminlen = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002058 lp->sl_compsylmax = MAXWLEN;
2059 lp->sl_regions[0] = NUL;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002060}
2061
2062/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00002063 * Clear the info from the .sug file in "lp".
2064 */
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002065 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002066slang_clear_sug(slang_T *lp)
Bram Moolenaar4770d092006-01-12 23:22:24 +00002067{
Bram Moolenaard23a8232018-02-10 18:45:26 +01002068 VIM_CLEAR(lp->sl_sbyts);
2069 VIM_CLEAR(lp->sl_sidxs);
Bram Moolenaar4770d092006-01-12 23:22:24 +00002070 close_spellbuf(lp->sl_sugbuf);
2071 lp->sl_sugbuf = NULL;
2072 lp->sl_sugloaded = FALSE;
2073 lp->sl_sugtime = 0;
2074}
2075
2076/*
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002077 * Load one spell file and store the info into a slang_T.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002078 * Invoked through do_in_runtimepath().
2079 */
2080 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002081spell_load_cb(char_u *fname, void *cookie)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002082{
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002083 spelload_T *slp = (spelload_T *)cookie;
2084 slang_T *slang;
2085
2086 slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE);
2087 if (slang != NULL)
2088 {
2089 /* When a previously loaded file has NOBREAK also use it for the
2090 * ".add" files. */
2091 if (slp->sl_nobreak && slang->sl_add)
2092 slang->sl_nobreak = TRUE;
2093 else if (slang->sl_nobreak)
2094 slp->sl_nobreak = TRUE;
2095
2096 slp->sl_slang = slang;
2097 }
Bram Moolenaarb765d632005-06-07 21:00:02 +00002098}
2099
Bram Moolenaar4770d092006-01-12 23:22:24 +00002100
2101/*
2102 * Add a word to the hashtable of common words.
2103 * If it's already there then the counter is increased.
2104 */
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002105 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002106count_common_word(
2107 slang_T *lp,
2108 char_u *word,
2109 int len, /* word length, -1 for upto NUL */
2110 int count) /* 1 to count once, 10 to init */
Bram Moolenaar4770d092006-01-12 23:22:24 +00002111{
2112 hash_T hash;
2113 hashitem_T *hi;
2114 wordcount_T *wc;
2115 char_u buf[MAXWLEN];
2116 char_u *p;
2117
2118 if (len == -1)
2119 p = word;
2120 else
2121 {
2122 vim_strncpy(buf, word, len);
2123 p = buf;
2124 }
2125
2126 hash = hash_hash(p);
2127 hi = hash_lookup(&lp->sl_wordcount, p, hash);
2128 if (HASHITEM_EMPTY(hi))
2129 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00002130 wc = (wordcount_T *)alloc((unsigned)(sizeof(wordcount_T) + STRLEN(p)));
Bram Moolenaar4770d092006-01-12 23:22:24 +00002131 if (wc == NULL)
2132 return;
2133 STRCPY(wc->wc_word, p);
2134 wc->wc_count = count;
2135 hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash);
2136 }
2137 else
2138 {
2139 wc = HI2WC(hi);
2140 if ((wc->wc_count += count) < (unsigned)count) /* check for overflow */
2141 wc->wc_count = MAXWORDCOUNT;
2142 }
2143}
2144
2145/*
2146 * Adjust the score of common words.
2147 */
2148 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002149score_wordcount_adj(
2150 slang_T *slang,
2151 int score,
2152 char_u *word,
2153 int split) /* word was split, less bonus */
Bram Moolenaar4770d092006-01-12 23:22:24 +00002154{
2155 hashitem_T *hi;
2156 wordcount_T *wc;
2157 int bonus;
2158 int newscore;
2159
2160 hi = hash_find(&slang->sl_wordcount, word);
2161 if (!HASHITEM_EMPTY(hi))
2162 {
2163 wc = HI2WC(hi);
2164 if (wc->wc_count < SCORE_THRES2)
2165 bonus = SCORE_COMMON1;
2166 else if (wc->wc_count < SCORE_THRES3)
2167 bonus = SCORE_COMMON2;
2168 else
2169 bonus = SCORE_COMMON3;
2170 if (split)
2171 newscore = score - bonus / 2;
2172 else
2173 newscore = score - bonus;
2174 if (newscore < 0)
2175 return 0;
2176 return newscore;
2177 }
2178 return score;
2179}
2180
Bram Moolenaar5195e452005-08-19 20:32:47 +00002181
Bram Moolenaar6de68532005-08-24 22:08:48 +00002182/*
Bram Moolenaar95529562005-08-25 21:21:38 +00002183 * Return TRUE if byte "n" appears in "str".
Bram Moolenaar6de68532005-08-24 22:08:48 +00002184 * Like strchr() but independent of locale.
2185 */
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002186 int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002187byte_in_str(char_u *str, int n)
Bram Moolenaar6de68532005-08-24 22:08:48 +00002188{
2189 char_u *p;
2190
2191 for (p = str; *p != NUL; ++p)
Bram Moolenaar95529562005-08-25 21:21:38 +00002192 if (*p == n)
Bram Moolenaar6de68532005-08-24 22:08:48 +00002193 return TRUE;
2194 return FALSE;
2195}
2196
Bram Moolenaar5195e452005-08-19 20:32:47 +00002197#define SY_MAXLEN 30
2198typedef struct syl_item_S
2199{
2200 char_u sy_chars[SY_MAXLEN]; /* the sequence of chars */
2201 int sy_len;
2202} syl_item_T;
2203
2204/*
2205 * Truncate "slang->sl_syllable" at the first slash and put the following items
2206 * in "slang->sl_syl_items".
2207 */
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002208 int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002209init_syl_tab(slang_T *slang)
Bram Moolenaar5195e452005-08-19 20:32:47 +00002210{
2211 char_u *p;
2212 char_u *s;
2213 int l;
2214 syl_item_T *syl;
2215
2216 ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4);
2217 p = vim_strchr(slang->sl_syllable, '/');
2218 while (p != NULL)
2219 {
2220 *p++ = NUL;
Bram Moolenaar6de68532005-08-24 22:08:48 +00002221 if (*p == NUL) /* trailing slash */
Bram Moolenaar5195e452005-08-19 20:32:47 +00002222 break;
2223 s = p;
2224 p = vim_strchr(p, '/');
2225 if (p == NULL)
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00002226 l = (int)STRLEN(s);
Bram Moolenaar5195e452005-08-19 20:32:47 +00002227 else
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00002228 l = (int)(p - s);
Bram Moolenaar5195e452005-08-19 20:32:47 +00002229 if (l >= SY_MAXLEN)
2230 return SP_FORMERROR;
2231 if (ga_grow(&slang->sl_syl_items, 1) == FAIL)
Bram Moolenaar6de68532005-08-24 22:08:48 +00002232 return SP_OTHERERROR;
Bram Moolenaar5195e452005-08-19 20:32:47 +00002233 syl = ((syl_item_T *)slang->sl_syl_items.ga_data)
2234 + slang->sl_syl_items.ga_len++;
2235 vim_strncpy(syl->sy_chars, s, l);
2236 syl->sy_len = l;
2237 }
2238 return OK;
2239}
2240
2241/*
2242 * Count the number of syllables in "word".
2243 * When "word" contains spaces the syllables after the last space are counted.
2244 * Returns zero if syllables are not defines.
2245 */
2246 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002247count_syllables(slang_T *slang, char_u *word)
Bram Moolenaar5195e452005-08-19 20:32:47 +00002248{
2249 int cnt = 0;
2250 int skip = FALSE;
2251 char_u *p;
2252 int len;
2253 int i;
2254 syl_item_T *syl;
2255 int c;
2256
2257 if (slang->sl_syllable == NULL)
2258 return 0;
2259
2260 for (p = word; *p != NUL; p += len)
2261 {
2262 /* When running into a space reset counter. */
2263 if (*p == ' ')
2264 {
2265 len = 1;
2266 cnt = 0;
2267 continue;
2268 }
2269
2270 /* Find longest match of syllable items. */
2271 len = 0;
2272 for (i = 0; i < slang->sl_syl_items.ga_len; ++i)
2273 {
2274 syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i;
2275 if (syl->sy_len > len
2276 && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0)
2277 len = syl->sy_len;
2278 }
2279 if (len != 0) /* found a match, count syllable */
2280 {
2281 ++cnt;
2282 skip = FALSE;
2283 }
2284 else
2285 {
2286 /* No recognized syllable item, at least a syllable char then? */
2287#ifdef FEAT_MBYTE
2288 c = mb_ptr2char(p);
2289 len = (*mb_ptr2len)(p);
2290#else
2291 c = *p;
2292 len = 1;
2293#endif
2294 if (vim_strchr(slang->sl_syllable, c) == NULL)
2295 skip = FALSE; /* No, search for next syllable */
2296 else if (!skip)
2297 {
2298 ++cnt; /* Yes, count it */
2299 skip = TRUE; /* don't count following syllable chars */
2300 }
2301 }
2302 }
2303 return cnt;
2304}
2305
2306/*
Bram Moolenaar860cae12010-06-05 23:22:07 +02002307 * Parse 'spelllang' and set w_s->b_langp accordingly.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002308 * Returns NULL if it's OK, an error message otherwise.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002309 */
2310 char_u *
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002311did_set_spelllang(win_T *wp)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002312{
2313 garray_T ga;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002314 char_u *splp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002315 char_u *region;
Bram Moolenaarb6356332005-07-18 21:40:44 +00002316 char_u region_cp[3];
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002317 int filename;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002318 int region_mask;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002319 slang_T *slang;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002320 int c;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002321 char_u lang[MAXWLEN + 1];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002322 char_u spf_name[MAXPATHL];
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002323 int len;
2324 char_u *p;
Bram Moolenaar7887d882005-07-01 22:33:52 +00002325 int round;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002326 char_u *spf;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002327 char_u *use_region = NULL;
2328 int dont_use_region = FALSE;
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002329 int nobreak = FALSE;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002330 int i, j;
2331 langp_T *lp, *lp2;
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00002332 static int recursive = FALSE;
2333 char_u *ret_msg = NULL;
2334 char_u *spl_copy;
Bram Moolenaar7c0a2f32016-07-10 22:11:16 +02002335 bufref_T bufref;
2336
2337 set_bufref(&bufref, wp->w_buffer);
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00002338
2339 /* We don't want to do this recursively. May happen when a language is
2340 * not available and the SpellFileMissing autocommand opens a new buffer
2341 * in which 'spell' is set. */
2342 if (recursive)
2343 return NULL;
2344 recursive = TRUE;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002345
2346 ga_init2(&ga, sizeof(langp_T), 2);
Bram Moolenaar860cae12010-06-05 23:22:07 +02002347 clear_midword(wp);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002348
Bram Moolenaar84a05ac2013-05-06 04:24:17 +02002349 /* Make a copy of 'spelllang', the SpellFileMissing autocommands may change
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00002350 * it under our fingers. */
Bram Moolenaar860cae12010-06-05 23:22:07 +02002351 spl_copy = vim_strsave(wp->w_s->b_p_spl);
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00002352 if (spl_copy == NULL)
2353 goto theend;
2354
Bram Moolenaar2593e032013-11-14 03:54:07 +01002355#ifdef FEAT_MBYTE
Bram Moolenaarcc63c642013-11-12 04:44:01 +01002356 wp->w_s->b_cjk = 0;
Bram Moolenaar2593e032013-11-14 03:54:07 +01002357#endif
Bram Moolenaarcc63c642013-11-12 04:44:01 +01002358
2359 /* Loop over comma separated language names. */
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00002360 for (splp = spl_copy; *splp != NUL; )
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002361 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002362 /* Get one language name. */
2363 copy_option_part(&splp, lang, MAXWLEN, ",");
Bram Moolenaar5482f332005-04-17 20:18:43 +00002364 region = NULL;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00002365 len = (int)STRLEN(lang);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002366
Bram Moolenaarcc63c642013-11-12 04:44:01 +01002367 if (STRCMP(lang, "cjk") == 0)
2368 {
Bram Moolenaar2593e032013-11-14 03:54:07 +01002369#ifdef FEAT_MBYTE
Bram Moolenaarcc63c642013-11-12 04:44:01 +01002370 wp->w_s->b_cjk = 1;
Bram Moolenaar2593e032013-11-14 03:54:07 +01002371#endif
Bram Moolenaarcc63c642013-11-12 04:44:01 +01002372 continue;
2373 }
2374
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002375 /* If the name ends in ".spl" use it as the name of the spell file.
2376 * If there is a region name let "region" point to it and remove it
2377 * from the name. */
2378 if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0)
2379 {
2380 filename = TRUE;
2381
Bram Moolenaarb6356332005-07-18 21:40:44 +00002382 /* Locate a region and remove it from the file name. */
2383 p = vim_strchr(gettail(lang), '_');
2384 if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2])
2385 && !ASCII_ISALPHA(p[3]))
2386 {
2387 vim_strncpy(region_cp, p + 1, 2);
2388 mch_memmove(p, p + 3, len - (p - lang) - 2);
Bram Moolenaarb6356332005-07-18 21:40:44 +00002389 region = region_cp;
2390 }
2391 else
2392 dont_use_region = TRUE;
2393
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002394 /* Check if we loaded this language before. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002395 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
2396 if (fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME)
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002397 break;
2398 }
2399 else
2400 {
2401 filename = FALSE;
2402 if (len > 3 && lang[len - 3] == '_')
2403 {
2404 region = lang + len - 2;
2405 len -= 3;
2406 lang[len] = NUL;
2407 }
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002408 else
2409 dont_use_region = TRUE;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002410
2411 /* Check if we loaded this language before. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002412 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
2413 if (STRICMP(lang, slang->sl_name) == 0)
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002414 break;
2415 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002416
Bram Moolenaarb6356332005-07-18 21:40:44 +00002417 if (region != NULL)
2418 {
2419 /* If the region differs from what was used before then don't
2420 * use it for 'spellfile'. */
2421 if (use_region != NULL && STRCMP(region, use_region) != 0)
2422 dont_use_region = TRUE;
2423 use_region = region;
2424 }
2425
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002426 /* If not found try loading the language now. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002427 if (slang == NULL)
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002428 {
2429 if (filename)
2430 (void)spell_load_file(lang, lang, NULL, FALSE);
2431 else
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00002432 {
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002433 spell_load_lang(lang);
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00002434 /* SpellFileMissing autocommands may do anything, including
2435 * destroying the buffer we are using... */
Bram Moolenaar7c0a2f32016-07-10 22:11:16 +02002436 if (!bufref_valid(&bufref))
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00002437 {
Bram Moolenaar5b302912016-08-24 22:11:55 +02002438 ret_msg = (char_u *)N_("E797: SpellFileMissing autocommand deleted buffer");
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00002439 goto theend;
2440 }
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00002441 }
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002442 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002443
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002444 /*
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002445 * Loop over the languages, there can be several files for "lang".
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002446 */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002447 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
2448 if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME
2449 : STRICMP(lang, slang->sl_name) == 0)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002450 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00002451 region_mask = REGION_ALL;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002452 if (!filename && region != NULL)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002453 {
2454 /* find region in sl_regions */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002455 c = find_region(slang->sl_regions, region);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002456 if (c == REGION_ALL)
2457 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002458 if (slang->sl_add)
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002459 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002460 if (*slang->sl_regions != NUL)
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002461 /* This addition file is for other regions. */
2462 region_mask = 0;
2463 }
2464 else
2465 /* This is probably an error. Give a warning and
2466 * accept the words anyway. */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002467 smsg((char_u *)
2468 _("Warning: region %s not supported"),
2469 region);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002470 }
2471 else
2472 region_mask = 1 << c;
2473 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002474
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002475 if (region_mask != 0)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002476 {
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002477 if (ga_grow(&ga, 1) == FAIL)
2478 {
2479 ga_clear(&ga);
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00002480 ret_msg = e_outofmem;
2481 goto theend;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002482 }
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002483 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002484 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
2485 ++ga.ga_len;
Bram Moolenaar860cae12010-06-05 23:22:07 +02002486 use_midword(slang, wp);
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002487 if (slang->sl_nobreak)
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002488 nobreak = TRUE;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002489 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002490 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002491 }
2492
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002493 /* round 0: load int_wordlist, if possible.
2494 * round 1: load first name in 'spellfile'.
2495 * round 2: load second name in 'spellfile.
2496 * etc. */
Bram Moolenaar860cae12010-06-05 23:22:07 +02002497 spf = curwin->w_s->b_p_spf;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002498 for (round = 0; round == 0 || *spf != NUL; ++round)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002499 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002500 if (round == 0)
Bram Moolenaar7887d882005-07-01 22:33:52 +00002501 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002502 /* Internal wordlist, if there is one. */
2503 if (int_wordlist == NULL)
Bram Moolenaar7887d882005-07-01 22:33:52 +00002504 continue;
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002505 int_wordlist_spl(spf_name);
Bram Moolenaar7887d882005-07-01 22:33:52 +00002506 }
2507 else
2508 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002509 /* One entry in 'spellfile'. */
2510 copy_option_part(&spf, spf_name, MAXPATHL - 5, ",");
2511 STRCAT(spf_name, ".spl");
2512
2513 /* If it was already found above then skip it. */
2514 for (c = 0; c < ga.ga_len; ++c)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002515 {
2516 p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname;
2517 if (p != NULL && fullpathcmp(spf_name, p, FALSE) == FPC_SAME)
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002518 break;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00002519 }
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002520 if (c < ga.ga_len)
Bram Moolenaar7887d882005-07-01 22:33:52 +00002521 continue;
Bram Moolenaar7887d882005-07-01 22:33:52 +00002522 }
2523
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002524 /* Check if it was loaded already. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002525 for (slang = first_lang; slang != NULL; slang = slang->sl_next)
2526 if (fullpathcmp(spf_name, slang->sl_fname, FALSE) == FPC_SAME)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002527 break;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002528 if (slang == NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002529 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00002530 /* Not loaded, try loading it now. The language name includes the
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002531 * region name, the region is ignored otherwise. for int_wordlist
2532 * use an arbitrary name. */
2533 if (round == 0)
2534 STRCPY(lang, "internal wordlist");
2535 else
Bram Moolenaar7887d882005-07-01 22:33:52 +00002536 {
Bram Moolenaarf9184a12005-07-02 23:10:47 +00002537 vim_strncpy(lang, gettail(spf_name), MAXWLEN);
Bram Moolenaar7887d882005-07-01 22:33:52 +00002538 p = vim_strchr(lang, '.');
2539 if (p != NULL)
2540 *p = NUL; /* truncate at ".encoding.add" */
2541 }
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002542 slang = spell_load_file(spf_name, lang, NULL, TRUE);
Bram Moolenaarda2303d2005-08-30 21:55:26 +00002543
2544 /* If one of the languages has NOBREAK we assume the addition
2545 * files also have this. */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002546 if (slang != NULL && nobreak)
2547 slang->sl_nobreak = TRUE;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002548 }
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002549 if (slang != NULL && ga_grow(&ga, 1) == OK)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002550 {
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002551 region_mask = REGION_ALL;
2552 if (use_region != NULL && !dont_use_region)
2553 {
2554 /* find region in sl_regions */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002555 c = find_region(slang->sl_regions, use_region);
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002556 if (c != REGION_ALL)
2557 region_mask = 1 << c;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002558 else if (*slang->sl_regions != NUL)
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002559 /* This spell file is for other regions. */
2560 region_mask = 0;
2561 }
2562
2563 if (region_mask != 0)
2564 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002565 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
2566 LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL;
2567 LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002568 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
2569 ++ga.ga_len;
Bram Moolenaar860cae12010-06-05 23:22:07 +02002570 use_midword(slang, wp);
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002571 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002572 }
2573 }
2574
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002575 /* Everything is fine, store the new b_langp value. */
Bram Moolenaar860cae12010-06-05 23:22:07 +02002576 ga_clear(&wp->w_s->b_langp);
2577 wp->w_s->b_langp = ga;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002578
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002579 /* For each language figure out what language to use for sound folding and
2580 * REP items. If the language doesn't support it itself use another one
2581 * with the same name. E.g. for "en-math" use "en". */
2582 for (i = 0; i < ga.ga_len; ++i)
2583 {
2584 lp = LANGP_ENTRY(ga, i);
2585
2586 /* sound folding */
2587 if (lp->lp_slang->sl_sal.ga_len > 0)
2588 /* language does sound folding itself */
2589 lp->lp_sallang = lp->lp_slang;
2590 else
2591 /* find first similar language that does sound folding */
2592 for (j = 0; j < ga.ga_len; ++j)
2593 {
2594 lp2 = LANGP_ENTRY(ga, j);
2595 if (lp2->lp_slang->sl_sal.ga_len > 0
2596 && STRNCMP(lp->lp_slang->sl_name,
2597 lp2->lp_slang->sl_name, 2) == 0)
2598 {
2599 lp->lp_sallang = lp2->lp_slang;
2600 break;
2601 }
2602 }
2603
2604 /* REP items */
2605 if (lp->lp_slang->sl_rep.ga_len > 0)
2606 /* language has REP items itself */
2607 lp->lp_replang = lp->lp_slang;
2608 else
Bram Moolenaar4770d092006-01-12 23:22:24 +00002609 /* find first similar language that has REP items */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002610 for (j = 0; j < ga.ga_len; ++j)
2611 {
2612 lp2 = LANGP_ENTRY(ga, j);
2613 if (lp2->lp_slang->sl_rep.ga_len > 0
2614 && STRNCMP(lp->lp_slang->sl_name,
2615 lp2->lp_slang->sl_name, 2) == 0)
2616 {
2617 lp->lp_replang = lp2->lp_slang;
2618 break;
2619 }
2620 }
2621 }
2622
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00002623theend:
2624 vim_free(spl_copy);
2625 recursive = FALSE;
Bram Moolenaarbe578ed2014-05-13 14:03:40 +02002626 redraw_win_later(wp, NOT_VALID);
Bram Moolenaar706cdeb2007-05-06 21:55:31 +00002627 return ret_msg;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002628}
2629
2630/*
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002631 * Clear the midword characters for buffer "buf".
2632 */
2633 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002634clear_midword(win_T *wp)
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002635{
Bram Moolenaar860cae12010-06-05 23:22:07 +02002636 vim_memset(wp->w_s->b_spell_ismw, 0, 256);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002637#ifdef FEAT_MBYTE
Bram Moolenaard23a8232018-02-10 18:45:26 +01002638 VIM_CLEAR(wp->w_s->b_spell_ismw_mb);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002639#endif
2640}
2641
2642/*
2643 * Use the "sl_midword" field of language "lp" for buffer "buf".
2644 * They add up to any currently used midword characters.
2645 */
2646 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002647use_midword(slang_T *lp, win_T *wp)
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002648{
2649 char_u *p;
2650
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00002651 if (lp->sl_midword == NULL) /* there aren't any */
2652 return;
2653
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002654 for (p = lp->sl_midword; *p != NUL; )
2655#ifdef FEAT_MBYTE
2656 if (has_mbyte)
2657 {
2658 int c, l, n;
2659 char_u *bp;
2660
2661 c = mb_ptr2char(p);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002662 l = (*mb_ptr2len)(p);
2663 if (c < 256 && l <= 2)
Bram Moolenaar860cae12010-06-05 23:22:07 +02002664 wp->w_s->b_spell_ismw[c] = TRUE;
2665 else if (wp->w_s->b_spell_ismw_mb == NULL)
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002666 /* First multi-byte char in "b_spell_ismw_mb". */
Bram Moolenaar860cae12010-06-05 23:22:07 +02002667 wp->w_s->b_spell_ismw_mb = vim_strnsave(p, l);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002668 else
2669 {
2670 /* Append multi-byte chars to "b_spell_ismw_mb". */
Bram Moolenaar860cae12010-06-05 23:22:07 +02002671 n = (int)STRLEN(wp->w_s->b_spell_ismw_mb);
2672 bp = vim_strnsave(wp->w_s->b_spell_ismw_mb, n + l);
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002673 if (bp != NULL)
2674 {
Bram Moolenaar860cae12010-06-05 23:22:07 +02002675 vim_free(wp->w_s->b_spell_ismw_mb);
2676 wp->w_s->b_spell_ismw_mb = bp;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002677 vim_strncpy(bp + n, p, l);
2678 }
2679 }
2680 p += l;
2681 }
2682 else
2683#endif
Bram Moolenaar860cae12010-06-05 23:22:07 +02002684 wp->w_s->b_spell_ismw[*p++] = TRUE;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00002685}
2686
2687/*
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002688 * Find the region "region[2]" in "rp" (points to "sl_regions").
Bram Moolenaarc4568ab2018-11-16 16:21:05 +01002689 * Each region is simply stored as the two characters of its name.
Bram Moolenaar7887d882005-07-01 22:33:52 +00002690 * Returns the index if found (first is 0), REGION_ALL if not found.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002691 */
2692 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002693find_region(char_u *rp, char_u *region)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002694{
2695 int i;
2696
2697 for (i = 0; ; i += 2)
2698 {
2699 if (rp[i] == NUL)
2700 return REGION_ALL;
2701 if (rp[i] == region[0] && rp[i + 1] == region[1])
2702 break;
2703 }
2704 return i / 2;
2705}
2706
2707/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002708 * Return case type of word:
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002709 * w word 0
Bram Moolenaar51485f02005-06-04 21:55:20 +00002710 * Word WF_ONECAP
2711 * W WORD WF_ALLCAP
2712 * WoRd wOrd WF_KEEPCAP
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002713 */
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002714 int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002715captype(
2716 char_u *word,
2717 char_u *end) /* When NULL use up to NUL byte. */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002718{
2719 char_u *p;
2720 int c;
2721 int firstcap;
2722 int allcap;
2723 int past_second = FALSE; /* past second word char */
2724
2725 /* find first letter */
Bram Moolenaar91acfff2017-03-12 19:22:36 +01002726 for (p = word; !spell_iswordp_nmw(p, curwin); MB_PTR_ADV(p))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00002727 if (end == NULL ? *p == NUL : p >= end)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002728 return 0; /* only non-word characters, illegal word */
2729#ifdef FEAT_MBYTE
Bram Moolenaarb765d632005-06-07 21:00:02 +00002730 if (has_mbyte)
2731 c = mb_ptr2char_adv(&p);
2732 else
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002733#endif
Bram Moolenaarb765d632005-06-07 21:00:02 +00002734 c = *p++;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002735 firstcap = allcap = SPELL_ISUPPER(c);
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002736
2737 /*
2738 * Need to check all letters to find a word with mixed upper/lower.
2739 * But a word with an upper char only at start is a ONECAP.
2740 */
Bram Moolenaar91acfff2017-03-12 19:22:36 +01002741 for ( ; end == NULL ? *p != NUL : p < end; MB_PTR_ADV(p))
Bram Moolenaarcc63c642013-11-12 04:44:01 +01002742 if (spell_iswordp_nmw(p, curwin))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002743 {
Bram Moolenaar53805d12005-08-01 07:08:33 +00002744 c = PTR2CHAR(p);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002745 if (!SPELL_ISUPPER(c))
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002746 {
2747 /* UUl -> KEEPCAP */
2748 if (past_second && allcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002749 return WF_KEEPCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002750 allcap = FALSE;
2751 }
2752 else if (!allcap)
2753 /* UlU -> KEEPCAP */
Bram Moolenaar51485f02005-06-04 21:55:20 +00002754 return WF_KEEPCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002755 past_second = TRUE;
2756 }
2757
2758 if (allcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002759 return WF_ALLCAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002760 if (firstcap)
Bram Moolenaar51485f02005-06-04 21:55:20 +00002761 return WF_ONECAP;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002762 return 0;
2763}
2764
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002765/*
2766 * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a
2767 * capital. So that make_case_word() can turn WOrd into Word.
2768 * Add ALLCAP for "WOrD".
2769 */
2770 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002771badword_captype(char_u *word, char_u *end)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002772{
2773 int flags = captype(word, end);
Bram Moolenaar8b59de92005-08-11 19:59:29 +00002774 int c;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002775 int l, u;
2776 int first;
2777 char_u *p;
2778
2779 if (flags & WF_KEEPCAP)
2780 {
2781 /* Count the number of UPPER and lower case letters. */
2782 l = u = 0;
2783 first = FALSE;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01002784 for (p = word; p < end; MB_PTR_ADV(p))
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002785 {
Bram Moolenaar8b59de92005-08-11 19:59:29 +00002786 c = PTR2CHAR(p);
2787 if (SPELL_ISUPPER(c))
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002788 {
2789 ++u;
2790 if (p == word)
2791 first = TRUE;
2792 }
2793 else
2794 ++l;
2795 }
2796
2797 /* If there are more UPPER than lower case letters suggest an
2798 * ALLCAP word. Otherwise, if the first letter is UPPER then
2799 * suggest ONECAP. Exception: "ALl" most likely should be "All",
2800 * require three upper case letters. */
2801 if (u > l && u > 2)
2802 flags |= WF_ALLCAP;
2803 else if (first)
2804 flags |= WF_ONECAP;
Bram Moolenaar2d3f4892006-01-20 23:02:51 +00002805
2806 if (u >= 2 && l >= 2) /* maCARONI maCAroni */
2807 flags |= WF_MIXCAP;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00002808 }
2809 return flags;
2810}
2811
Bram Moolenaar34b466e2013-11-28 17:41:46 +01002812/*
2813 * Delete the internal wordlist and its .spl file.
2814 */
2815 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002816spell_delete_wordlist(void)
Bram Moolenaar34b466e2013-11-28 17:41:46 +01002817{
2818 char_u fname[MAXPATHL];
2819
2820 if (int_wordlist != NULL)
2821 {
2822 mch_remove(int_wordlist);
2823 int_wordlist_spl(fname);
2824 mch_remove(fname);
Bram Moolenaard23a8232018-02-10 18:45:26 +01002825 VIM_CLEAR(int_wordlist);
Bram Moolenaar34b466e2013-11-28 17:41:46 +01002826 }
2827}
2828
2829#if defined(FEAT_MBYTE) || defined(EXITFREE) || defined(PROTO)
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002830/*
2831 * Free all languages.
2832 */
2833 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002834spell_free_all(void)
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002835{
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002836 slang_T *slang;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002837 buf_T *buf;
2838
Bram Moolenaar60bb4e12010-09-18 13:36:49 +02002839 /* Go through all buffers and handle 'spelllang'. <VN> */
Bram Moolenaar29323592016-07-24 22:04:11 +02002840 FOR_ALL_BUFFERS(buf)
Bram Moolenaar860cae12010-06-05 23:22:07 +02002841 ga_clear(&buf->b_s.b_langp);
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002842
2843 while (first_lang != NULL)
2844 {
Bram Moolenaar8b96d642005-09-05 22:05:30 +00002845 slang = first_lang;
2846 first_lang = slang->sl_next;
2847 slang_free(slang);
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002848 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00002849
Bram Moolenaar34b466e2013-11-28 17:41:46 +01002850 spell_delete_wordlist();
Bram Moolenaar7887d882005-07-01 22:33:52 +00002851
Bram Moolenaard23a8232018-02-10 18:45:26 +01002852 VIM_CLEAR(repl_to);
2853 VIM_CLEAR(repl_from);
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002854}
Bram Moolenaar34b466e2013-11-28 17:41:46 +01002855#endif
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002856
Bram Moolenaar34b466e2013-11-28 17:41:46 +01002857#if defined(FEAT_MBYTE) || defined(PROTO)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002858/*
2859 * Clear all spelling tables and reload them.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002860 * Used after 'encoding' is set and when ":mkspell" was used.
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002861 */
2862 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002863spell_reload(void)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002864{
Bram Moolenaar3982c542005-06-08 21:56:31 +00002865 win_T *wp;
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002866
Bram Moolenaarea408852005-06-25 22:49:46 +00002867 /* Initialize the table for spell_iswordp(). */
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002868 init_spell_chartab();
2869
2870 /* Unload all allocated memory. */
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00002871 spell_free_all();
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002872
2873 /* Go through all buffers and handle 'spelllang'. */
Bram Moolenaar29323592016-07-24 22:04:11 +02002874 FOR_ALL_WINDOWS(wp)
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002875 {
Bram Moolenaar3982c542005-06-08 21:56:31 +00002876 /* Only load the wordlists when 'spelllang' is set and there is a
2877 * window for this buffer in which 'spell' is set. */
Bram Moolenaar860cae12010-06-05 23:22:07 +02002878 if (*wp->w_s->b_p_spl != NUL)
Bram Moolenaar3982c542005-06-08 21:56:31 +00002879 {
Bram Moolenaar860cae12010-06-05 23:22:07 +02002880 if (wp->w_p_spell)
Bram Moolenaar3982c542005-06-08 21:56:31 +00002881 {
Bram Moolenaar860cae12010-06-05 23:22:07 +02002882 (void)did_set_spelllang(wp);
Bram Moolenaar3982c542005-06-08 21:56:31 +00002883 break;
Bram Moolenaar3982c542005-06-08 21:56:31 +00002884 }
2885 }
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002886 }
2887}
Bram Moolenaar34b466e2013-11-28 17:41:46 +01002888#endif
Bram Moolenaar402d2fe2005-04-15 21:00:38 +00002889
Bram Moolenaarb765d632005-06-07 21:00:02 +00002890/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00002891 * Opposite of offset2bytes().
2892 * "pp" points to the bytes and is advanced over it.
2893 * Returns the offset.
2894 */
2895 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002896bytes2offset(char_u **pp)
Bram Moolenaar4770d092006-01-12 23:22:24 +00002897{
2898 char_u *p = *pp;
2899 int nr;
2900 int c;
2901
2902 c = *p++;
2903 if ((c & 0x80) == 0x00) /* 1 byte */
2904 {
2905 nr = c - 1;
2906 }
2907 else if ((c & 0xc0) == 0x80) /* 2 bytes */
2908 {
2909 nr = (c & 0x3f) - 1;
2910 nr = nr * 255 + (*p++ - 1);
2911 }
2912 else if ((c & 0xe0) == 0xc0) /* 3 bytes */
2913 {
2914 nr = (c & 0x1f) - 1;
2915 nr = nr * 255 + (*p++ - 1);
2916 nr = nr * 255 + (*p++ - 1);
2917 }
2918 else /* 4 bytes */
2919 {
2920 nr = (c & 0x0f) - 1;
2921 nr = nr * 255 + (*p++ - 1);
2922 nr = nr * 255 + (*p++ - 1);
2923 nr = nr * 255 + (*p++ - 1);
2924 }
2925
2926 *pp = p;
2927 return nr;
2928}
2929
Bram Moolenaar4770d092006-01-12 23:22:24 +00002930
2931/*
2932 * Open a spell buffer. This is a nameless buffer that is not in the buffer
2933 * list and only contains text lines. Can use a swapfile to reduce memory
2934 * use.
2935 * Most other fields are invalid! Esp. watch out for string options being
2936 * NULL and there is no undo info.
2937 * Returns NULL when out of memory.
2938 */
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002939 buf_T *
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002940open_spellbuf(void)
Bram Moolenaar4770d092006-01-12 23:22:24 +00002941{
2942 buf_T *buf;
2943
2944 buf = (buf_T *)alloc_clear(sizeof(buf_T));
2945 if (buf != NULL)
2946 {
2947 buf->b_spell = TRUE;
2948 buf->b_p_swf = TRUE; /* may create a swap file */
Bram Moolenaar706d2de2013-07-17 17:35:13 +02002949#ifdef FEAT_CRYPT
2950 buf->b_p_key = empty_option;
2951#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +00002952 ml_open(buf);
2953 ml_open_file(buf); /* create swap file now */
2954 }
2955 return buf;
2956}
2957
2958/*
2959 * Close the buffer used for spell info.
2960 */
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002961 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002962close_spellbuf(buf_T *buf)
Bram Moolenaar4770d092006-01-12 23:22:24 +00002963{
2964 if (buf != NULL)
2965 {
2966 ml_close(buf, TRUE);
2967 vim_free(buf);
2968 }
2969}
2970
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002971/*
2972 * Init the chartab used for spelling for ASCII.
2973 * EBCDIC is not supported!
2974 */
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02002975 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01002976clear_spell_chartab(spelltab_T *sp)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002977{
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002978 int i;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002979
2980 /* Init everything to FALSE. */
2981 vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
2982 vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
2983 for (i = 0; i < 256; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002984 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002985 sp->st_fold[i] = i;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00002986 sp->st_upper[i] = i;
2987 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00002988
2989 /* We include digits. A word shouldn't start with a digit, but handling
2990 * that is done separately. */
2991 for (i = '0'; i <= '9'; ++i)
2992 sp->st_isw[i] = TRUE;
2993 for (i = 'A'; i <= 'Z'; ++i)
2994 {
2995 sp->st_isw[i] = TRUE;
2996 sp->st_isu[i] = TRUE;
2997 sp->st_fold[i] = i + 0x20;
2998 }
2999 for (i = 'a'; i <= 'z'; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003000 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003001 sp->st_isw[i] = TRUE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003002 sp->st_upper[i] = i - 0x20;
3003 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003004}
3005
3006/*
3007 * Init the chartab used for spelling. Only depends on 'encoding'.
3008 * Called once while starting up and when 'encoding' changes.
3009 * The default is to use isalpha(), but the spell file should define the word
3010 * characters to make it possible that 'encoding' differs from the current
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00003011 * locale. For utf-8 we don't use isalpha() but our own functions.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003012 */
3013 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01003014init_spell_chartab(void)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003015{
3016 int i;
3017
3018 did_set_spelltab = FALSE;
3019 clear_spell_chartab(&spelltab);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003020#ifdef FEAT_MBYTE
3021 if (enc_dbcs)
3022 {
3023 /* DBCS: assume double-wide characters are word characters. */
3024 for (i = 128; i <= 255; ++i)
3025 if (MB_BYTE2LEN(i) == 2)
3026 spelltab.st_isw[i] = TRUE;
3027 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003028 else if (enc_utf8)
3029 {
3030 for (i = 128; i < 256; ++i)
3031 {
Bram Moolenaar54ab0f12010-05-13 17:46:58 +02003032 int f = utf_fold(i);
3033 int u = utf_toupper(i);
3034
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003035 spelltab.st_isu[i] = utf_isupper(i);
3036 spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
Bram Moolenaar54ab0f12010-05-13 17:46:58 +02003037 /* The folded/upper-cased value is different between latin1 and
3038 * utf8 for 0xb5, causing E763 for no good reason. Use the latin1
3039 * value for utf-8 to avoid this. */
3040 spelltab.st_fold[i] = (f < 256) ? f : i;
3041 spelltab.st_upper[i] = (u < 256) ? u : i;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003042 }
3043 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003044 else
3045#endif
3046 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003047 /* Rough guess: use locale-dependent library functions. */
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003048 for (i = 128; i < 256; ++i)
3049 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003050 if (MB_ISUPPER(i))
3051 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003052 spelltab.st_isw[i] = TRUE;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003053 spelltab.st_isu[i] = TRUE;
3054 spelltab.st_fold[i] = MB_TOLOWER(i);
3055 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003056 else if (MB_ISLOWER(i))
3057 {
3058 spelltab.st_isw[i] = TRUE;
3059 spelltab.st_upper[i] = MB_TOUPPER(i);
3060 }
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003061 }
3062 }
3063}
3064
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003065
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003066/*
Bram Moolenaarea408852005-06-25 22:49:46 +00003067 * Return TRUE if "p" points to a word character.
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003068 * As a special case we see "midword" characters as word character when it is
Bram Moolenaarea408852005-06-25 22:49:46 +00003069 * followed by a word character. This finds they'there but not 'they there'.
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003070 * Thus this only works properly when past the first character of the word.
Bram Moolenaarea408852005-06-25 22:49:46 +00003071 */
3072 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01003073spell_iswordp(
3074 char_u *p,
3075 win_T *wp) /* buffer used */
Bram Moolenaarea408852005-06-25 22:49:46 +00003076{
Bram Moolenaarea408852005-06-25 22:49:46 +00003077#ifdef FEAT_MBYTE
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003078 char_u *s;
3079 int l;
3080 int c;
3081
3082 if (has_mbyte)
3083 {
Bram Moolenaar5b276aa2017-04-22 23:49:52 +02003084 l = MB_PTR2LEN(p);
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003085 s = p;
3086 if (l == 1)
3087 {
3088 /* be quick for ASCII */
Bram Moolenaar860cae12010-06-05 23:22:07 +02003089 if (wp->w_s->b_spell_ismw[*p])
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003090 s = p + 1; /* skip a mid-word character */
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003091 }
3092 else
3093 {
3094 c = mb_ptr2char(p);
Bram Moolenaar860cae12010-06-05 23:22:07 +02003095 if (c < 256 ? wp->w_s->b_spell_ismw[c]
3096 : (wp->w_s->b_spell_ismw_mb != NULL
3097 && vim_strchr(wp->w_s->b_spell_ismw_mb, c) != NULL))
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003098 s = p + l;
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003099 }
3100
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00003101 c = mb_ptr2char(s);
3102 if (c > 255)
Bram Moolenaarcc63c642013-11-12 04:44:01 +01003103 return spell_mb_isword_class(mb_get_class(s), wp);
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00003104 return spelltab.st_isw[c];
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003105 }
Bram Moolenaarea408852005-06-25 22:49:46 +00003106#endif
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00003107
Bram Moolenaar860cae12010-06-05 23:22:07 +02003108 return spelltab.st_isw[wp->w_s->b_spell_ismw[*p] ? p[1] : p[0]];
Bram Moolenaar9c96f592005-06-30 21:52:39 +00003109}
3110
3111/*
3112 * Return TRUE if "p" points to a word character.
3113 * Unlike spell_iswordp() this doesn't check for "midword" characters.
3114 */
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003115 int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01003116spell_iswordp_nmw(char_u *p, win_T *wp)
Bram Moolenaar9c96f592005-06-30 21:52:39 +00003117{
3118#ifdef FEAT_MBYTE
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00003119 int c;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00003120
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00003121 if (has_mbyte)
3122 {
3123 c = mb_ptr2char(p);
3124 if (c > 255)
Bram Moolenaarcc63c642013-11-12 04:44:01 +01003125 return spell_mb_isword_class(mb_get_class(p), wp);
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00003126 return spelltab.st_isw[c];
3127 }
3128#endif
Bram Moolenaar9c96f592005-06-30 21:52:39 +00003129 return spelltab.st_isw[*p];
Bram Moolenaarea408852005-06-25 22:49:46 +00003130}
3131
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003132#ifdef FEAT_MBYTE
3133/*
Bram Moolenaar7a91a4a2008-04-09 13:49:57 +00003134 * Return TRUE if word class indicates a word character.
3135 * Only for characters above 255.
3136 * Unicode subscript and superscript are not considered word characters.
Bram Moolenaarcc63c642013-11-12 04:44:01 +01003137 * See also dbcs_class() and utf_class() in mbyte.c.
Bram Moolenaar7a91a4a2008-04-09 13:49:57 +00003138 */
3139 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01003140spell_mb_isword_class(int cl, win_T *wp)
Bram Moolenaar7a91a4a2008-04-09 13:49:57 +00003141{
Bram Moolenaarcc63c642013-11-12 04:44:01 +01003142 if (wp->w_s->b_cjk)
3143 /* East Asian characters are not considered word characters. */
3144 return cl == 2 || cl == 0x2800;
Bram Moolenaar7a91a4a2008-04-09 13:49:57 +00003145 return cl >= 2 && cl != 0x2070 && cl != 0x2080;
3146}
3147
3148/*
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003149 * Return TRUE if "p" points to a word character.
3150 * Wide version of spell_iswordp().
3151 */
3152 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01003153spell_iswordp_w(int *p, win_T *wp)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003154{
3155 int *s;
3156
Bram Moolenaar860cae12010-06-05 23:22:07 +02003157 if (*p < 256 ? wp->w_s->b_spell_ismw[*p]
3158 : (wp->w_s->b_spell_ismw_mb != NULL
3159 && vim_strchr(wp->w_s->b_spell_ismw_mb, *p) != NULL))
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003160 s = p + 1;
3161 else
3162 s = p;
3163
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00003164 if (*s > 255)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003165 {
3166 if (enc_utf8)
Bram Moolenaarcc63c642013-11-12 04:44:01 +01003167 return spell_mb_isword_class(utf_class(*s), wp);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003168 if (enc_dbcs)
Bram Moolenaarcc63c642013-11-12 04:44:01 +01003169 return spell_mb_isword_class(
3170 dbcs_class((unsigned)*s >> 8, *s & 0xff), wp);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003171 return 0;
3172 }
3173 return spelltab.st_isw[*s];
3174}
3175#endif
3176
Bram Moolenaarea408852005-06-25 22:49:46 +00003177/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003178 * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated.
3179 * Uses the character definitions from the .spl file.
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003180 * When using a multi-byte 'encoding' the length may change!
3181 * Returns FAIL when something wrong.
3182 */
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02003183 int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01003184spell_casefold(
3185 char_u *str,
3186 int len,
3187 char_u *buf,
3188 int buflen)
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003189{
3190 int i;
3191
3192 if (len >= buflen)
3193 {
3194 buf[0] = NUL;
3195 return FAIL; /* result will not fit */
3196 }
3197
3198#ifdef FEAT_MBYTE
3199 if (has_mbyte)
3200 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003201 int outi = 0;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003202 char_u *p;
3203 int c;
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003204
3205 /* Fold one character at a time. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003206 for (p = str; p < str + len; )
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003207 {
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003208 if (outi + MB_MAXBYTES > buflen)
3209 {
3210 buf[outi] = NUL;
3211 return FAIL;
3212 }
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003213 c = mb_cptr2char_adv(&p);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003214 outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi);
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003215 }
3216 buf[outi] = NUL;
3217 }
3218 else
3219#endif
3220 {
3221 /* Be quick for non-multibyte encodings. */
3222 for (i = 0; i < len; ++i)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003223 buf[i] = spelltab.st_fold[str[i]];
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00003224 buf[i] = NUL;
3225 }
3226
3227 return OK;
3228}
3229
Bram Moolenaar4770d092006-01-12 23:22:24 +00003230/* values for sps_flags */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003231#define SPS_BEST 1
3232#define SPS_FAST 2
3233#define SPS_DOUBLE 4
3234
Bram Moolenaar4770d092006-01-12 23:22:24 +00003235static int sps_flags = SPS_BEST; /* flags from 'spellsuggest' */
3236static int sps_limit = 9999; /* max nr of suggestions given */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003237
3238/*
3239 * Check the 'spellsuggest' option. Return FAIL if it's wrong.
Bram Moolenaar5195e452005-08-19 20:32:47 +00003240 * Sets "sps_flags" and "sps_limit".
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003241 */
3242 int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01003243spell_check_sps(void)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003244{
3245 char_u *p;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003246 char_u *s;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003247 char_u buf[MAXPATHL];
3248 int f;
3249
3250 sps_flags = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003251 sps_limit = 9999;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003252
3253 for (p = p_sps; *p != NUL; )
3254 {
3255 copy_option_part(&p, buf, MAXPATHL, ",");
3256
3257 f = 0;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003258 if (VIM_ISDIGIT(*buf))
3259 {
3260 s = buf;
3261 sps_limit = getdigits(&s);
3262 if (*s != NUL && !VIM_ISDIGIT(*s))
3263 f = -1;
3264 }
3265 else if (STRCMP(buf, "best") == 0)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003266 f = SPS_BEST;
3267 else if (STRCMP(buf, "fast") == 0)
3268 f = SPS_FAST;
3269 else if (STRCMP(buf, "double") == 0)
3270 f = SPS_DOUBLE;
3271 else if (STRNCMP(buf, "expr:", 5) != 0
3272 && STRNCMP(buf, "file:", 5) != 0)
3273 f = -1;
3274
3275 if (f == -1 || (sps_flags != 0 && f != 0))
3276 {
3277 sps_flags = SPS_BEST;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003278 sps_limit = 9999;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003279 return FAIL;
3280 }
3281 if (f != 0)
3282 sps_flags = f;
3283 }
3284
3285 if (sps_flags == 0)
3286 sps_flags = SPS_BEST;
3287
3288 return OK;
3289}
3290
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003291/*
Bram Moolenaar134bf072013-09-25 18:54:24 +02003292 * "z=": Find badly spelled word under or after the cursor.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003293 * Give suggestions for the properly spelled word.
Bram Moolenaar66fa2712006-01-22 23:22:22 +00003294 * In Visual mode use the highlighted word as the bad word.
Bram Moolenaard12a1322005-08-21 22:08:24 +00003295 * When "count" is non-zero use that suggestion.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003296 */
3297 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01003298spell_suggest(int count)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003299{
3300 char_u *line;
3301 pos_T prev_cursor = curwin->w_cursor;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003302 char_u wcopy[MAXWLEN + 2];
3303 char_u *p;
3304 int i;
3305 int c;
3306 suginfo_T sug;
3307 suggest_T *stp;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003308 int mouse_used;
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +00003309 int need_cap;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003310 int limit;
Bram Moolenaard12a1322005-08-21 22:08:24 +00003311 int selected = count;
Bram Moolenaar66fa2712006-01-22 23:22:22 +00003312 int badlen = 0;
Bram Moolenaarb2450162009-07-22 09:04:20 +00003313 int msg_scroll_save = msg_scroll;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003314
Bram Moolenaar66fa2712006-01-22 23:22:22 +00003315 if (no_spell_checking(curwin))
3316 return;
3317
Bram Moolenaar66fa2712006-01-22 23:22:22 +00003318 if (VIsual_active)
3319 {
3320 /* Use the Visually selected text as the bad word. But reject
3321 * a multi-line selection. */
3322 if (curwin->w_cursor.lnum != VIsual.lnum)
3323 {
Bram Moolenaar165bc692015-07-21 17:53:25 +02003324 vim_beep(BO_SPELL);
Bram Moolenaar66fa2712006-01-22 23:22:22 +00003325 return;
3326 }
3327 badlen = (int)curwin->w_cursor.col - (int)VIsual.col;
3328 if (badlen < 0)
3329 badlen = -badlen;
3330 else
3331 curwin->w_cursor.col = VIsual.col;
3332 ++badlen;
3333 end_visual_mode();
3334 }
Bram Moolenaarf7ff6e82014-03-23 15:13:05 +01003335 /* Find the start of the badly spelled word. */
3336 else if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0
Bram Moolenaar0c405862005-06-22 22:26:26 +00003337 || curwin->w_cursor.col > prev_cursor.col)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003338 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00003339 /* No bad word or it starts after the cursor: use the word under the
3340 * cursor. */
3341 curwin->w_cursor = prev_cursor;
3342 line = ml_get_curline();
3343 p = line + curwin->w_cursor.col;
3344 /* Backup to before start of word. */
Bram Moolenaarcc63c642013-11-12 04:44:01 +01003345 while (p > line && spell_iswordp_nmw(p, curwin))
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003346 MB_PTR_BACK(line, p);
Bram Moolenaar0c405862005-06-22 22:26:26 +00003347 /* Forward to start of word. */
Bram Moolenaarcc63c642013-11-12 04:44:01 +01003348 while (*p != NUL && !spell_iswordp_nmw(p, curwin))
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003349 MB_PTR_ADV(p);
Bram Moolenaar0c405862005-06-22 22:26:26 +00003350
Bram Moolenaarcc63c642013-11-12 04:44:01 +01003351 if (!spell_iswordp_nmw(p, curwin)) /* No word found. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00003352 {
3353 beep_flush();
3354 return;
3355 }
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00003356 curwin->w_cursor.col = (colnr_T)(p - line);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003357 }
3358
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003359 /* Get the word and its length. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003360
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +00003361 /* Figure out if the word should be capitalised. */
Bram Moolenaar8b59de92005-08-11 19:59:29 +00003362 need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col);
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +00003363
Bram Moolenaar3ea38ef2010-01-19 13:08:42 +01003364 /* Make a copy of current line since autocommands may free the line. */
3365 line = vim_strsave(ml_get_curline());
3366 if (line == NULL)
3367 goto skip;
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +00003368
Bram Moolenaar5195e452005-08-19 20:32:47 +00003369 /* Get the list of suggestions. Limit to 'lines' - 2 or the number in
3370 * 'spellsuggest', whatever is smaller. */
3371 if (sps_limit > (int)Rows - 2)
3372 limit = (int)Rows - 2;
3373 else
3374 limit = sps_limit;
Bram Moolenaar66fa2712006-01-22 23:22:22 +00003375 spell_find_suggest(line + curwin->w_cursor.col, badlen, &sug, limit,
Bram Moolenaar4770d092006-01-12 23:22:24 +00003376 TRUE, need_cap, TRUE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003377
3378 if (sug.su_ga.ga_len == 0)
3379 MSG(_("Sorry, no suggestions"));
Bram Moolenaard12a1322005-08-21 22:08:24 +00003380 else if (count > 0)
3381 {
3382 if (count > sug.su_ga.ga_len)
3383 smsg((char_u *)_("Sorry, only %ld suggestions"),
3384 (long)sug.su_ga.ga_len);
3385 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003386 else
3387 {
Bram Moolenaard23a8232018-02-10 18:45:26 +01003388 VIM_CLEAR(repl_from);
3389 VIM_CLEAR(repl_to);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003390
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003391#ifdef FEAT_RIGHTLEFT
3392 /* When 'rightleft' is set the list is drawn right-left. */
3393 cmdmsg_rl = curwin->w_p_rl;
3394 if (cmdmsg_rl)
3395 msg_col = Columns - 1;
3396#endif
3397
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003398 /* List the suggestions. */
3399 msg_start();
Bram Moolenaar412f7442006-07-23 19:51:57 +00003400 msg_row = Rows - 1; /* for when 'cmdheight' > 1 */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003401 lines_left = Rows; /* avoid more prompt */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003402 vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"),
3403 sug.su_badlen, sug.su_badptr);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003404#ifdef FEAT_RIGHTLEFT
3405 if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0)
3406 {
3407 /* And now the rabbit from the high hat: Avoid showing the
3408 * untranslated message rightleft. */
3409 vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC",
3410 sug.su_badlen, sug.su_badptr);
3411 }
3412#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003413 msg_puts(IObuff);
3414 msg_clr_eos();
3415 msg_putchar('\n');
Bram Moolenaar0c405862005-06-22 22:26:26 +00003416
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003417 msg_scroll = TRUE;
3418 for (i = 0; i < sug.su_ga.ga_len; ++i)
3419 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003420 stp = &SUG(sug.su_ga, i);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003421
3422 /* The suggested word may replace only part of the bad word, add
3423 * the not replaced part. */
Bram Moolenaaref9d6aa2011-04-11 16:56:35 +02003424 vim_strncpy(wcopy, stp->st_word, MAXWLEN);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003425 if (sug.su_badlen > stp->st_orglen)
Bram Moolenaar4770d092006-01-12 23:22:24 +00003426 vim_strncpy(wcopy + stp->st_wordlen,
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003427 sug.su_badptr + stp->st_orglen,
3428 sug.su_badlen - stp->st_orglen);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003429 vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1);
3430#ifdef FEAT_RIGHTLEFT
3431 if (cmdmsg_rl)
3432 rl_mirror(IObuff);
3433#endif
3434 msg_puts(IObuff);
3435
3436 vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy);
Bram Moolenaar0c405862005-06-22 22:26:26 +00003437 msg_puts(IObuff);
3438
3439 /* The word may replace more than "su_badlen". */
3440 if (sug.su_badlen < stp->st_orglen)
3441 {
3442 vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""),
3443 stp->st_orglen, sug.su_badptr);
3444 msg_puts(IObuff);
3445 }
3446
Bram Moolenaar9f30f502005-06-14 22:01:04 +00003447 if (p_verbose > 0)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003448 {
Bram Moolenaar0c405862005-06-22 22:26:26 +00003449 /* Add the score. */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00003450 if (sps_flags & (SPS_DOUBLE | SPS_BEST))
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003451 vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)",
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003452 stp->st_salscore ? "s " : "",
3453 stp->st_score, stp->st_altscore);
3454 else
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003455 vim_snprintf((char *)IObuff, IOSIZE, " (%d)",
Bram Moolenaar0c405862005-06-22 22:26:26 +00003456 stp->st_score);
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003457#ifdef FEAT_RIGHTLEFT
3458 if (cmdmsg_rl)
3459 /* Mirror the numbers, but keep the leading space. */
3460 rl_mirror(IObuff + 1);
3461#endif
Bram Moolenaar0c405862005-06-22 22:26:26 +00003462 msg_advance(30);
3463 msg_puts(IObuff);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003464 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003465 msg_putchar('\n');
3466 }
3467
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003468#ifdef FEAT_RIGHTLEFT
3469 cmdmsg_rl = FALSE;
3470 msg_col = 0;
3471#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003472 /* Ask for choice. */
Bram Moolenaard12a1322005-08-21 22:08:24 +00003473 selected = prompt_for_number(&mouse_used);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003474 if (mouse_used)
Bram Moolenaard12a1322005-08-21 22:08:24 +00003475 selected -= lines_left;
Bram Moolenaarb2450162009-07-22 09:04:20 +00003476 lines_left = Rows; /* avoid more prompt */
3477 /* don't delay for 'smd' in normal_cmd() */
3478 msg_scroll = msg_scroll_save;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00003479 }
3480
Bram Moolenaard12a1322005-08-21 22:08:24 +00003481 if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK)
3482 {
3483 /* Save the from and to text for :spellrepall. */
3484 stp = &SUG(sug.su_ga, selected - 1);
Bram Moolenaard5cdbeb2005-10-10 20:59:28 +00003485 if (sug.su_badlen > stp->st_orglen)
3486 {
3487 /* Replacing less than "su_badlen", append the remainder to
3488 * repl_to. */
3489 repl_from = vim_strnsave(sug.su_badptr, sug.su_badlen);
3490 vim_snprintf((char *)IObuff, IOSIZE, "%s%.*s", stp->st_word,
3491 sug.su_badlen - stp->st_orglen,
3492 sug.su_badptr + stp->st_orglen);
3493 repl_to = vim_strsave(IObuff);
3494 }
3495 else
3496 {
3497 /* Replacing su_badlen or more, use the whole word. */
3498 repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen);
3499 repl_to = vim_strsave(stp->st_word);
3500 }
Bram Moolenaard12a1322005-08-21 22:08:24 +00003501
3502 /* Replace the word. */
Bram Moolenaarb2450162009-07-22 09:04:20 +00003503 p = alloc((unsigned)STRLEN(line) - stp->st_orglen
3504 + stp->st_wordlen + 1);
Bram Moolenaard12a1322005-08-21 22:08:24 +00003505 if (p != NULL)
3506 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00003507 c = (int)(sug.su_badptr - line);
Bram Moolenaard12a1322005-08-21 22:08:24 +00003508 mch_memmove(p, line, c);
3509 STRCPY(p + c, stp->st_word);
3510 STRCAT(p, sug.su_badptr + stp->st_orglen);
3511 ml_replace(curwin->w_cursor.lnum, p, FALSE);
3512 curwin->w_cursor.col = c;
Bram Moolenaard12a1322005-08-21 22:08:24 +00003513
3514 /* For redo we use a change-word command. */
3515 ResetRedobuff();
3516 AppendToRedobuff((char_u *)"ciw");
Bram Moolenaarebefac62005-12-28 22:39:57 +00003517 AppendToRedobuffLit(p + c,
Bram Moolenaar4770d092006-01-12 23:22:24 +00003518 stp->st_wordlen + sug.su_badlen - stp->st_orglen);
Bram Moolenaard12a1322005-08-21 22:08:24 +00003519 AppendCharToRedobuff(ESC);
Bram Moolenaar910f66f2006-04-05 20:41:53 +00003520
3521 /* After this "p" may be invalid. */
3522 changed_bytes(curwin->w_cursor.lnum, c);
Bram Moolenaard12a1322005-08-21 22:08:24 +00003523 }
3524 }
3525 else
3526 curwin->w_cursor = prev_cursor;
3527
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003528 spell_find_cleanup(&sug);
Bram Moolenaar3ea38ef2010-01-19 13:08:42 +01003529skip:
3530 vim_free(line);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003531}
3532
3533/*
Bram Moolenaar8b59de92005-08-11 19:59:29 +00003534 * Check if the word at line "lnum" column "col" is required to start with a
3535 * capital. This uses 'spellcapcheck' of the current buffer.
3536 */
3537 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01003538check_need_cap(linenr_T lnum, colnr_T col)
Bram Moolenaar8b59de92005-08-11 19:59:29 +00003539{
3540 int need_cap = FALSE;
3541 char_u *line;
3542 char_u *line_copy = NULL;
3543 char_u *p;
3544 colnr_T endcol;
3545 regmatch_T regmatch;
3546
Bram Moolenaar860cae12010-06-05 23:22:07 +02003547 if (curwin->w_s->b_cap_prog == NULL)
Bram Moolenaar8b59de92005-08-11 19:59:29 +00003548 return FALSE;
3549
3550 line = ml_get_curline();
3551 endcol = 0;
Bram Moolenaare2e69e42017-09-02 20:30:35 +02003552 if (getwhitecols(line) >= (int)col)
Bram Moolenaar8b59de92005-08-11 19:59:29 +00003553 {
3554 /* At start of line, check if previous line is empty or sentence
3555 * ends there. */
3556 if (lnum == 1)
3557 need_cap = TRUE;
3558 else
3559 {
3560 line = ml_get(lnum - 1);
3561 if (*skipwhite(line) == NUL)
3562 need_cap = TRUE;
3563 else
3564 {
3565 /* Append a space in place of the line break. */
3566 line_copy = concat_str(line, (char_u *)" ");
3567 line = line_copy;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00003568 endcol = (colnr_T)STRLEN(line);
Bram Moolenaar8b59de92005-08-11 19:59:29 +00003569 }
3570 }
3571 }
3572 else
3573 endcol = col;
3574
3575 if (endcol > 0)
3576 {
3577 /* Check if sentence ends before the bad word. */
Bram Moolenaar860cae12010-06-05 23:22:07 +02003578 regmatch.regprog = curwin->w_s->b_cap_prog;
Bram Moolenaar8b59de92005-08-11 19:59:29 +00003579 regmatch.rm_ic = FALSE;
3580 p = line + endcol;
3581 for (;;)
3582 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01003583 MB_PTR_BACK(line, p);
Bram Moolenaarcc63c642013-11-12 04:44:01 +01003584 if (p == line || spell_iswordp_nmw(p, curwin))
Bram Moolenaar8b59de92005-08-11 19:59:29 +00003585 break;
3586 if (vim_regexec(&regmatch, p, 0)
3587 && regmatch.endp[0] == line + endcol)
3588 {
3589 need_cap = TRUE;
3590 break;
3591 }
3592 }
Bram Moolenaardffa5b82014-11-19 16:38:07 +01003593 curwin->w_s->b_cap_prog = regmatch.regprog;
Bram Moolenaar8b59de92005-08-11 19:59:29 +00003594 }
3595
3596 vim_free(line_copy);
3597
3598 return need_cap;
3599}
3600
3601
3602/*
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003603 * ":spellrepall"
3604 */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003605 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01003606ex_spellrepall(exarg_T *eap UNUSED)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003607{
3608 pos_T pos = curwin->w_cursor;
3609 char_u *frompat;
3610 int addlen;
3611 char_u *line;
3612 char_u *p;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003613 int save_ws = p_ws;
Bram Moolenaar5195e452005-08-19 20:32:47 +00003614 linenr_T prev_lnum = 0;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003615
3616 if (repl_from == NULL || repl_to == NULL)
3617 {
3618 EMSG(_("E752: No previous spell replacement"));
3619 return;
3620 }
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00003621 addlen = (int)(STRLEN(repl_to) - STRLEN(repl_from));
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003622
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00003623 frompat = alloc((unsigned)STRLEN(repl_from) + 7);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003624 if (frompat == NULL)
3625 return;
3626 sprintf((char *)frompat, "\\V\\<%s\\>", repl_from);
3627 p_ws = FALSE;
3628
Bram Moolenaar5195e452005-08-19 20:32:47 +00003629 sub_nsubs = 0;
3630 sub_nlines = 0;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003631 curwin->w_cursor.lnum = 0;
3632 while (!got_int)
3633 {
Bram Moolenaarfbd0b0a2017-06-17 18:44:21 +02003634 if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP, NULL, NULL) == 0
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003635 || u_save_cursor() == FAIL)
3636 break;
3637
3638 /* Only replace when the right word isn't there yet. This happens
3639 * when changing "etc" to "etc.". */
3640 line = ml_get_curline();
3641 if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col,
3642 repl_to, STRLEN(repl_to)) != 0)
3643 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00003644 p = alloc((unsigned)STRLEN(line) + addlen + 1);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003645 if (p == NULL)
3646 break;
3647 mch_memmove(p, line, curwin->w_cursor.col);
3648 STRCPY(p + curwin->w_cursor.col, repl_to);
3649 STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from));
3650 ml_replace(curwin->w_cursor.lnum, p, FALSE);
3651 changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col);
Bram Moolenaar5195e452005-08-19 20:32:47 +00003652
3653 if (curwin->w_cursor.lnum != prev_lnum)
3654 {
3655 ++sub_nlines;
3656 prev_lnum = curwin->w_cursor.lnum;
3657 }
3658 ++sub_nsubs;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003659 }
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00003660 curwin->w_cursor.col += (colnr_T)STRLEN(repl_to);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003661 }
3662
3663 p_ws = save_ws;
3664 curwin->w_cursor = pos;
3665 vim_free(frompat);
3666
Bram Moolenaar5195e452005-08-19 20:32:47 +00003667 if (sub_nsubs == 0)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003668 EMSG2(_("E753: Not found: %s"), repl_from);
Bram Moolenaar5195e452005-08-19 20:32:47 +00003669 else
3670 do_sub_msg(FALSE);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003671}
3672
3673/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003674 * Find spell suggestions for "word". Return them in the growarray "*gap" as
3675 * a list of allocated strings.
3676 */
3677 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01003678spell_suggest_list(
3679 garray_T *gap,
3680 char_u *word,
3681 int maxcount, /* maximum nr of suggestions */
3682 int need_cap, /* 'spellcapcheck' matched */
3683 int interactive)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003684{
3685 suginfo_T sug;
3686 int i;
3687 suggest_T *stp;
3688 char_u *wcopy;
3689
Bram Moolenaar66fa2712006-01-22 23:22:22 +00003690 spell_find_suggest(word, 0, &sug, maxcount, FALSE, need_cap, interactive);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003691
3692 /* Make room in "gap". */
3693 ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1);
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00003694 if (ga_grow(gap, sug.su_ga.ga_len) == OK)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003695 {
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00003696 for (i = 0; i < sug.su_ga.ga_len; ++i)
3697 {
3698 stp = &SUG(sug.su_ga, i);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003699
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00003700 /* The suggested word may replace only part of "word", add the not
3701 * replaced part. */
3702 wcopy = alloc(stp->st_wordlen
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00003703 + (unsigned)STRLEN(sug.su_badptr + stp->st_orglen) + 1);
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00003704 if (wcopy == NULL)
3705 break;
3706 STRCPY(wcopy, stp->st_word);
3707 STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen);
3708 ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy;
3709 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003710 }
3711
3712 spell_find_cleanup(&sug);
3713}
3714
3715/*
3716 * Find spell suggestions for the word at the start of "badptr".
3717 * Return the suggestions in "su->su_ga".
3718 * The maximum number of suggestions is "maxcount".
3719 * Note: does use info for the current window.
3720 * This is based on the mechanisms of Aspell, but completely reimplemented.
3721 */
3722 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01003723spell_find_suggest(
3724 char_u *badptr,
3725 int badlen, /* length of bad word or 0 if unknown */
3726 suginfo_T *su,
3727 int maxcount,
3728 int banbadword, /* don't include badword in suggestions */
3729 int need_cap, /* word should start with capital */
3730 int interactive)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003731{
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00003732 hlf_T attr = HLF_COUNT;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003733 char_u buf[MAXPATHL];
3734 char_u *p;
3735 int do_combine = FALSE;
3736 char_u *sps_copy;
3737#ifdef FEAT_EVAL
3738 static int expr_busy = FALSE;
3739#endif
Bram Moolenaarf9184a12005-07-02 23:10:47 +00003740 int c;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00003741 int i;
3742 langp_T *lp;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003743
3744 /*
3745 * Set the info in "*su".
3746 */
3747 vim_memset(su, 0, sizeof(suginfo_T));
3748 ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10);
3749 ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10);
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00003750 if (*badptr == NUL)
3751 return;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003752 hash_init(&su->su_banned);
3753
3754 su->su_badptr = badptr;
Bram Moolenaar66fa2712006-01-22 23:22:22 +00003755 if (badlen != 0)
3756 su->su_badlen = badlen;
3757 else
3758 su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL, FALSE);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003759 su->su_maxcount = maxcount;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003760 su->su_maxscore = SCORE_MAXINIT;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003761
3762 if (su->su_badlen >= MAXWLEN)
3763 su->su_badlen = MAXWLEN - 1; /* just in case */
3764 vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen);
3765 (void)spell_casefold(su->su_badptr, su->su_badlen,
3766 su->su_fbadword, MAXWLEN);
Bram Moolenaar5b276aa2017-04-22 23:49:52 +02003767 /* TODO: make this work if the case-folded text is longer than the original
3768 * text. Currently an illegal byte causes wrong pointer computations. */
3769 su->su_fbadword[su->su_badlen] = NUL;
3770
Bram Moolenaar0c405862005-06-22 22:26:26 +00003771 /* get caps flags for bad word */
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003772 su->su_badflags = badword_captype(su->su_badptr,
3773 su->su_badptr + su->su_badlen);
Bram Moolenaar7d1f5db2005-07-03 21:39:27 +00003774 if (need_cap)
3775 su->su_badflags |= WF_ONECAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003776
Bram Moolenaar8b96d642005-09-05 22:05:30 +00003777 /* Find the default language for sound folding. We simply use the first
3778 * one in 'spelllang' that supports sound folding. That's good for when
3779 * using multiple files for one language, it's not that bad when mixing
3780 * languages (e.g., "pl,en"). */
Bram Moolenaar860cae12010-06-05 23:22:07 +02003781 for (i = 0; i < curbuf->b_s.b_langp.ga_len; ++i)
Bram Moolenaar8b96d642005-09-05 22:05:30 +00003782 {
Bram Moolenaar860cae12010-06-05 23:22:07 +02003783 lp = LANGP_ENTRY(curbuf->b_s.b_langp, i);
Bram Moolenaar8b96d642005-09-05 22:05:30 +00003784 if (lp->lp_sallang != NULL)
3785 {
3786 su->su_sallang = lp->lp_sallang;
3787 break;
3788 }
3789 }
3790
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00003791 /* Soundfold the bad word with the default sound folding, so that we don't
3792 * have to do this many times. */
3793 if (su->su_sallang != NULL)
3794 spell_soundfold(su->su_sallang, su->su_fbadword, TRUE,
3795 su->su_sal_badword);
3796
Bram Moolenaarf9184a12005-07-02 23:10:47 +00003797 /* If the word is not capitalised and spell_check() doesn't consider the
3798 * word to be bad then it might need to be capitalised. Add a suggestion
3799 * for that. */
Bram Moolenaar53805d12005-08-01 07:08:33 +00003800 c = PTR2CHAR(su->su_badptr);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00003801 if (!SPELL_ISUPPER(c) && attr == HLF_COUNT)
Bram Moolenaarf9184a12005-07-02 23:10:47 +00003802 {
3803 make_case_word(su->su_badword, buf, WF_ONECAP);
3804 add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE,
Bram Moolenaar4770d092006-01-12 23:22:24 +00003805 0, TRUE, su->su_sallang, FALSE);
Bram Moolenaarf9184a12005-07-02 23:10:47 +00003806 }
3807
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003808 /* Ban the bad word itself. It may appear in another region. */
Bram Moolenaarea408852005-06-25 22:49:46 +00003809 if (banbadword)
3810 add_banned(su, su->su_badword);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003811
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003812 /* Make a copy of 'spellsuggest', because the expression may change it. */
3813 sps_copy = vim_strsave(p_sps);
3814 if (sps_copy == NULL)
3815 return;
3816
3817 /* Loop over the items in 'spellsuggest'. */
3818 for (p = sps_copy; *p != NUL; )
3819 {
3820 copy_option_part(&p, buf, MAXPATHL, ",");
3821
3822 if (STRNCMP(buf, "expr:", 5) == 0)
3823 {
3824#ifdef FEAT_EVAL
Bram Moolenaar42eeac32005-06-29 22:40:58 +00003825 /* Evaluate an expression. Skip this when called recursively,
3826 * when using spellsuggest() in the expression. */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003827 if (!expr_busy)
3828 {
3829 expr_busy = TRUE;
3830 spell_suggest_expr(su, buf + 5);
3831 expr_busy = FALSE;
3832 }
3833#endif
3834 }
3835 else if (STRNCMP(buf, "file:", 5) == 0)
3836 /* Use list of suggestions in a file. */
3837 spell_suggest_file(su, buf + 5);
3838 else
3839 {
3840 /* Use internal method. */
Bram Moolenaar4770d092006-01-12 23:22:24 +00003841 spell_suggest_intern(su, interactive);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003842 if (sps_flags & SPS_DOUBLE)
3843 do_combine = TRUE;
3844 }
3845 }
3846
3847 vim_free(sps_copy);
3848
3849 if (do_combine)
3850 /* Combine the two list of suggestions. This must be done last,
3851 * because sorting changes the order again. */
3852 score_combine(su);
3853}
3854
3855#ifdef FEAT_EVAL
3856/*
3857 * Find suggestions by evaluating expression "expr".
3858 */
3859 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01003860spell_suggest_expr(suginfo_T *su, char_u *expr)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003861{
3862 list_T *list;
3863 listitem_T *li;
3864 int score;
3865 char_u *p;
3866
3867 /* The work is split up in a few parts to avoid having to export
3868 * suginfo_T.
3869 * First evaluate the expression and get the resulting list. */
3870 list = eval_spell_expr(su->su_badword, expr);
3871 if (list != NULL)
3872 {
3873 /* Loop over the items in the list. */
3874 for (li = list->lv_first; li != NULL; li = li->li_next)
3875 if (li->li_tv.v_type == VAR_LIST)
3876 {
3877 /* Get the word and the score from the items. */
3878 score = get_spellword(li->li_tv.vval.v_list, &p);
Bram Moolenaar4770d092006-01-12 23:22:24 +00003879 if (score >= 0 && score <= su->su_maxscore)
3880 add_suggestion(su, &su->su_ga, p, su->su_badlen,
3881 score, 0, TRUE, su->su_sallang, FALSE);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003882 }
3883 list_unref(list);
3884 }
3885
Bram Moolenaar4770d092006-01-12 23:22:24 +00003886 /* Remove bogus suggestions, sort and truncate at "maxcount". */
3887 check_suggestions(su, &su->su_ga);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003888 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
3889}
3890#endif
3891
3892/*
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00003893 * Find suggestions in file "fname". Used for "file:" in 'spellsuggest'.
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003894 */
3895 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01003896spell_suggest_file(suginfo_T *su, char_u *fname)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003897{
3898 FILE *fd;
3899 char_u line[MAXWLEN * 2];
3900 char_u *p;
3901 int len;
3902 char_u cword[MAXWLEN];
3903
3904 /* Open the file. */
3905 fd = mch_fopen((char *)fname, "r");
3906 if (fd == NULL)
3907 {
3908 EMSG2(_(e_notopen), fname);
3909 return;
3910 }
3911
3912 /* Read it line by line. */
3913 while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int)
3914 {
3915 line_breakcheck();
3916
3917 p = vim_strchr(line, '/');
3918 if (p == NULL)
3919 continue; /* No Tab found, just skip the line. */
3920 *p++ = NUL;
3921 if (STRICMP(su->su_badword, line) == 0)
3922 {
3923 /* Match! Isolate the good word, until CR or NL. */
3924 for (len = 0; p[len] >= ' '; ++len)
3925 ;
3926 p[len] = NUL;
3927
3928 /* If the suggestion doesn't have specific case duplicate the case
3929 * of the bad word. */
3930 if (captype(p, NULL) == 0)
3931 {
3932 make_case_word(p, cword, su->su_badflags);
3933 p = cword;
3934 }
3935
3936 add_suggestion(su, &su->su_ga, p, su->su_badlen,
Bram Moolenaar4770d092006-01-12 23:22:24 +00003937 SCORE_FILE, 0, TRUE, su->su_sallang, FALSE);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003938 }
3939 }
3940
3941 fclose(fd);
3942
Bram Moolenaar4770d092006-01-12 23:22:24 +00003943 /* Remove bogus suggestions, sort and truncate at "maxcount". */
3944 check_suggestions(su, &su->su_ga);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003945 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
3946}
3947
3948/*
3949 * Find suggestions for the internal method indicated by "sps_flags".
3950 */
3951 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01003952spell_suggest_intern(suginfo_T *su, int interactive)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00003953{
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003954 /*
Bram Moolenaar4770d092006-01-12 23:22:24 +00003955 * Load the .sug file(s) that are available and not done yet.
3956 */
3957 suggest_load_files();
3958
3959 /*
Bram Moolenaar0c405862005-06-22 22:26:26 +00003960 * 1. Try special cases, such as repeating a word: "the the" -> "the".
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003961 *
3962 * Set a maximum score to limit the combination of operations that is
3963 * tried.
3964 */
Bram Moolenaar0c405862005-06-22 22:26:26 +00003965 suggest_try_special(su);
3966
3967 /*
3968 * 2. Try inserting/deleting/swapping/changing a letter, use REP entries
3969 * from the .aff file and inserting a space (split the word).
3970 */
3971 suggest_try_change(su);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003972
3973 /* For the resulting top-scorers compute the sound-a-like score. */
3974 if (sps_flags & SPS_DOUBLE)
3975 score_comp_sal(su);
3976
3977 /*
Bram Moolenaar0c405862005-06-22 22:26:26 +00003978 * 3. Try finding sound-a-like words.
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003979 */
Bram Moolenaar4770d092006-01-12 23:22:24 +00003980 if ((sps_flags & SPS_FAST) == 0)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00003981 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00003982 if (sps_flags & SPS_BEST)
3983 /* Adjust the word score for the suggestions found so far for how
3984 * they sounds like. */
3985 rescore_suggestions(su);
3986
3987 /*
Bram Moolenaar3ea38ef2010-01-19 13:08:42 +01003988 * While going through the soundfold tree "su_maxscore" is the score
Bram Moolenaar4770d092006-01-12 23:22:24 +00003989 * for the soundfold word, limits the changes that are being tried,
3990 * and "su_sfmaxscore" the rescored score, which is set by
3991 * cleanup_suggestions().
3992 * First find words with a small edit distance, because this is much
3993 * faster and often already finds the top-N suggestions. If we didn't
3994 * find many suggestions try again with a higher edit distance.
3995 * "sl_sounddone" is used to avoid doing the same word twice.
3996 */
3997 suggest_try_soundalike_prep();
3998 su->su_maxscore = SCORE_SFMAX1;
3999 su->su_sfmaxscore = SCORE_MAXINIT * 3;
Bram Moolenaar0c405862005-06-22 22:26:26 +00004000 suggest_try_soundalike(su);
Bram Moolenaar4770d092006-01-12 23:22:24 +00004001 if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
4002 {
4003 /* We didn't find enough matches, try again, allowing more
4004 * changes to the soundfold word. */
4005 su->su_maxscore = SCORE_SFMAX2;
4006 suggest_try_soundalike(su);
4007 if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
4008 {
4009 /* Still didn't find enough matches, try again, allowing even
4010 * more changes to the soundfold word. */
4011 su->su_maxscore = SCORE_SFMAX3;
4012 suggest_try_soundalike(su);
4013 }
4014 }
4015 su->su_maxscore = su->su_sfmaxscore;
4016 suggest_try_soundalike_finish();
Bram Moolenaard857f0e2005-06-21 22:37:39 +00004017 }
4018
Bram Moolenaar4770d092006-01-12 23:22:24 +00004019 /* When CTRL-C was hit while searching do show the results. Only clear
4020 * got_int when using a command, not for spellsuggest(). */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00004021 ui_breakcheck();
Bram Moolenaar4770d092006-01-12 23:22:24 +00004022 if (interactive && got_int)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00004023 {
4024 (void)vgetc();
4025 got_int = FALSE;
4026 }
4027
Bram Moolenaara1ba8112005-06-28 23:23:32 +00004028 if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00004029 {
4030 if (sps_flags & SPS_BEST)
4031 /* Adjust the word score for how it sounds like. */
4032 rescore_suggestions(su);
4033
Bram Moolenaar4770d092006-01-12 23:22:24 +00004034 /* Remove bogus suggestions, sort and truncate at "maxcount". */
4035 check_suggestions(su, &su->su_ga);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00004036 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00004037 }
4038}
4039
4040/*
4041 * Free the info put in "*su" by spell_find_suggest().
4042 */
4043 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01004044spell_find_cleanup(suginfo_T *su)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00004045{
4046 int i;
4047
4048 /* Free the suggestions. */
4049 for (i = 0; i < su->su_ga.ga_len; ++i)
4050 vim_free(SUG(su->su_ga, i).st_word);
4051 ga_clear(&su->su_ga);
4052 for (i = 0; i < su->su_sga.ga_len; ++i)
4053 vim_free(SUG(su->su_sga, i).st_word);
4054 ga_clear(&su->su_sga);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004055
4056 /* Free the banned words. */
Bram Moolenaar4770d092006-01-12 23:22:24 +00004057 hash_clear_all(&su->su_banned, 0);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004058}
4059
4060/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004061 * Make a copy of "word", with the first letter upper or lower cased, to
4062 * "wcopy[MAXWLEN]". "word" must not be empty.
4063 * The result is NUL terminated.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004064 */
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02004065 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01004066onecap_copy(
4067 char_u *word,
4068 char_u *wcopy,
4069 int upper) /* TRUE: first letter made upper case */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004070{
4071 char_u *p;
4072 int c;
4073 int l;
4074
4075 p = word;
4076#ifdef FEAT_MBYTE
4077 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004078 c = mb_cptr2char_adv(&p);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004079 else
4080#endif
4081 c = *p++;
4082 if (upper)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004083 c = SPELL_TOUPPER(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004084 else
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004085 c = SPELL_TOFOLD(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004086#ifdef FEAT_MBYTE
4087 if (has_mbyte)
4088 l = mb_char2bytes(c, wcopy);
4089 else
4090#endif
4091 {
4092 l = 1;
4093 wcopy[0] = c;
4094 }
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004095 vim_strncpy(wcopy + l, p, MAXWLEN - l - 1);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004096}
4097
4098/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004099 * Make a copy of "word" with all the letters upper cased into
4100 * "wcopy[MAXWLEN]". The result is NUL terminated.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004101 */
4102 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01004103allcap_copy(char_u *word, char_u *wcopy)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004104{
4105 char_u *s;
4106 char_u *d;
4107 int c;
4108
4109 d = wcopy;
4110 for (s = word; *s != NUL; )
4111 {
4112#ifdef FEAT_MBYTE
4113 if (has_mbyte)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004114 c = mb_cptr2char_adv(&s);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004115 else
4116#endif
4117 c = *s++;
Bram Moolenaar78622822005-08-23 21:00:13 +00004118
4119#ifdef FEAT_MBYTE
Bram Moolenaard3184b52011-09-02 14:18:20 +02004120 /* We only change 0xdf to SS when we are certain latin1 is used. It
Bram Moolenaar78622822005-08-23 21:00:13 +00004121 * would cause weird errors in other 8-bit encodings. */
4122 if (enc_latin1like && c == 0xdf)
4123 {
4124 c = 'S';
4125 if (d - wcopy >= MAXWLEN - 1)
4126 break;
4127 *d++ = c;
4128 }
4129 else
4130#endif
4131 c = SPELL_TOUPPER(c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004132
4133#ifdef FEAT_MBYTE
4134 if (has_mbyte)
4135 {
4136 if (d - wcopy >= MAXWLEN - MB_MAXBYTES)
4137 break;
4138 d += mb_char2bytes(c, d);
4139 }
4140 else
4141#endif
4142 {
4143 if (d - wcopy >= MAXWLEN - 1)
4144 break;
4145 *d++ = c;
4146 }
4147 }
4148 *d = NUL;
4149}
4150
4151/*
Bram Moolenaar0c405862005-06-22 22:26:26 +00004152 * Try finding suggestions by recognizing specific situations.
4153 */
4154 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01004155suggest_try_special(suginfo_T *su)
Bram Moolenaar0c405862005-06-22 22:26:26 +00004156{
4157 char_u *p;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004158 size_t len;
Bram Moolenaar0c405862005-06-22 22:26:26 +00004159 int c;
4160 char_u word[MAXWLEN];
4161
4162 /*
4163 * Recognize a word that is repeated: "the the".
4164 */
4165 p = skiptowhite(su->su_fbadword);
4166 len = p - su->su_fbadword;
4167 p = skipwhite(p);
4168 if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0)
4169 {
4170 /* Include badflags: if the badword is onecap or allcap
4171 * use that for the goodword too: "The the" -> "The". */
4172 c = su->su_fbadword[len];
4173 su->su_fbadword[len] = NUL;
4174 make_case_word(su->su_fbadword, word, su->su_badflags);
4175 su->su_fbadword[len] = c;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00004176
4177 /* Give a soundalike score of 0, compute the score as if deleting one
4178 * character. */
4179 add_suggestion(su, &su->su_ga, word, su->su_badlen,
Bram Moolenaar4770d092006-01-12 23:22:24 +00004180 RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang, FALSE);
Bram Moolenaar0c405862005-06-22 22:26:26 +00004181 }
4182}
4183
4184/*
Bram Moolenaarca1fe982016-01-07 16:22:06 +01004185 * Change the 0 to 1 to measure how much time is spent in each state.
4186 * Output is dumped in "suggestprof".
4187 */
4188#if 0
4189# define SUGGEST_PROFILE
4190proftime_T current;
4191proftime_T total;
4192proftime_T times[STATE_FINAL + 1];
4193long counts[STATE_FINAL + 1];
4194
4195 static void
4196prof_init(void)
4197{
4198 for (int i = 0; i <= STATE_FINAL; ++i)
4199 {
4200 profile_zero(&times[i]);
4201 counts[i] = 0;
4202 }
4203 profile_start(&current);
4204 profile_start(&total);
4205}
4206
4207/* call before changing state */
4208 static void
4209prof_store(state_T state)
4210{
4211 profile_end(&current);
4212 profile_add(&times[state], &current);
4213 ++counts[state];
4214 profile_start(&current);
4215}
4216# define PROF_STORE(state) prof_store(state);
4217
4218 static void
4219prof_report(char *name)
4220{
4221 FILE *fd = fopen("suggestprof", "a");
4222
4223 profile_end(&total);
4224 fprintf(fd, "-----------------------\n");
4225 fprintf(fd, "%s: %s\n", name, profile_msg(&total));
4226 for (int i = 0; i <= STATE_FINAL; ++i)
4227 fprintf(fd, "%d: %s (%ld)\n", i, profile_msg(&times[i]), counts[i]);
4228 fclose(fd);
4229}
4230#else
4231# define PROF_STORE(state)
4232#endif
4233
4234/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004235 * Try finding suggestions by adding/removing/swapping letters.
4236 */
4237 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01004238suggest_try_change(suginfo_T *su)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004239{
4240 char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00004241 int n;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004242 char_u *p;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004243 int lpi;
Bram Moolenaar4770d092006-01-12 23:22:24 +00004244 langp_T *lp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004245
4246 /* We make a copy of the case-folded bad word, so that we can modify it
Bram Moolenaar0c405862005-06-22 22:26:26 +00004247 * to find matches (esp. REP items). Append some more text, changing
4248 * chars after the bad word may help. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004249 STRCPY(fword, su->su_fbadword);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00004250 n = (int)STRLEN(fword);
Bram Moolenaar0c405862005-06-22 22:26:26 +00004251 p = su->su_badptr + su->su_badlen;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00004252 (void)spell_casefold(p, (int)STRLEN(p), fword + n, MAXWLEN - n);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004253
Bram Moolenaar860cae12010-06-05 23:22:07 +02004254 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004255 {
Bram Moolenaar860cae12010-06-05 23:22:07 +02004256 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004257
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004258 /* If reloading a spell file fails it's still in the list but
4259 * everything has been cleared. */
Bram Moolenaar4770d092006-01-12 23:22:24 +00004260 if (lp->lp_slang->sl_fbyts == NULL)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004261 continue;
4262
Bram Moolenaar4770d092006-01-12 23:22:24 +00004263 /* Try it for this language. Will add possible suggestions. */
Bram Moolenaarca1fe982016-01-07 16:22:06 +01004264#ifdef SUGGEST_PROFILE
4265 prof_init();
4266#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +00004267 suggest_trie_walk(su, lp, fword, FALSE);
Bram Moolenaarca1fe982016-01-07 16:22:06 +01004268#ifdef SUGGEST_PROFILE
4269 prof_report("try_change");
4270#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +00004271 }
4272}
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004273
Bram Moolenaar4770d092006-01-12 23:22:24 +00004274/* Check the maximum score, if we go over it we won't try this change. */
4275#define TRY_DEEPER(su, stack, depth, add) \
4276 (stack[depth].ts_score + (add) < su->su_maxscore)
4277
4278/*
4279 * Try finding suggestions by adding/removing/swapping letters.
4280 *
4281 * This uses a state machine. At each node in the tree we try various
4282 * operations. When trying if an operation works "depth" is increased and the
4283 * stack[] is used to store info. This allows combinations, thus insert one
4284 * character, replace one and delete another. The number of changes is
4285 * limited by su->su_maxscore.
4286 *
4287 * After implementing this I noticed an article by Kemal Oflazer that
4288 * describes something similar: "Error-tolerant Finite State Recognition with
4289 * Applications to Morphological Analysis and Spelling Correction" (1996).
4290 * The implementation in the article is simplified and requires a stack of
4291 * unknown depth. The implementation here only needs a stack depth equal to
4292 * the length of the word.
4293 *
4294 * This is also used for the sound-folded word, "soundfold" is TRUE then.
4295 * The mechanism is the same, but we find a match with a sound-folded word
4296 * that comes from one or more original words. Each of these words may be
4297 * added, this is done by add_sound_suggest().
4298 * Don't use:
4299 * the prefix tree or the keep-case tree
4300 * "su->su_badlen"
4301 * anything to do with upper and lower case
4302 * anything to do with word or non-word characters ("spell_iswordp()")
4303 * banned words
4304 * word flags (rare, region, compounding)
4305 * word splitting for now
4306 * "similar_chars()"
4307 * use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep"
4308 */
4309 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01004310suggest_trie_walk(
4311 suginfo_T *su,
4312 langp_T *lp,
4313 char_u *fword,
4314 int soundfold)
Bram Moolenaar4770d092006-01-12 23:22:24 +00004315{
4316 char_u tword[MAXWLEN]; /* good word collected so far */
4317 trystate_T stack[MAXWLEN];
4318 char_u preword[MAXWLEN * 3]; /* word found with proper case;
Bram Moolenaar3ea38ef2010-01-19 13:08:42 +01004319 * concatenation of prefix compound
Bram Moolenaar4770d092006-01-12 23:22:24 +00004320 * words and split word. NUL terminated
4321 * when going deeper but not when coming
4322 * back. */
4323 char_u compflags[MAXWLEN]; /* compound flags, one for each word */
4324 trystate_T *sp;
4325 int newscore;
4326 int score;
4327 char_u *byts, *fbyts, *pbyts;
4328 idx_T *idxs, *fidxs, *pidxs;
4329 int depth;
4330 int c, c2, c3;
4331 int n = 0;
4332 int flags;
4333 garray_T *gap;
4334 idx_T arridx;
4335 int len;
4336 char_u *p;
4337 fromto_T *ftp;
4338 int fl = 0, tl;
4339 int repextra = 0; /* extra bytes in fword[] from REP item */
4340 slang_T *slang = lp->lp_slang;
4341 int fword_ends;
4342 int goodword_ends;
4343#ifdef DEBUG_TRIEWALK
4344 /* Stores the name of the change made at each level. */
4345 char_u changename[MAXWLEN][80];
4346#endif
4347 int breakcheckcount = 1000;
4348 int compound_ok;
4349
4350 /*
4351 * Go through the whole case-fold tree, try changes at each node.
4352 * "tword[]" contains the word collected from nodes in the tree.
4353 * "fword[]" the word we are trying to match with (initially the bad
4354 * word).
4355 */
4356 depth = 0;
4357 sp = &stack[0];
4358 vim_memset(sp, 0, sizeof(trystate_T));
4359 sp->ts_curi = 1;
4360
4361 if (soundfold)
4362 {
4363 /* Going through the soundfold tree. */
4364 byts = fbyts = slang->sl_sbyts;
4365 idxs = fidxs = slang->sl_sidxs;
4366 pbyts = NULL;
4367 pidxs = NULL;
4368 sp->ts_prefixdepth = PFD_NOPREFIX;
4369 sp->ts_state = STATE_START;
4370 }
4371 else
4372 {
Bram Moolenaarea424162005-06-16 21:51:00 +00004373 /*
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004374 * When there are postponed prefixes we need to use these first. At
4375 * the end of the prefix we continue in the case-fold tree.
4376 */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004377 fbyts = slang->sl_fbyts;
4378 fidxs = slang->sl_fidxs;
4379 pbyts = slang->sl_pbyts;
4380 pidxs = slang->sl_pidxs;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004381 if (pbyts != NULL)
4382 {
4383 byts = pbyts;
4384 idxs = pidxs;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004385 sp->ts_prefixdepth = PFD_PREFIXTREE;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004386 sp->ts_state = STATE_NOPREFIX; /* try without prefix first */
4387 }
4388 else
4389 {
4390 byts = fbyts;
4391 idxs = fidxs;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004392 sp->ts_prefixdepth = PFD_NOPREFIX;
Bram Moolenaard12a1322005-08-21 22:08:24 +00004393 sp->ts_state = STATE_START;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004394 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00004395 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004396
Bram Moolenaar4770d092006-01-12 23:22:24 +00004397 /*
4398 * Loop to find all suggestions. At each round we either:
4399 * - For the current state try one operation, advance "ts_curi",
4400 * increase "depth".
4401 * - When a state is done go to the next, set "ts_state".
4402 * - When all states are tried decrease "depth".
4403 */
4404 while (depth >= 0 && !got_int)
4405 {
4406 sp = &stack[depth];
4407 switch (sp->ts_state)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004408 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00004409 case STATE_START:
4410 case STATE_NOPREFIX:
4411 /*
4412 * Start of node: Deal with NUL bytes, which means
4413 * tword[] may end here.
4414 */
4415 arridx = sp->ts_arridx; /* current node in the tree */
4416 len = byts[arridx]; /* bytes in this node */
4417 arridx += sp->ts_curi; /* index of current byte */
4418
4419 if (sp->ts_prefixdepth == PFD_PREFIXTREE)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004420 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00004421 /* Skip over the NUL bytes, we use them later. */
4422 for (n = 0; n < len && byts[arridx + n] == 0; ++n)
4423 ;
4424 sp->ts_curi += n;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004425
Bram Moolenaar4770d092006-01-12 23:22:24 +00004426 /* Always past NUL bytes now. */
4427 n = (int)sp->ts_state;
Bram Moolenaarca1fe982016-01-07 16:22:06 +01004428 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00004429 sp->ts_state = STATE_ENDNUL;
4430 sp->ts_save_badflags = su->su_badflags;
4431
4432 /* At end of a prefix or at start of prefixtree: check for
4433 * following word. */
4434 if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX)
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004435 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00004436 /* Set su->su_badflags to the caps type at this position.
4437 * Use the caps type until here for the prefix itself. */
Bram Moolenaar53805d12005-08-01 07:08:33 +00004438#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +00004439 if (has_mbyte)
4440 n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
4441 else
Bram Moolenaar53805d12005-08-01 07:08:33 +00004442#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +00004443 n = sp->ts_fidx;
4444 flags = badword_captype(su->su_badptr, su->su_badptr + n);
4445 su->su_badflags = badword_captype(su->su_badptr + n,
Bram Moolenaar53805d12005-08-01 07:08:33 +00004446 su->su_badptr + su->su_badlen);
Bram Moolenaar4770d092006-01-12 23:22:24 +00004447#ifdef DEBUG_TRIEWALK
4448 sprintf(changename[depth], "prefix");
4449#endif
4450 go_deeper(stack, depth, 0);
4451 ++depth;
4452 sp = &stack[depth];
4453 sp->ts_prefixdepth = depth - 1;
4454 byts = fbyts;
4455 idxs = fidxs;
4456 sp->ts_arridx = 0;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004457
Bram Moolenaar4770d092006-01-12 23:22:24 +00004458 /* Move the prefix to preword[] with the right case
4459 * and make find_keepcap_word() works. */
4460 tword[sp->ts_twordlen] = NUL;
4461 make_case_word(tword + sp->ts_splitoff,
4462 preword + sp->ts_prewordlen, flags);
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00004463 sp->ts_prewordlen = (char_u)STRLEN(preword);
Bram Moolenaar4770d092006-01-12 23:22:24 +00004464 sp->ts_splitoff = sp->ts_twordlen;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004465 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00004466 break;
4467 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004468
Bram Moolenaar4770d092006-01-12 23:22:24 +00004469 if (sp->ts_curi > len || byts[arridx] != 0)
4470 {
4471 /* Past bytes in node and/or past NUL bytes. */
Bram Moolenaarca1fe982016-01-07 16:22:06 +01004472 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00004473 sp->ts_state = STATE_ENDNUL;
4474 sp->ts_save_badflags = su->su_badflags;
4475 break;
4476 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004477
Bram Moolenaar4770d092006-01-12 23:22:24 +00004478 /*
4479 * End of word in tree.
4480 */
4481 ++sp->ts_curi; /* eat one NUL byte */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004482
Bram Moolenaar4770d092006-01-12 23:22:24 +00004483 flags = (int)idxs[arridx];
Bram Moolenaare1438bb2006-03-01 22:01:55 +00004484
4485 /* Skip words with the NOSUGGEST flag. */
4486 if (flags & WF_NOSUGGEST)
4487 break;
4488
Bram Moolenaar4770d092006-01-12 23:22:24 +00004489 fword_ends = (fword[sp->ts_fidx] == NUL
4490 || (soundfold
Bram Moolenaar1c465442017-03-12 20:10:05 +01004491 ? VIM_ISWHITE(fword[sp->ts_fidx])
Bram Moolenaar860cae12010-06-05 23:22:07 +02004492 : !spell_iswordp(fword + sp->ts_fidx, curwin)));
Bram Moolenaar4770d092006-01-12 23:22:24 +00004493 tword[sp->ts_twordlen] = NUL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004494
Bram Moolenaar4770d092006-01-12 23:22:24 +00004495 if (sp->ts_prefixdepth <= PFD_NOTSPECIAL
Bram Moolenaard12a1322005-08-21 22:08:24 +00004496 && (sp->ts_flags & TSF_PREFIXOK) == 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +00004497 {
4498 /* There was a prefix before the word. Check that the prefix
4499 * can be used with this word. */
4500 /* Count the length of the NULs in the prefix. If there are
4501 * none this must be the first try without a prefix. */
4502 n = stack[sp->ts_prefixdepth].ts_arridx;
4503 len = pbyts[n++];
4504 for (c = 0; c < len && pbyts[n + c] == 0; ++c)
4505 ;
4506 if (c > 0)
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004507 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00004508 c = valid_word_prefix(c, n, flags,
Bram Moolenaar5195e452005-08-19 20:32:47 +00004509 tword + sp->ts_splitoff, slang, FALSE);
Bram Moolenaar4770d092006-01-12 23:22:24 +00004510 if (c == 0)
4511 break;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004512
Bram Moolenaar4770d092006-01-12 23:22:24 +00004513 /* Use the WF_RARE flag for a rare prefix. */
4514 if (c & WF_RAREPFX)
4515 flags |= WF_RARE;
Bram Moolenaard12a1322005-08-21 22:08:24 +00004516
Bram Moolenaar4770d092006-01-12 23:22:24 +00004517 /* Tricky: when checking for both prefix and compounding
4518 * we run into the prefix flag first.
4519 * Remember that it's OK, so that we accept the prefix
4520 * when arriving at a compound flag. */
4521 sp->ts_flags |= TSF_PREFIXOK;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004522 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00004523 }
Bram Moolenaar42eeac32005-06-29 22:40:58 +00004524
Bram Moolenaar4770d092006-01-12 23:22:24 +00004525 /* Check NEEDCOMPOUND: can't use word without compounding. Do try
4526 * appending another compound word below. */
4527 if (sp->ts_complen == sp->ts_compsplit && fword_ends
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004528 && (flags & WF_NEEDCOMP))
Bram Moolenaar4770d092006-01-12 23:22:24 +00004529 goodword_ends = FALSE;
4530 else
4531 goodword_ends = TRUE;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004532
Bram Moolenaar4770d092006-01-12 23:22:24 +00004533 p = NULL;
4534 compound_ok = TRUE;
4535 if (sp->ts_complen > sp->ts_compsplit)
4536 {
4537 if (slang->sl_nobreak)
Bram Moolenaard12a1322005-08-21 22:08:24 +00004538 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00004539 /* There was a word before this word. When there was no
4540 * change in this word (it was correct) add the first word
4541 * as a suggestion. If this word was corrected too, we
4542 * need to check if a correct word follows. */
4543 if (sp->ts_fidx - sp->ts_splitfidx
Bram Moolenaar78622822005-08-23 21:00:13 +00004544 == sp->ts_twordlen - sp->ts_splitoff
Bram Moolenaar4770d092006-01-12 23:22:24 +00004545 && STRNCMP(fword + sp->ts_splitfidx,
4546 tword + sp->ts_splitoff,
Bram Moolenaar78622822005-08-23 21:00:13 +00004547 sp->ts_fidx - sp->ts_splitfidx) == 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +00004548 {
4549 preword[sp->ts_prewordlen] = NUL;
4550 newscore = score_wordcount_adj(slang, sp->ts_score,
4551 preword + sp->ts_prewordlen,
4552 sp->ts_prewordlen > 0);
4553 /* Add the suggestion if the score isn't too bad. */
4554 if (newscore <= su->su_maxscore)
Bram Moolenaar78622822005-08-23 21:00:13 +00004555 add_suggestion(su, &su->su_ga, preword,
Bram Moolenaar8b96d642005-09-05 22:05:30 +00004556 sp->ts_splitfidx - repextra,
Bram Moolenaar4770d092006-01-12 23:22:24 +00004557 newscore, 0, FALSE,
4558 lp->lp_sallang, FALSE);
4559 break;
Bram Moolenaar78622822005-08-23 21:00:13 +00004560 }
Bram Moolenaard12a1322005-08-21 22:08:24 +00004561 }
Bram Moolenaare52325c2005-08-22 22:54:29 +00004562 else
Bram Moolenaar0c405862005-06-22 22:26:26 +00004563 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00004564 /* There was a compound word before this word. If this
4565 * word does not support compounding then give up
4566 * (splitting is tried for the word without compound
4567 * flag). */
4568 if (((unsigned)flags >> 24) == 0
4569 || sp->ts_twordlen - sp->ts_splitoff
4570 < slang->sl_compminlen)
4571 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +00004572#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +00004573 /* For multi-byte chars check character length against
4574 * COMPOUNDMIN. */
4575 if (has_mbyte
4576 && slang->sl_compminlen > 0
4577 && mb_charlen(tword + sp->ts_splitoff)
4578 < slang->sl_compminlen)
4579 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +00004580#endif
Bram Moolenaare52325c2005-08-22 22:54:29 +00004581
Bram Moolenaar4770d092006-01-12 23:22:24 +00004582 compflags[sp->ts_complen] = ((unsigned)flags >> 24);
4583 compflags[sp->ts_complen + 1] = NUL;
4584 vim_strncpy(preword + sp->ts_prewordlen,
4585 tword + sp->ts_splitoff,
4586 sp->ts_twordlen - sp->ts_splitoff);
Bram Moolenaar9f94b052008-11-30 20:12:46 +00004587
4588 /* Verify CHECKCOMPOUNDPATTERN rules. */
4589 if (match_checkcompoundpattern(preword, sp->ts_prewordlen,
4590 &slang->sl_comppat))
Bram Moolenaar4770d092006-01-12 23:22:24 +00004591 compound_ok = FALSE;
4592
Bram Moolenaar9f94b052008-11-30 20:12:46 +00004593 if (compound_ok)
4594 {
4595 p = preword;
4596 while (*skiptowhite(p) != NUL)
4597 p = skipwhite(skiptowhite(p));
4598 if (fword_ends && !can_compound(slang, p,
4599 compflags + sp->ts_compsplit))
4600 /* Compound is not allowed. But it may still be
4601 * possible if we add another (short) word. */
4602 compound_ok = FALSE;
4603 }
4604
Bram Moolenaar4770d092006-01-12 23:22:24 +00004605 /* Get pointer to last char of previous word. */
4606 p = preword + sp->ts_prewordlen;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01004607 MB_PTR_BACK(preword, p);
Bram Moolenaar0c405862005-06-22 22:26:26 +00004608 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00004609 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004610
Bram Moolenaar4770d092006-01-12 23:22:24 +00004611 /*
4612 * Form the word with proper case in preword.
4613 * If there is a word from a previous split, append.
4614 * For the soundfold tree don't change the case, simply append.
4615 */
4616 if (soundfold)
4617 STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff);
4618 else if (flags & WF_KEEPCAP)
4619 /* Must find the word in the keep-case tree. */
4620 find_keepcap_word(slang, tword + sp->ts_splitoff,
4621 preword + sp->ts_prewordlen);
4622 else
4623 {
4624 /* Include badflags: If the badword is onecap or allcap
4625 * use that for the goodword too. But if the badword is
4626 * allcap and it's only one char long use onecap. */
4627 c = su->su_badflags;
4628 if ((c & WF_ALLCAP)
4629#ifdef FEAT_MBYTE
4630 && su->su_badlen == (*mb_ptr2len)(su->su_badptr)
4631#else
4632 && su->su_badlen == 1
4633#endif
4634 )
4635 c = WF_ONECAP;
4636 c |= flags;
4637
4638 /* When appending a compound word after a word character don't
4639 * use Onecap. */
Bram Moolenaarcc63c642013-11-12 04:44:01 +01004640 if (p != NULL && spell_iswordp_nmw(p, curwin))
Bram Moolenaar4770d092006-01-12 23:22:24 +00004641 c &= ~WF_ONECAP;
4642 make_case_word(tword + sp->ts_splitoff,
4643 preword + sp->ts_prewordlen, c);
4644 }
4645
4646 if (!soundfold)
4647 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004648 /* Don't use a banned word. It may appear again as a good
4649 * word, thus remember it. */
4650 if (flags & WF_BANNED)
4651 {
Bram Moolenaar5195e452005-08-19 20:32:47 +00004652 add_banned(su, preword + sp->ts_prewordlen);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004653 break;
4654 }
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00004655 if ((sp->ts_complen == sp->ts_compsplit
Bram Moolenaar4770d092006-01-12 23:22:24 +00004656 && WAS_BANNED(su, preword + sp->ts_prewordlen))
4657 || WAS_BANNED(su, preword))
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00004658 {
4659 if (slang->sl_compprog == NULL)
4660 break;
4661 /* the word so far was banned but we may try compounding */
4662 goodword_ends = FALSE;
4663 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00004664 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004665
Bram Moolenaar4770d092006-01-12 23:22:24 +00004666 newscore = 0;
4667 if (!soundfold) /* soundfold words don't have flags */
4668 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004669 if ((flags & WF_REGION)
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00004670 && (((unsigned)flags >> 16) & lp->lp_region) == 0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004671 newscore += SCORE_REGION;
4672 if (flags & WF_RARE)
4673 newscore += SCORE_RARE;
4674
Bram Moolenaar0c405862005-06-22 22:26:26 +00004675 if (!spell_valid_case(su->su_badflags,
Bram Moolenaar5195e452005-08-19 20:32:47 +00004676 captype(preword + sp->ts_prewordlen, NULL)))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004677 newscore += SCORE_ICASE;
Bram Moolenaar4770d092006-01-12 23:22:24 +00004678 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004679
Bram Moolenaar4770d092006-01-12 23:22:24 +00004680 /* TODO: how about splitting in the soundfold tree? */
4681 if (fword_ends
4682 && goodword_ends
4683 && sp->ts_fidx >= sp->ts_fidxtry
4684 && compound_ok)
4685 {
4686 /* The badword also ends: add suggestions. */
4687#ifdef DEBUG_TRIEWALK
4688 if (soundfold && STRCMP(preword, "smwrd") == 0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004689 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00004690 int j;
4691
4692 /* print the stack of changes that brought us here */
4693 smsg("------ %s -------", fword);
4694 for (j = 0; j < depth; ++j)
4695 smsg("%s", changename[j]);
4696 }
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004697#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +00004698 if (soundfold)
4699 {
4700 /* For soundfolded words we need to find the original
Bram Moolenaarf711faf2007-05-10 16:48:19 +00004701 * words, the edit distance and then add them. */
Bram Moolenaar4770d092006-01-12 23:22:24 +00004702 add_sound_suggest(su, preword, sp->ts_score, lp);
4703 }
Bram Moolenaar7e88c3d2010-08-01 15:47:35 +02004704 else if (sp->ts_fidx > 0)
Bram Moolenaar4770d092006-01-12 23:22:24 +00004705 {
4706 /* Give a penalty when changing non-word char to word
4707 * char, e.g., "thes," -> "these". */
4708 p = fword + sp->ts_fidx;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01004709 MB_PTR_BACK(fword, p);
Bram Moolenaar860cae12010-06-05 23:22:07 +02004710 if (!spell_iswordp(p, curwin))
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004711 {
4712 p = preword + STRLEN(preword);
Bram Moolenaar91acfff2017-03-12 19:22:36 +01004713 MB_PTR_BACK(preword, p);
Bram Moolenaar860cae12010-06-05 23:22:07 +02004714 if (spell_iswordp(p, curwin))
Bram Moolenaarcf6bf392005-06-27 22:27:46 +00004715 newscore += SCORE_NONWORD;
4716 }
4717
Bram Moolenaar4770d092006-01-12 23:22:24 +00004718 /* Give a bonus to words seen before. */
4719 score = score_wordcount_adj(slang,
4720 sp->ts_score + newscore,
4721 preword + sp->ts_prewordlen,
4722 sp->ts_prewordlen > 0);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00004723
Bram Moolenaar4770d092006-01-12 23:22:24 +00004724 /* Add the suggestion if the score isn't too bad. */
4725 if (score <= su->su_maxscore)
Bram Moolenaar2d3f4892006-01-20 23:02:51 +00004726 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00004727 add_suggestion(su, &su->su_ga, preword,
4728 sp->ts_fidx - repextra,
4729 score, 0, FALSE, lp->lp_sallang, FALSE);
Bram Moolenaar2d3f4892006-01-20 23:02:51 +00004730
4731 if (su->su_badflags & WF_MIXCAP)
4732 {
4733 /* We really don't know if the word should be
4734 * upper or lower case, add both. */
4735 c = captype(preword, NULL);
4736 if (c == 0 || c == WF_ALLCAP)
4737 {
4738 make_case_word(tword + sp->ts_splitoff,
4739 preword + sp->ts_prewordlen,
4740 c == 0 ? WF_ALLCAP : 0);
4741
4742 add_suggestion(su, &su->su_ga, preword,
4743 sp->ts_fidx - repextra,
4744 score + SCORE_ICASE, 0, FALSE,
4745 lp->lp_sallang, FALSE);
4746 }
4747 }
4748 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004749 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00004750 }
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00004751
Bram Moolenaar4770d092006-01-12 23:22:24 +00004752 /*
4753 * Try word split and/or compounding.
4754 */
4755 if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends)
Bram Moolenaarea424162005-06-16 21:51:00 +00004756#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +00004757 /* Don't split halfway a character. */
4758 && (!has_mbyte || sp->ts_tcharlen == 0)
Bram Moolenaarea424162005-06-16 21:51:00 +00004759#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +00004760 )
4761 {
4762 int try_compound;
4763 int try_split;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004764
Bram Moolenaar4770d092006-01-12 23:22:24 +00004765 /* If past the end of the bad word don't try a split.
4766 * Otherwise try changing the next word. E.g., find
4767 * suggestions for "the the" where the second "the" is
4768 * different. It's done like a split.
4769 * TODO: word split for soundfold words */
4770 try_split = (sp->ts_fidx - repextra < su->su_badlen)
4771 && !soundfold;
4772
4773 /* Get here in several situations:
4774 * 1. The word in the tree ends:
4775 * If the word allows compounding try that. Otherwise try
4776 * a split by inserting a space. For both check that a
4777 * valid words starts at fword[sp->ts_fidx].
4778 * For NOBREAK do like compounding to be able to check if
4779 * the next word is valid.
4780 * 2. The badword does end, but it was due to a change (e.g.,
4781 * a swap). No need to split, but do check that the
4782 * following word is valid.
4783 * 3. The badword and the word in the tree end. It may still
4784 * be possible to compound another (short) word.
4785 */
4786 try_compound = FALSE;
4787 if (!soundfold
Bram Moolenaar7b877b32016-01-09 13:51:34 +01004788 && !slang->sl_nocompoundsugs
Bram Moolenaar4770d092006-01-12 23:22:24 +00004789 && slang->sl_compprog != NULL
4790 && ((unsigned)flags >> 24) != 0
4791 && sp->ts_twordlen - sp->ts_splitoff
4792 >= slang->sl_compminlen
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004793#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +00004794 && (!has_mbyte
4795 || slang->sl_compminlen == 0
4796 || mb_charlen(tword + sp->ts_splitoff)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00004797 >= slang->sl_compminlen)
4798#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +00004799 && (slang->sl_compsylmax < MAXWLEN
4800 || sp->ts_complen + 1 - sp->ts_compsplit
4801 < slang->sl_compmax)
Bram Moolenaar9f94b052008-11-30 20:12:46 +00004802 && (can_be_compound(sp, slang,
4803 compflags, ((unsigned)flags >> 24))))
4804
Bram Moolenaar4770d092006-01-12 23:22:24 +00004805 {
4806 try_compound = TRUE;
4807 compflags[sp->ts_complen] = ((unsigned)flags >> 24);
4808 compflags[sp->ts_complen + 1] = NUL;
4809 }
Bram Moolenaard12a1322005-08-21 22:08:24 +00004810
Bram Moolenaar4770d092006-01-12 23:22:24 +00004811 /* For NOBREAK we never try splitting, it won't make any word
4812 * valid. */
Bram Moolenaar7b877b32016-01-09 13:51:34 +01004813 if (slang->sl_nobreak && !slang->sl_nocompoundsugs)
Bram Moolenaar4770d092006-01-12 23:22:24 +00004814 try_compound = TRUE;
Bram Moolenaar78622822005-08-23 21:00:13 +00004815
Bram Moolenaar4770d092006-01-12 23:22:24 +00004816 /* If we could add a compound word, and it's also possible to
4817 * split at this point, do the split first and set
4818 * TSF_DIDSPLIT to avoid doing it again. */
4819 else if (!fword_ends
4820 && try_compound
4821 && (sp->ts_flags & TSF_DIDSPLIT) == 0)
4822 {
4823 try_compound = FALSE;
4824 sp->ts_flags |= TSF_DIDSPLIT;
4825 --sp->ts_curi; /* do the same NUL again */
4826 compflags[sp->ts_complen] = NUL;
4827 }
4828 else
4829 sp->ts_flags &= ~TSF_DIDSPLIT;
Bram Moolenaard12a1322005-08-21 22:08:24 +00004830
Bram Moolenaar4770d092006-01-12 23:22:24 +00004831 if (try_split || try_compound)
4832 {
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00004833 if (!try_compound && (!fword_ends || !goodword_ends))
Bram Moolenaard12a1322005-08-21 22:08:24 +00004834 {
4835 /* If we're going to split need to check that the
Bram Moolenaarda2303d2005-08-30 21:55:26 +00004836 * words so far are valid for compounding. If there
4837 * is only one word it must not have the NEEDCOMPOUND
4838 * flag. */
4839 if (sp->ts_complen == sp->ts_compsplit
4840 && (flags & WF_NEEDCOMP))
4841 break;
Bram Moolenaare52325c2005-08-22 22:54:29 +00004842 p = preword;
4843 while (*skiptowhite(p) != NUL)
4844 p = skipwhite(skiptowhite(p));
Bram Moolenaard12a1322005-08-21 22:08:24 +00004845 if (sp->ts_complen > sp->ts_compsplit
Bram Moolenaare52325c2005-08-22 22:54:29 +00004846 && !can_compound(slang, p,
Bram Moolenaard12a1322005-08-21 22:08:24 +00004847 compflags + sp->ts_compsplit))
4848 break;
Bram Moolenaare1438bb2006-03-01 22:01:55 +00004849
4850 if (slang->sl_nosplitsugs)
4851 newscore += SCORE_SPLIT_NO;
4852 else
4853 newscore += SCORE_SPLIT;
Bram Moolenaar4770d092006-01-12 23:22:24 +00004854
4855 /* Give a bonus to words seen before. */
4856 newscore = score_wordcount_adj(slang, newscore,
4857 preword + sp->ts_prewordlen, TRUE);
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004858 }
4859
Bram Moolenaar4770d092006-01-12 23:22:24 +00004860 if (TRY_DEEPER(su, stack, depth, newscore))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004861 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00004862 go_deeper(stack, depth, newscore);
4863#ifdef DEBUG_TRIEWALK
4864 if (!try_compound && !fword_ends)
4865 sprintf(changename[depth], "%.*s-%s: split",
4866 sp->ts_twordlen, tword, fword + sp->ts_fidx);
4867 else
4868 sprintf(changename[depth], "%.*s-%s: compound",
4869 sp->ts_twordlen, tword, fword + sp->ts_fidx);
4870#endif
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004871 /* Save things to be restored at STATE_SPLITUNDO. */
Bram Moolenaar0c405862005-06-22 22:26:26 +00004872 sp->ts_save_badflags = su->su_badflags;
Bram Moolenaarca1fe982016-01-07 16:22:06 +01004873 PROF_STORE(sp->ts_state)
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004874 sp->ts_state = STATE_SPLITUNDO;
4875
4876 ++depth;
4877 sp = &stack[depth];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004878
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004879 /* Append a space to preword when splitting. */
4880 if (!try_compound && !fword_ends)
4881 STRCAT(preword, " ");
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00004882 sp->ts_prewordlen = (char_u)STRLEN(preword);
Bram Moolenaar5195e452005-08-19 20:32:47 +00004883 sp->ts_splitoff = sp->ts_twordlen;
Bram Moolenaar78622822005-08-23 21:00:13 +00004884 sp->ts_splitfidx = sp->ts_fidx;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004885
4886 /* If the badword has a non-word character at this
4887 * position skip it. That means replacing the
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004888 * non-word character with a space. Always skip a
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00004889 * character when the word ends. But only when the
4890 * good word can end. */
Bram Moolenaar4770d092006-01-12 23:22:24 +00004891 if (((!try_compound && !spell_iswordp_nmw(fword
Bram Moolenaarcc63c642013-11-12 04:44:01 +01004892 + sp->ts_fidx,
4893 curwin))
Bram Moolenaar4770d092006-01-12 23:22:24 +00004894 || fword_ends)
4895 && fword[sp->ts_fidx] != NUL
4896 && goodword_ends)
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004897 {
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004898 int l;
4899
Bram Moolenaar5b276aa2017-04-22 23:49:52 +02004900 l = MB_PTR2LEN(fword + sp->ts_fidx);
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004901 if (fword_ends)
4902 {
4903 /* Copy the skipped character to preword. */
Bram Moolenaar5195e452005-08-19 20:32:47 +00004904 mch_memmove(preword + sp->ts_prewordlen,
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004905 fword + sp->ts_fidx, l);
Bram Moolenaar5195e452005-08-19 20:32:47 +00004906 sp->ts_prewordlen += l;
4907 preword[sp->ts_prewordlen] = NUL;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004908 }
4909 else
4910 sp->ts_score -= SCORE_SPLIT - SCORE_SUBST;
4911 sp->ts_fidx += l;
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004912 }
Bram Moolenaar53805d12005-08-01 07:08:33 +00004913
Bram Moolenaard12a1322005-08-21 22:08:24 +00004914 /* When compounding include compound flag in
4915 * compflags[] (already set above). When splitting we
4916 * may start compounding over again. */
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004917 if (try_compound)
Bram Moolenaar5195e452005-08-19 20:32:47 +00004918 ++sp->ts_complen;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004919 else
Bram Moolenaard12a1322005-08-21 22:08:24 +00004920 sp->ts_compsplit = sp->ts_complen;
4921 sp->ts_prefixdepth = PFD_NOPREFIX;
Bram Moolenaar5b8d8fd2005-08-16 23:01:50 +00004922
Bram Moolenaar53805d12005-08-01 07:08:33 +00004923 /* set su->su_badflags to the caps type at this
4924 * position */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004925#ifdef FEAT_MBYTE
4926 if (has_mbyte)
Bram Moolenaar53805d12005-08-01 07:08:33 +00004927 n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00004928 else
4929#endif
Bram Moolenaar53805d12005-08-01 07:08:33 +00004930 n = sp->ts_fidx;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00004931 su->su_badflags = badword_captype(su->su_badptr + n,
Bram Moolenaar53805d12005-08-01 07:08:33 +00004932 su->su_badptr + su->su_badlen);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004933
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004934 /* Restart at top of the tree. */
Bram Moolenaar9c96f592005-06-30 21:52:39 +00004935 sp->ts_arridx = 0;
Bram Moolenaard12a1322005-08-21 22:08:24 +00004936
4937 /* If there are postponed prefixes, try these too. */
4938 if (pbyts != NULL)
4939 {
4940 byts = pbyts;
4941 idxs = pidxs;
4942 sp->ts_prefixdepth = PFD_PREFIXTREE;
Bram Moolenaarca1fe982016-01-07 16:22:06 +01004943 PROF_STORE(sp->ts_state)
Bram Moolenaard12a1322005-08-21 22:08:24 +00004944 sp->ts_state = STATE_NOPREFIX;
4945 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004946 }
4947 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00004948 }
4949 break;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004950
Bram Moolenaar4770d092006-01-12 23:22:24 +00004951 case STATE_SPLITUNDO:
4952 /* Undo the changes done for word split or compound word. */
4953 su->su_badflags = sp->ts_save_badflags;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004954
Bram Moolenaar4770d092006-01-12 23:22:24 +00004955 /* Continue looking for NUL bytes. */
Bram Moolenaarca1fe982016-01-07 16:22:06 +01004956 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00004957 sp->ts_state = STATE_START;
Bram Moolenaard12a1322005-08-21 22:08:24 +00004958
Bram Moolenaar4770d092006-01-12 23:22:24 +00004959 /* In case we went into the prefix tree. */
4960 byts = fbyts;
4961 idxs = fidxs;
4962 break;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004963
Bram Moolenaar4770d092006-01-12 23:22:24 +00004964 case STATE_ENDNUL:
4965 /* Past the NUL bytes in the node. */
4966 su->su_badflags = sp->ts_save_badflags;
4967 if (fword[sp->ts_fidx] == NUL
Bram Moolenaarda2303d2005-08-30 21:55:26 +00004968#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +00004969 && sp->ts_tcharlen == 0
Bram Moolenaarda2303d2005-08-30 21:55:26 +00004970#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +00004971 )
4972 {
4973 /* The badword ends, can't use STATE_PLAIN. */
Bram Moolenaarca1fe982016-01-07 16:22:06 +01004974 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00004975 sp->ts_state = STATE_DEL;
4976 break;
4977 }
Bram Moolenaarca1fe982016-01-07 16:22:06 +01004978 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00004979 sp->ts_state = STATE_PLAIN;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02004980 /* FALLTHROUGH */
Bram Moolenaar4770d092006-01-12 23:22:24 +00004981
4982 case STATE_PLAIN:
4983 /*
4984 * Go over all possible bytes at this node, add each to tword[]
4985 * and use child node. "ts_curi" is the index.
4986 */
4987 arridx = sp->ts_arridx;
4988 if (sp->ts_curi > byts[arridx])
4989 {
4990 /* Done all bytes at this node, do next state. When still at
4991 * already changed bytes skip the other tricks. */
Bram Moolenaarca1fe982016-01-07 16:22:06 +01004992 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00004993 if (sp->ts_fidx >= sp->ts_fidxtry)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004994 sp->ts_state = STATE_DEL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00004995 else
Bram Moolenaar4770d092006-01-12 23:22:24 +00004996 sp->ts_state = STATE_FINAL;
4997 }
4998 else
4999 {
5000 arridx += sp->ts_curi++;
5001 c = byts[arridx];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005002
Bram Moolenaar4770d092006-01-12 23:22:24 +00005003 /* Normal byte, go one level deeper. If it's not equal to the
5004 * byte in the bad word adjust the score. But don't even try
5005 * when the byte was already changed. And don't try when we
Bram Moolenaar4de6a212014-03-08 16:13:44 +01005006 * just deleted this byte, accepting it is always cheaper than
Bram Moolenaar4770d092006-01-12 23:22:24 +00005007 * delete + substitute. */
5008 if (c == fword[sp->ts_fidx]
Bram Moolenaarea424162005-06-16 21:51:00 +00005009#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +00005010 || (sp->ts_tcharlen > 0 && sp->ts_isdiff != DIFF_NONE)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005011#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +00005012 )
5013 newscore = 0;
5014 else
5015 newscore = SCORE_SUBST;
5016 if ((newscore == 0
5017 || (sp->ts_fidx >= sp->ts_fidxtry
5018 && ((sp->ts_flags & TSF_DIDDEL) == 0
5019 || c != fword[sp->ts_delidx])))
5020 && TRY_DEEPER(su, stack, depth, newscore))
5021 {
5022 go_deeper(stack, depth, newscore);
5023#ifdef DEBUG_TRIEWALK
5024 if (newscore > 0)
5025 sprintf(changename[depth], "%.*s-%s: subst %c to %c",
5026 sp->ts_twordlen, tword, fword + sp->ts_fidx,
5027 fword[sp->ts_fidx], c);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005028 else
Bram Moolenaar4770d092006-01-12 23:22:24 +00005029 sprintf(changename[depth], "%.*s-%s: accept %c",
5030 sp->ts_twordlen, tword, fword + sp->ts_fidx,
5031 fword[sp->ts_fidx]);
5032#endif
5033 ++depth;
5034 sp = &stack[depth];
5035 ++sp->ts_fidx;
5036 tword[sp->ts_twordlen++] = c;
5037 sp->ts_arridx = idxs[arridx];
Bram Moolenaarea424162005-06-16 21:51:00 +00005038#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +00005039 if (newscore == SCORE_SUBST)
5040 sp->ts_isdiff = DIFF_YES;
5041 if (has_mbyte)
5042 {
5043 /* Multi-byte characters are a bit complicated to
5044 * handle: They differ when any of the bytes differ
5045 * and then their length may also differ. */
5046 if (sp->ts_tcharlen == 0)
Bram Moolenaarea424162005-06-16 21:51:00 +00005047 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00005048 /* First byte. */
5049 sp->ts_tcharidx = 0;
5050 sp->ts_tcharlen = MB_BYTE2LEN(c);
5051 sp->ts_fcharstart = sp->ts_fidx - 1;
5052 sp->ts_isdiff = (newscore != 0)
Bram Moolenaarea424162005-06-16 21:51:00 +00005053 ? DIFF_YES : DIFF_NONE;
Bram Moolenaar4770d092006-01-12 23:22:24 +00005054 }
5055 else if (sp->ts_isdiff == DIFF_INSERT)
5056 /* When inserting trail bytes don't advance in the
5057 * bad word. */
5058 --sp->ts_fidx;
5059 if (++sp->ts_tcharidx == sp->ts_tcharlen)
5060 {
5061 /* Last byte of character. */
5062 if (sp->ts_isdiff == DIFF_YES)
Bram Moolenaarea424162005-06-16 21:51:00 +00005063 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00005064 /* Correct ts_fidx for the byte length of the
5065 * character (we didn't check that before). */
5066 sp->ts_fidx = sp->ts_fcharstart
Bram Moolenaar5b276aa2017-04-22 23:49:52 +02005067 + MB_PTR2LEN(
5068 fword + sp->ts_fcharstart);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005069 /* For changing a composing character adjust
5070 * the score from SCORE_SUBST to
5071 * SCORE_SUBCOMP. */
5072 if (enc_utf8
5073 && utf_iscomposing(
Bram Moolenaarace95982017-03-29 17:30:27 +02005074 utf_ptr2char(tword
Bram Moolenaar4770d092006-01-12 23:22:24 +00005075 + sp->ts_twordlen
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +00005076 - sp->ts_tcharlen))
Bram Moolenaar4770d092006-01-12 23:22:24 +00005077 && utf_iscomposing(
Bram Moolenaarace95982017-03-29 17:30:27 +02005078 utf_ptr2char(fword
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +00005079 + sp->ts_fcharstart)))
Bram Moolenaar4770d092006-01-12 23:22:24 +00005080 sp->ts_score -=
Bram Moolenaare5b8e3d2005-08-12 19:48:49 +00005081 SCORE_SUBST - SCORE_SUBCOMP;
5082
Bram Moolenaar4770d092006-01-12 23:22:24 +00005083 /* For a similar character adjust score from
5084 * SCORE_SUBST to SCORE_SIMILAR. */
5085 else if (!soundfold
5086 && slang->sl_has_map
5087 && similar_chars(slang,
5088 mb_ptr2char(tword
5089 + sp->ts_twordlen
Bram Moolenaarea424162005-06-16 21:51:00 +00005090 - sp->ts_tcharlen),
Bram Moolenaar4770d092006-01-12 23:22:24 +00005091 mb_ptr2char(fword
Bram Moolenaarea424162005-06-16 21:51:00 +00005092 + sp->ts_fcharstart)))
Bram Moolenaar4770d092006-01-12 23:22:24 +00005093 sp->ts_score -=
Bram Moolenaarea424162005-06-16 21:51:00 +00005094 SCORE_SUBST - SCORE_SIMILAR;
Bram Moolenaarea424162005-06-16 21:51:00 +00005095 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005096 else if (sp->ts_isdiff == DIFF_INSERT
5097 && sp->ts_twordlen > sp->ts_tcharlen)
5098 {
5099 p = tword + sp->ts_twordlen - sp->ts_tcharlen;
5100 c = mb_ptr2char(p);
5101 if (enc_utf8 && utf_iscomposing(c))
5102 {
5103 /* Inserting a composing char doesn't
5104 * count that much. */
5105 sp->ts_score -= SCORE_INS - SCORE_INSCOMP;
5106 }
5107 else
5108 {
5109 /* If the previous character was the same,
5110 * thus doubling a character, give a bonus
5111 * to the score. Also for the soundfold
5112 * tree (might seem illogical but does
5113 * give better scores). */
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005114 MB_PTR_BACK(tword, p);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005115 if (c == mb_ptr2char(p))
5116 sp->ts_score -= SCORE_INS
5117 - SCORE_INSDUP;
5118 }
5119 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005120
Bram Moolenaar4770d092006-01-12 23:22:24 +00005121 /* Starting a new char, reset the length. */
5122 sp->ts_tcharlen = 0;
5123 }
Bram Moolenaarea408852005-06-25 22:49:46 +00005124 }
Bram Moolenaarea424162005-06-16 21:51:00 +00005125 else
5126#endif
Bram Moolenaarea408852005-06-25 22:49:46 +00005127 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00005128 /* If we found a similar char adjust the score.
5129 * We do this after calling go_deeper() because
5130 * it's slow. */
5131 if (newscore != 0
5132 && !soundfold
5133 && slang->sl_has_map
5134 && similar_chars(slang,
5135 c, fword[sp->ts_fidx - 1]))
5136 sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
Bram Moolenaarea408852005-06-25 22:49:46 +00005137 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005138 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005139 }
5140 break;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005141
Bram Moolenaar4770d092006-01-12 23:22:24 +00005142 case STATE_DEL:
5143#ifdef FEAT_MBYTE
5144 /* When past the first byte of a multi-byte char don't try
5145 * delete/insert/swap a character. */
5146 if (has_mbyte && sp->ts_tcharlen > 0)
5147 {
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005148 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005149 sp->ts_state = STATE_FINAL;
5150 break;
5151 }
5152#endif
5153 /*
5154 * Try skipping one character in the bad word (delete it).
5155 */
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005156 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005157 sp->ts_state = STATE_INS_PREP;
5158 sp->ts_curi = 1;
5159 if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*')
5160 /* Deleting a vowel at the start of a word counts less, see
5161 * soundalike_score(). */
5162 newscore = 2 * SCORE_DEL / 3;
5163 else
5164 newscore = SCORE_DEL;
5165 if (fword[sp->ts_fidx] != NUL
5166 && TRY_DEEPER(su, stack, depth, newscore))
5167 {
5168 go_deeper(stack, depth, newscore);
5169#ifdef DEBUG_TRIEWALK
5170 sprintf(changename[depth], "%.*s-%s: delete %c",
5171 sp->ts_twordlen, tword, fword + sp->ts_fidx,
5172 fword[sp->ts_fidx]);
5173#endif
5174 ++depth;
5175
5176 /* Remember what character we deleted, so that we can avoid
5177 * inserting it again. */
5178 stack[depth].ts_flags |= TSF_DIDDEL;
5179 stack[depth].ts_delidx = sp->ts_fidx;
5180
5181 /* Advance over the character in fword[]. Give a bonus to the
5182 * score if the same character is following "nn" -> "n". It's
5183 * a bit illogical for soundfold tree but it does give better
5184 * results. */
5185#ifdef FEAT_MBYTE
5186 if (has_mbyte)
5187 {
5188 c = mb_ptr2char(fword + sp->ts_fidx);
Bram Moolenaar5b276aa2017-04-22 23:49:52 +02005189 stack[depth].ts_fidx += MB_PTR2LEN(fword + sp->ts_fidx);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005190 if (enc_utf8 && utf_iscomposing(c))
5191 stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP;
5192 else if (c == mb_ptr2char(fword + stack[depth].ts_fidx))
5193 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
5194 }
5195 else
5196#endif
5197 {
5198 ++stack[depth].ts_fidx;
5199 if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1])
5200 stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
5201 }
5202 break;
5203 }
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005204 /* FALLTHROUGH */
Bram Moolenaar4770d092006-01-12 23:22:24 +00005205
5206 case STATE_INS_PREP:
5207 if (sp->ts_flags & TSF_DIDDEL)
5208 {
5209 /* If we just deleted a byte then inserting won't make sense,
5210 * a substitute is always cheaper. */
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005211 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005212 sp->ts_state = STATE_SWAP;
5213 break;
5214 }
5215
5216 /* skip over NUL bytes */
5217 n = sp->ts_arridx;
5218 for (;;)
5219 {
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005220 if (sp->ts_curi > byts[n])
5221 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00005222 /* Only NUL bytes at this node, go to next state. */
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005223 PROF_STORE(sp->ts_state)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005224 sp->ts_state = STATE_SWAP;
Bram Moolenaar4770d092006-01-12 23:22:24 +00005225 break;
5226 }
5227 if (byts[n + sp->ts_curi] != NUL)
5228 {
5229 /* Found a byte to insert. */
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005230 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005231 sp->ts_state = STATE_INS;
5232 break;
5233 }
5234 ++sp->ts_curi;
5235 }
5236 break;
5237
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005238 /* FALLTHROUGH */
Bram Moolenaar4770d092006-01-12 23:22:24 +00005239
5240 case STATE_INS:
5241 /* Insert one byte. Repeat this for each possible byte at this
5242 * node. */
5243 n = sp->ts_arridx;
5244 if (sp->ts_curi > byts[n])
5245 {
5246 /* Done all bytes at this node, go to next state. */
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005247 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005248 sp->ts_state = STATE_SWAP;
5249 break;
5250 }
5251
5252 /* Do one more byte at this node, but:
5253 * - Skip NUL bytes.
5254 * - Skip the byte if it's equal to the byte in the word,
5255 * accepting that byte is always better.
5256 */
5257 n += sp->ts_curi++;
5258 c = byts[n];
5259 if (soundfold && sp->ts_twordlen == 0 && c == '*')
5260 /* Inserting a vowel at the start of a word counts less,
5261 * see soundalike_score(). */
5262 newscore = 2 * SCORE_INS / 3;
5263 else
5264 newscore = SCORE_INS;
5265 if (c != fword[sp->ts_fidx]
5266 && TRY_DEEPER(su, stack, depth, newscore))
5267 {
5268 go_deeper(stack, depth, newscore);
5269#ifdef DEBUG_TRIEWALK
5270 sprintf(changename[depth], "%.*s-%s: insert %c",
5271 sp->ts_twordlen, tword, fword + sp->ts_fidx,
5272 c);
5273#endif
5274 ++depth;
5275 sp = &stack[depth];
5276 tword[sp->ts_twordlen++] = c;
5277 sp->ts_arridx = idxs[n];
5278#ifdef FEAT_MBYTE
5279 if (has_mbyte)
5280 {
5281 fl = MB_BYTE2LEN(c);
5282 if (fl > 1)
5283 {
5284 /* There are following bytes for the same character.
5285 * We must find all bytes before trying
5286 * delete/insert/swap/etc. */
5287 sp->ts_tcharlen = fl;
5288 sp->ts_tcharidx = 1;
5289 sp->ts_isdiff = DIFF_INSERT;
5290 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005291 }
5292 else
Bram Moolenaar4770d092006-01-12 23:22:24 +00005293 fl = 1;
5294 if (fl == 1)
Bram Moolenaarea424162005-06-16 21:51:00 +00005295#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +00005296 {
5297 /* If the previous character was the same, thus doubling a
5298 * character, give a bonus to the score. Also for
5299 * soundfold words (illogical but does give a better
5300 * score). */
5301 if (sp->ts_twordlen >= 2
Bram Moolenaarea408852005-06-25 22:49:46 +00005302 && tword[sp->ts_twordlen - 2] == c)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005303 sp->ts_score -= SCORE_INS - SCORE_INSDUP;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005304 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005305 }
5306 break;
5307
5308 case STATE_SWAP:
5309 /*
5310 * Swap two bytes in the bad word: "12" -> "21".
5311 * We change "fword" here, it's changed back afterwards at
5312 * STATE_UNSWAP.
5313 */
5314 p = fword + sp->ts_fidx;
5315 c = *p;
5316 if (c == NUL)
5317 {
5318 /* End of word, can't swap or replace. */
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005319 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005320 sp->ts_state = STATE_FINAL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005321 break;
Bram Moolenaar4770d092006-01-12 23:22:24 +00005322 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005323
Bram Moolenaar4770d092006-01-12 23:22:24 +00005324 /* Don't swap if the first character is not a word character.
5325 * SWAP3 etc. also don't make sense then. */
Bram Moolenaar860cae12010-06-05 23:22:07 +02005326 if (!soundfold && !spell_iswordp(p, curwin))
Bram Moolenaar4770d092006-01-12 23:22:24 +00005327 {
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005328 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005329 sp->ts_state = STATE_REP_INI;
5330 break;
5331 }
Bram Moolenaarbb15b652005-10-03 21:52:09 +00005332
Bram Moolenaar4770d092006-01-12 23:22:24 +00005333#ifdef FEAT_MBYTE
5334 if (has_mbyte)
5335 {
Bram Moolenaard3c907b2016-08-17 21:32:09 +02005336 n = MB_CPTR2LEN(p);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005337 c = mb_ptr2char(p);
Bram Moolenaar3dcfbf72007-08-05 16:33:12 +00005338 if (p[n] == NUL)
5339 c2 = NUL;
Bram Moolenaar860cae12010-06-05 23:22:07 +02005340 else if (!soundfold && !spell_iswordp(p + n, curwin))
Bram Moolenaar4770d092006-01-12 23:22:24 +00005341 c2 = c; /* don't swap non-word char */
5342 else
5343 c2 = mb_ptr2char(p + n);
5344 }
5345 else
5346#endif
5347 {
Bram Moolenaar3dcfbf72007-08-05 16:33:12 +00005348 if (p[1] == NUL)
5349 c2 = NUL;
Bram Moolenaar860cae12010-06-05 23:22:07 +02005350 else if (!soundfold && !spell_iswordp(p + 1, curwin))
Bram Moolenaar4770d092006-01-12 23:22:24 +00005351 c2 = c; /* don't swap non-word char */
5352 else
5353 c2 = p[1];
5354 }
Bram Moolenaarbb15b652005-10-03 21:52:09 +00005355
Bram Moolenaar3dcfbf72007-08-05 16:33:12 +00005356 /* When the second character is NUL we can't swap. */
5357 if (c2 == NUL)
5358 {
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005359 PROF_STORE(sp->ts_state)
Bram Moolenaar3dcfbf72007-08-05 16:33:12 +00005360 sp->ts_state = STATE_REP_INI;
5361 break;
5362 }
5363
Bram Moolenaar4770d092006-01-12 23:22:24 +00005364 /* When characters are identical, swap won't do anything.
5365 * Also get here if the second char is not a word character. */
5366 if (c == c2)
5367 {
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005368 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005369 sp->ts_state = STATE_SWAP3;
5370 break;
5371 }
5372 if (c2 != NUL && TRY_DEEPER(su, stack, depth, SCORE_SWAP))
5373 {
5374 go_deeper(stack, depth, SCORE_SWAP);
5375#ifdef DEBUG_TRIEWALK
5376 sprintf(changename[depth], "%.*s-%s: swap %c and %c",
5377 sp->ts_twordlen, tword, fword + sp->ts_fidx,
5378 c, c2);
5379#endif
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005380 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005381 sp->ts_state = STATE_UNSWAP;
5382 ++depth;
Bram Moolenaarea424162005-06-16 21:51:00 +00005383#ifdef FEAT_MBYTE
5384 if (has_mbyte)
5385 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00005386 fl = mb_char2len(c2);
5387 mch_memmove(p, p + n, fl);
5388 mb_char2bytes(c, p + fl);
5389 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
Bram Moolenaarea424162005-06-16 21:51:00 +00005390 }
5391 else
5392#endif
Bram Moolenaarbb15b652005-10-03 21:52:09 +00005393 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00005394 p[0] = c2;
Bram Moolenaarea424162005-06-16 21:51:00 +00005395 p[1] = c;
Bram Moolenaar4770d092006-01-12 23:22:24 +00005396 stack[depth].ts_fidxtry = sp->ts_fidx + 2;
Bram Moolenaarea424162005-06-16 21:51:00 +00005397 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005398 }
5399 else
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005400 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00005401 /* If this swap doesn't work then SWAP3 won't either. */
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005402 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005403 sp->ts_state = STATE_REP_INI;
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005404 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005405 break;
Bram Moolenaarea424162005-06-16 21:51:00 +00005406
Bram Moolenaar4770d092006-01-12 23:22:24 +00005407 case STATE_UNSWAP:
5408 /* Undo the STATE_SWAP swap: "21" -> "12". */
5409 p = fword + sp->ts_fidx;
5410#ifdef FEAT_MBYTE
5411 if (has_mbyte)
5412 {
Bram Moolenaar5b276aa2017-04-22 23:49:52 +02005413 n = MB_PTR2LEN(p);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005414 c = mb_ptr2char(p + n);
Bram Moolenaar5b276aa2017-04-22 23:49:52 +02005415 mch_memmove(p + MB_PTR2LEN(p + n), p, n);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005416 mb_char2bytes(c, p);
5417 }
5418 else
5419#endif
5420 {
5421 c = *p;
5422 *p = p[1];
5423 p[1] = c;
5424 }
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005425 /* FALLTHROUGH */
Bram Moolenaar4770d092006-01-12 23:22:24 +00005426
5427 case STATE_SWAP3:
5428 /* Swap two bytes, skipping one: "123" -> "321". We change
5429 * "fword" here, it's changed back afterwards at STATE_UNSWAP3. */
5430 p = fword + sp->ts_fidx;
5431#ifdef FEAT_MBYTE
5432 if (has_mbyte)
5433 {
Bram Moolenaard3c907b2016-08-17 21:32:09 +02005434 n = MB_CPTR2LEN(p);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005435 c = mb_ptr2char(p);
Bram Moolenaard3c907b2016-08-17 21:32:09 +02005436 fl = MB_CPTR2LEN(p + n);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005437 c2 = mb_ptr2char(p + n);
Bram Moolenaar860cae12010-06-05 23:22:07 +02005438 if (!soundfold && !spell_iswordp(p + n + fl, curwin))
Bram Moolenaar4770d092006-01-12 23:22:24 +00005439 c3 = c; /* don't swap non-word char */
5440 else
5441 c3 = mb_ptr2char(p + n + fl);
5442 }
5443 else
5444#endif
5445 {
5446 c = *p;
5447 c2 = p[1];
Bram Moolenaar860cae12010-06-05 23:22:07 +02005448 if (!soundfold && !spell_iswordp(p + 2, curwin))
Bram Moolenaar4770d092006-01-12 23:22:24 +00005449 c3 = c; /* don't swap non-word char */
5450 else
5451 c3 = p[2];
5452 }
5453
5454 /* When characters are identical: "121" then SWAP3 result is
5455 * identical, ROT3L result is same as SWAP: "211", ROT3L result is
5456 * same as SWAP on next char: "112". Thus skip all swapping.
5457 * Also skip when c3 is NUL.
5458 * Also get here when the third character is not a word character.
5459 * Second character may any char: "a.b" -> "b.a" */
5460 if (c == c3 || c3 == NUL)
5461 {
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005462 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005463 sp->ts_state = STATE_REP_INI;
5464 break;
5465 }
5466 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
5467 {
5468 go_deeper(stack, depth, SCORE_SWAP3);
5469#ifdef DEBUG_TRIEWALK
5470 sprintf(changename[depth], "%.*s-%s: swap3 %c and %c",
5471 sp->ts_twordlen, tword, fword + sp->ts_fidx,
5472 c, c3);
5473#endif
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005474 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005475 sp->ts_state = STATE_UNSWAP3;
5476 ++depth;
5477#ifdef FEAT_MBYTE
5478 if (has_mbyte)
5479 {
5480 tl = mb_char2len(c3);
5481 mch_memmove(p, p + n + fl, tl);
5482 mb_char2bytes(c2, p + tl);
5483 mb_char2bytes(c, p + fl + tl);
5484 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl;
5485 }
5486 else
5487#endif
5488 {
5489 p[0] = p[2];
5490 p[2] = c;
5491 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
5492 }
5493 }
5494 else
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005495 {
5496 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005497 sp->ts_state = STATE_REP_INI;
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005498 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005499 break;
5500
5501 case STATE_UNSWAP3:
5502 /* Undo STATE_SWAP3: "321" -> "123" */
5503 p = fword + sp->ts_fidx;
5504#ifdef FEAT_MBYTE
5505 if (has_mbyte)
5506 {
Bram Moolenaar5b276aa2017-04-22 23:49:52 +02005507 n = MB_PTR2LEN(p);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005508 c2 = mb_ptr2char(p + n);
Bram Moolenaar5b276aa2017-04-22 23:49:52 +02005509 fl = MB_PTR2LEN(p + n);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005510 c = mb_ptr2char(p + n + fl);
Bram Moolenaar5b276aa2017-04-22 23:49:52 +02005511 tl = MB_PTR2LEN(p + n + fl);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005512 mch_memmove(p + fl + tl, p, n);
5513 mb_char2bytes(c, p);
5514 mb_char2bytes(c2, p + tl);
5515 p = p + tl;
5516 }
5517 else
5518#endif
5519 {
5520 c = *p;
5521 *p = p[2];
5522 p[2] = c;
5523 ++p;
5524 }
5525
Bram Moolenaar860cae12010-06-05 23:22:07 +02005526 if (!soundfold && !spell_iswordp(p, curwin))
Bram Moolenaar4770d092006-01-12 23:22:24 +00005527 {
5528 /* Middle char is not a word char, skip the rotate. First and
5529 * third char were already checked at swap and swap3. */
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005530 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005531 sp->ts_state = STATE_REP_INI;
5532 break;
5533 }
5534
5535 /* Rotate three characters left: "123" -> "231". We change
5536 * "fword" here, it's changed back afterwards at STATE_UNROT3L. */
5537 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
5538 {
5539 go_deeper(stack, depth, SCORE_SWAP3);
5540#ifdef DEBUG_TRIEWALK
5541 p = fword + sp->ts_fidx;
5542 sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c",
5543 sp->ts_twordlen, tword, fword + sp->ts_fidx,
5544 p[0], p[1], p[2]);
5545#endif
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005546 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005547 sp->ts_state = STATE_UNROT3L;
5548 ++depth;
Bram Moolenaarea424162005-06-16 21:51:00 +00005549 p = fword + sp->ts_fidx;
5550#ifdef FEAT_MBYTE
5551 if (has_mbyte)
5552 {
Bram Moolenaard3c907b2016-08-17 21:32:09 +02005553 n = MB_CPTR2LEN(p);
Bram Moolenaarea424162005-06-16 21:51:00 +00005554 c = mb_ptr2char(p);
Bram Moolenaard3c907b2016-08-17 21:32:09 +02005555 fl = MB_CPTR2LEN(p + n);
5556 fl += MB_CPTR2LEN(p + n + fl);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005557 mch_memmove(p, p + n, fl);
5558 mb_char2bytes(c, p + fl);
5559 stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
Bram Moolenaarea424162005-06-16 21:51:00 +00005560 }
5561 else
5562#endif
5563 {
5564 c = *p;
5565 *p = p[1];
5566 p[1] = p[2];
5567 p[2] = c;
Bram Moolenaar4770d092006-01-12 23:22:24 +00005568 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
Bram Moolenaarea424162005-06-16 21:51:00 +00005569 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005570 }
5571 else
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005572 {
5573 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005574 sp->ts_state = STATE_REP_INI;
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005575 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005576 break;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005577
Bram Moolenaar4770d092006-01-12 23:22:24 +00005578 case STATE_UNROT3L:
5579 /* Undo ROT3L: "231" -> "123" */
5580 p = fword + sp->ts_fidx;
Bram Moolenaarea424162005-06-16 21:51:00 +00005581#ifdef FEAT_MBYTE
Bram Moolenaar4770d092006-01-12 23:22:24 +00005582 if (has_mbyte)
5583 {
Bram Moolenaar5b276aa2017-04-22 23:49:52 +02005584 n = MB_PTR2LEN(p);
5585 n += MB_PTR2LEN(p + n);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005586 c = mb_ptr2char(p + n);
Bram Moolenaar5b276aa2017-04-22 23:49:52 +02005587 tl = MB_PTR2LEN(p + n);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005588 mch_memmove(p + tl, p, n);
5589 mb_char2bytes(c, p);
5590 }
5591 else
Bram Moolenaarea424162005-06-16 21:51:00 +00005592#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +00005593 {
5594 c = p[2];
5595 p[2] = p[1];
5596 p[1] = *p;
5597 *p = c;
5598 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005599
Bram Moolenaar4770d092006-01-12 23:22:24 +00005600 /* Rotate three bytes right: "123" -> "312". We change "fword"
5601 * here, it's changed back afterwards at STATE_UNROT3R. */
5602 if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
5603 {
5604 go_deeper(stack, depth, SCORE_SWAP3);
5605#ifdef DEBUG_TRIEWALK
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005606 p = fword + sp->ts_fidx;
Bram Moolenaar4770d092006-01-12 23:22:24 +00005607 sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c",
5608 sp->ts_twordlen, tword, fword + sp->ts_fidx,
5609 p[0], p[1], p[2]);
5610#endif
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005611 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005612 sp->ts_state = STATE_UNROT3R;
5613 ++depth;
5614 p = fword + sp->ts_fidx;
5615#ifdef FEAT_MBYTE
5616 if (has_mbyte)
Bram Moolenaar0c405862005-06-22 22:26:26 +00005617 {
Bram Moolenaard3c907b2016-08-17 21:32:09 +02005618 n = MB_CPTR2LEN(p);
5619 n += MB_CPTR2LEN(p + n);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005620 c = mb_ptr2char(p + n);
Bram Moolenaard3c907b2016-08-17 21:32:09 +02005621 tl = MB_CPTR2LEN(p + n);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005622 mch_memmove(p + tl, p, n);
5623 mb_char2bytes(c, p);
5624 stack[depth].ts_fidxtry = sp->ts_fidx + n + tl;
Bram Moolenaar0c405862005-06-22 22:26:26 +00005625 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005626 else
5627#endif
5628 {
5629 c = p[2];
5630 p[2] = p[1];
5631 p[1] = *p;
5632 *p = c;
5633 stack[depth].ts_fidxtry = sp->ts_fidx + 3;
5634 }
5635 }
5636 else
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005637 {
5638 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005639 sp->ts_state = STATE_REP_INI;
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005640 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005641 break;
5642
5643 case STATE_UNROT3R:
5644 /* Undo ROT3R: "312" -> "123" */
5645 p = fword + sp->ts_fidx;
5646#ifdef FEAT_MBYTE
5647 if (has_mbyte)
5648 {
5649 c = mb_ptr2char(p);
Bram Moolenaar5b276aa2017-04-22 23:49:52 +02005650 tl = MB_PTR2LEN(p);
5651 n = MB_PTR2LEN(p + tl);
5652 n += MB_PTR2LEN(p + tl + n);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005653 mch_memmove(p, p + tl, n);
5654 mb_char2bytes(c, p + n);
5655 }
5656 else
5657#endif
5658 {
5659 c = *p;
5660 *p = p[1];
5661 p[1] = p[2];
5662 p[2] = c;
5663 }
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005664 /* FALLTHROUGH */
Bram Moolenaar4770d092006-01-12 23:22:24 +00005665
5666 case STATE_REP_INI:
5667 /* Check if matching with REP items from the .aff file would work.
5668 * Quickly skip if:
5669 * - there are no REP items and we are not in the soundfold trie
5670 * - the score is going to be too high anyway
5671 * - already applied a REP item or swapped here */
5672 if ((lp->lp_replang == NULL && !soundfold)
5673 || sp->ts_score + SCORE_REP >= su->su_maxscore
5674 || sp->ts_fidx < sp->ts_fidxtry)
5675 {
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005676 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005677 sp->ts_state = STATE_FINAL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005678 break;
Bram Moolenaar4770d092006-01-12 23:22:24 +00005679 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005680
Bram Moolenaar4770d092006-01-12 23:22:24 +00005681 /* Use the first byte to quickly find the first entry that may
5682 * match. If the index is -1 there is none. */
5683 if (soundfold)
5684 sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]];
5685 else
5686 sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005687
Bram Moolenaar4770d092006-01-12 23:22:24 +00005688 if (sp->ts_curi < 0)
5689 {
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005690 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005691 sp->ts_state = STATE_FINAL;
5692 break;
5693 }
5694
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005695 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005696 sp->ts_state = STATE_REP;
Bram Moolenaar2f40d122017-10-24 21:49:36 +02005697 /* FALLTHROUGH */
Bram Moolenaar4770d092006-01-12 23:22:24 +00005698
5699 case STATE_REP:
5700 /* Try matching with REP items from the .aff file. For each match
5701 * replace the characters and check if the resulting word is
5702 * valid. */
5703 p = fword + sp->ts_fidx;
5704
5705 if (soundfold)
5706 gap = &slang->sl_repsal;
5707 else
5708 gap = &lp->lp_replang->sl_rep;
5709 while (sp->ts_curi < gap->ga_len)
5710 {
5711 ftp = (fromto_T *)gap->ga_data + sp->ts_curi++;
5712 if (*ftp->ft_from != *p)
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005713 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00005714 /* past possible matching entries */
5715 sp->ts_curi = gap->ga_len;
5716 break;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005717 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005718 if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0
5719 && TRY_DEEPER(su, stack, depth, SCORE_REP))
5720 {
5721 go_deeper(stack, depth, SCORE_REP);
5722#ifdef DEBUG_TRIEWALK
5723 sprintf(changename[depth], "%.*s-%s: replace %s with %s",
5724 sp->ts_twordlen, tword, fword + sp->ts_fidx,
5725 ftp->ft_from, ftp->ft_to);
5726#endif
5727 /* Need to undo this afterwards. */
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005728 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005729 sp->ts_state = STATE_REP_UNDO;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00005730
Bram Moolenaar4770d092006-01-12 23:22:24 +00005731 /* Change the "from" to the "to" string. */
5732 ++depth;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00005733 fl = (int)STRLEN(ftp->ft_from);
5734 tl = (int)STRLEN(ftp->ft_to);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005735 if (fl != tl)
5736 {
Bram Moolenaara7241f52008-06-24 20:39:31 +00005737 STRMOVE(p + tl, p + fl);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005738 repextra += tl - fl;
5739 }
5740 mch_memmove(p, ftp->ft_to, tl);
5741 stack[depth].ts_fidxtry = sp->ts_fidx + tl;
5742#ifdef FEAT_MBYTE
5743 stack[depth].ts_tcharlen = 0;
5744#endif
5745 break;
5746 }
5747 }
5748
5749 if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP)
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005750 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00005751 /* No (more) matches. */
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005752 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005753 sp->ts_state = STATE_FINAL;
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005754 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00005755
5756 break;
5757
5758 case STATE_REP_UNDO:
5759 /* Undo a REP replacement and continue with the next one. */
5760 if (soundfold)
5761 gap = &slang->sl_repsal;
5762 else
5763 gap = &lp->lp_replang->sl_rep;
5764 ftp = (fromto_T *)gap->ga_data + sp->ts_curi - 1;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00005765 fl = (int)STRLEN(ftp->ft_from);
5766 tl = (int)STRLEN(ftp->ft_to);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005767 p = fword + sp->ts_fidx;
5768 if (fl != tl)
5769 {
Bram Moolenaara7241f52008-06-24 20:39:31 +00005770 STRMOVE(p + fl, p + tl);
Bram Moolenaar4770d092006-01-12 23:22:24 +00005771 repextra -= tl - fl;
5772 }
5773 mch_memmove(p, ftp->ft_from, fl);
Bram Moolenaarca1fe982016-01-07 16:22:06 +01005774 PROF_STORE(sp->ts_state)
Bram Moolenaar4770d092006-01-12 23:22:24 +00005775 sp->ts_state = STATE_REP;
5776 break;
5777
5778 default:
5779 /* Did all possible states at this level, go up one level. */
5780 --depth;
5781
5782 if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE)
5783 {
5784 /* Continue in or go back to the prefix tree. */
5785 byts = pbyts;
5786 idxs = pidxs;
5787 }
5788
5789 /* Don't check for CTRL-C too often, it takes time. */
5790 if (--breakcheckcount == 0)
5791 {
5792 ui_breakcheck();
5793 breakcheckcount = 1000;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005794 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005795 }
5796 }
5797}
5798
Bram Moolenaar4770d092006-01-12 23:22:24 +00005799
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005800/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00005801 * Go one level deeper in the tree.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005802 */
Bram Moolenaar4770d092006-01-12 23:22:24 +00005803 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01005804go_deeper(trystate_T *stack, int depth, int score_add)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005805{
Bram Moolenaarea424162005-06-16 21:51:00 +00005806 stack[depth + 1] = stack[depth];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005807 stack[depth + 1].ts_state = STATE_START;
Bram Moolenaar4770d092006-01-12 23:22:24 +00005808 stack[depth + 1].ts_score = stack[depth].ts_score + score_add;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005809 stack[depth + 1].ts_curi = 1; /* start just after length byte */
Bram Moolenaard12a1322005-08-21 22:08:24 +00005810 stack[depth + 1].ts_flags = 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005811}
5812
Bram Moolenaar53805d12005-08-01 07:08:33 +00005813#ifdef FEAT_MBYTE
5814/*
5815 * Case-folding may change the number of bytes: Count nr of chars in
5816 * fword[flen] and return the byte length of that many chars in "word".
5817 */
5818 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01005819nofold_len(char_u *fword, int flen, char_u *word)
Bram Moolenaar53805d12005-08-01 07:08:33 +00005820{
5821 char_u *p;
5822 int i = 0;
5823
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005824 for (p = fword; p < fword + flen; MB_PTR_ADV(p))
Bram Moolenaar53805d12005-08-01 07:08:33 +00005825 ++i;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01005826 for (p = word; i > 0; MB_PTR_ADV(p))
Bram Moolenaar53805d12005-08-01 07:08:33 +00005827 --i;
5828 return (int)(p - word);
5829}
5830#endif
5831
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005832/*
5833 * "fword" is a good word with case folded. Find the matching keep-case
5834 * words and put it in "kword".
5835 * Theoretically there could be several keep-case words that result in the
5836 * same case-folded word, but we only find one...
5837 */
5838 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01005839find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005840{
5841 char_u uword[MAXWLEN]; /* "fword" in upper-case */
5842 int depth;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005843 idx_T tryidx;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005844
5845 /* The following arrays are used at each depth in the tree. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005846 idx_T arridx[MAXWLEN];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005847 int round[MAXWLEN];
5848 int fwordidx[MAXWLEN];
5849 int uwordidx[MAXWLEN];
5850 int kwordlen[MAXWLEN];
5851
5852 int flen, ulen;
5853 int l;
5854 int len;
5855 int c;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005856 idx_T lo, hi, m;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005857 char_u *p;
5858 char_u *byts = slang->sl_kbyts; /* array with bytes of the words */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00005859 idx_T *idxs = slang->sl_kidxs; /* array with indexes */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005860
5861 if (byts == NULL)
5862 {
5863 /* array is empty: "cannot happen" */
5864 *kword = NUL;
5865 return;
5866 }
5867
5868 /* Make an all-cap version of "fword". */
5869 allcap_copy(fword, uword);
5870
5871 /*
5872 * Each character needs to be tried both case-folded and upper-case.
5873 * All this gets very complicated if we keep in mind that changing case
5874 * may change the byte length of a multi-byte character...
5875 */
5876 depth = 0;
5877 arridx[0] = 0;
5878 round[0] = 0;
5879 fwordidx[0] = 0;
5880 uwordidx[0] = 0;
5881 kwordlen[0] = 0;
5882 while (depth >= 0)
5883 {
5884 if (fword[fwordidx[depth]] == NUL)
5885 {
5886 /* We are at the end of "fword". If the tree allows a word to end
5887 * here we have found a match. */
5888 if (byts[arridx[depth] + 1] == 0)
5889 {
5890 kword[kwordlen[depth]] = NUL;
5891 return;
5892 }
5893
5894 /* kword is getting too long, continue one level up */
5895 --depth;
5896 }
5897 else if (++round[depth] > 2)
5898 {
5899 /* tried both fold-case and upper-case character, continue one
5900 * level up */
5901 --depth;
5902 }
5903 else
5904 {
5905 /*
5906 * round[depth] == 1: Try using the folded-case character.
5907 * round[depth] == 2: Try using the upper-case character.
5908 */
5909#ifdef FEAT_MBYTE
5910 if (has_mbyte)
5911 {
Bram Moolenaard3c907b2016-08-17 21:32:09 +02005912 flen = MB_CPTR2LEN(fword + fwordidx[depth]);
5913 ulen = MB_CPTR2LEN(uword + uwordidx[depth]);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00005914 }
5915 else
5916#endif
5917 ulen = flen = 1;
5918 if (round[depth] == 1)
5919 {
5920 p = fword + fwordidx[depth];
5921 l = flen;
5922 }
5923 else
5924 {
5925 p = uword + uwordidx[depth];
5926 l = ulen;
5927 }
5928
5929 for (tryidx = arridx[depth]; l > 0; --l)
5930 {
5931 /* Perform a binary search in the list of accepted bytes. */
5932 len = byts[tryidx++];
5933 c = *p++;
5934 lo = tryidx;
5935 hi = tryidx + len - 1;
5936 while (lo < hi)
5937 {
5938 m = (lo + hi) / 2;
5939 if (byts[m] > c)
5940 hi = m - 1;
5941 else if (byts[m] < c)
5942 lo = m + 1;
5943 else
5944 {
5945 lo = hi = m;
5946 break;
5947 }
5948 }
5949
5950 /* Stop if there is no matching byte. */
5951 if (hi < lo || byts[lo] != c)
5952 break;
5953
5954 /* Continue at the child (if there is one). */
5955 tryidx = idxs[lo];
5956 }
5957
5958 if (l == 0)
5959 {
5960 /*
5961 * Found the matching char. Copy it to "kword" and go a
5962 * level deeper.
5963 */
5964 if (round[depth] == 1)
5965 {
5966 STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth],
5967 flen);
5968 kwordlen[depth + 1] = kwordlen[depth] + flen;
5969 }
5970 else
5971 {
5972 STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth],
5973 ulen);
5974 kwordlen[depth + 1] = kwordlen[depth] + ulen;
5975 }
5976 fwordidx[depth + 1] = fwordidx[depth] + flen;
5977 uwordidx[depth + 1] = uwordidx[depth] + ulen;
5978
5979 ++depth;
5980 arridx[depth] = tryidx;
5981 round[depth] = 0;
5982 }
5983 }
5984 }
5985
5986 /* Didn't find it: "cannot happen". */
5987 *kword = NUL;
5988}
5989
5990/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005991 * Compute the sound-a-like score for suggestions in su->su_ga and add them to
5992 * su->su_sga.
5993 */
5994 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01005995score_comp_sal(suginfo_T *su)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00005996{
5997 langp_T *lp;
5998 char_u badsound[MAXWLEN];
5999 int i;
6000 suggest_T *stp;
6001 suggest_T *sstp;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006002 int score;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00006003 int lpi;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006004
6005 if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL)
6006 return;
6007
6008 /* Use the sound-folding of the first language that supports it. */
Bram Moolenaar860cae12010-06-05 23:22:07 +02006009 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00006010 {
Bram Moolenaar860cae12010-06-05 23:22:07 +02006011 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006012 if (lp->lp_slang->sl_sal.ga_len > 0)
6013 {
6014 /* soundfold the bad word */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006015 spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006016
6017 for (i = 0; i < su->su_ga.ga_len; ++i)
6018 {
6019 stp = &SUG(su->su_ga, i);
6020
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006021 /* Case-fold the suggested word, sound-fold it and compute the
6022 * sound-a-like score. */
6023 score = stp_sal_score(stp, su, lp->lp_slang, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006024 if (score < SCORE_MAXMAX)
6025 {
6026 /* Add the suggestion. */
6027 sstp = &SUG(su->su_sga, su->su_sga.ga_len);
6028 sstp->st_word = vim_strsave(stp->st_word);
6029 if (sstp->st_word != NULL)
6030 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00006031 sstp->st_wordlen = stp->st_wordlen;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006032 sstp->st_score = score;
6033 sstp->st_altscore = 0;
6034 sstp->st_orglen = stp->st_orglen;
6035 ++su->su_sga.ga_len;
6036 }
6037 }
6038 }
6039 break;
6040 }
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00006041 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006042}
6043
6044/*
6045 * Combine the list of suggestions in su->su_ga and su->su_sga.
Bram Moolenaar84a05ac2013-05-06 04:24:17 +02006046 * They are entwined.
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006047 */
6048 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01006049score_combine(suginfo_T *su)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006050{
6051 int i;
6052 int j;
6053 garray_T ga;
6054 garray_T *gap;
6055 langp_T *lp;
6056 suggest_T *stp;
6057 char_u *p;
6058 char_u badsound[MAXWLEN];
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006059 int round;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00006060 int lpi;
Bram Moolenaar4770d092006-01-12 23:22:24 +00006061 slang_T *slang = NULL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006062
6063 /* Add the alternate score to su_ga. */
Bram Moolenaar860cae12010-06-05 23:22:07 +02006064 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006065 {
Bram Moolenaar860cae12010-06-05 23:22:07 +02006066 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006067 if (lp->lp_slang->sl_sal.ga_len > 0)
6068 {
6069 /* soundfold the bad word */
Bram Moolenaar4770d092006-01-12 23:22:24 +00006070 slang = lp->lp_slang;
6071 spell_soundfold(slang, su->su_fbadword, TRUE, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006072
6073 for (i = 0; i < su->su_ga.ga_len; ++i)
6074 {
6075 stp = &SUG(su->su_ga, i);
Bram Moolenaar4770d092006-01-12 23:22:24 +00006076 stp->st_altscore = stp_sal_score(stp, su, slang, badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006077 if (stp->st_altscore == SCORE_MAXMAX)
6078 stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4;
6079 else
6080 stp->st_score = (stp->st_score * 3
6081 + stp->st_altscore) / 4;
6082 stp->st_salscore = FALSE;
6083 }
6084 break;
6085 }
6086 }
6087
Bram Moolenaarf193fff2006-04-27 00:02:13 +00006088 if (slang == NULL) /* Using "double" without sound folding. */
6089 {
6090 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore,
6091 su->su_maxcount);
Bram Moolenaar4770d092006-01-12 23:22:24 +00006092 return;
Bram Moolenaarf193fff2006-04-27 00:02:13 +00006093 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00006094
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006095 /* Add the alternate score to su_sga. */
6096 for (i = 0; i < su->su_sga.ga_len; ++i)
6097 {
6098 stp = &SUG(su->su_sga, i);
Bram Moolenaar4770d092006-01-12 23:22:24 +00006099 stp->st_altscore = spell_edit_score(slang,
6100 su->su_badword, stp->st_word);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006101 if (stp->st_score == SCORE_MAXMAX)
6102 stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8;
6103 else
6104 stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8;
6105 stp->st_salscore = TRUE;
6106 }
6107
Bram Moolenaar4770d092006-01-12 23:22:24 +00006108 /* Remove bad suggestions, sort the suggestions and truncate at "maxcount"
6109 * for both lists. */
6110 check_suggestions(su, &su->su_ga);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006111 (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
Bram Moolenaar4770d092006-01-12 23:22:24 +00006112 check_suggestions(su, &su->su_sga);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006113 (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount);
6114
6115 ga_init2(&ga, (int)sizeof(suginfo_T), 1);
6116 if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL)
6117 return;
6118
6119 stp = &SUG(ga, 0);
6120 for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i)
6121 {
6122 /* round 1: get a suggestion from su_ga
6123 * round 2: get a suggestion from su_sga */
6124 for (round = 1; round <= 2; ++round)
6125 {
6126 gap = round == 1 ? &su->su_ga : &su->su_sga;
6127 if (i < gap->ga_len)
6128 {
6129 /* Don't add a word if it's already there. */
6130 p = SUG(*gap, i).st_word;
6131 for (j = 0; j < ga.ga_len; ++j)
6132 if (STRCMP(stp[j].st_word, p) == 0)
6133 break;
6134 if (j == ga.ga_len)
6135 stp[ga.ga_len++] = SUG(*gap, i);
6136 else
6137 vim_free(p);
6138 }
6139 }
6140 }
6141
6142 ga_clear(&su->su_ga);
6143 ga_clear(&su->su_sga);
6144
6145 /* Truncate the list to the number of suggestions that will be displayed. */
6146 if (ga.ga_len > su->su_maxcount)
6147 {
6148 for (i = su->su_maxcount; i < ga.ga_len; ++i)
6149 vim_free(stp[i].st_word);
6150 ga.ga_len = su->su_maxcount;
6151 }
6152
6153 su->su_ga = ga;
6154}
6155
6156/*
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006157 * For the goodword in "stp" compute the soundalike score compared to the
6158 * badword.
6159 */
6160 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01006161stp_sal_score(
6162 suggest_T *stp,
6163 suginfo_T *su,
6164 slang_T *slang,
6165 char_u *badsound) /* sound-folded badword */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006166{
6167 char_u *p;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006168 char_u *pbad;
6169 char_u *pgood;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006170 char_u badsound2[MAXWLEN];
6171 char_u fword[MAXWLEN];
6172 char_u goodsound[MAXWLEN];
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006173 char_u goodword[MAXWLEN];
6174 int lendiff;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006175
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006176 lendiff = (int)(su->su_badlen - stp->st_orglen);
6177 if (lendiff >= 0)
6178 pbad = badsound;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006179 else
6180 {
6181 /* soundfold the bad word with more characters following */
6182 (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN);
6183
6184 /* When joining two words the sound often changes a lot. E.g., "t he"
6185 * sounds like "t h" while "the" sounds like "@". Avoid that by
6186 * removing the space. Don't do it when the good word also contains a
6187 * space. */
Bram Moolenaar1c465442017-03-12 20:10:05 +01006188 if (VIM_ISWHITE(su->su_badptr[su->su_badlen])
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006189 && *skiptowhite(stp->st_word) == NUL)
6190 for (p = fword; *(p = skiptowhite(p)) != NUL; )
Bram Moolenaara7241f52008-06-24 20:39:31 +00006191 STRMOVE(p, p + 1);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006192
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006193 spell_soundfold(slang, fword, TRUE, badsound2);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006194 pbad = badsound2;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006195 }
6196
Bram Moolenaaref9d6aa2011-04-11 16:56:35 +02006197 if (lendiff > 0 && stp->st_wordlen + lendiff < MAXWLEN)
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006198 {
6199 /* Add part of the bad word to the good word, so that we soundfold
6200 * what replaces the bad word. */
6201 STRCPY(goodword, stp->st_word);
Bram Moolenaar4770d092006-01-12 23:22:24 +00006202 vim_strncpy(goodword + stp->st_wordlen,
6203 su->su_badptr + su->su_badlen - lendiff, lendiff);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006204 pgood = goodword;
6205 }
6206 else
6207 pgood = stp->st_word;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006208
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006209 /* Sound-fold the word and compute the score for the difference. */
6210 spell_soundfold(slang, pgood, FALSE, goodsound);
6211
6212 return soundalike_score(goodsound, pbad);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006213}
6214
Bram Moolenaar4770d092006-01-12 23:22:24 +00006215/* structure used to store soundfolded words that add_sound_suggest() has
6216 * handled already. */
6217typedef struct
6218{
6219 short sft_score; /* lowest score used */
6220 char_u sft_word[1]; /* soundfolded word, actually longer */
6221} sftword_T;
6222
6223static sftword_T dumsft;
6224#define HIKEY2SFT(p) ((sftword_T *)(p - (dumsft.sft_word - (char_u *)&dumsft)))
6225#define HI2SFT(hi) HIKEY2SFT((hi)->hi_key)
6226
6227/*
6228 * Prepare for calling suggest_try_soundalike().
6229 */
6230 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01006231suggest_try_soundalike_prep(void)
Bram Moolenaar4770d092006-01-12 23:22:24 +00006232{
6233 langp_T *lp;
6234 int lpi;
6235 slang_T *slang;
6236
6237 /* Do this for all languages that support sound folding and for which a
6238 * .sug file has been loaded. */
Bram Moolenaar860cae12010-06-05 23:22:07 +02006239 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
Bram Moolenaar4770d092006-01-12 23:22:24 +00006240 {
Bram Moolenaar860cae12010-06-05 23:22:07 +02006241 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
Bram Moolenaar4770d092006-01-12 23:22:24 +00006242 slang = lp->lp_slang;
6243 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
6244 /* prepare the hashtable used by add_sound_suggest() */
6245 hash_init(&slang->sl_sounddone);
6246 }
6247}
6248
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00006249/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006250 * Find suggestions by comparing the word in a sound-a-like form.
Bram Moolenaar8b96d642005-09-05 22:05:30 +00006251 * Note: This doesn't support postponed prefixes.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006252 */
6253 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01006254suggest_try_soundalike(suginfo_T *su)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006255{
6256 char_u salword[MAXWLEN];
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006257 langp_T *lp;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00006258 int lpi;
Bram Moolenaar8b96d642005-09-05 22:05:30 +00006259 slang_T *slang;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006260
Bram Moolenaar4770d092006-01-12 23:22:24 +00006261 /* Do this for all languages that support sound folding and for which a
6262 * .sug file has been loaded. */
Bram Moolenaar860cae12010-06-05 23:22:07 +02006263 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006264 {
Bram Moolenaar860cae12010-06-05 23:22:07 +02006265 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
Bram Moolenaar8b96d642005-09-05 22:05:30 +00006266 slang = lp->lp_slang;
Bram Moolenaar4770d092006-01-12 23:22:24 +00006267 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006268 {
6269 /* soundfold the bad word */
Bram Moolenaar8b96d642005-09-05 22:05:30 +00006270 spell_soundfold(slang, su->su_fbadword, TRUE, salword);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006271
Bram Moolenaar4770d092006-01-12 23:22:24 +00006272 /* try all kinds of inserts/deletes/swaps/etc. */
6273 /* TODO: also soundfold the next words, so that we can try joining
6274 * and splitting */
Bram Moolenaarca1fe982016-01-07 16:22:06 +01006275#ifdef SUGGEST_PROFILE
6276 prof_init();
6277#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +00006278 suggest_trie_walk(su, lp, salword, TRUE);
Bram Moolenaarca1fe982016-01-07 16:22:06 +01006279#ifdef SUGGEST_PROFILE
6280 prof_report("soundalike");
6281#endif
Bram Moolenaar4770d092006-01-12 23:22:24 +00006282 }
6283 }
6284}
6285
6286/*
6287 * Finish up after calling suggest_try_soundalike().
6288 */
6289 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01006290suggest_try_soundalike_finish(void)
Bram Moolenaar4770d092006-01-12 23:22:24 +00006291{
6292 langp_T *lp;
6293 int lpi;
6294 slang_T *slang;
6295 int todo;
6296 hashitem_T *hi;
6297
6298 /* Do this for all languages that support sound folding and for which a
6299 * .sug file has been loaded. */
Bram Moolenaar860cae12010-06-05 23:22:07 +02006300 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
Bram Moolenaar4770d092006-01-12 23:22:24 +00006301 {
Bram Moolenaar860cae12010-06-05 23:22:07 +02006302 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
Bram Moolenaar4770d092006-01-12 23:22:24 +00006303 slang = lp->lp_slang;
6304 if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
6305 {
6306 /* Free the info about handled words. */
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00006307 todo = (int)slang->sl_sounddone.ht_used;
Bram Moolenaar4770d092006-01-12 23:22:24 +00006308 for (hi = slang->sl_sounddone.ht_array; todo > 0; ++hi)
6309 if (!HASHITEM_EMPTY(hi))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006310 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00006311 vim_free(HI2SFT(hi));
6312 --todo;
6313 }
Bram Moolenaar6417da62007-03-08 13:49:53 +00006314
6315 /* Clear the hashtable, it may also be used by another region. */
Bram Moolenaar4770d092006-01-12 23:22:24 +00006316 hash_clear(&slang->sl_sounddone);
Bram Moolenaar6417da62007-03-08 13:49:53 +00006317 hash_init(&slang->sl_sounddone);
Bram Moolenaar4770d092006-01-12 23:22:24 +00006318 }
6319 }
6320}
6321
6322/*
6323 * A match with a soundfolded word is found. Add the good word(s) that
6324 * produce this soundfolded word.
6325 */
6326 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01006327add_sound_suggest(
6328 suginfo_T *su,
6329 char_u *goodword,
6330 int score, /* soundfold score */
6331 langp_T *lp)
Bram Moolenaar4770d092006-01-12 23:22:24 +00006332{
6333 slang_T *slang = lp->lp_slang; /* language for sound folding */
6334 int sfwordnr;
6335 char_u *nrline;
6336 int orgnr;
6337 char_u theword[MAXWLEN];
6338 int i;
6339 int wlen;
6340 char_u *byts;
6341 idx_T *idxs;
6342 int n;
6343 int wordcount;
6344 int wc;
6345 int goodscore;
6346 hash_T hash;
6347 hashitem_T *hi;
6348 sftword_T *sft;
6349 int bc, gc;
6350 int limit;
6351
6352 /*
6353 * It's very well possible that the same soundfold word is found several
6354 * times with different scores. Since the following is quite slow only do
6355 * the words that have a better score than before. Use a hashtable to
6356 * remember the words that have been done.
6357 */
6358 hash = hash_hash(goodword);
6359 hi = hash_lookup(&slang->sl_sounddone, goodword, hash);
6360 if (HASHITEM_EMPTY(hi))
6361 {
Bram Moolenaarf193fff2006-04-27 00:02:13 +00006362 sft = (sftword_T *)alloc((unsigned)(sizeof(sftword_T)
6363 + STRLEN(goodword)));
Bram Moolenaar4770d092006-01-12 23:22:24 +00006364 if (sft != NULL)
6365 {
6366 sft->sft_score = score;
6367 STRCPY(sft->sft_word, goodword);
6368 hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash);
6369 }
6370 }
6371 else
6372 {
6373 sft = HI2SFT(hi);
6374 if (score >= sft->sft_score)
6375 return;
6376 sft->sft_score = score;
6377 }
6378
6379 /*
6380 * Find the word nr in the soundfold tree.
6381 */
6382 sfwordnr = soundfold_find(slang, goodword);
6383 if (sfwordnr < 0)
6384 {
Bram Moolenaar95f09602016-11-10 20:01:45 +01006385 internal_error("add_sound_suggest()");
Bram Moolenaar4770d092006-01-12 23:22:24 +00006386 return;
6387 }
6388
6389 /*
6390 * go over the list of good words that produce this soundfold word
6391 */
6392 nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)(sfwordnr + 1), FALSE);
6393 orgnr = 0;
6394 while (*nrline != NUL)
6395 {
6396 /* The wordnr was stored in a minimal nr of bytes as an offset to the
6397 * previous wordnr. */
6398 orgnr += bytes2offset(&nrline);
6399
6400 byts = slang->sl_fbyts;
6401 idxs = slang->sl_fidxs;
6402
6403 /* Lookup the word "orgnr" one of the two tries. */
6404 n = 0;
Bram Moolenaar4770d092006-01-12 23:22:24 +00006405 wordcount = 0;
Bram Moolenaarace8d8e2013-11-21 17:42:31 +01006406 for (wlen = 0; wlen < MAXWLEN - 3; ++wlen)
Bram Moolenaar4770d092006-01-12 23:22:24 +00006407 {
6408 i = 1;
6409 if (wordcount == orgnr && byts[n + 1] == NUL)
6410 break; /* found end of word */
6411
6412 if (byts[n + 1] == NUL)
6413 ++wordcount;
6414
6415 /* skip over the NUL bytes */
6416 for ( ; byts[n + i] == NUL; ++i)
6417 if (i > byts[n]) /* safety check */
6418 {
6419 STRCPY(theword + wlen, "BAD");
Bram Moolenaarace8d8e2013-11-21 17:42:31 +01006420 wlen += 3;
Bram Moolenaar4770d092006-01-12 23:22:24 +00006421 goto badword;
6422 }
6423
6424 /* One of the siblings must have the word. */
6425 for ( ; i < byts[n]; ++i)
6426 {
6427 wc = idxs[idxs[n + i]]; /* nr of words under this byte */
6428 if (wordcount + wc > orgnr)
6429 break;
6430 wordcount += wc;
6431 }
6432
Bram Moolenaarace8d8e2013-11-21 17:42:31 +01006433 theword[wlen] = byts[n + i];
Bram Moolenaar4770d092006-01-12 23:22:24 +00006434 n = idxs[n + i];
6435 }
6436badword:
6437 theword[wlen] = NUL;
6438
6439 /* Go over the possible flags and regions. */
6440 for (; i <= byts[n] && byts[n + i] == NUL; ++i)
6441 {
6442 char_u cword[MAXWLEN];
6443 char_u *p;
6444 int flags = (int)idxs[n + i];
6445
Bram Moolenaare1438bb2006-03-01 22:01:55 +00006446 /* Skip words with the NOSUGGEST flag */
6447 if (flags & WF_NOSUGGEST)
6448 continue;
6449
Bram Moolenaar4770d092006-01-12 23:22:24 +00006450 if (flags & WF_KEEPCAP)
6451 {
6452 /* Must find the word in the keep-case tree. */
6453 find_keepcap_word(slang, theword, cword);
6454 p = cword;
6455 }
6456 else
6457 {
6458 flags |= su->su_badflags;
6459 if ((flags & WF_CAPMASK) != 0)
6460 {
6461 /* Need to fix case according to "flags". */
6462 make_case_word(theword, cword, flags);
6463 p = cword;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006464 }
6465 else
Bram Moolenaar4770d092006-01-12 23:22:24 +00006466 p = theword;
6467 }
6468
6469 /* Add the suggestion. */
6470 if (sps_flags & SPS_DOUBLE)
6471 {
6472 /* Add the suggestion if the score isn't too bad. */
6473 if (score <= su->su_maxscore)
6474 add_suggestion(su, &su->su_sga, p, su->su_badlen,
6475 score, 0, FALSE, slang, FALSE);
6476 }
6477 else
6478 {
6479 /* Add a penalty for words in another region. */
6480 if ((flags & WF_REGION)
6481 && (((unsigned)flags >> 16) & lp->lp_region) == 0)
6482 goodscore = SCORE_REGION;
6483 else
6484 goodscore = 0;
6485
6486 /* Add a small penalty for changing the first letter from
6487 * lower to upper case. Helps for "tath" -> "Kath", which is
Bram Moolenaar84a05ac2013-05-06 04:24:17 +02006488 * less common than "tath" -> "path". Don't do it when the
Bram Moolenaar4770d092006-01-12 23:22:24 +00006489 * letter is the same, that has already been counted. */
6490 gc = PTR2CHAR(p);
6491 if (SPELL_ISUPPER(gc))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006492 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00006493 bc = PTR2CHAR(su->su_badword);
6494 if (!SPELL_ISUPPER(bc)
6495 && SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc))
6496 goodscore += SCORE_ICASE / 2;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006497 }
6498
Bram Moolenaar4770d092006-01-12 23:22:24 +00006499 /* Compute the score for the good word. This only does letter
6500 * insert/delete/swap/replace. REP items are not considered,
6501 * which may make the score a bit higher.
6502 * Use a limit for the score to make it work faster. Use
6503 * MAXSCORE(), because RESCORE() will change the score.
6504 * If the limit is very high then the iterative method is
6505 * inefficient, using an array is quicker. */
6506 limit = MAXSCORE(su->su_sfmaxscore - goodscore, score);
6507 if (limit > SCORE_LIMITMAX)
6508 goodscore += spell_edit_score(slang, su->su_badword, p);
6509 else
6510 goodscore += spell_edit_score_limit(slang, su->su_badword,
6511 p, limit);
6512
6513 /* When going over the limit don't bother to do the rest. */
6514 if (goodscore < SCORE_MAXMAX)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006515 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00006516 /* Give a bonus to words seen before. */
6517 goodscore = score_wordcount_adj(slang, goodscore, p, FALSE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006518
Bram Moolenaar4770d092006-01-12 23:22:24 +00006519 /* Add the suggestion if the score isn't too bad. */
6520 goodscore = RESCORE(goodscore, score);
6521 if (goodscore <= su->su_sfmaxscore)
6522 add_suggestion(su, &su->su_ga, p, su->su_badlen,
6523 goodscore, score, TRUE, slang, TRUE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006524 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006525 }
6526 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00006527 /* smsg("word %s (%d): %s (%d)", sftword, sftnr, theword, orgnr); */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006528 }
6529}
6530
6531/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00006532 * Find word "word" in fold-case tree for "slang" and return the word number.
6533 */
6534 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01006535soundfold_find(slang_T *slang, char_u *word)
Bram Moolenaar4770d092006-01-12 23:22:24 +00006536{
6537 idx_T arridx = 0;
6538 int len;
6539 int wlen = 0;
6540 int c;
6541 char_u *ptr = word;
6542 char_u *byts;
6543 idx_T *idxs;
6544 int wordnr = 0;
6545
6546 byts = slang->sl_sbyts;
6547 idxs = slang->sl_sidxs;
6548
6549 for (;;)
6550 {
6551 /* First byte is the number of possible bytes. */
6552 len = byts[arridx++];
6553
6554 /* If the first possible byte is a zero the word could end here.
6555 * If the word ends we found the word. If not skip the NUL bytes. */
6556 c = ptr[wlen];
6557 if (byts[arridx] == NUL)
6558 {
6559 if (c == NUL)
6560 break;
6561
6562 /* Skip over the zeros, there can be several. */
6563 while (len > 0 && byts[arridx] == NUL)
6564 {
6565 ++arridx;
6566 --len;
6567 }
6568 if (len == 0)
6569 return -1; /* no children, word should have ended here */
6570 ++wordnr;
6571 }
6572
6573 /* If the word ends we didn't find it. */
6574 if (c == NUL)
6575 return -1;
6576
6577 /* Perform a binary search in the list of accepted bytes. */
6578 if (c == TAB) /* <Tab> is handled like <Space> */
6579 c = ' ';
6580 while (byts[arridx] < c)
6581 {
6582 /* The word count is in the first idxs[] entry of the child. */
6583 wordnr += idxs[idxs[arridx]];
6584 ++arridx;
6585 if (--len == 0) /* end of the bytes, didn't find it */
6586 return -1;
6587 }
6588 if (byts[arridx] != c) /* didn't find the byte */
6589 return -1;
6590
6591 /* Continue at the child (if there is one). */
6592 arridx = idxs[arridx];
6593 ++wlen;
6594
6595 /* One space in the good word may stand for several spaces in the
6596 * checked word. */
6597 if (c == ' ')
6598 while (ptr[wlen] == ' ' || ptr[wlen] == TAB)
6599 ++wlen;
6600 }
6601
6602 return wordnr;
6603}
6604
6605/*
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006606 * Copy "fword" to "cword", fixing case according to "flags".
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006607 */
6608 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01006609make_case_word(char_u *fword, char_u *cword, int flags)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006610{
6611 if (flags & WF_ALLCAP)
6612 /* Make it all upper-case */
6613 allcap_copy(fword, cword);
6614 else if (flags & WF_ONECAP)
6615 /* Make the first letter upper-case */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006616 onecap_copy(fword, cword, TRUE);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006617 else
6618 /* Use goodword as-is. */
6619 STRCPY(cword, fword);
6620}
6621
Bram Moolenaarea424162005-06-16 21:51:00 +00006622
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006623/*
6624 * Return TRUE if "c1" and "c2" are similar characters according to the MAP
6625 * lines in the .aff file.
6626 */
6627 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01006628similar_chars(slang_T *slang, int c1, int c2)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006629{
Bram Moolenaarea424162005-06-16 21:51:00 +00006630 int m1, m2;
6631#ifdef FEAT_MBYTE
Bram Moolenaar9a920d82012-06-01 15:21:02 +02006632 char_u buf[MB_MAXBYTES + 1];
Bram Moolenaarea424162005-06-16 21:51:00 +00006633 hashitem_T *hi;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006634
Bram Moolenaarea424162005-06-16 21:51:00 +00006635 if (c1 >= 256)
6636 {
6637 buf[mb_char2bytes(c1, buf)] = 0;
6638 hi = hash_find(&slang->sl_map_hash, buf);
6639 if (HASHITEM_EMPTY(hi))
6640 m1 = 0;
6641 else
6642 m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
6643 }
6644 else
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006645#endif
Bram Moolenaarea424162005-06-16 21:51:00 +00006646 m1 = slang->sl_map_array[c1];
6647 if (m1 == 0)
6648 return FALSE;
6649
6650
6651#ifdef FEAT_MBYTE
6652 if (c2 >= 256)
6653 {
6654 buf[mb_char2bytes(c2, buf)] = 0;
6655 hi = hash_find(&slang->sl_map_hash, buf);
6656 if (HASHITEM_EMPTY(hi))
6657 m2 = 0;
6658 else
6659 m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
6660 }
6661 else
6662#endif
6663 m2 = slang->sl_map_array[c2];
6664
6665 return m1 == m2;
6666}
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006667
6668/*
6669 * Add a suggestion to the list of suggestions.
Bram Moolenaar4770d092006-01-12 23:22:24 +00006670 * For a suggestion that is already in the list the lowest score is remembered.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006671 */
6672 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01006673add_suggestion(
6674 suginfo_T *su,
6675 garray_T *gap, /* either su_ga or su_sga */
6676 char_u *goodword,
6677 int badlenarg, /* len of bad word replaced with "goodword" */
6678 int score,
6679 int altscore,
6680 int had_bonus, /* value for st_had_bonus */
6681 slang_T *slang, /* language for sound folding */
6682 int maxsf) /* su_maxscore applies to soundfold score,
Bram Moolenaar4770d092006-01-12 23:22:24 +00006683 su_sfmaxscore to the total score. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006684{
Bram Moolenaar4770d092006-01-12 23:22:24 +00006685 int goodlen; /* len of goodword changed */
6686 int badlen; /* len of bad word changed */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006687 suggest_T *stp;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006688 suggest_T new_sug;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006689 int i;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006690 char_u *pgood, *pbad;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006691
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006692 /* Minimize "badlen" for consistency. Avoids that changing "the the" to
6693 * "thee the" is added next to changing the first "the" the "thee". */
6694 pgood = goodword + STRLEN(goodword);
Bram Moolenaar4770d092006-01-12 23:22:24 +00006695 pbad = su->su_badptr + badlenarg;
6696 for (;;)
Bram Moolenaar0c405862005-06-22 22:26:26 +00006697 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00006698 goodlen = (int)(pgood - goodword);
6699 badlen = (int)(pbad - su->su_badptr);
Bram Moolenaar4770d092006-01-12 23:22:24 +00006700 if (goodlen <= 0 || badlen <= 0)
6701 break;
Bram Moolenaar91acfff2017-03-12 19:22:36 +01006702 MB_PTR_BACK(goodword, pgood);
6703 MB_PTR_BACK(su->su_badptr, pbad);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006704#ifdef FEAT_MBYTE
6705 if (has_mbyte)
Bram Moolenaar0c405862005-06-22 22:26:26 +00006706 {
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006707 if (mb_ptr2char(pgood) != mb_ptr2char(pbad))
6708 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +00006709 }
6710 else
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006711#endif
6712 if (*pgood != *pbad)
6713 break;
Bram Moolenaar0c405862005-06-22 22:26:26 +00006714 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00006715
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006716 if (badlen == 0 && goodlen == 0)
6717 /* goodword doesn't change anything; may happen for "the the" changing
6718 * the first "the" to itself. */
6719 return;
Bram Moolenaar0c405862005-06-22 22:26:26 +00006720
Bram Moolenaar89d40322006-08-29 15:30:07 +00006721 if (gap->ga_len == 0)
6722 i = -1;
6723 else
6724 {
6725 /* Check if the word is already there. Also check the length that is
6726 * being replaced "thes," -> "these" is a different suggestion from
6727 * "thes" -> "these". */
6728 stp = &SUG(*gap, 0);
6729 for (i = gap->ga_len; --i >= 0; ++stp)
6730 if (stp->st_wordlen == goodlen
6731 && stp->st_orglen == badlen
6732 && STRNCMP(stp->st_word, goodword, goodlen) == 0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006733 {
Bram Moolenaar89d40322006-08-29 15:30:07 +00006734 /*
6735 * Found it. Remember the word with the lowest score.
6736 */
6737 if (stp->st_slang == NULL)
6738 stp->st_slang = slang;
6739
6740 new_sug.st_score = score;
6741 new_sug.st_altscore = altscore;
6742 new_sug.st_had_bonus = had_bonus;
6743
6744 if (stp->st_had_bonus != had_bonus)
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006745 {
Bram Moolenaar89d40322006-08-29 15:30:07 +00006746 /* Only one of the two had the soundalike score computed.
6747 * Need to do that for the other one now, otherwise the
6748 * scores can't be compared. This happens because
6749 * suggest_try_change() doesn't compute the soundalike
6750 * word to keep it fast, while some special methods set
6751 * the soundalike score to zero. */
6752 if (had_bonus)
6753 rescore_one(su, stp);
6754 else
6755 {
6756 new_sug.st_word = stp->st_word;
6757 new_sug.st_wordlen = stp->st_wordlen;
6758 new_sug.st_slang = stp->st_slang;
6759 new_sug.st_orglen = badlen;
6760 rescore_one(su, &new_sug);
6761 }
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006762 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006763
Bram Moolenaar89d40322006-08-29 15:30:07 +00006764 if (stp->st_score > new_sug.st_score)
6765 {
6766 stp->st_score = new_sug.st_score;
6767 stp->st_altscore = new_sug.st_altscore;
6768 stp->st_had_bonus = new_sug.st_had_bonus;
6769 }
6770 break;
Bram Moolenaar4770d092006-01-12 23:22:24 +00006771 }
Bram Moolenaar89d40322006-08-29 15:30:07 +00006772 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006773
Bram Moolenaar4770d092006-01-12 23:22:24 +00006774 if (i < 0 && ga_grow(gap, 1) == OK)
6775 {
6776 /* Add a suggestion. */
6777 stp = &SUG(*gap, gap->ga_len);
6778 stp->st_word = vim_strnsave(goodword, goodlen);
6779 if (stp->st_word != NULL)
6780 {
6781 stp->st_wordlen = goodlen;
6782 stp->st_score = score;
6783 stp->st_altscore = altscore;
6784 stp->st_had_bonus = had_bonus;
6785 stp->st_orglen = badlen;
6786 stp->st_slang = slang;
6787 ++gap->ga_len;
6788
6789 /* If we have too many suggestions now, sort the list and keep
6790 * the best suggestions. */
6791 if (gap->ga_len > SUG_MAX_COUNT(su))
6792 {
6793 if (maxsf)
6794 su->su_sfmaxscore = cleanup_suggestions(gap,
6795 su->su_sfmaxscore, SUG_CLEAN_COUNT(su));
6796 else
Bram Moolenaar4770d092006-01-12 23:22:24 +00006797 su->su_maxscore = cleanup_suggestions(gap,
6798 su->su_maxscore, SUG_CLEAN_COUNT(su));
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006799 }
6800 }
6801 }
6802}
6803
6804/*
Bram Moolenaar4770d092006-01-12 23:22:24 +00006805 * Suggestions may in fact be flagged as errors. Esp. for banned words and
6806 * for split words, such as "the the". Remove these from the list here.
6807 */
6808 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01006809check_suggestions(
6810 suginfo_T *su,
6811 garray_T *gap) /* either su_ga or su_sga */
Bram Moolenaar4770d092006-01-12 23:22:24 +00006812{
6813 suggest_T *stp;
6814 int i;
6815 char_u longword[MAXWLEN + 1];
6816 int len;
6817 hlf_T attr;
6818
6819 stp = &SUG(*gap, 0);
6820 for (i = gap->ga_len - 1; i >= 0; --i)
6821 {
6822 /* Need to append what follows to check for "the the". */
Bram Moolenaaref9d6aa2011-04-11 16:56:35 +02006823 vim_strncpy(longword, stp[i].st_word, MAXWLEN);
Bram Moolenaar4770d092006-01-12 23:22:24 +00006824 len = stp[i].st_wordlen;
6825 vim_strncpy(longword + len, su->su_badptr + stp[i].st_orglen,
6826 MAXWLEN - len);
6827 attr = HLF_COUNT;
6828 (void)spell_check(curwin, longword, &attr, NULL, FALSE);
6829 if (attr != HLF_COUNT)
6830 {
6831 /* Remove this entry. */
6832 vim_free(stp[i].st_word);
6833 --gap->ga_len;
6834 if (i < gap->ga_len)
6835 mch_memmove(stp + i, stp + i + 1,
6836 sizeof(suggest_T) * (gap->ga_len - i));
6837 }
6838 }
6839}
6840
6841
6842/*
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006843 * Add a word to be banned.
6844 */
6845 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01006846add_banned(
6847 suginfo_T *su,
6848 char_u *word)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006849{
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00006850 char_u *s;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006851 hash_T hash;
6852 hashitem_T *hi;
6853
Bram Moolenaar4770d092006-01-12 23:22:24 +00006854 hash = hash_hash(word);
6855 hi = hash_lookup(&su->su_banned, word, hash);
6856 if (HASHITEM_EMPTY(hi))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006857 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00006858 s = vim_strsave(word);
6859 if (s != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006860 hash_add_item(&su->su_banned, hi, s, hash);
6861 }
6862}
6863
6864/*
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006865 * Recompute the score for all suggestions if sound-folding is possible. This
6866 * is slow, thus only done for the final results.
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006867 */
6868 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01006869rescore_suggestions(suginfo_T *su)
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006870{
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006871 int i;
6872
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006873 if (su->su_sallang != NULL)
Bram Moolenaar8b96d642005-09-05 22:05:30 +00006874 for (i = 0; i < su->su_ga.ga_len; ++i)
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006875 rescore_one(su, &SUG(su->su_ga, i));
6876}
6877
6878/*
6879 * Recompute the score for one suggestion if sound-folding is possible.
6880 */
6881 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01006882rescore_one(suginfo_T *su, suggest_T *stp)
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006883{
6884 slang_T *slang = stp->st_slang;
6885 char_u sal_badword[MAXWLEN];
Bram Moolenaar4effc802005-09-30 21:12:02 +00006886 char_u *p;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006887
6888 /* Only rescore suggestions that have no sal score yet and do have a
6889 * language. */
6890 if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus)
6891 {
6892 if (slang == su->su_sallang)
Bram Moolenaar4effc802005-09-30 21:12:02 +00006893 p = su->su_sal_badword;
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006894 else
Bram Moolenaar8b96d642005-09-05 22:05:30 +00006895 {
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006896 spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword);
Bram Moolenaar4effc802005-09-30 21:12:02 +00006897 p = sal_badword;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006898 }
Bram Moolenaar4effc802005-09-30 21:12:02 +00006899
6900 stp->st_altscore = stp_sal_score(stp, su, slang, p);
Bram Moolenaar482aaeb2005-09-29 18:26:07 +00006901 if (stp->st_altscore == SCORE_MAXMAX)
6902 stp->st_altscore = SCORE_BIG;
6903 stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
6904 stp->st_had_bonus = TRUE;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006905 }
6906}
Bram Moolenaar9f30f502005-06-14 22:01:04 +00006907
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006908static int
6909#ifdef __BORLANDC__
6910_RTLENTRYF
6911#endif
Bram Moolenaarbaaa7e92016-01-29 22:47:03 +01006912sug_compare(const void *s1, const void *s2);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006913
6914/*
6915 * Function given to qsort() to sort the suggestions on st_score.
Bram Moolenaar6b730e12005-09-16 21:47:57 +00006916 * First on "st_score", then "st_altscore" then alphabetically.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006917 */
6918 static int
6919#ifdef __BORLANDC__
6920_RTLENTRYF
6921#endif
Bram Moolenaar764b23c2016-01-30 21:10:09 +01006922sug_compare(const void *s1, const void *s2)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006923{
6924 suggest_T *p1 = (suggest_T *)s1;
6925 suggest_T *p2 = (suggest_T *)s2;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006926 int n = p1->st_score - p2->st_score;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006927
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006928 if (n == 0)
Bram Moolenaar6b730e12005-09-16 21:47:57 +00006929 {
6930 n = p1->st_altscore - p2->st_altscore;
6931 if (n == 0)
6932 n = STRICMP(p1->st_word, p2->st_word);
6933 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006934 return n;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006935}
6936
6937/*
6938 * Cleanup the suggestions:
6939 * - Sort on score.
6940 * - Remove words that won't be displayed.
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006941 * Returns the maximum score in the list or "maxscore" unmodified.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006942 */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006943 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01006944cleanup_suggestions(
6945 garray_T *gap,
6946 int maxscore,
6947 int keep) /* nr of suggestions to keep */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006948{
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006949 suggest_T *stp = &SUG(*gap, 0);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006950 int i;
6951
6952 /* Sort the list. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006953 qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006954
6955 /* Truncate the list to the number of suggestions that will be displayed. */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006956 if (gap->ga_len > keep)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006957 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006958 for (i = keep; i < gap->ga_len; ++i)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006959 vim_free(stp[i].st_word);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006960 gap->ga_len = keep;
6961 return stp[keep - 1].st_score;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006962 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00006963 return maxscore;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006964}
6965
Bram Moolenaara1ba8112005-06-28 23:23:32 +00006966#if defined(FEAT_EVAL) || defined(PROTO)
6967/*
6968 * Soundfold a string, for soundfold().
6969 * Result is in allocated memory, NULL for an error.
6970 */
6971 char_u *
Bram Moolenaar764b23c2016-01-30 21:10:09 +01006972eval_soundfold(char_u *word)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00006973{
6974 langp_T *lp;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00006975 char_u sound[MAXWLEN];
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00006976 int lpi;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00006977
Bram Moolenaar860cae12010-06-05 23:22:07 +02006978 if (curwin->w_p_spell && *curwin->w_s->b_p_spl != NUL)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00006979 /* Use the sound-folding of the first language that supports it. */
Bram Moolenaar860cae12010-06-05 23:22:07 +02006980 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00006981 {
Bram Moolenaar860cae12010-06-05 23:22:07 +02006982 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00006983 if (lp->lp_slang->sl_sal.ga_len > 0)
6984 {
Bram Moolenaara1ba8112005-06-28 23:23:32 +00006985 /* soundfold the word */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00006986 spell_soundfold(lp->lp_slang, word, FALSE, sound);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00006987 return vim_strsave(sound);
6988 }
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00006989 }
Bram Moolenaara1ba8112005-06-28 23:23:32 +00006990
6991 /* No language with sound folding, return word as-is. */
6992 return vim_strsave(word);
6993}
6994#endif
6995
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00006996/*
6997 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
Bram Moolenaard12a1322005-08-21 22:08:24 +00006998 *
6999 * There are many ways to turn a word into a sound-a-like representation. The
7000 * oldest is Soundex (1918!). A nice overview can be found in "Approximate
7001 * swedish name matching - survey and test of different algorithms" by Klas
7002 * Erikson.
7003 *
7004 * We support two methods:
7005 * 1. SOFOFROM/SOFOTO do a simple character mapping.
7006 * 2. SAL items define a more advanced sound-folding (and much slower).
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007007 */
Bram Moolenaar9ccfebd2016-07-19 16:39:08 +02007008 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01007009spell_soundfold(
7010 slang_T *slang,
7011 char_u *inword,
7012 int folded, /* "inword" is already case-folded */
7013 char_u *res)
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007014{
7015 char_u fword[MAXWLEN];
7016 char_u *word;
7017
7018 if (slang->sl_sofo)
7019 /* SOFOFROM and SOFOTO used */
7020 spell_soundfold_sofo(slang, inword, res);
7021 else
7022 {
7023 /* SAL items used. Requires the word to be case-folded. */
7024 if (folded)
7025 word = inword;
7026 else
7027 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00007028 (void)spell_casefold(inword, (int)STRLEN(inword), fword, MAXWLEN);
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007029 word = fword;
7030 }
7031
7032#ifdef FEAT_MBYTE
7033 if (has_mbyte)
7034 spell_soundfold_wsal(slang, word, res);
7035 else
7036#endif
7037 spell_soundfold_sal(slang, word, res);
7038 }
7039}
7040
7041/*
7042 * Perform sound folding of "inword" into "res" according to SOFOFROM and
7043 * SOFOTO lines.
7044 */
7045 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01007046spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res)
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007047{
7048 char_u *s;
7049 int ri = 0;
7050 int c;
7051
7052#ifdef FEAT_MBYTE
7053 if (has_mbyte)
7054 {
7055 int prevc = 0;
7056 int *ip;
7057
7058 /* The sl_sal_first[] table contains the translation for chars up to
7059 * 255, sl_sal the rest. */
7060 for (s = inword; *s != NUL; )
7061 {
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007062 c = mb_cptr2char_adv(&s);
Bram Moolenaar1c465442017-03-12 20:10:05 +01007063 if (enc_utf8 ? utf_class(c) == 0 : VIM_ISWHITE(c))
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007064 c = ' ';
7065 else if (c < 256)
7066 c = slang->sl_sal_first[c];
7067 else
7068 {
7069 ip = ((int **)slang->sl_sal.ga_data)[c & 0xff];
7070 if (ip == NULL) /* empty list, can't match */
7071 c = NUL;
7072 else
7073 for (;;) /* find "c" in the list */
7074 {
7075 if (*ip == 0) /* not found */
7076 {
7077 c = NUL;
7078 break;
7079 }
7080 if (*ip == c) /* match! */
7081 {
7082 c = ip[1];
7083 break;
7084 }
7085 ip += 2;
7086 }
7087 }
7088
7089 if (c != NUL && c != prevc)
7090 {
7091 ri += mb_char2bytes(c, res + ri);
7092 if (ri + MB_MAXBYTES > MAXWLEN)
7093 break;
7094 prevc = c;
7095 }
7096 }
7097 }
7098 else
7099#endif
7100 {
7101 /* The sl_sal_first[] table contains the translation. */
7102 for (s = inword; (c = *s) != NUL; ++s)
7103 {
Bram Moolenaar1c465442017-03-12 20:10:05 +01007104 if (VIM_ISWHITE(c))
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007105 c = ' ';
7106 else
7107 c = slang->sl_sal_first[c];
7108 if (c != NUL && (ri == 0 || res[ri - 1] != c))
7109 res[ri++] = c;
7110 }
7111 }
7112
7113 res[ri] = NUL;
7114}
7115
7116 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01007117spell_soundfold_sal(slang_T *slang, char_u *inword, char_u *res)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007118{
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007119 salitem_T *smp;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007120 char_u word[MAXWLEN];
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007121 char_u *s = inword;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007122 char_u *t;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007123 char_u *pf;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007124 int i, j, z;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007125 int reslen;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007126 int n, k = 0;
7127 int z0;
7128 int k0;
7129 int n0;
7130 int c;
7131 int pri;
7132 int p0 = -333;
7133 int c0;
7134
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007135 /* Remove accents, if wanted. We actually remove all non-word characters.
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007136 * But keep white space. We need a copy, the word may be changed here. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007137 if (slang->sl_rem_accents)
7138 {
7139 t = word;
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007140 while (*s != NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007141 {
Bram Moolenaar1c465442017-03-12 20:10:05 +01007142 if (VIM_ISWHITE(*s))
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007143 {
7144 *t++ = ' ';
7145 s = skipwhite(s);
7146 }
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007147 else
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007148 {
Bram Moolenaarcc63c642013-11-12 04:44:01 +01007149 if (spell_iswordp_nmw(s, curwin))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007150 *t++ = *s;
7151 ++s;
7152 }
7153 }
7154 *t = NUL;
7155 }
7156 else
Bram Moolenaaref9d6aa2011-04-11 16:56:35 +02007157 vim_strncpy(word, s, MAXWLEN - 1);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007158
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007159 smp = (salitem_T *)slang->sl_sal.ga_data;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007160
7161 /*
7162 * This comes from Aspell phonet.cpp. Converted from C++ to C.
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007163 * Changed to keep spaces.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007164 */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007165 i = reslen = z = 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007166 while ((c = word[i]) != NUL)
7167 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007168 /* Start with the first rule that has the character in the word. */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007169 n = slang->sl_sal_first[c];
7170 z0 = 0;
7171
7172 if (n >= 0)
7173 {
7174 /* check all rules for the same letter */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007175 for (; (s = smp[n].sm_lead)[0] == c; ++n)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007176 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007177 /* Quickly skip entries that don't match the word. Most
7178 * entries are less then three chars, optimize for that. */
7179 k = smp[n].sm_leadlen;
7180 if (k > 1)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007181 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007182 if (word[i + 1] != s[1])
7183 continue;
7184 if (k > 2)
7185 {
7186 for (j = 2; j < k; ++j)
7187 if (word[i + j] != s[j])
7188 break;
7189 if (j < k)
7190 continue;
7191 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007192 }
7193
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007194 if ((pf = smp[n].sm_oneof) != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007195 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007196 /* Check for match with one of the chars in "sm_oneof". */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007197 while (*pf != NUL && *pf != word[i + k])
7198 ++pf;
7199 if (*pf == NUL)
7200 continue;
7201 ++k;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007202 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007203 s = smp[n].sm_rules;
7204 pri = 5; /* default priority */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007205
7206 p0 = *s;
7207 k0 = k;
7208 while (*s == '-' && k > 1)
7209 {
7210 k--;
7211 s++;
7212 }
7213 if (*s == '<')
7214 s++;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007215 if (VIM_ISDIGIT(*s))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007216 {
7217 /* determine priority */
7218 pri = *s - '0';
7219 s++;
7220 }
7221 if (*s == '^' && *(s + 1) == '^')
7222 s++;
7223
7224 if (*s == NUL
7225 || (*s == '^'
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007226 && (i == 0 || !(word[i - 1] == ' '
Bram Moolenaar860cae12010-06-05 23:22:07 +02007227 || spell_iswordp(word + i - 1, curwin)))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007228 && (*(s + 1) != '$'
Bram Moolenaar860cae12010-06-05 23:22:07 +02007229 || (!spell_iswordp(word + i + k0, curwin))))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007230 || (*s == '$' && i > 0
Bram Moolenaar860cae12010-06-05 23:22:07 +02007231 && spell_iswordp(word + i - 1, curwin)
7232 && (!spell_iswordp(word + i + k0, curwin))))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007233 {
7234 /* search for followup rules, if: */
7235 /* followup and k > 1 and NO '-' in searchstring */
7236 c0 = word[i + k - 1];
7237 n0 = slang->sl_sal_first[c0];
7238
7239 if (slang->sl_followup && k > 1 && n0 >= 0
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007240 && p0 != '-' && word[i + k] != NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007241 {
7242 /* test follow-up rule for "word[i + k]" */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007243 for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007244 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007245 /* Quickly skip entries that don't match the word.
7246 * */
7247 k0 = smp[n0].sm_leadlen;
7248 if (k0 > 1)
7249 {
7250 if (word[i + k] != s[1])
7251 continue;
7252 if (k0 > 2)
7253 {
7254 pf = word + i + k + 1;
7255 for (j = 2; j < k0; ++j)
7256 if (*pf++ != s[j])
7257 break;
7258 if (j < k0)
7259 continue;
7260 }
7261 }
7262 k0 += k - 1;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007263
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007264 if ((pf = smp[n0].sm_oneof) != NULL)
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007265 {
7266 /* Check for match with one of the chars in
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007267 * "sm_oneof". */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007268 while (*pf != NUL && *pf != word[i + k0])
7269 ++pf;
7270 if (*pf == NUL)
7271 continue;
7272 ++k0;
7273 }
7274
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007275 p0 = 5;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007276 s = smp[n0].sm_rules;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007277 while (*s == '-')
7278 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007279 /* "k0" gets NOT reduced because
7280 * "if (k0 == k)" */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007281 s++;
7282 }
7283 if (*s == '<')
7284 s++;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007285 if (VIM_ISDIGIT(*s))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007286 {
7287 p0 = *s - '0';
7288 s++;
7289 }
7290
7291 if (*s == NUL
7292 /* *s == '^' cuts */
7293 || (*s == '$'
Bram Moolenaar9c96f592005-06-30 21:52:39 +00007294 && !spell_iswordp(word + i + k0,
Bram Moolenaar860cae12010-06-05 23:22:07 +02007295 curwin)))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007296 {
7297 if (k0 == k)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007298 /* this is just a piece of the string */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007299 continue;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007300
7301 if (p0 < pri)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007302 /* priority too low */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007303 continue;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007304 /* rule fits; stop search */
7305 break;
7306 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007307 }
7308
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007309 if (p0 >= pri && smp[n0].sm_lead[0] == c0)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007310 continue;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007311 }
7312
7313 /* replace string */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007314 s = smp[n].sm_to;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00007315 if (s == NULL)
7316 s = (char_u *)"";
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007317 pf = smp[n].sm_rules;
7318 p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007319 if (p0 == 1 && z == 0)
7320 {
7321 /* rule with '<' is used */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007322 if (reslen > 0 && *s != NUL && (res[reslen - 1] == c
7323 || res[reslen - 1] == *s))
7324 reslen--;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007325 z0 = 1;
7326 z = 1;
7327 k0 = 0;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007328 while (*s != NUL && word[i + k0] != NUL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007329 {
7330 word[i + k0] = *s;
7331 k0++;
7332 s++;
7333 }
7334 if (k > k0)
Bram Moolenaara7241f52008-06-24 20:39:31 +00007335 STRMOVE(word + i + k0, word + i + k);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007336
7337 /* new "actual letter" */
7338 c = word[i];
7339 }
7340 else
7341 {
7342 /* no '<' rule used */
7343 i += k - 1;
7344 z = 0;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007345 while (*s != NUL && s[1] != NUL && reslen < MAXWLEN)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007346 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007347 if (reslen == 0 || res[reslen - 1] != *s)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007348 res[reslen++] = *s;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007349 s++;
7350 }
7351 /* new "actual letter" */
7352 c = *s;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007353 if (strstr((char *)pf, "^^") != NULL)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007354 {
7355 if (c != NUL)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007356 res[reslen++] = c;
Bram Moolenaara7241f52008-06-24 20:39:31 +00007357 STRMOVE(word, word + i + 1);
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007358 i = 0;
7359 z0 = 1;
7360 }
7361 }
7362 break;
7363 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007364 }
7365 }
Bram Moolenaar1c465442017-03-12 20:10:05 +01007366 else if (VIM_ISWHITE(c))
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007367 {
7368 c = ' ';
7369 k = 1;
7370 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007371
7372 if (z0 == 0)
7373 {
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007374 if (k && !p0 && reslen < MAXWLEN && c != NUL
7375 && (!slang->sl_collapse || reslen == 0
7376 || res[reslen - 1] != c))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007377 /* condense only double letters */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007378 res[reslen++] = c;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007379
7380 i++;
7381 z = 0;
7382 k = 0;
7383 }
7384 }
7385
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007386 res[reslen] = NUL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007387}
7388
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007389#ifdef FEAT_MBYTE
7390/*
7391 * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
7392 * Multi-byte version of spell_soundfold().
7393 */
7394 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01007395spell_soundfold_wsal(slang_T *slang, char_u *inword, char_u *res)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007396{
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007397 salitem_T *smp = (salitem_T *)slang->sl_sal.ga_data;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007398 int word[MAXWLEN];
7399 int wres[MAXWLEN];
7400 int l;
7401 char_u *s;
7402 int *ws;
7403 char_u *t;
7404 int *pf;
7405 int i, j, z;
7406 int reslen;
7407 int n, k = 0;
7408 int z0;
7409 int k0;
7410 int n0;
7411 int c;
7412 int pri;
7413 int p0 = -333;
7414 int c0;
7415 int did_white = FALSE;
Bram Moolenaarf9de1402012-05-18 18:08:01 +02007416 int wordlen;
7417
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007418
7419 /*
7420 * Convert the multi-byte string to a wide-character string.
7421 * Remove accents, if wanted. We actually remove all non-word characters.
7422 * But keep white space.
7423 */
Bram Moolenaarf9de1402012-05-18 18:08:01 +02007424 wordlen = 0;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007425 for (s = inword; *s != NUL; )
7426 {
7427 t = s;
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007428 c = mb_cptr2char_adv(&s);
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007429 if (slang->sl_rem_accents)
7430 {
Bram Moolenaar1c465442017-03-12 20:10:05 +01007431 if (enc_utf8 ? utf_class(c) == 0 : VIM_ISWHITE(c))
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007432 {
7433 if (did_white)
7434 continue;
7435 c = ' ';
7436 did_white = TRUE;
7437 }
7438 else
7439 {
7440 did_white = FALSE;
Bram Moolenaarcc63c642013-11-12 04:44:01 +01007441 if (!spell_iswordp_nmw(t, curwin))
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007442 continue;
7443 }
7444 }
Bram Moolenaarf9de1402012-05-18 18:08:01 +02007445 word[wordlen++] = c;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007446 }
Bram Moolenaarf9de1402012-05-18 18:08:01 +02007447 word[wordlen] = NUL;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007448
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007449 /*
Bram Moolenaarf9de1402012-05-18 18:08:01 +02007450 * This algorithm comes from Aspell phonet.cpp.
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007451 * Converted from C++ to C. Added support for multi-byte chars.
7452 * Changed to keep spaces.
7453 */
7454 i = reslen = z = 0;
7455 while ((c = word[i]) != NUL)
7456 {
7457 /* Start with the first rule that has the character in the word. */
7458 n = slang->sl_sal_first[c & 0xff];
7459 z0 = 0;
7460
7461 if (n >= 0)
7462 {
Bram Moolenaar95e85792010-08-01 15:37:02 +02007463 /* Check all rules for the same index byte.
7464 * If c is 0x300 need extra check for the end of the array, as
7465 * (c & 0xff) is NUL. */
7466 for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff)
7467 && ws[0] != NUL; ++n)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007468 {
7469 /* Quickly skip entries that don't match the word. Most
7470 * entries are less then three chars, optimize for that. */
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007471 if (c != ws[0])
7472 continue;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007473 k = smp[n].sm_leadlen;
7474 if (k > 1)
7475 {
7476 if (word[i + 1] != ws[1])
7477 continue;
7478 if (k > 2)
7479 {
7480 for (j = 2; j < k; ++j)
7481 if (word[i + j] != ws[j])
7482 break;
7483 if (j < k)
7484 continue;
7485 }
7486 }
7487
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007488 if ((pf = smp[n].sm_oneof_w) != NULL)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007489 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007490 /* Check for match with one of the chars in "sm_oneof". */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007491 while (*pf != NUL && *pf != word[i + k])
7492 ++pf;
7493 if (*pf == NUL)
7494 continue;
7495 ++k;
7496 }
7497 s = smp[n].sm_rules;
7498 pri = 5; /* default priority */
7499
7500 p0 = *s;
7501 k0 = k;
7502 while (*s == '-' && k > 1)
7503 {
7504 k--;
7505 s++;
7506 }
7507 if (*s == '<')
7508 s++;
7509 if (VIM_ISDIGIT(*s))
7510 {
7511 /* determine priority */
7512 pri = *s - '0';
7513 s++;
7514 }
7515 if (*s == '^' && *(s + 1) == '^')
7516 s++;
7517
7518 if (*s == NUL
7519 || (*s == '^'
7520 && (i == 0 || !(word[i - 1] == ' '
Bram Moolenaar860cae12010-06-05 23:22:07 +02007521 || spell_iswordp_w(word + i - 1, curwin)))
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007522 && (*(s + 1) != '$'
Bram Moolenaar860cae12010-06-05 23:22:07 +02007523 || (!spell_iswordp_w(word + i + k0, curwin))))
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007524 || (*s == '$' && i > 0
Bram Moolenaar860cae12010-06-05 23:22:07 +02007525 && spell_iswordp_w(word + i - 1, curwin)
7526 && (!spell_iswordp_w(word + i + k0, curwin))))
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007527 {
7528 /* search for followup rules, if: */
7529 /* followup and k > 1 and NO '-' in searchstring */
7530 c0 = word[i + k - 1];
7531 n0 = slang->sl_sal_first[c0 & 0xff];
7532
7533 if (slang->sl_followup && k > 1 && n0 >= 0
7534 && p0 != '-' && word[i + k] != NUL)
7535 {
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007536 /* Test follow-up rule for "word[i + k]"; loop over
7537 * all entries with the same index byte. */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007538 for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff)
7539 == (c0 & 0xff); ++n0)
7540 {
7541 /* Quickly skip entries that don't match the word.
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007542 */
7543 if (c0 != ws[0])
7544 continue;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007545 k0 = smp[n0].sm_leadlen;
7546 if (k0 > 1)
7547 {
7548 if (word[i + k] != ws[1])
7549 continue;
7550 if (k0 > 2)
7551 {
7552 pf = word + i + k + 1;
7553 for (j = 2; j < k0; ++j)
7554 if (*pf++ != ws[j])
7555 break;
7556 if (j < k0)
7557 continue;
7558 }
7559 }
7560 k0 += k - 1;
7561
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007562 if ((pf = smp[n0].sm_oneof_w) != NULL)
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007563 {
7564 /* Check for match with one of the chars in
Bram Moolenaar42eeac32005-06-29 22:40:58 +00007565 * "sm_oneof". */
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007566 while (*pf != NUL && *pf != word[i + k0])
7567 ++pf;
7568 if (*pf == NUL)
7569 continue;
7570 ++k0;
7571 }
7572
7573 p0 = 5;
7574 s = smp[n0].sm_rules;
7575 while (*s == '-')
7576 {
7577 /* "k0" gets NOT reduced because
7578 * "if (k0 == k)" */
7579 s++;
7580 }
7581 if (*s == '<')
7582 s++;
7583 if (VIM_ISDIGIT(*s))
7584 {
7585 p0 = *s - '0';
7586 s++;
7587 }
7588
7589 if (*s == NUL
7590 /* *s == '^' cuts */
7591 || (*s == '$'
Bram Moolenaar9c96f592005-06-30 21:52:39 +00007592 && !spell_iswordp_w(word + i + k0,
Bram Moolenaar860cae12010-06-05 23:22:07 +02007593 curwin)))
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007594 {
7595 if (k0 == k)
7596 /* this is just a piece of the string */
7597 continue;
7598
7599 if (p0 < pri)
7600 /* priority too low */
7601 continue;
7602 /* rule fits; stop search */
7603 break;
7604 }
7605 }
7606
7607 if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff)
7608 == (c0 & 0xff))
7609 continue;
7610 }
7611
7612 /* replace string */
7613 ws = smp[n].sm_to_w;
7614 s = smp[n].sm_rules;
7615 p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0;
7616 if (p0 == 1 && z == 0)
7617 {
7618 /* rule with '<' is used */
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00007619 if (reslen > 0 && ws != NULL && *ws != NUL
7620 && (wres[reslen - 1] == c
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007621 || wres[reslen - 1] == *ws))
7622 reslen--;
7623 z0 = 1;
7624 z = 1;
7625 k0 = 0;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00007626 if (ws != NULL)
7627 while (*ws != NUL && word[i + k0] != NUL)
7628 {
7629 word[i + k0] = *ws;
7630 k0++;
7631 ws++;
7632 }
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007633 if (k > k0)
7634 mch_memmove(word + i + k0, word + i + k,
Bram Moolenaarf9de1402012-05-18 18:08:01 +02007635 sizeof(int) * (wordlen - (i + k) + 1));
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007636
7637 /* new "actual letter" */
7638 c = word[i];
7639 }
7640 else
7641 {
7642 /* no '<' rule used */
7643 i += k - 1;
7644 z = 0;
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00007645 if (ws != NULL)
7646 while (*ws != NUL && ws[1] != NUL
7647 && reslen < MAXWLEN)
7648 {
7649 if (reslen == 0 || wres[reslen - 1] != *ws)
7650 wres[reslen++] = *ws;
7651 ws++;
7652 }
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007653 /* new "actual letter" */
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00007654 if (ws == NULL)
7655 c = NUL;
7656 else
7657 c = *ws;
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007658 if (strstr((char *)s, "^^") != NULL)
7659 {
7660 if (c != NUL)
7661 wres[reslen++] = c;
7662 mch_memmove(word, word + i + 1,
Bram Moolenaarf9de1402012-05-18 18:08:01 +02007663 sizeof(int) * (wordlen - (i + 1) + 1));
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007664 i = 0;
7665 z0 = 1;
7666 }
7667 }
7668 break;
7669 }
7670 }
7671 }
Bram Moolenaar1c465442017-03-12 20:10:05 +01007672 else if (VIM_ISWHITE(c))
Bram Moolenaara1ba8112005-06-28 23:23:32 +00007673 {
7674 c = ' ';
7675 k = 1;
7676 }
7677
7678 if (z0 == 0)
7679 {
7680 if (k && !p0 && reslen < MAXWLEN && c != NUL
7681 && (!slang->sl_collapse || reslen == 0
7682 || wres[reslen - 1] != c))
7683 /* condense only double letters */
7684 wres[reslen++] = c;
7685
7686 i++;
7687 z = 0;
7688 k = 0;
7689 }
7690 }
7691
7692 /* Convert wide characters in "wres" to a multi-byte string in "res". */
7693 l = 0;
7694 for (n = 0; n < reslen; ++n)
7695 {
7696 l += mb_char2bytes(wres[n], res + l);
7697 if (l + MB_MAXBYTES > MAXWLEN)
7698 break;
7699 }
7700 res[l] = NUL;
7701}
7702#endif
7703
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007704/*
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007705 * Compute a score for two sound-a-like words.
7706 * This permits up to two inserts/deletes/swaps/etc. to keep things fast.
7707 * Instead of a generic loop we write out the code. That keeps it fast by
7708 * avoiding checks that will not be possible.
7709 */
7710 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01007711soundalike_score(
7712 char_u *goodstart, /* sound-folded good word */
7713 char_u *badstart) /* sound-folded bad word */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007714{
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007715 char_u *goodsound = goodstart;
7716 char_u *badsound = badstart;
7717 int goodlen;
7718 int badlen;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007719 int n;
7720 char_u *pl, *ps;
7721 char_u *pl2, *ps2;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007722 int score = 0;
7723
Bram Moolenaar121d95f2010-08-01 15:11:43 +02007724 /* Adding/inserting "*" at the start (word starts with vowel) shouldn't be
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007725 * counted so much, vowels halfway the word aren't counted at all. */
7726 if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound)
7727 {
Bram Moolenaar121d95f2010-08-01 15:11:43 +02007728 if ((badsound[0] == NUL && goodsound[1] == NUL)
7729 || (goodsound[0] == NUL && badsound[1] == NUL))
7730 /* changing word with vowel to word without a sound */
7731 return SCORE_DEL;
7732 if (badsound[0] == NUL || goodsound[0] == NUL)
7733 /* more than two changes */
7734 return SCORE_MAXMAX;
7735
Bram Moolenaar4770d092006-01-12 23:22:24 +00007736 if (badsound[1] == goodsound[1]
7737 || (badsound[1] != NUL
7738 && goodsound[1] != NUL
7739 && badsound[2] == goodsound[2]))
7740 {
7741 /* handle like a substitute */
7742 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007743 else
Bram Moolenaar4770d092006-01-12 23:22:24 +00007744 {
7745 score = 2 * SCORE_DEL / 3;
7746 if (*badsound == '*')
7747 ++badsound;
7748 else
7749 ++goodsound;
7750 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007751 }
7752
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00007753 goodlen = (int)STRLEN(goodsound);
7754 badlen = (int)STRLEN(badsound);
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007755
Bram Moolenaarf711faf2007-05-10 16:48:19 +00007756 /* Return quickly if the lengths are too different to be fixed by two
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007757 * changes. */
7758 n = goodlen - badlen;
7759 if (n < -2 || n > 2)
7760 return SCORE_MAXMAX;
7761
7762 if (n > 0)
7763 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007764 pl = goodsound; /* goodsound is longest */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007765 ps = badsound;
7766 }
7767 else
7768 {
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007769 pl = badsound; /* badsound is longest */
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007770 ps = goodsound;
7771 }
7772
7773 /* Skip over the identical part. */
7774 while (*pl == *ps && *pl != NUL)
7775 {
7776 ++pl;
7777 ++ps;
7778 }
7779
7780 switch (n)
7781 {
7782 case -2:
7783 case 2:
7784 /*
7785 * Must delete two characters from "pl".
7786 */
7787 ++pl; /* first delete */
7788 while (*pl == *ps)
7789 {
7790 ++pl;
7791 ++ps;
7792 }
7793 /* strings must be equal after second delete */
7794 if (STRCMP(pl + 1, ps) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007795 return score + SCORE_DEL * 2;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007796
7797 /* Failed to compare. */
7798 break;
7799
7800 case -1:
7801 case 1:
7802 /*
7803 * Minimal one delete from "pl" required.
7804 */
7805
7806 /* 1: delete */
7807 pl2 = pl + 1;
7808 ps2 = ps;
7809 while (*pl2 == *ps2)
7810 {
7811 if (*pl2 == NUL) /* reached the end */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007812 return score + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007813 ++pl2;
7814 ++ps2;
7815 }
7816
7817 /* 2: delete then swap, then rest must be equal */
7818 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
7819 && STRCMP(pl2 + 2, ps2 + 2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007820 return score + SCORE_DEL + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007821
7822 /* 3: delete then substitute, then the rest must be equal */
7823 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007824 return score + SCORE_DEL + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007825
7826 /* 4: first swap then delete */
7827 if (pl[0] == ps[1] && pl[1] == ps[0])
7828 {
7829 pl2 = pl + 2; /* swap, skip two chars */
7830 ps2 = ps + 2;
7831 while (*pl2 == *ps2)
7832 {
7833 ++pl2;
7834 ++ps2;
7835 }
7836 /* delete a char and then strings must be equal */
7837 if (STRCMP(pl2 + 1, ps2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007838 return score + SCORE_SWAP + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007839 }
7840
7841 /* 5: first substitute then delete */
7842 pl2 = pl + 1; /* substitute, skip one char */
7843 ps2 = ps + 1;
7844 while (*pl2 == *ps2)
7845 {
7846 ++pl2;
7847 ++ps2;
7848 }
7849 /* delete a char and then strings must be equal */
7850 if (STRCMP(pl2 + 1, ps2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007851 return score + SCORE_SUBST + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007852
7853 /* Failed to compare. */
7854 break;
7855
7856 case 0:
7857 /*
Bram Moolenaar6ae167a2009-02-11 16:58:49 +00007858 * Lengths are equal, thus changes must result in same length: An
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007859 * insert is only possible in combination with a delete.
7860 * 1: check if for identical strings
7861 */
7862 if (*pl == NUL)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007863 return score;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007864
7865 /* 2: swap */
7866 if (pl[0] == ps[1] && pl[1] == ps[0])
7867 {
7868 pl2 = pl + 2; /* swap, skip two chars */
7869 ps2 = ps + 2;
7870 while (*pl2 == *ps2)
7871 {
7872 if (*pl2 == NUL) /* reached the end */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007873 return score + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007874 ++pl2;
7875 ++ps2;
7876 }
7877 /* 3: swap and swap again */
7878 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
7879 && STRCMP(pl2 + 2, ps2 + 2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007880 return score + SCORE_SWAP + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007881
7882 /* 4: swap and substitute */
7883 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007884 return score + SCORE_SWAP + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007885 }
7886
7887 /* 5: substitute */
7888 pl2 = pl + 1;
7889 ps2 = ps + 1;
7890 while (*pl2 == *ps2)
7891 {
7892 if (*pl2 == NUL) /* reached the end */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007893 return score + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007894 ++pl2;
7895 ++ps2;
7896 }
7897
7898 /* 6: substitute and swap */
7899 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
7900 && STRCMP(pl2 + 2, ps2 + 2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007901 return score + SCORE_SUBST + SCORE_SWAP;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007902
7903 /* 7: substitute and substitute */
7904 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007905 return score + SCORE_SUBST + SCORE_SUBST;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007906
7907 /* 8: insert then delete */
7908 pl2 = pl;
7909 ps2 = ps + 1;
7910 while (*pl2 == *ps2)
7911 {
7912 ++pl2;
7913 ++ps2;
7914 }
7915 if (STRCMP(pl2 + 1, ps2) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007916 return score + SCORE_INS + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007917
7918 /* 9: delete then insert */
7919 pl2 = pl + 1;
7920 ps2 = ps;
7921 while (*pl2 == *ps2)
7922 {
7923 ++pl2;
7924 ++ps2;
7925 }
7926 if (STRCMP(pl2, ps2 + 1) == 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00007927 return score + SCORE_INS + SCORE_DEL;
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007928
7929 /* Failed to compare. */
7930 break;
7931 }
7932
7933 return SCORE_MAXMAX;
7934}
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007935
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007936/*
7937 * Compute the "edit distance" to turn "badword" into "goodword". The less
Bram Moolenaard857f0e2005-06-21 22:37:39 +00007938 * deletes/inserts/substitutes/swaps are required the lower the score.
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007939 *
Bram Moolenaard12a1322005-08-21 22:08:24 +00007940 * The algorithm is described by Du and Chang, 1992.
7941 * The implementation of the algorithm comes from Aspell editdist.cpp,
7942 * edit_distance(). It has been converted from C++ to C and modified to
7943 * support multi-byte characters.
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007944 */
7945 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01007946spell_edit_score(
7947 slang_T *slang,
7948 char_u *badword,
7949 char_u *goodword)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007950{
7951 int *cnt;
Bram Moolenaarf711faf2007-05-10 16:48:19 +00007952 int badlen, goodlen; /* lengths including NUL */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007953 int j, i;
7954 int t;
7955 int bc, gc;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007956 int pbc, pgc;
7957#ifdef FEAT_MBYTE
7958 char_u *p;
7959 int wbadword[MAXWLEN];
7960 int wgoodword[MAXWLEN];
7961
7962 if (has_mbyte)
7963 {
7964 /* Get the characters from the multi-byte strings and put them in an
7965 * int array for easy access. */
7966 for (p = badword, badlen = 0; *p != NUL; )
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007967 wbadword[badlen++] = mb_cptr2char_adv(&p);
Bram Moolenaar97409f12005-07-08 22:17:29 +00007968 wbadword[badlen++] = 0;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007969 for (p = goodword, goodlen = 0; *p != NUL; )
Bram Moolenaar0fa313a2005-08-10 21:07:57 +00007970 wgoodword[goodlen++] = mb_cptr2char_adv(&p);
Bram Moolenaar97409f12005-07-08 22:17:29 +00007971 wgoodword[goodlen++] = 0;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007972 }
7973 else
7974#endif
7975 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00007976 badlen = (int)STRLEN(badword) + 1;
7977 goodlen = (int)STRLEN(goodword) + 1;
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007978 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007979
7980 /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */
7981#define CNT(a, b) cnt[(a) + (b) * (badlen + 1)]
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007982 cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)),
7983 TRUE);
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007984 if (cnt == NULL)
7985 return 0; /* out of memory */
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007986
7987 CNT(0, 0) = 0;
7988 for (j = 1; j <= goodlen; ++j)
Bram Moolenaar4770d092006-01-12 23:22:24 +00007989 CNT(0, j) = CNT(0, j - 1) + SCORE_INS;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007990
7991 for (i = 1; i <= badlen; ++i)
7992 {
Bram Moolenaar4770d092006-01-12 23:22:24 +00007993 CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00007994 for (j = 1; j <= goodlen; ++j)
7995 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00007996#ifdef FEAT_MBYTE
7997 if (has_mbyte)
7998 {
7999 bc = wbadword[i - 1];
8000 gc = wgoodword[j - 1];
8001 }
8002 else
8003#endif
8004 {
8005 bc = badword[i - 1];
8006 gc = goodword[j - 1];
8007 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008008 if (bc == gc)
8009 CNT(i, j) = CNT(i - 1, j - 1);
8010 else
8011 {
8012 /* Use a better score when there is only a case difference. */
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008013 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008014 CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1);
8015 else
Bram Moolenaar4770d092006-01-12 23:22:24 +00008016 {
8017 /* For a similar character use SCORE_SIMILAR. */
8018 if (slang != NULL
8019 && slang->sl_has_map
8020 && similar_chars(slang, gc, bc))
8021 CNT(i, j) = SCORE_SIMILAR + CNT(i - 1, j - 1);
8022 else
8023 CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1);
8024 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008025
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008026 if (i > 1 && j > 1)
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008027 {
Bram Moolenaar9f30f502005-06-14 22:01:04 +00008028#ifdef FEAT_MBYTE
8029 if (has_mbyte)
8030 {
8031 pbc = wbadword[i - 2];
8032 pgc = wgoodword[j - 2];
8033 }
8034 else
8035#endif
8036 {
8037 pbc = badword[i - 2];
8038 pgc = goodword[j - 2];
8039 }
8040 if (bc == pgc && pbc == gc)
8041 {
8042 t = SCORE_SWAP + CNT(i - 2, j - 2);
8043 if (t < CNT(i, j))
8044 CNT(i, j) = t;
8045 }
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008046 }
8047 t = SCORE_DEL + CNT(i - 1, j);
8048 if (t < CNT(i, j))
8049 CNT(i, j) = t;
8050 t = SCORE_INS + CNT(i, j - 1);
8051 if (t < CNT(i, j))
8052 CNT(i, j) = t;
8053 }
8054 }
8055 }
Bram Moolenaard857f0e2005-06-21 22:37:39 +00008056
8057 i = CNT(badlen - 1, goodlen - 1);
8058 vim_free(cnt);
8059 return i;
Bram Moolenaar9ba0eb82005-06-13 22:28:56 +00008060}
Bram Moolenaarcfc6c432005-06-06 21:50:35 +00008061
Bram Moolenaar4770d092006-01-12 23:22:24 +00008062typedef struct
8063{
8064 int badi;
8065 int goodi;
8066 int score;
8067} limitscore_T;
8068
8069/*
8070 * Like spell_edit_score(), but with a limit on the score to make it faster.
8071 * May return SCORE_MAXMAX when the score is higher than "limit".
8072 *
8073 * This uses a stack for the edits still to be tried.
8074 * The idea comes from Aspell leditdist.cpp. Rewritten in C and added support
8075 * for multi-byte characters.
8076 */
8077 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01008078spell_edit_score_limit(
8079 slang_T *slang,
8080 char_u *badword,
8081 char_u *goodword,
8082 int limit)
Bram Moolenaar4770d092006-01-12 23:22:24 +00008083{
8084 limitscore_T stack[10]; /* allow for over 3 * 2 edits */
8085 int stackidx;
8086 int bi, gi;
8087 int bi2, gi2;
8088 int bc, gc;
8089 int score;
8090 int score_off;
8091 int minscore;
8092 int round;
8093
8094#ifdef FEAT_MBYTE
8095 /* Multi-byte characters require a bit more work, use a different function
8096 * to avoid testing "has_mbyte" quite often. */
8097 if (has_mbyte)
8098 return spell_edit_score_limit_w(slang, badword, goodword, limit);
8099#endif
8100
8101 /*
8102 * The idea is to go from start to end over the words. So long as
8103 * characters are equal just continue, this always gives the lowest score.
8104 * When there is a difference try several alternatives. Each alternative
8105 * increases "score" for the edit distance. Some of the alternatives are
8106 * pushed unto a stack and tried later, some are tried right away. At the
8107 * end of the word the score for one alternative is known. The lowest
8108 * possible score is stored in "minscore".
8109 */
8110 stackidx = 0;
8111 bi = 0;
8112 gi = 0;
8113 score = 0;
8114 minscore = limit + 1;
8115
8116 for (;;)
8117 {
8118 /* Skip over an equal part, score remains the same. */
8119 for (;;)
8120 {
8121 bc = badword[bi];
8122 gc = goodword[gi];
8123 if (bc != gc) /* stop at a char that's different */
8124 break;
8125 if (bc == NUL) /* both words end */
8126 {
8127 if (score < minscore)
8128 minscore = score;
8129 goto pop; /* do next alternative */
8130 }
8131 ++bi;
8132 ++gi;
8133 }
8134
8135 if (gc == NUL) /* goodword ends, delete badword chars */
8136 {
8137 do
8138 {
8139 if ((score += SCORE_DEL) >= minscore)
8140 goto pop; /* do next alternative */
8141 } while (badword[++bi] != NUL);
8142 minscore = score;
8143 }
8144 else if (bc == NUL) /* badword ends, insert badword chars */
8145 {
8146 do
8147 {
8148 if ((score += SCORE_INS) >= minscore)
8149 goto pop; /* do next alternative */
8150 } while (goodword[++gi] != NUL);
8151 minscore = score;
8152 }
8153 else /* both words continue */
8154 {
8155 /* If not close to the limit, perform a change. Only try changes
8156 * that may lead to a lower score than "minscore".
8157 * round 0: try deleting a char from badword
8158 * round 1: try inserting a char in badword */
8159 for (round = 0; round <= 1; ++round)
8160 {
8161 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
8162 if (score_off < minscore)
8163 {
8164 if (score_off + SCORE_EDIT_MIN >= minscore)
8165 {
8166 /* Near the limit, rest of the words must match. We
8167 * can check that right now, no need to push an item
8168 * onto the stack. */
8169 bi2 = bi + 1 - round;
8170 gi2 = gi + round;
8171 while (goodword[gi2] == badword[bi2])
8172 {
8173 if (goodword[gi2] == NUL)
8174 {
8175 minscore = score_off;
8176 break;
8177 }
8178 ++bi2;
8179 ++gi2;
8180 }
8181 }
8182 else
8183 {
8184 /* try deleting/inserting a character later */
8185 stack[stackidx].badi = bi + 1 - round;
8186 stack[stackidx].goodi = gi + round;
8187 stack[stackidx].score = score_off;
8188 ++stackidx;
8189 }
8190 }
8191 }
8192
8193 if (score + SCORE_SWAP < minscore)
8194 {
8195 /* If swapping two characters makes a match then the
8196 * substitution is more expensive, thus there is no need to
8197 * try both. */
8198 if (gc == badword[bi + 1] && bc == goodword[gi + 1])
8199 {
8200 /* Swap two characters, that is: skip them. */
8201 gi += 2;
8202 bi += 2;
8203 score += SCORE_SWAP;
8204 continue;
8205 }
8206 }
8207
8208 /* Substitute one character for another which is the same
8209 * thing as deleting a character from both goodword and badword.
8210 * Use a better score when there is only a case difference. */
8211 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
8212 score += SCORE_ICASE;
8213 else
8214 {
8215 /* For a similar character use SCORE_SIMILAR. */
8216 if (slang != NULL
8217 && slang->sl_has_map
8218 && similar_chars(slang, gc, bc))
8219 score += SCORE_SIMILAR;
8220 else
8221 score += SCORE_SUBST;
8222 }
8223
8224 if (score < minscore)
8225 {
8226 /* Do the substitution. */
8227 ++gi;
8228 ++bi;
8229 continue;
8230 }
8231 }
8232pop:
8233 /*
8234 * Get here to try the next alternative, pop it from the stack.
8235 */
8236 if (stackidx == 0) /* stack is empty, finished */
8237 break;
8238
8239 /* pop an item from the stack */
8240 --stackidx;
8241 gi = stack[stackidx].goodi;
8242 bi = stack[stackidx].badi;
8243 score = stack[stackidx].score;
8244 }
8245
8246 /* When the score goes over "limit" it may actually be much higher.
8247 * Return a very large number to avoid going below the limit when giving a
8248 * bonus. */
8249 if (minscore > limit)
8250 return SCORE_MAXMAX;
8251 return minscore;
8252}
8253
8254#ifdef FEAT_MBYTE
8255/*
8256 * Multi-byte version of spell_edit_score_limit().
8257 * Keep it in sync with the above!
8258 */
8259 static int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01008260spell_edit_score_limit_w(
8261 slang_T *slang,
8262 char_u *badword,
8263 char_u *goodword,
8264 int limit)
Bram Moolenaar4770d092006-01-12 23:22:24 +00008265{
8266 limitscore_T stack[10]; /* allow for over 3 * 2 edits */
8267 int stackidx;
8268 int bi, gi;
8269 int bi2, gi2;
8270 int bc, gc;
8271 int score;
8272 int score_off;
8273 int minscore;
8274 int round;
8275 char_u *p;
8276 int wbadword[MAXWLEN];
8277 int wgoodword[MAXWLEN];
8278
8279 /* Get the characters from the multi-byte strings and put them in an
8280 * int array for easy access. */
8281 bi = 0;
8282 for (p = badword; *p != NUL; )
8283 wbadword[bi++] = mb_cptr2char_adv(&p);
8284 wbadword[bi++] = 0;
8285 gi = 0;
8286 for (p = goodword; *p != NUL; )
8287 wgoodword[gi++] = mb_cptr2char_adv(&p);
8288 wgoodword[gi++] = 0;
8289
8290 /*
8291 * The idea is to go from start to end over the words. So long as
8292 * characters are equal just continue, this always gives the lowest score.
8293 * When there is a difference try several alternatives. Each alternative
8294 * increases "score" for the edit distance. Some of the alternatives are
8295 * pushed unto a stack and tried later, some are tried right away. At the
8296 * end of the word the score for one alternative is known. The lowest
8297 * possible score is stored in "minscore".
8298 */
8299 stackidx = 0;
8300 bi = 0;
8301 gi = 0;
8302 score = 0;
8303 minscore = limit + 1;
8304
8305 for (;;)
8306 {
8307 /* Skip over an equal part, score remains the same. */
8308 for (;;)
8309 {
8310 bc = wbadword[bi];
8311 gc = wgoodword[gi];
8312
8313 if (bc != gc) /* stop at a char that's different */
8314 break;
8315 if (bc == NUL) /* both words end */
8316 {
8317 if (score < minscore)
8318 minscore = score;
8319 goto pop; /* do next alternative */
8320 }
8321 ++bi;
8322 ++gi;
8323 }
8324
8325 if (gc == NUL) /* goodword ends, delete badword chars */
8326 {
8327 do
8328 {
8329 if ((score += SCORE_DEL) >= minscore)
8330 goto pop; /* do next alternative */
8331 } while (wbadword[++bi] != NUL);
8332 minscore = score;
8333 }
8334 else if (bc == NUL) /* badword ends, insert badword chars */
8335 {
8336 do
8337 {
8338 if ((score += SCORE_INS) >= minscore)
8339 goto pop; /* do next alternative */
8340 } while (wgoodword[++gi] != NUL);
8341 minscore = score;
8342 }
8343 else /* both words continue */
8344 {
8345 /* If not close to the limit, perform a change. Only try changes
8346 * that may lead to a lower score than "minscore".
8347 * round 0: try deleting a char from badword
8348 * round 1: try inserting a char in badword */
8349 for (round = 0; round <= 1; ++round)
8350 {
8351 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
8352 if (score_off < minscore)
8353 {
8354 if (score_off + SCORE_EDIT_MIN >= minscore)
8355 {
8356 /* Near the limit, rest of the words must match. We
8357 * can check that right now, no need to push an item
8358 * onto the stack. */
8359 bi2 = bi + 1 - round;
8360 gi2 = gi + round;
8361 while (wgoodword[gi2] == wbadword[bi2])
8362 {
8363 if (wgoodword[gi2] == NUL)
8364 {
8365 minscore = score_off;
8366 break;
8367 }
8368 ++bi2;
8369 ++gi2;
8370 }
8371 }
8372 else
8373 {
8374 /* try deleting a character from badword later */
8375 stack[stackidx].badi = bi + 1 - round;
8376 stack[stackidx].goodi = gi + round;
8377 stack[stackidx].score = score_off;
8378 ++stackidx;
8379 }
8380 }
8381 }
8382
8383 if (score + SCORE_SWAP < minscore)
8384 {
8385 /* If swapping two characters makes a match then the
8386 * substitution is more expensive, thus there is no need to
8387 * try both. */
8388 if (gc == wbadword[bi + 1] && bc == wgoodword[gi + 1])
8389 {
8390 /* Swap two characters, that is: skip them. */
8391 gi += 2;
8392 bi += 2;
8393 score += SCORE_SWAP;
8394 continue;
8395 }
8396 }
8397
8398 /* Substitute one character for another which is the same
8399 * thing as deleting a character from both goodword and badword.
8400 * Use a better score when there is only a case difference. */
8401 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
8402 score += SCORE_ICASE;
8403 else
8404 {
8405 /* For a similar character use SCORE_SIMILAR. */
8406 if (slang != NULL
8407 && slang->sl_has_map
8408 && similar_chars(slang, gc, bc))
8409 score += SCORE_SIMILAR;
8410 else
8411 score += SCORE_SUBST;
8412 }
8413
8414 if (score < minscore)
8415 {
8416 /* Do the substitution. */
8417 ++gi;
8418 ++bi;
8419 continue;
8420 }
8421 }
8422pop:
8423 /*
8424 * Get here to try the next alternative, pop it from the stack.
8425 */
8426 if (stackidx == 0) /* stack is empty, finished */
8427 break;
8428
8429 /* pop an item from the stack */
8430 --stackidx;
8431 gi = stack[stackidx].goodi;
8432 bi = stack[stackidx].badi;
8433 score = stack[stackidx].score;
8434 }
8435
8436 /* When the score goes over "limit" it may actually be much higher.
8437 * Return a very large number to avoid going below the limit when giving a
8438 * bonus. */
8439 if (minscore > limit)
8440 return SCORE_MAXMAX;
8441 return minscore;
8442}
8443#endif
8444
Bram Moolenaar362e1a32006-03-06 23:29:24 +00008445/*
8446 * ":spellinfo"
8447 */
Bram Moolenaar362e1a32006-03-06 23:29:24 +00008448 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01008449ex_spellinfo(exarg_T *eap UNUSED)
Bram Moolenaar362e1a32006-03-06 23:29:24 +00008450{
8451 int lpi;
8452 langp_T *lp;
8453 char_u *p;
8454
8455 if (no_spell_checking(curwin))
8456 return;
8457
8458 msg_start();
Bram Moolenaar860cae12010-06-05 23:22:07 +02008459 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len && !got_int; ++lpi)
Bram Moolenaar362e1a32006-03-06 23:29:24 +00008460 {
Bram Moolenaar860cae12010-06-05 23:22:07 +02008461 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
Bram Moolenaar362e1a32006-03-06 23:29:24 +00008462 msg_puts((char_u *)"file: ");
8463 msg_puts(lp->lp_slang->sl_fname);
8464 msg_putchar('\n');
8465 p = lp->lp_slang->sl_info;
8466 if (p != NULL)
8467 {
8468 msg_puts(p);
8469 msg_putchar('\n');
8470 }
8471 }
8472 msg_end();
8473}
8474
Bram Moolenaar4770d092006-01-12 23:22:24 +00008475#define DUMPFLAG_KEEPCASE 1 /* round 2: keep-case tree */
8476#define DUMPFLAG_COUNT 2 /* include word count */
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008477#define DUMPFLAG_ICASE 4 /* ignore case when finding matches */
Bram Moolenaard0131a82006-03-04 21:46:13 +00008478#define DUMPFLAG_ONECAP 8 /* pattern starts with capital */
8479#define DUMPFLAG_ALLCAP 16 /* pattern is all capitals */
Bram Moolenaar4770d092006-01-12 23:22:24 +00008480
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008481/*
8482 * ":spelldump"
8483 */
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008484 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01008485ex_spelldump(exarg_T *eap)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008486{
Bram Moolenaar7a18fdc2013-09-29 13:38:29 +02008487 char_u *spl;
8488 long dummy;
8489
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008490 if (no_spell_checking(curwin))
8491 return;
Bram Moolenaar7a18fdc2013-09-29 13:38:29 +02008492 get_option_value((char_u*)"spl", &dummy, &spl, OPT_LOCAL);
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008493
Bram Moolenaar7a18fdc2013-09-29 13:38:29 +02008494 /* Create a new empty buffer in a new window. */
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008495 do_cmdline_cmd((char_u *)"new");
Bram Moolenaar7a18fdc2013-09-29 13:38:29 +02008496
8497 /* enable spelling locally in the new window */
8498 set_option_value((char_u*)"spell", TRUE, (char_u*)"", OPT_LOCAL);
Bram Moolenaar887c1fe2016-01-02 17:56:35 +01008499 set_option_value((char_u*)"spl", dummy, spl, OPT_LOCAL);
Bram Moolenaar7a18fdc2013-09-29 13:38:29 +02008500 vim_free(spl);
8501
Bram Moolenaarb5aedf32017-03-12 18:23:53 +01008502 if (!BUFEMPTY())
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008503 return;
8504
Bram Moolenaar860cae12010-06-05 23:22:07 +02008505 spell_dump_compl(NULL, 0, NULL, eap->forceit ? DUMPFLAG_COUNT : 0);
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008506
8507 /* Delete the empty line that we started with. */
8508 if (curbuf->b_ml.ml_line_count > 1)
8509 ml_delete(curbuf->b_ml.ml_line_count, FALSE);
8510
8511 redraw_later(NOT_VALID);
8512}
8513
8514/*
8515 * Go through all possible words and:
8516 * 1. When "pat" is NULL: dump a list of all words in the current buffer.
8517 * "ic" and "dir" are not used.
8518 * 2. When "pat" is not NULL: add matching words to insert mode completion.
8519 */
8520 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01008521spell_dump_compl(
8522 char_u *pat, /* leading part of the word */
8523 int ic, /* ignore case */
8524 int *dir, /* direction for adding matches */
8525 int dumpflags_arg) /* DUMPFLAG_* */
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008526{
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008527 langp_T *lp;
8528 slang_T *slang;
8529 idx_T arridx[MAXWLEN];
8530 int curi[MAXWLEN];
8531 char_u word[MAXWLEN];
8532 int c;
8533 char_u *byts;
8534 idx_T *idxs;
8535 linenr_T lnum = 0;
8536 int round;
8537 int depth;
8538 int n;
8539 int flags;
Bram Moolenaar7887d882005-07-01 22:33:52 +00008540 char_u *region_names = NULL; /* region names being used */
8541 int do_region = TRUE; /* dump region names and numbers */
8542 char_u *p;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00008543 int lpi;
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008544 int dumpflags = dumpflags_arg;
8545 int patlen;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008546
Bram Moolenaard0131a82006-03-04 21:46:13 +00008547 /* When ignoring case or when the pattern starts with capital pass this on
8548 * to dump_word(). */
8549 if (pat != NULL)
8550 {
8551 if (ic)
8552 dumpflags |= DUMPFLAG_ICASE;
8553 else
8554 {
8555 n = captype(pat, NULL);
8556 if (n == WF_ONECAP)
8557 dumpflags |= DUMPFLAG_ONECAP;
8558 else if (n == WF_ALLCAP
8559#ifdef FEAT_MBYTE
Bram Moolenaar362e1a32006-03-06 23:29:24 +00008560 && (int)STRLEN(pat) > mb_ptr2len(pat)
Bram Moolenaard0131a82006-03-04 21:46:13 +00008561#else
Bram Moolenaar362e1a32006-03-06 23:29:24 +00008562 && (int)STRLEN(pat) > 1
Bram Moolenaard0131a82006-03-04 21:46:13 +00008563#endif
8564 )
8565 dumpflags |= DUMPFLAG_ALLCAP;
8566 }
8567 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008568
Bram Moolenaar7887d882005-07-01 22:33:52 +00008569 /* Find out if we can support regions: All languages must support the same
8570 * regions or none at all. */
Bram Moolenaar860cae12010-06-05 23:22:07 +02008571 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
Bram Moolenaar7887d882005-07-01 22:33:52 +00008572 {
Bram Moolenaar860cae12010-06-05 23:22:07 +02008573 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
Bram Moolenaar7887d882005-07-01 22:33:52 +00008574 p = lp->lp_slang->sl_regions;
8575 if (p[0] != 0)
8576 {
8577 if (region_names == NULL) /* first language with regions */
8578 region_names = p;
8579 else if (STRCMP(region_names, p) != 0)
8580 {
8581 do_region = FALSE; /* region names are different */
8582 break;
8583 }
8584 }
8585 }
8586
8587 if (do_region && region_names != NULL)
8588 {
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008589 if (pat == NULL)
8590 {
8591 vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names);
8592 ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
8593 }
Bram Moolenaar7887d882005-07-01 22:33:52 +00008594 }
8595 else
8596 do_region = FALSE;
8597
8598 /*
8599 * Loop over all files loaded for the entries in 'spelllang'.
8600 */
Bram Moolenaar860cae12010-06-05 23:22:07 +02008601 for (lpi = 0; lpi < curwin->w_s->b_langp.ga_len; ++lpi)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008602 {
Bram Moolenaar860cae12010-06-05 23:22:07 +02008603 lp = LANGP_ENTRY(curwin->w_s->b_langp, lpi);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008604 slang = lp->lp_slang;
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00008605 if (slang->sl_fbyts == NULL) /* reloading failed */
8606 continue;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008607
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008608 if (pat == NULL)
8609 {
8610 vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
8611 ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
8612 }
8613
8614 /* When matching with a pattern and there are no prefixes only use
8615 * parts of the tree that match "pat". */
8616 if (pat != NULL && slang->sl_pbyts == NULL)
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00008617 patlen = (int)STRLEN(pat);
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008618 else
Bram Moolenaareb3593b2006-04-22 22:33:57 +00008619 patlen = -1;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008620
8621 /* round 1: case-folded tree
8622 * round 2: keep-case tree */
8623 for (round = 1; round <= 2; ++round)
8624 {
8625 if (round == 1)
8626 {
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008627 dumpflags &= ~DUMPFLAG_KEEPCASE;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008628 byts = slang->sl_fbyts;
8629 idxs = slang->sl_fidxs;
8630 }
8631 else
8632 {
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008633 dumpflags |= DUMPFLAG_KEEPCASE;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008634 byts = slang->sl_kbyts;
8635 idxs = slang->sl_kidxs;
8636 }
8637 if (byts == NULL)
8638 continue; /* array is empty */
8639
8640 depth = 0;
8641 arridx[0] = 0;
8642 curi[0] = 1;
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008643 while (depth >= 0 && !got_int
8644 && (pat == NULL || !compl_interrupted))
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008645 {
8646 if (curi[depth] > byts[arridx[depth]])
8647 {
8648 /* Done all bytes at this node, go up one level. */
8649 --depth;
8650 line_breakcheck();
Bram Moolenaar472e8592016-10-15 17:06:47 +02008651 ins_compl_check_keys(50, FALSE);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008652 }
8653 else
8654 {
8655 /* Do one more byte at this node. */
8656 n = arridx[depth] + curi[depth];
8657 ++curi[depth];
8658 c = byts[n];
8659 if (c == 0)
8660 {
8661 /* End of word, deal with the word.
8662 * Don't use keep-case words in the fold-case tree,
8663 * they will appear in the keep-case tree.
8664 * Only use the word when the region matches. */
8665 flags = (int)idxs[n];
8666 if ((round == 2 || (flags & WF_KEEPCAP) == 0)
Bram Moolenaarac6e65f2005-08-29 22:25:38 +00008667 && (flags & WF_NEEDCOMP) == 0
Bram Moolenaar7887d882005-07-01 22:33:52 +00008668 && (do_region
8669 || (flags & WF_REGION) == 0
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00008670 || (((unsigned)flags >> 16)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008671 & lp->lp_region) != 0))
8672 {
8673 word[depth] = NUL;
Bram Moolenaar7887d882005-07-01 22:33:52 +00008674 if (!do_region)
8675 flags &= ~WF_REGION;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00008676
8677 /* Dump the basic word if there is no prefix or
8678 * when it's the first one. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00008679 c = (unsigned)flags >> 24;
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00008680 if (c == 0 || curi[depth] == 2)
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008681 {
8682 dump_word(slang, word, pat, dir,
8683 dumpflags, flags, lnum);
8684 if (pat == NULL)
8685 ++lnum;
8686 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008687
8688 /* Apply the prefix, if there is one. */
Bram Moolenaar0a5fe212005-06-24 23:01:23 +00008689 if (c != 0)
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008690 lnum = dump_prefixes(slang, word, pat, dir,
8691 dumpflags, flags, lnum);
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008692 }
8693 }
8694 else
8695 {
8696 /* Normal char, go one level deeper. */
8697 word[depth++] = c;
8698 arridx[depth] = idxs[n];
8699 curi[depth] = 1;
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008700
8701 /* Check if this characters matches with the pattern.
8702 * If not skip the whole tree below it.
Bram Moolenaard0131a82006-03-04 21:46:13 +00008703 * Always ignore case here, dump_word() will check
8704 * proper case later. This isn't exactly right when
8705 * length changes for multi-byte characters with
8706 * ignore case... */
8707 if (depth <= patlen
8708 && MB_STRNICMP(word, pat, depth) != 0)
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008709 --depth;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008710 }
8711 }
8712 }
8713 }
8714 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008715}
8716
8717/*
8718 * Dump one word: apply case modifications and append a line to the buffer.
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008719 * When "lnum" is zero add insert mode completion.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008720 */
8721 static void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01008722dump_word(
8723 slang_T *slang,
8724 char_u *word,
8725 char_u *pat,
8726 int *dir,
8727 int dumpflags,
8728 int wordflags,
8729 linenr_T lnum)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008730{
8731 int keepcap = FALSE;
8732 char_u *p;
Bram Moolenaar4770d092006-01-12 23:22:24 +00008733 char_u *tw;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008734 char_u cword[MAXWLEN];
Bram Moolenaar7887d882005-07-01 22:33:52 +00008735 char_u badword[MAXWLEN + 10];
8736 int i;
Bram Moolenaard0131a82006-03-04 21:46:13 +00008737 int flags = wordflags;
8738
8739 if (dumpflags & DUMPFLAG_ONECAP)
8740 flags |= WF_ONECAP;
8741 if (dumpflags & DUMPFLAG_ALLCAP)
8742 flags |= WF_ALLCAP;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008743
Bram Moolenaar4770d092006-01-12 23:22:24 +00008744 if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008745 {
8746 /* Need to fix case according to "flags". */
8747 make_case_word(word, cword, flags);
8748 p = cword;
8749 }
8750 else
8751 {
8752 p = word;
Bram Moolenaar4770d092006-01-12 23:22:24 +00008753 if ((dumpflags & DUMPFLAG_KEEPCASE)
8754 && ((captype(word, NULL) & WF_KEEPCAP) == 0
Bram Moolenaar0dc065e2005-07-04 22:49:24 +00008755 || (flags & WF_FIXCAP) != 0))
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008756 keepcap = TRUE;
8757 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00008758 tw = p;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008759
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008760 if (pat == NULL)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008761 {
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008762 /* Add flags and regions after a slash. */
8763 if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap)
Bram Moolenaar4770d092006-01-12 23:22:24 +00008764 {
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008765 STRCPY(badword, p);
8766 STRCAT(badword, "/");
8767 if (keepcap)
8768 STRCAT(badword, "=");
8769 if (flags & WF_BANNED)
8770 STRCAT(badword, "!");
8771 else if (flags & WF_RARE)
8772 STRCAT(badword, "?");
8773 if (flags & WF_REGION)
8774 for (i = 0; i < 7; ++i)
8775 if (flags & (0x10000 << i))
8776 sprintf((char *)badword + STRLEN(badword), "%d", i + 1);
8777 p = badword;
Bram Moolenaar4770d092006-01-12 23:22:24 +00008778 }
Bram Moolenaar4770d092006-01-12 23:22:24 +00008779
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008780 if (dumpflags & DUMPFLAG_COUNT)
8781 {
8782 hashitem_T *hi;
8783
8784 /* Include the word count for ":spelldump!". */
8785 hi = hash_find(&slang->sl_wordcount, tw);
8786 if (!HASHITEM_EMPTY(hi))
8787 {
8788 vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d",
8789 tw, HI2WC(hi)->wc_count);
8790 p = IObuff;
8791 }
8792 }
8793
8794 ml_append(lnum, p, (colnr_T)0, FALSE);
8795 }
Bram Moolenaard0131a82006-03-04 21:46:13 +00008796 else if (((dumpflags & DUMPFLAG_ICASE)
8797 ? MB_STRNICMP(p, pat, STRLEN(pat)) == 0
8798 : STRNCMP(p, pat, STRLEN(pat)) == 0)
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008799 && ins_compl_add_infercase(p, (int)STRLEN(p),
Bram Moolenaare8c3a142006-08-29 14:30:35 +00008800 p_ic, NULL, *dir, 0) == OK)
Bram Moolenaard0131a82006-03-04 21:46:13 +00008801 /* if dir was BACKWARD then honor it just once */
8802 *dir = FORWARD;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008803}
8804
8805/*
Bram Moolenaara1ba8112005-06-28 23:23:32 +00008806 * For ":spelldump": Find matching prefixes for "word". Prepend each to
8807 * "word" and append a line to the buffer.
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008808 * When "lnum" is zero add insert mode completion.
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008809 * Return the updated line number.
8810 */
8811 static linenr_T
Bram Moolenaar764b23c2016-01-30 21:10:09 +01008812dump_prefixes(
8813 slang_T *slang,
8814 char_u *word, /* case-folded word */
8815 char_u *pat,
8816 int *dir,
8817 int dumpflags,
8818 int flags, /* flags with prefix ID */
8819 linenr_T startlnum)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008820{
8821 idx_T arridx[MAXWLEN];
8822 int curi[MAXWLEN];
8823 char_u prefix[MAXWLEN];
Bram Moolenaar53805d12005-08-01 07:08:33 +00008824 char_u word_up[MAXWLEN];
8825 int has_word_up = FALSE;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008826 int c;
8827 char_u *byts;
8828 idx_T *idxs;
8829 linenr_T lnum = startlnum;
8830 int depth;
8831 int n;
8832 int len;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008833 int i;
8834
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00008835 /* If the word starts with a lower-case letter make the word with an
Bram Moolenaar53805d12005-08-01 07:08:33 +00008836 * upper-case letter in word_up[]. */
8837 c = PTR2CHAR(word);
8838 if (SPELL_TOUPPER(c) != c)
8839 {
8840 onecap_copy(word, word_up, TRUE);
8841 has_word_up = TRUE;
8842 }
8843
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008844 byts = slang->sl_pbyts;
8845 idxs = slang->sl_pidxs;
8846 if (byts != NULL) /* array not is empty */
8847 {
8848 /*
8849 * Loop over all prefixes, building them byte-by-byte in prefix[].
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00008850 * When at the end of a prefix check that it supports "flags".
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008851 */
8852 depth = 0;
8853 arridx[0] = 0;
8854 curi[0] = 1;
8855 while (depth >= 0 && !got_int)
8856 {
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00008857 n = arridx[depth];
8858 len = byts[n];
8859 if (curi[depth] > len)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008860 {
8861 /* Done all bytes at this node, go up one level. */
8862 --depth;
8863 line_breakcheck();
8864 }
8865 else
8866 {
8867 /* Do one more byte at this node. */
Bram Moolenaardfb9ac02005-07-05 21:36:03 +00008868 n += curi[depth];
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008869 ++curi[depth];
8870 c = byts[n];
8871 if (c == 0)
8872 {
8873 /* End of prefix, find out how many IDs there are. */
8874 for (i = 1; i < len; ++i)
8875 if (byts[n + i] != 0)
8876 break;
8877 curi[depth] += i - 1;
8878
Bram Moolenaar53805d12005-08-01 07:08:33 +00008879 c = valid_word_prefix(i, n, flags, word, slang, FALSE);
8880 if (c != 0)
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008881 {
Bram Moolenaar9c96f592005-06-30 21:52:39 +00008882 vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1);
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008883 dump_word(slang, prefix, pat, dir, dumpflags,
Bram Moolenaar53805d12005-08-01 07:08:33 +00008884 (c & WF_RAREPFX) ? (flags | WF_RARE)
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008885 : flags, lnum);
8886 if (lnum != 0)
8887 ++lnum;
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008888 }
Bram Moolenaar53805d12005-08-01 07:08:33 +00008889
8890 /* Check for prefix that matches the word when the
8891 * first letter is upper-case, but only if the prefix has
8892 * a condition. */
8893 if (has_word_up)
8894 {
8895 c = valid_word_prefix(i, n, flags, word_up, slang,
8896 TRUE);
8897 if (c != 0)
8898 {
8899 vim_strncpy(prefix + depth, word_up,
8900 MAXWLEN - depth - 1);
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008901 dump_word(slang, prefix, pat, dir, dumpflags,
Bram Moolenaar53805d12005-08-01 07:08:33 +00008902 (c & WF_RAREPFX) ? (flags | WF_RARE)
Bram Moolenaarb475fb92006-03-02 22:40:52 +00008903 : flags, lnum);
8904 if (lnum != 0)
8905 ++lnum;
Bram Moolenaar53805d12005-08-01 07:08:33 +00008906 }
8907 }
Bram Moolenaarf417f2b2005-06-23 22:29:21 +00008908 }
8909 else
8910 {
8911 /* Normal char, go one level deeper. */
8912 prefix[depth++] = c;
8913 arridx[depth] = idxs[n];
8914 curi[depth] = 1;
8915 }
8916 }
8917 }
8918 }
8919
8920 return lnum;
8921}
8922
Bram Moolenaar95529562005-08-25 21:21:38 +00008923/*
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00008924 * Move "p" to the end of word "start".
8925 * Uses the spell-checking word characters.
Bram Moolenaar95529562005-08-25 21:21:38 +00008926 */
8927 char_u *
Bram Moolenaar764b23c2016-01-30 21:10:09 +01008928spell_to_word_end(char_u *start, win_T *win)
Bram Moolenaar95529562005-08-25 21:21:38 +00008929{
8930 char_u *p = start;
8931
Bram Moolenaar860cae12010-06-05 23:22:07 +02008932 while (*p != NUL && spell_iswordp(p, win))
Bram Moolenaar91acfff2017-03-12 19:22:36 +01008933 MB_PTR_ADV(p);
Bram Moolenaar95529562005-08-25 21:21:38 +00008934 return p;
8935}
8936
Bram Moolenaar8b59de92005-08-11 19:59:29 +00008937#if defined(FEAT_INS_EXPAND) || defined(PROTO)
Bram Moolenaar8b59de92005-08-11 19:59:29 +00008938/*
Bram Moolenaara40ceaf2006-01-13 22:35:40 +00008939 * For Insert mode completion CTRL-X s:
8940 * Find start of the word in front of column "startcol".
8941 * We don't check if it is badly spelled, with completion we can only change
8942 * the word in front of the cursor.
Bram Moolenaar8b59de92005-08-11 19:59:29 +00008943 * Returns the column number of the word.
8944 */
8945 int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01008946spell_word_start(int startcol)
Bram Moolenaar8b59de92005-08-11 19:59:29 +00008947{
8948 char_u *line;
8949 char_u *p;
8950 int col = 0;
8951
Bram Moolenaar95529562005-08-25 21:21:38 +00008952 if (no_spell_checking(curwin))
Bram Moolenaar8b59de92005-08-11 19:59:29 +00008953 return startcol;
8954
8955 /* Find a word character before "startcol". */
8956 line = ml_get_curline();
8957 for (p = line + startcol; p > line; )
8958 {
Bram Moolenaar91acfff2017-03-12 19:22:36 +01008959 MB_PTR_BACK(line, p);
Bram Moolenaarcc63c642013-11-12 04:44:01 +01008960 if (spell_iswordp_nmw(p, curwin))
Bram Moolenaar8b59de92005-08-11 19:59:29 +00008961 break;
8962 }
8963
8964 /* Go back to start of the word. */
8965 while (p > line)
8966 {
Bram Moolenaara93fa7e2006-04-17 22:14:47 +00008967 col = (int)(p - line);
Bram Moolenaar91acfff2017-03-12 19:22:36 +01008968 MB_PTR_BACK(line, p);
Bram Moolenaar860cae12010-06-05 23:22:07 +02008969 if (!spell_iswordp(p, curwin))
Bram Moolenaar8b59de92005-08-11 19:59:29 +00008970 break;
8971 col = 0;
8972 }
8973
Bram Moolenaar8b59de92005-08-11 19:59:29 +00008974 return col;
8975}
8976
8977/*
Bram Moolenaar4effc802005-09-30 21:12:02 +00008978 * Need to check for 'spellcapcheck' now, the word is removed before
8979 * expand_spelling() is called. Therefore the ugly global variable.
8980 */
8981static int spell_expand_need_cap;
8982
8983 void
Bram Moolenaar764b23c2016-01-30 21:10:09 +01008984spell_expand_check_cap(colnr_T col)
Bram Moolenaar4effc802005-09-30 21:12:02 +00008985{
8986 spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col);
8987}
8988
8989/*
Bram Moolenaar8b59de92005-08-11 19:59:29 +00008990 * Get list of spelling suggestions.
8991 * Used for Insert mode completion CTRL-X ?.
8992 * Returns the number of matches. The matches are in "matchp[]", array of
8993 * allocated strings.
8994 */
Bram Moolenaar8b59de92005-08-11 19:59:29 +00008995 int
Bram Moolenaar764b23c2016-01-30 21:10:09 +01008996expand_spelling(
8997 linenr_T lnum UNUSED,
8998 char_u *pat,
8999 char_u ***matchp)
Bram Moolenaar8b59de92005-08-11 19:59:29 +00009000{
9001 garray_T ga;
9002
Bram Moolenaar4770d092006-01-12 23:22:24 +00009003 spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE);
Bram Moolenaar8b59de92005-08-11 19:59:29 +00009004 *matchp = ga.ga_data;
9005 return ga.ga_len;
9006}
9007#endif
9008
Bram Moolenaarf71a3db2006-03-12 21:50:18 +00009009#endif /* FEAT_SPELL */