Bram Moolenaar | e19defe | 2005-03-21 08:23:33 +0000 | [diff] [blame] | 1 | /* vi:set ts=8 sts=4 sw=4: |
2 | * | ||||
3 | * VIM - Vi IMproved by Bram Moolenaar | ||||
4 | * | ||||
5 | * Do ":help uganda" in Vim to read copying and usage conditions. | ||||
6 | * Do ":help credits" in Vim to see a list of people who contributed. | ||||
7 | * See README.txt for an overview of the Vim source code. | ||||
8 | */ | ||||
9 | |||||
10 | /* | ||||
11 | * spell.c: code for spell checking | ||||
Bram Moolenaar | fc73515 | 2005-03-22 22:54:12 +0000 | [diff] [blame] | 12 | * |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 13 | * The spell checking mechanism uses a tree (aka trie). Each node in the tree |
14 | * has a list of bytes that can appear (siblings). For each byte there is a | ||||
15 | * pointer to the node with the byte that follows in the word (child). | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 16 | * |
17 | * A NUL byte is used where the word may end. The bytes are sorted, so that | ||||
18 | * binary searching can be used and the NUL bytes are at the start. The | ||||
19 | * number of possible bytes is stored before the list of bytes. | ||||
20 | * | ||||
21 | * The tree uses two arrays: "byts" stores the characters, "idxs" stores | ||||
22 | * either the next index or flags. The tree starts at index 0. For example, | ||||
23 | * to lookup "vi" this sequence is followed: | ||||
24 | * i = 0 | ||||
25 | * len = byts[i] | ||||
26 | * n = where "v" appears in byts[i + 1] to byts[i + len] | ||||
27 | * i = idxs[n] | ||||
28 | * len = byts[i] | ||||
29 | * n = where "i" appears in byts[i + 1] to byts[i + len] | ||||
30 | * i = idxs[n] | ||||
31 | * len = byts[i] | ||||
32 | * find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi". | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 33 | * |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 34 | * There are two word trees: one with case-folded words and one with words in |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 35 | * original case. The second one is only used for keep-case words and is |
36 | * usually small. | ||||
37 | * | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 38 | * There is one additional tree for when not all prefixes are applied when |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 39 | * generating the .spl file. This tree stores all the possible prefixes, as |
40 | * if they were words. At each word (prefix) end the prefix nr is stored, the | ||||
41 | * following word must support this prefix nr. And the condition nr is | ||||
42 | * stored, used to lookup the condition that the word must match with. | ||||
43 | * | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 44 | * Thanks to Olaf Seibert for providing an example implementation of this tree |
45 | * and the compression mechanism. | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 46 | * LZ trie ideas: |
47 | * http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf | ||||
48 | * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html | ||||
Bram Moolenaar | 63d5a1e | 2005-04-19 21:30:25 +0000 | [diff] [blame] | 49 | * |
50 | * Matching involves checking the caps type: Onecap ALLCAP KeepCap. | ||||
Bram Moolenaar | 63d5a1e | 2005-04-19 21:30:25 +0000 | [diff] [blame] | 51 | * |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 52 | * Why doesn't Vim use aspell/ispell/myspell/etc.? |
53 | * See ":help develop-spell". | ||||
54 | */ | ||||
55 | |||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 56 | /* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word. |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 57 | * Only use it for small word lists! */ |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 58 | #if 0 |
59 | # define SPELL_PRINTTREE | ||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 60 | #endif |
61 | |||||
Bram Moolenaar | 2d3f489 | 2006-01-20 23:02:51 +0000 | [diff] [blame] | 62 | /* Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk() for a |
63 | * specific word. */ | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 64 | #if 0 |
65 | # define DEBUG_TRIEWALK | ||||
66 | #endif | ||||
67 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 68 | /* |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 69 | * Use this to adjust the score after finding suggestions, based on the |
70 | * suggested word sounding like the bad word. This is much faster than doing | ||||
71 | * it for every possible suggestion. | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 72 | * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@" |
73 | * vs "ht") and goes down in the list. | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 74 | * Used when 'spellsuggest' is set to "best". |
75 | */ | ||||
76 | #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4) | ||||
77 | |||||
78 | /* | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 79 | * Do the opposite: based on a maximum end score and a known sound score, |
80 | * compute the the maximum word score that can be used. | ||||
81 | */ | ||||
82 | #define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3) | ||||
83 | |||||
84 | /* | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 85 | * Vim spell file format: <HEADER> |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 86 | * <SECTIONS> |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 87 | * <LWORDTREE> |
88 | * <KWORDTREE> | ||||
89 | * <PREFIXTREE> | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 90 | * |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 91 | * <HEADER>: <fileID> <versionnr> |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 92 | * |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 93 | * <fileID> 8 bytes "VIMspell" |
94 | * <versionnr> 1 byte VIMSPELLVERSION | ||||
95 | * | ||||
96 | * | ||||
97 | * Sections make it possible to add information to the .spl file without | ||||
98 | * making it incompatible with previous versions. There are two kinds of | ||||
99 | * sections: | ||||
100 | * 1. Not essential for correct spell checking. E.g. for making suggestions. | ||||
101 | * These are skipped when not supported. | ||||
102 | * 2. Optional information, but essential for spell checking when present. | ||||
103 | * E.g. conditions for affixes. When this section is present but not | ||||
104 | * supported an error message is given. | ||||
105 | * | ||||
106 | * <SECTIONS>: <section> ... <sectionend> | ||||
107 | * | ||||
108 | * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) | ||||
109 | * | ||||
110 | * <sectionID> 1 byte number from 0 to 254 identifying the section | ||||
111 | * | ||||
112 | * <sectionflags> 1 byte SNF_REQUIRED: this section is required for correct | ||||
113 | * spell checking | ||||
114 | * | ||||
115 | * <sectionlen> 4 bytes length of section contents, MSB first | ||||
116 | * | ||||
117 | * <sectionend> 1 byte SN_END | ||||
118 | * | ||||
119 | * | ||||
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 120 | * sectionID == SN_INFO: <infotext> |
121 | * <infotext> N bytes free format text with spell file info (version, | ||||
122 | * website, etc) | ||||
123 | * | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 124 | * sectionID == SN_REGION: <regionname> ... |
125 | * <regionname> 2 bytes Up to 8 region names: ca, au, etc. Lower case. | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 126 | * First <regionname> is region 1. |
127 | * | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 128 | * sectionID == SN_CHARFLAGS: <charflagslen> <charflags> |
129 | * <folcharslen> <folchars> | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 130 | * <charflagslen> 1 byte Number of bytes in <charflags> (should be 128). |
131 | * <charflags> N bytes List of flags (first one is for character 128): | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 132 | * 0x01 word character CF_WORD |
133 | * 0x02 upper-case character CF_UPPER | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 134 | * <folcharslen> 2 bytes Number of bytes in <folchars>. |
135 | * <folchars> N bytes Folded characters, first one is for character 128. | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 136 | * |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 137 | * sectionID == SN_MIDWORD: <midword> |
138 | * <midword> N bytes Characters that are word characters only when used | ||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 139 | * in the middle of a word. |
140 | * | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 141 | * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ... |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 142 | * <prefcondcnt> 2 bytes Number of <prefcond> items following. |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 143 | * <prefcond> : <condlen> <condstr> |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 144 | * <condlen> 1 byte Length of <condstr>. |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 145 | * <condstr> N bytes Condition for the prefix. |
146 | * | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 147 | * sectionID == SN_REP: <repcount> <rep> ... |
148 | * <repcount> 2 bytes number of <rep> items, MSB first. | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 149 | * <rep> : <repfromlen> <repfrom> <reptolen> <repto> |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 150 | * <repfromlen> 1 byte length of <repfrom> |
151 | * <repfrom> N bytes "from" part of replacement | ||||
152 | * <reptolen> 1 byte length of <repto> | ||||
153 | * <repto> N bytes "to" part of replacement | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 154 | * |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 155 | * sectionID == SN_REPSAL: <repcount> <rep> ... |
156 | * just like SN_REP but for soundfolded words | ||||
157 | * | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 158 | * sectionID == SN_SAL: <salflags> <salcount> <sal> ... |
159 | * <salflags> 1 byte flags for soundsalike conversion: | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 160 | * SAL_F0LLOWUP |
161 | * SAL_COLLAPSE | ||||
162 | * SAL_REM_ACCENTS | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 163 | * <salcount> 2 bytes number of <sal> items following |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 164 | * <sal> : <salfromlen> <salfrom> <saltolen> <salto> |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 165 | * <salfromlen> 1 byte length of <salfrom> |
166 | * <salfrom> N bytes "from" part of soundsalike | ||||
167 | * <saltolen> 1 byte length of <salto> | ||||
168 | * <salto> N bytes "to" part of soundsalike | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 169 | * |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 170 | * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> |
171 | * <sofofromlen> 2 bytes length of <sofofrom> | ||||
172 | * <sofofrom> N bytes "from" part of soundfold | ||||
173 | * <sofotolen> 2 bytes length of <sofoto> | ||||
174 | * <sofoto> N bytes "to" part of soundfold | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 175 | * |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 176 | * sectionID == SN_SUGFILE: <timestamp> |
177 | * <timestamp> 8 bytes time in seconds that must match with .sug file | ||||
178 | * | ||||
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 179 | * sectionID == SN_NOSPLITSUGS: nothing |
180 | * | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 181 | * sectionID == SN_WORDS: <word> ... |
182 | * <word> N bytes NUL terminated common word | ||||
183 | * | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 184 | * sectionID == SN_MAP: <mapstr> |
185 | * <mapstr> N bytes String with sequences of similar characters, | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 186 | * separated by slashes. |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 187 | * |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 188 | * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions> |
189 | * <comppatcount> <comppattern> ... <compflags> | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 190 | * <compmax> 1 byte Maximum nr of words in compound word. |
191 | * <compminlen> 1 byte Minimal word length for compounding. | ||||
192 | * <compsylmax> 1 byte Maximum nr of syllables in compound word. | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 193 | * <compoptions> 2 bytes COMP_ flags. |
194 | * <comppatcount> 2 bytes number of <comppattern> following | ||||
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 195 | * <compflags> N bytes Flags from COMPOUNDRULE items, separated by |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 196 | * slashes. |
197 | * | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 198 | * <comppattern>: <comppatlen> <comppattext> |
199 | * <comppatlen> 1 byte length of <comppattext> | ||||
200 | * <comppattext> N bytes end or begin chars from CHECKCOMPOUNDPATTERN | ||||
201 | * | ||||
202 | * sectionID == SN_NOBREAK: (empty, its presence is what matters) | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 203 | * |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 204 | * sectionID == SN_SYLLABLE: <syllable> |
205 | * <syllable> N bytes String from SYLLABLE item. | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 206 | * |
207 | * <LWORDTREE>: <wordtree> | ||||
208 | * | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 209 | * <KWORDTREE>: <wordtree> |
210 | * | ||||
211 | * <PREFIXTREE>: <wordtree> | ||||
212 | * | ||||
213 | * | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 214 | * <wordtree>: <nodecount> <nodedata> ... |
215 | * | ||||
216 | * <nodecount> 4 bytes Number of nodes following. MSB first. | ||||
217 | * | ||||
218 | * <nodedata>: <siblingcount> <sibling> ... | ||||
219 | * | ||||
220 | * <siblingcount> 1 byte Number of siblings in this node. The siblings | ||||
221 | * follow in sorted order. | ||||
222 | * | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 223 | * <sibling>: <byte> [ <nodeidx> <xbyte> |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 224 | * | <flags> [<flags2>] [<region>] [<affixID>] |
225 | * | [<pflags>] <affixID> <prefcondnr> ] | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 226 | * |
227 | * <byte> 1 byte Byte value of the sibling. Special cases: | ||||
228 | * BY_NOFLAGS: End of word without flags and for all | ||||
229 | * regions. | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 230 | * For PREFIXTREE <affixID> and |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 231 | * <prefcondnr> follow. |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 232 | * BY_FLAGS: End of word, <flags> follow. |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 233 | * For PREFIXTREE <pflags>, <affixID> |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 234 | * and <prefcondnr> follow. |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 235 | * BY_FLAGS2: End of word, <flags> and <flags2> |
236 | * follow. Not used in PREFIXTREE. | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 237 | * BY_INDEX: Child of sibling is shared, <nodeidx> |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 238 | * and <xbyte> follow. |
239 | * | ||||
240 | * <nodeidx> 3 bytes Index of child for this sibling, MSB first. | ||||
241 | * | ||||
242 | * <xbyte> 1 byte byte value of the sibling. | ||||
243 | * | ||||
244 | * <flags> 1 byte bitmask of: | ||||
245 | * WF_ALLCAP word must have only capitals | ||||
246 | * WF_ONECAP first char of word must be capital | ||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 247 | * WF_KEEPCAP keep-case word |
248 | * WF_FIXCAP keep-case word, all caps not allowed | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 249 | * WF_RARE rare word |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 250 | * WF_BANNED bad word |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 251 | * WF_REGION <region> follows |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 252 | * WF_AFX <affixID> follows |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 253 | * |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 254 | * <flags2> 1 byte Bitmask of: |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 255 | * WF_HAS_AFF >> 8 word includes affix |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 256 | * WF_NEEDCOMP >> 8 word only valid in compound |
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 257 | * WF_NOSUGGEST >> 8 word not used for suggestions |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 258 | * WF_COMPROOT >> 8 word already a compound |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 259 | * |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 260 | * <pflags> 1 byte bitmask of: |
261 | * WFP_RARE rare prefix | ||||
262 | * WFP_NC non-combining prefix | ||||
263 | * WFP_UP letter after prefix made upper case | ||||
264 | * | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 265 | * <region> 1 byte Bitmask for regions in which word is valid. When |
266 | * omitted it's valid in all regions. | ||||
267 | * Lowest bit is for region 1. | ||||
268 | * | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 269 | * <affixID> 1 byte ID of affix that can be used with this word. In |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 270 | * PREFIXTREE used for the required prefix ID. |
271 | * | ||||
272 | * <prefcondnr> 2 bytes Prefix condition number, index in <prefcond> list | ||||
273 | * from HEADER. | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 274 | * |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 275 | * All text characters are in 'encoding', but stored as single bytes. |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 276 | */ |
277 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 278 | /* |
279 | * Vim .sug file format: <SUGHEADER> | ||||
280 | * <SUGWORDTREE> | ||||
281 | * <SUGTABLE> | ||||
282 | * | ||||
283 | * <SUGHEADER>: <fileID> <versionnr> <timestamp> | ||||
284 | * | ||||
285 | * <fileID> 6 bytes "VIMsug" | ||||
286 | * <versionnr> 1 byte VIMSUGVERSION | ||||
287 | * <timestamp> 8 bytes timestamp that must match with .spl file | ||||
288 | * | ||||
289 | * | ||||
290 | * <SUGWORDTREE>: <wordtree> (see above, no flags or region used) | ||||
291 | * | ||||
292 | * | ||||
293 | * <SUGTABLE>: <sugwcount> <sugline> ... | ||||
294 | * | ||||
295 | * <sugwcount> 4 bytes number of <sugline> following | ||||
296 | * | ||||
297 | * <sugline>: <sugnr> ... NUL | ||||
298 | * | ||||
299 | * <sugnr>: X bytes word number that results in this soundfolded word, | ||||
300 | * stored as an offset to the previous number in as | ||||
301 | * few bytes as possible, see offset2bytes()) | ||||
302 | */ | ||||
303 | |||||
Bram Moolenaar | e19defe | 2005-03-21 08:23:33 +0000 | [diff] [blame] | 304 | #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64) |
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 305 | # include "vimio.h" /* for lseek(), must be before vim.h */ |
Bram Moolenaar | e19defe | 2005-03-21 08:23:33 +0000 | [diff] [blame] | 306 | #endif |
307 | |||||
308 | #include "vim.h" | ||||
309 | |||||
Bram Moolenaar | f71a3db | 2006-03-12 21:50:18 +0000 | [diff] [blame] | 310 | #if defined(FEAT_SPELL) || defined(PROTO) |
Bram Moolenaar | e19defe | 2005-03-21 08:23:33 +0000 | [diff] [blame] | 311 | |
312 | #ifdef HAVE_FCNTL_H | ||||
313 | # include <fcntl.h> | ||||
314 | #endif | ||||
315 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 316 | #ifndef UNIX /* it's in os_unix.h for Unix */ |
317 | # include <time.h> /* for time_t */ | ||||
318 | #endif | ||||
319 | |||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 320 | #define MAXWLEN 250 /* Assume max. word len is this many bytes. |
321 | Some places assume a word length fits in a | ||||
322 | byte, thus it can't be above 255. */ | ||||
Bram Moolenaar | fc73515 | 2005-03-22 22:54:12 +0000 | [diff] [blame] | 323 | |
Bram Moolenaar | e52325c | 2005-08-22 22:54:29 +0000 | [diff] [blame] | 324 | /* Type used for indexes in the word tree need to be at least 4 bytes. If int |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 325 | * is 8 bytes we could use something smaller, but what? */ |
Bram Moolenaar | e52325c | 2005-08-22 22:54:29 +0000 | [diff] [blame] | 326 | #if SIZEOF_INT > 3 |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 327 | typedef int idx_T; |
328 | #else | ||||
329 | typedef long idx_T; | ||||
330 | #endif | ||||
331 | |||||
332 | /* Flags used for a word. Only the lowest byte can be used, the region byte | ||||
333 | * comes above it. */ | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 334 | #define WF_REGION 0x01 /* region byte follows */ |
335 | #define WF_ONECAP 0x02 /* word with one capital (or all capitals) */ | ||||
336 | #define WF_ALLCAP 0x04 /* word must be all capitals */ | ||||
337 | #define WF_RARE 0x08 /* rare word */ | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 338 | #define WF_BANNED 0x10 /* bad word */ |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 339 | #define WF_AFX 0x20 /* affix ID follows */ |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 340 | #define WF_FIXCAP 0x40 /* keep-case word, allcap not allowed */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 341 | #define WF_KEEPCAP 0x80 /* keep-case word */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 342 | |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 343 | /* for <flags2>, shifted up one byte to be used in wn_flags */ |
344 | #define WF_HAS_AFF 0x0100 /* word includes affix */ | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 345 | #define WF_NEEDCOMP 0x0200 /* word only valid in compound */ |
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 346 | #define WF_NOSUGGEST 0x0400 /* word not to be suggested */ |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 347 | #define WF_COMPROOT 0x0800 /* already compounded word, COMPOUNDROOT */ |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 348 | |
Bram Moolenaar | 2d3f489 | 2006-01-20 23:02:51 +0000 | [diff] [blame] | 349 | /* only used for su_badflags */ |
350 | #define WF_MIXCAP 0x20 /* mix of upper and lower case: macaRONI */ | ||||
351 | |||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 352 | #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 353 | |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 354 | /* flags for <pflags> */ |
355 | #define WFP_RARE 0x01 /* rare prefix */ | ||||
356 | #define WFP_NC 0x02 /* prefix is not combining */ | ||||
357 | #define WFP_UP 0x04 /* to-upper prefix */ | ||||
358 | |||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 359 | /* Flags for postponed prefixes. Must be above affixID (one byte) |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 360 | * and prefcondnr (two bytes). */ |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 361 | #define WF_RAREPFX (WFP_RARE << 24) /* in sl_pidxs: flag for rare |
362 | * postponed prefix */ | ||||
363 | #define WF_PFX_NC (WFP_NC << 24) /* in sl_pidxs: flag for non-combining | ||||
364 | * postponed prefix */ | ||||
365 | #define WF_PFX_UP (WFP_UP << 24) /* in sl_pidxs: flag for to-upper | ||||
366 | * postponed prefix */ | ||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 367 | |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 368 | /* flags for <compoptions> */ |
369 | #define COMP_CHECKDUP 1 /* CHECKCOMPOUNDDUP */ | ||||
370 | #define COMP_CHECKREP 2 /* CHECKCOMPOUNDREP */ | ||||
371 | #define COMP_CHECKCASE 4 /* CHECKCOMPOUNDCASE */ | ||||
372 | #define COMP_CHECKTRIPLE 8 /* CHECKCOMPOUNDTRIPLE */ | ||||
373 | |||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 374 | /* Special byte values for <byte>. Some are only used in the tree for |
375 | * postponed prefixes, some only in the other trees. This is a bit messy... */ | ||||
376 | #define BY_NOFLAGS 0 /* end of word without flags or region; for | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 377 | * postponed prefix: no <pflags> */ |
378 | #define BY_INDEX 1 /* child is shared, index follows */ | ||||
379 | #define BY_FLAGS 2 /* end of word, <flags> byte follows; for | ||||
380 | * postponed prefix: <pflags> follows */ | ||||
381 | #define BY_FLAGS2 3 /* end of word, <flags> and <flags2> bytes | ||||
382 | * follow; never used in prefix tree */ | ||||
383 | #define BY_SPECIAL BY_FLAGS2 /* highest special byte value */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 384 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 385 | /* Info from "REP", "REPSAL" and "SAL" entries in ".aff" file used in si_rep, |
386 | * si_repsal, sl_rep, and si_sal. Not for sl_sal! | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 387 | * One replacement: from "ft_from" to "ft_to". */ |
388 | typedef struct fromto_S | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 389 | { |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 390 | char_u *ft_from; |
391 | char_u *ft_to; | ||||
392 | } fromto_T; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 393 | |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 394 | /* Info from "SAL" entries in ".aff" file used in sl_sal. |
395 | * The info is split for quick processing by spell_soundfold(). | ||||
396 | * Note that "sm_oneof" and "sm_rules" point into sm_lead. */ | ||||
397 | typedef struct salitem_S | ||||
398 | { | ||||
399 | char_u *sm_lead; /* leading letters */ | ||||
400 | int sm_leadlen; /* length of "sm_lead" */ | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 401 | char_u *sm_oneof; /* letters from () or NULL */ |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 402 | char_u *sm_rules; /* rules like ^, $, priority */ |
403 | char_u *sm_to; /* replacement. */ | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 404 | #ifdef FEAT_MBYTE |
405 | int *sm_lead_w; /* wide character copy of "sm_lead" */ | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 406 | int *sm_oneof_w; /* wide character copy of "sm_oneof" */ |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 407 | int *sm_to_w; /* wide character copy of "sm_to" */ |
408 | #endif | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 409 | } salitem_T; |
410 | |||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 411 | #ifdef FEAT_MBYTE |
412 | typedef int salfirst_T; | ||||
413 | #else | ||||
414 | typedef short salfirst_T; | ||||
415 | #endif | ||||
416 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 417 | /* Values for SP_*ERROR are negative, positive values are used by |
418 | * read_cnt_string(). */ | ||||
419 | #define SP_TRUNCERROR -1 /* spell file truncated error */ | ||||
420 | #define SP_FORMERROR -2 /* format error in spell file */ | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 421 | #define SP_OTHERERROR -3 /* other error while reading spell file */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 422 | |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 423 | /* |
Bram Moolenaar | 63d5a1e | 2005-04-19 21:30:25 +0000 | [diff] [blame] | 424 | * Structure used to store words and other info for one language, loaded from |
425 | * a .spl file. | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 426 | * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the |
427 | * case-folded words. "sl_kbyts/sl_kidxs" is for keep-case words. | ||||
428 | * | ||||
429 | * The "byts" array stores the possible bytes in each tree node, preceded by | ||||
430 | * the number of possible bytes, sorted on byte value: | ||||
431 | * <len> <byte1> <byte2> ... | ||||
432 | * The "idxs" array stores the index of the child node corresponding to the | ||||
433 | * byte in "byts". | ||||
434 | * Exception: when the byte is zero, the word may end here and "idxs" holds | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 435 | * the flags, region mask and affixID for the word. There may be several |
436 | * zeros in sequence for alternative flag/region/affixID combinations. | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 437 | */ |
438 | typedef struct slang_S slang_T; | ||||
439 | struct slang_S | ||||
440 | { | ||||
441 | slang_T *sl_next; /* next language */ | ||||
442 | char_u *sl_name; /* language name "en", "en.rare", "nl", etc. */ | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 443 | char_u *sl_fname; /* name of .spl file */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 444 | int sl_add; /* TRUE if it's a .add file. */ |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 445 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 446 | char_u *sl_fbyts; /* case-folded word bytes */ |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 447 | idx_T *sl_fidxs; /* case-folded word indexes */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 448 | char_u *sl_kbyts; /* keep-case word bytes */ |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 449 | idx_T *sl_kidxs; /* keep-case word indexes */ |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 450 | char_u *sl_pbyts; /* prefix tree word bytes */ |
451 | idx_T *sl_pidxs; /* prefix tree word indexes */ | ||||
452 | |||||
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 453 | char_u *sl_info; /* infotext string or NULL */ |
454 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 455 | char_u sl_regions[17]; /* table with up to 8 region names plus NUL */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 456 | |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 457 | char_u *sl_midword; /* MIDWORD string or NULL */ |
458 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 459 | hashtab_T sl_wordcount; /* hashtable with word count, wordcount_T */ |
460 | |||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 461 | int sl_compmax; /* COMPOUNDWORDMAX (default: MAXWLEN) */ |
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 462 | int sl_compminlen; /* COMPOUNDMIN (default: 0) */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 463 | int sl_compsylmax; /* COMPOUNDSYLMAX (default: MAXWLEN) */ |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 464 | int sl_compoptions; /* COMP_* flags */ |
465 | garray_T sl_comppat; /* CHECKCOMPOUNDPATTERN items */ | ||||
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 466 | regprog_T *sl_compprog; /* COMPOUNDRULE turned into a regexp progrm |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 467 | * (NULL when no compounding) */ |
468 | char_u *sl_compstartflags; /* flags for first compound word */ | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 469 | char_u *sl_compallflags; /* all flags for compound words */ |
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 470 | char_u sl_nobreak; /* When TRUE: no spaces between words */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 471 | char_u *sl_syllable; /* SYLLABLE repeatable chars or NULL */ |
472 | garray_T sl_syl_items; /* syllable items */ | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 473 | |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 474 | int sl_prefixcnt; /* number of items in "sl_prefprog" */ |
475 | regprog_T **sl_prefprog; /* table with regprogs for prefixes */ | ||||
476 | |||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 477 | garray_T sl_rep; /* list of fromto_T entries from REP lines */ |
478 | short sl_rep_first[256]; /* indexes where byte first appears, -1 if | ||||
479 | there is none */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 480 | garray_T sl_sal; /* list of salitem_T entries from SAL lines */ |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 481 | salfirst_T sl_sal_first[256]; /* indexes where byte first appears, -1 if |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 482 | there is none */ |
483 | int sl_followup; /* SAL followup */ | ||||
484 | int sl_collapse; /* SAL collapse_result */ | ||||
485 | int sl_rem_accents; /* SAL remove_accents */ | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 486 | int sl_sofo; /* SOFOFROM and SOFOTO instead of SAL items: |
487 | * "sl_sal_first" maps chars, when has_mbyte | ||||
488 | * "sl_sal" is a list of wide char lists. */ | ||||
489 | garray_T sl_repsal; /* list of fromto_T entries from REPSAL lines */ | ||||
490 | short sl_repsal_first[256]; /* sl_rep_first for REPSAL lines */ | ||||
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 491 | int sl_nosplitsugs; /* don't suggest splitting a word */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 492 | |
493 | /* Info from the .sug file. Loaded on demand. */ | ||||
494 | time_t sl_sugtime; /* timestamp for .sug file */ | ||||
495 | char_u *sl_sbyts; /* soundfolded word bytes */ | ||||
496 | idx_T *sl_sidxs; /* soundfolded word indexes */ | ||||
497 | buf_T *sl_sugbuf; /* buffer with word number table */ | ||||
498 | int sl_sugloaded; /* TRUE when .sug file was loaded or failed to | ||||
499 | load */ | ||||
500 | |||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 501 | int sl_has_map; /* TRUE if there is a MAP line */ |
502 | #ifdef FEAT_MBYTE | ||||
503 | hashtab_T sl_map_hash; /* MAP for multi-byte chars */ | ||||
504 | int sl_map_array[256]; /* MAP for first 256 chars */ | ||||
505 | #else | ||||
506 | char_u sl_map_array[256]; /* MAP for first 256 chars */ | ||||
507 | #endif | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 508 | hashtab_T sl_sounddone; /* table with soundfolded words that have |
509 | handled, see add_sound_suggest() */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 510 | }; |
511 | |||||
Bram Moolenaar | 63d5a1e | 2005-04-19 21:30:25 +0000 | [diff] [blame] | 512 | /* First language that is loaded, start of the linked list of loaded |
513 | * languages. */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 514 | static slang_T *first_lang = NULL; |
515 | |||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 516 | /* Flags used in .spl file for soundsalike flags. */ |
517 | #define SAL_F0LLOWUP 1 | ||||
518 | #define SAL_COLLAPSE 2 | ||||
519 | #define SAL_REM_ACCENTS 4 | ||||
520 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 521 | /* |
522 | * Structure used in "b_langp", filled from 'spelllang'. | ||||
523 | */ | ||||
524 | typedef struct langp_S | ||||
525 | { | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 526 | slang_T *lp_slang; /* info for this language */ |
527 | slang_T *lp_sallang; /* language used for sound folding or NULL */ | ||||
528 | slang_T *lp_replang; /* language used for REP items or NULL */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 529 | int lp_region; /* bitmask for region or REGION_ALL */ |
530 | } langp_T; | ||||
531 | |||||
532 | #define LANGP_ENTRY(ga, i) (((langp_T *)(ga).ga_data) + (i)) | ||||
533 | |||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 534 | #define REGION_ALL 0xff /* word valid in all regions */ |
535 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 536 | #define VIMSPELLMAGIC "VIMspell" /* string at start of Vim spell file */ |
537 | #define VIMSPELLMAGICL 8 | ||||
538 | #define VIMSPELLVERSION 50 | ||||
539 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 540 | #define VIMSUGMAGIC "VIMsug" /* string at start of Vim .sug file */ |
541 | #define VIMSUGMAGICL 6 | ||||
542 | #define VIMSUGVERSION 1 | ||||
543 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 544 | /* Section IDs. Only renumber them when VIMSPELLVERSION changes! */ |
545 | #define SN_REGION 0 /* <regionname> section */ | ||||
546 | #define SN_CHARFLAGS 1 /* charflags section */ | ||||
547 | #define SN_MIDWORD 2 /* <midword> section */ | ||||
548 | #define SN_PREFCOND 3 /* <prefcond> section */ | ||||
549 | #define SN_REP 4 /* REP items section */ | ||||
550 | #define SN_SAL 5 /* SAL items section */ | ||||
551 | #define SN_SOFO 6 /* soundfolding section */ | ||||
552 | #define SN_MAP 7 /* MAP items section */ | ||||
553 | #define SN_COMPOUND 8 /* compound words section */ | ||||
554 | #define SN_SYLLABLE 9 /* syllable section */ | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 555 | #define SN_NOBREAK 10 /* NOBREAK section */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 556 | #define SN_SUGFILE 11 /* timestamp for .sug file */ |
557 | #define SN_REPSAL 12 /* REPSAL items section */ | ||||
558 | #define SN_WORDS 13 /* common words */ | ||||
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 559 | #define SN_NOSPLITSUGS 14 /* don't split word for suggestions */ |
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 560 | #define SN_INFO 15 /* info section */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 561 | #define SN_END 255 /* end of sections */ |
562 | |||||
563 | #define SNF_REQUIRED 1 /* <sectionflags>: required section */ | ||||
564 | |||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 565 | /* Result values. Lower number is accepted over higher one. */ |
566 | #define SP_BANNED -1 | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 567 | #define SP_OK 0 |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 568 | #define SP_RARE 1 |
569 | #define SP_LOCAL 2 | ||||
570 | #define SP_BAD 3 | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 571 | |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 572 | /* file used for "zG" and "zW" */ |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 573 | static char_u *int_wordlist = NULL; |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 574 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 575 | typedef struct wordcount_S |
576 | { | ||||
577 | short_u wc_count; /* nr of times word was seen */ | ||||
578 | char_u wc_word[1]; /* word, actually longer */ | ||||
579 | } wordcount_T; | ||||
580 | |||||
581 | static wordcount_T dumwc; | ||||
582 | #define WC_KEY_OFF (dumwc.wc_word - (char_u *)&dumwc) | ||||
583 | #define HI2WC(hi) ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF)) | ||||
584 | #define MAXWORDCOUNT 0xffff | ||||
585 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 586 | /* |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 587 | * Information used when looking for suggestions. |
588 | */ | ||||
589 | typedef struct suginfo_S | ||||
590 | { | ||||
591 | garray_T su_ga; /* suggestions, contains "suggest_T" */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 592 | int su_maxcount; /* max. number of suggestions displayed */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 593 | int su_maxscore; /* maximum score for adding to su_ga */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 594 | int su_sfmaxscore; /* idem, for when doing soundfold words */ |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 595 | garray_T su_sga; /* like su_ga, sound-folded scoring */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 596 | char_u *su_badptr; /* start of bad word in line */ |
597 | int su_badlen; /* length of detected bad word in line */ | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 598 | int su_badflags; /* caps flags for bad word */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 599 | char_u su_badword[MAXWLEN]; /* bad word truncated at su_badlen */ |
600 | char_u su_fbadword[MAXWLEN]; /* su_badword case-folded */ | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 601 | char_u su_sal_badword[MAXWLEN]; /* su_badword soundfolded */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 602 | hashtab_T su_banned; /* table with banned words */ |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 603 | slang_T *su_sallang; /* default language for sound folding */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 604 | } suginfo_T; |
605 | |||||
606 | /* One word suggestion. Used in "si_ga". */ | ||||
607 | typedef struct suggest_S | ||||
608 | { | ||||
609 | char_u *st_word; /* suggested word, allocated string */ | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 610 | int st_wordlen; /* STRLEN(st_word) */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 611 | int st_orglen; /* length of replaced text */ |
612 | int st_score; /* lower is better */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 613 | int st_altscore; /* used when st_score compares equal */ |
614 | int st_salscore; /* st_score is for soundalike */ | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 615 | int st_had_bonus; /* bonus already included in score */ |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 616 | slang_T *st_slang; /* language used for sound folding */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 617 | } suggest_T; |
618 | |||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 619 | #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i]) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 620 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 621 | /* TRUE if a word appears in the list of banned words. */ |
622 | #define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word))) | ||||
623 | |||||
624 | /* Number of suggestions kept when cleaning up. we need to keep more than | ||||
625 | * what is displayed, because when rescore_suggestions() is called the score | ||||
626 | * may change and wrong suggestions may be removed later. */ | ||||
627 | #define SUG_CLEAN_COUNT(su) ((su)->su_maxcount < 130 ? 150 : (su)->su_maxcount + 20) | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 628 | |
629 | /* Threshold for sorting and cleaning up suggestions. Don't want to keep lots | ||||
630 | * of suggestions that are not going to be displayed. */ | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 631 | #define SUG_MAX_COUNT(su) (SUG_CLEAN_COUNT(su) + 50) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 632 | |
633 | /* score for various changes */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 634 | #define SCORE_SPLIT 149 /* split bad word */ |
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 635 | #define SCORE_SPLIT_NO 249 /* split bad word with NOSPLITSUGS */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 636 | #define SCORE_ICASE 52 /* slightly different case */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 637 | #define SCORE_REGION 200 /* word is for different region */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 638 | #define SCORE_RARE 180 /* rare word */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 639 | #define SCORE_SWAP 75 /* swap two characters */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 640 | #define SCORE_SWAP3 110 /* swap two characters in three */ |
Bram Moolenaar | 1e01546 | 2005-09-25 22:16:38 +0000 | [diff] [blame] | 641 | #define SCORE_REP 65 /* REP replacement */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 642 | #define SCORE_SUBST 93 /* substitute a character */ |
643 | #define SCORE_SIMILAR 33 /* substitute a similar character */ | ||||
Bram Moolenaar | e5b8e3d | 2005-08-12 19:48:49 +0000 | [diff] [blame] | 644 | #define SCORE_SUBCOMP 33 /* substitute a composing character */ |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 645 | #define SCORE_DEL 94 /* delete a character */ |
Bram Moolenaar | 1e01546 | 2005-09-25 22:16:38 +0000 | [diff] [blame] | 646 | #define SCORE_DELDUP 66 /* delete a duplicated character */ |
Bram Moolenaar | e5b8e3d | 2005-08-12 19:48:49 +0000 | [diff] [blame] | 647 | #define SCORE_DELCOMP 28 /* delete a composing character */ |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 648 | #define SCORE_INS 96 /* insert a character */ |
Bram Moolenaar | 1e01546 | 2005-09-25 22:16:38 +0000 | [diff] [blame] | 649 | #define SCORE_INSDUP 67 /* insert a duplicate character */ |
Bram Moolenaar | 8b59de9 | 2005-08-11 19:59:29 +0000 | [diff] [blame] | 650 | #define SCORE_INSCOMP 30 /* insert a composing character */ |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 651 | #define SCORE_NONWORD 103 /* change non-word to word char */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 652 | |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 653 | #define SCORE_FILE 30 /* suggestion from a file */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 654 | #define SCORE_MAXINIT 350 /* Initial maximum score: higher == slower. |
655 | * 350 allows for about three changes. */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 656 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 657 | #define SCORE_COMMON1 30 /* subtracted for words seen before */ |
658 | #define SCORE_COMMON2 40 /* subtracted for words often seen */ | ||||
659 | #define SCORE_COMMON3 50 /* subtracted for words very often seen */ | ||||
660 | #define SCORE_THRES2 10 /* word count threshold for COMMON2 */ | ||||
661 | #define SCORE_THRES3 100 /* word count threshold for COMMON3 */ | ||||
662 | |||||
663 | /* When trying changed soundfold words it becomes slow when trying more than | ||||
664 | * two changes. With less then two changes it's slightly faster but we miss a | ||||
665 | * few good suggestions. In rare cases we need to try three of four changes. | ||||
666 | */ | ||||
667 | #define SCORE_SFMAX1 200 /* maximum score for first try */ | ||||
668 | #define SCORE_SFMAX2 300 /* maximum score for second try */ | ||||
669 | #define SCORE_SFMAX3 400 /* maximum score for third try */ | ||||
670 | |||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 671 | #define SCORE_BIG SCORE_INS * 3 /* big difference */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 672 | #define SCORE_MAXMAX 999999 /* accept any score */ |
673 | #define SCORE_LIMITMAX 350 /* for spell_edit_score_limit() */ | ||||
674 | |||||
675 | /* for spell_edit_score_limit() we need to know the minimum value of | ||||
676 | * SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS */ | ||||
677 | #define SCORE_EDIT_MIN SCORE_SIMILAR | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 678 | |
679 | /* | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 680 | * Structure to store info for word matching. |
681 | */ | ||||
682 | typedef struct matchinf_S | ||||
683 | { | ||||
684 | langp_T *mi_lp; /* info for language and region */ | ||||
Bram Moolenaar | 63d5a1e | 2005-04-19 21:30:25 +0000 | [diff] [blame] | 685 | |
686 | /* pointers to original text to be checked */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 687 | char_u *mi_word; /* start of word being checked */ |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 688 | char_u *mi_end; /* end of matching word so far */ |
Bram Moolenaar | 63d5a1e | 2005-04-19 21:30:25 +0000 | [diff] [blame] | 689 | char_u *mi_fend; /* next char to be added to mi_fword */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 690 | char_u *mi_cend; /* char after what was used for |
691 | mi_capflags */ | ||||
Bram Moolenaar | 63d5a1e | 2005-04-19 21:30:25 +0000 | [diff] [blame] | 692 | |
693 | /* case-folded text */ | ||||
694 | char_u mi_fword[MAXWLEN + 1]; /* mi_word case-folded */ | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 695 | int mi_fwordlen; /* nr of valid bytes in mi_fword */ |
Bram Moolenaar | 63d5a1e | 2005-04-19 21:30:25 +0000 | [diff] [blame] | 696 | |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 697 | /* for when checking word after a prefix */ |
698 | int mi_prefarridx; /* index in sl_pidxs with list of | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 699 | affixID/condition */ |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 700 | int mi_prefcnt; /* number of entries at mi_prefarridx */ |
701 | int mi_prefixlen; /* byte length of prefix */ | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 702 | #ifdef FEAT_MBYTE |
703 | int mi_cprefixlen; /* byte length of prefix in original | ||||
704 | case */ | ||||
705 | #else | ||||
706 | # define mi_cprefixlen mi_prefixlen /* it's the same value */ | ||||
707 | #endif | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 708 | |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 709 | /* for when checking a compound word */ |
710 | int mi_compoff; /* start of following word offset */ | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 711 | char_u mi_compflags[MAXWLEN]; /* flags for compound words used */ |
712 | int mi_complen; /* nr of compound words used */ | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 713 | int mi_compextra; /* nr of COMPOUNDROOT words */ |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 714 | |
Bram Moolenaar | 63d5a1e | 2005-04-19 21:30:25 +0000 | [diff] [blame] | 715 | /* others */ |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 716 | int mi_result; /* result so far: SP_BAD, SP_OK, etc. */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 717 | int mi_capflags; /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */ |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 718 | buf_T *mi_buf; /* buffer being checked */ |
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 719 | |
720 | /* for NOBREAK */ | ||||
721 | int mi_result2; /* "mi_resul" without following word */ | ||||
722 | char_u *mi_end2; /* "mi_end" without following word */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 723 | } matchinf_T; |
724 | |||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 725 | /* |
726 | * The tables used for recognizing word characters according to spelling. | ||||
727 | * These are only used for the first 256 characters of 'encoding'. | ||||
728 | */ | ||||
729 | typedef struct spelltab_S | ||||
730 | { | ||||
731 | char_u st_isw[256]; /* flags: is word char */ | ||||
732 | char_u st_isu[256]; /* flags: is uppercase char */ | ||||
733 | char_u st_fold[256]; /* chars: folded case */ | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 734 | char_u st_upper[256]; /* chars: upper case */ |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 735 | } spelltab_T; |
736 | |||||
737 | static spelltab_T spelltab; | ||||
738 | static int did_set_spelltab; | ||||
739 | |||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 740 | #define CF_WORD 0x01 |
741 | #define CF_UPPER 0x02 | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 742 | |
743 | static void clear_spell_chartab __ARGS((spelltab_T *sp)); | ||||
744 | static int set_spell_finish __ARGS((spelltab_T *new_st)); | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 745 | static int spell_iswordp __ARGS((char_u *p, buf_T *buf)); |
746 | static int spell_iswordp_nmw __ARGS((char_u *p)); | ||||
747 | #ifdef FEAT_MBYTE | ||||
748 | static int spell_iswordp_w __ARGS((int *p, buf_T *buf)); | ||||
749 | #endif | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 750 | static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap)); |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 751 | |
752 | /* | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 753 | * For finding suggestions: At each node in the tree these states are tried: |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 754 | */ |
755 | typedef enum | ||||
756 | { | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 757 | STATE_START = 0, /* At start of node check for NUL bytes (goodword |
758 | * ends); if badword ends there is a match, otherwise | ||||
759 | * try splitting word. */ | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 760 | STATE_NOPREFIX, /* try without prefix */ |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 761 | STATE_SPLITUNDO, /* Undo splitting. */ |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 762 | STATE_ENDNUL, /* Past NUL bytes at start of the node. */ |
763 | STATE_PLAIN, /* Use each byte of the node. */ | ||||
764 | STATE_DEL, /* Delete a byte from the bad word. */ | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 765 | STATE_INS_PREP, /* Prepare for inserting bytes. */ |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 766 | STATE_INS, /* Insert a byte in the bad word. */ |
767 | STATE_SWAP, /* Swap two bytes. */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 768 | STATE_UNSWAP, /* Undo swap two characters. */ |
769 | STATE_SWAP3, /* Swap two characters over three. */ | ||||
770 | STATE_UNSWAP3, /* Undo Swap two characters over three. */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 771 | STATE_UNROT3L, /* Undo rotate three characters left */ |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 772 | STATE_UNROT3R, /* Undo rotate three characters right */ |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 773 | STATE_REP_INI, /* Prepare for using REP items. */ |
774 | STATE_REP, /* Use matching REP items from the .aff file. */ | ||||
775 | STATE_REP_UNDO, /* Undo a REP item replacement. */ | ||||
776 | STATE_FINAL /* End of this node. */ | ||||
777 | } state_T; | ||||
778 | |||||
779 | /* | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 780 | * Struct to keep the state at each level in suggest_try_change(). |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 781 | */ |
782 | typedef struct trystate_S | ||||
783 | { | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 784 | state_T ts_state; /* state at this level, STATE_ */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 785 | int ts_score; /* score */ |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 786 | idx_T ts_arridx; /* index in tree array, start of node */ |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 787 | short ts_curi; /* index in list of child nodes */ |
788 | char_u ts_fidx; /* index in fword[], case-folded bad word */ | ||||
789 | char_u ts_fidxtry; /* ts_fidx at which bytes may be changed */ | ||||
790 | char_u ts_twordlen; /* valid length of tword[] */ | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 791 | char_u ts_prefixdepth; /* stack depth for end of prefix or |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 792 | * PFD_PREFIXTREE or PFD_NOPREFIX */ |
793 | char_u ts_flags; /* TSF_ flags */ | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 794 | #ifdef FEAT_MBYTE |
795 | char_u ts_tcharlen; /* number of bytes in tword character */ | ||||
796 | char_u ts_tcharidx; /* current byte index in tword character */ | ||||
797 | char_u ts_isdiff; /* DIFF_ values */ | ||||
798 | char_u ts_fcharstart; /* index in fword where badword char started */ | ||||
799 | #endif | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 800 | char_u ts_prewordlen; /* length of word in "preword[]" */ |
801 | char_u ts_splitoff; /* index in "tword" after last split */ | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 802 | char_u ts_splitfidx; /* "ts_fidx" at word split */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 803 | char_u ts_complen; /* nr of compound words used */ |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 804 | char_u ts_compsplit; /* index for "compflags" where word was spit */ |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 805 | char_u ts_save_badflags; /* su_badflags saved here */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 806 | char_u ts_delidx; /* index in fword for char that was deleted, |
807 | valid when "ts_flags" has TSF_DIDDEL */ | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 808 | } trystate_T; |
809 | |||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 810 | /* values for ts_isdiff */ |
811 | #define DIFF_NONE 0 /* no different byte (yet) */ | ||||
812 | #define DIFF_YES 1 /* different byte found */ | ||||
813 | #define DIFF_INSERT 2 /* inserting character */ | ||||
814 | |||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 815 | /* values for ts_flags */ |
816 | #define TSF_PREFIXOK 1 /* already checked that prefix is OK */ | ||||
817 | #define TSF_DIDSPLIT 2 /* tried split at this point */ | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 818 | #define TSF_DIDDEL 4 /* did a delete, "ts_delidx" has index */ |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 819 | |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 820 | /* special values ts_prefixdepth */ |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 821 | #define PFD_NOPREFIX 0xff /* not using prefixes */ |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 822 | #define PFD_PREFIXTREE 0xfe /* walking through the prefix tree */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 823 | #define PFD_NOTSPECIAL 0xfd /* highest value that's not special */ |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 824 | |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 825 | /* mode values for find_word */ |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 826 | #define FIND_FOLDWORD 0 /* find word case-folded */ |
827 | #define FIND_KEEPWORD 1 /* find keep-case word */ | ||||
828 | #define FIND_PREFIX 2 /* find word after prefix */ | ||||
829 | #define FIND_COMPOUND 3 /* find case-folded compound word */ | ||||
830 | #define FIND_KEEPCOMPOUND 4 /* find keep-case compound word */ | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 831 | |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 832 | static slang_T *slang_alloc __ARGS((char_u *lang)); |
833 | static void slang_free __ARGS((slang_T *lp)); | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 834 | static void slang_clear __ARGS((slang_T *lp)); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 835 | static void slang_clear_sug __ARGS((slang_T *lp)); |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 836 | static void find_word __ARGS((matchinf_T *mip, int mode)); |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 837 | static int can_compound __ARGS((slang_T *slang, char_u *word, char_u *flags)); |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 838 | static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req)); |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 839 | static void find_prefix __ARGS((matchinf_T *mip, int mode)); |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 840 | static int fold_more __ARGS((matchinf_T *mip)); |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 841 | static int spell_valid_case __ARGS((int wordflags, int treeflags)); |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 842 | static int no_spell_checking __ARGS((win_T *wp)); |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 843 | static void spell_load_lang __ARGS((char_u *lang)); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 844 | static char_u *spell_enc __ARGS((void)); |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 845 | static void int_wordlist_spl __ARGS((char_u *fname)); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 846 | static void spell_load_cb __ARGS((char_u *fname, void *cookie)); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 847 | static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent)); |
Bram Moolenaar | b388adb | 2006-02-28 23:50:17 +0000 | [diff] [blame] | 848 | static int get2c __ARGS((FILE *fd)); |
849 | static int get3c __ARGS((FILE *fd)); | ||||
850 | static int get4c __ARGS((FILE *fd)); | ||||
851 | static time_t get8c __ARGS((FILE *fd)); | ||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 852 | static char_u *read_cnt_string __ARGS((FILE *fd, int cnt_bytes, int *lenp)); |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 853 | static char_u *read_string __ARGS((FILE *fd, int cnt)); |
854 | static int read_region_section __ARGS((FILE *fd, slang_T *slang, int len)); | ||||
855 | static int read_charflags_section __ARGS((FILE *fd)); | ||||
856 | static int read_prefcond_section __ARGS((FILE *fd, slang_T *lp)); | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 857 | static int read_rep_section __ARGS((FILE *fd, garray_T *gap, short *first)); |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 858 | static int read_sal_section __ARGS((FILE *fd, slang_T *slang)); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 859 | static int read_words_section __ARGS((FILE *fd, slang_T *lp, int len)); |
860 | static void count_common_word __ARGS((slang_T *lp, char_u *word, int len, int count)); | ||||
861 | static int score_wordcount_adj __ARGS((slang_T *slang, int score, char_u *word, int split)); | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 862 | static int read_sofo_section __ARGS((FILE *fd, slang_T *slang)); |
863 | static int read_compound __ARGS((FILE *fd, slang_T *slang, int len)); | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 864 | static int byte_in_str __ARGS((char_u *str, int byte)); |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 865 | static int init_syl_tab __ARGS((slang_T *slang)); |
866 | static int count_syllables __ARGS((slang_T *slang, char_u *word)); | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 867 | static int set_sofo __ARGS((slang_T *lp, char_u *from, char_u *to)); |
868 | static void set_sal_first __ARGS((slang_T *lp)); | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 869 | #ifdef FEAT_MBYTE |
870 | static int *mb_str2wide __ARGS((char_u *s)); | ||||
871 | #endif | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 872 | static int spell_read_tree __ARGS((FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt)); |
873 | static idx_T read_tree_node __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr)); | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 874 | static void clear_midword __ARGS((buf_T *buf)); |
875 | static void use_midword __ARGS((slang_T *lp, buf_T *buf)); | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 876 | static int find_region __ARGS((char_u *rp, char_u *region)); |
877 | static int captype __ARGS((char_u *word, char_u *end)); | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 878 | static int badword_captype __ARGS((char_u *word, char_u *end)); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 879 | static void spell_reload_one __ARGS((char_u *fname, int added_word)); |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 880 | static void set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp)); |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 881 | static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp)); |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 882 | static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen)); |
Bram Moolenaar | 8b59de9 | 2005-08-11 19:59:29 +0000 | [diff] [blame] | 883 | static int check_need_cap __ARGS((linenr_T lnum, colnr_T col)); |
Bram Moolenaar | 66fa271 | 2006-01-22 23:22:22 +0000 | [diff] [blame] | 884 | static void spell_find_suggest __ARGS((char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive)); |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 885 | #ifdef FEAT_EVAL |
886 | static void spell_suggest_expr __ARGS((suginfo_T *su, char_u *expr)); | ||||
887 | #endif | ||||
888 | static void spell_suggest_file __ARGS((suginfo_T *su, char_u *fname)); | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 889 | static void spell_suggest_intern __ARGS((suginfo_T *su, int interactive)); |
890 | static void suggest_load_files __ARGS((void)); | ||||
891 | static void tree_count_words __ARGS((char_u *byts, idx_T *idxs)); | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 892 | static void spell_find_cleanup __ARGS((suginfo_T *su)); |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 893 | static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper)); |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 894 | static void allcap_copy __ARGS((char_u *word, char_u *wcopy)); |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 895 | static void suggest_try_special __ARGS((suginfo_T *su)); |
896 | static void suggest_try_change __ARGS((suginfo_T *su)); | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 897 | static void suggest_trie_walk __ARGS((suginfo_T *su, langp_T *lp, char_u *fword, int soundfold)); |
898 | static void go_deeper __ARGS((trystate_T *stack, int depth, int score_add)); | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 899 | #ifdef FEAT_MBYTE |
900 | static int nofold_len __ARGS((char_u *fword, int flen, char_u *word)); | ||||
901 | #endif | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 902 | static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword)); |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 903 | static void score_comp_sal __ARGS((suginfo_T *su)); |
904 | static void score_combine __ARGS((suginfo_T *su)); | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 905 | static int stp_sal_score __ARGS((suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound)); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 906 | static void suggest_try_soundalike_prep __ARGS((void)); |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 907 | static void suggest_try_soundalike __ARGS((suginfo_T *su)); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 908 | static void suggest_try_soundalike_finish __ARGS((void)); |
909 | static void add_sound_suggest __ARGS((suginfo_T *su, char_u *goodword, int score, langp_T *lp)); | ||||
910 | static int soundfold_find __ARGS((slang_T *slang, char_u *word)); | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 911 | static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags)); |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 912 | static void set_map_str __ARGS((slang_T *lp, char_u *map)); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 913 | static int similar_chars __ARGS((slang_T *slang, int c1, int c2)); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 914 | static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf)); |
915 | static void check_suggestions __ARGS((suginfo_T *su, garray_T *gap)); | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 916 | static void add_banned __ARGS((suginfo_T *su, char_u *word)); |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 917 | static void rescore_suggestions __ARGS((suginfo_T *su)); |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 918 | static void rescore_one __ARGS((suginfo_T *su, suggest_T *stp)); |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 919 | static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep)); |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 920 | static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, int folded, char_u *res)); |
921 | static void spell_soundfold_sofo __ARGS((slang_T *slang, char_u *inword, char_u *res)); | ||||
922 | static void spell_soundfold_sal __ARGS((slang_T *slang, char_u *inword, char_u *res)); | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 923 | #ifdef FEAT_MBYTE |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 924 | static void spell_soundfold_wsal __ARGS((slang_T *slang, char_u *inword, char_u *res)); |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 925 | #endif |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 926 | static int soundalike_score __ARGS((char_u *goodsound, char_u *badsound)); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 927 | static int spell_edit_score __ARGS((slang_T *slang, char_u *badword, char_u *goodword)); |
928 | static int spell_edit_score_limit __ARGS((slang_T *slang, char_u *badword, char_u *goodword, int limit)); | ||||
929 | #ifdef FEAT_MBYTE | ||||
930 | static int spell_edit_score_limit_w __ARGS((slang_T *slang, char_u *badword, char_u *goodword, int limit)); | ||||
931 | #endif | ||||
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 932 | static void dump_word __ARGS((slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum)); |
933 | static linenr_T dump_prefixes __ARGS((slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum)); | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 934 | static buf_T *open_spellbuf __ARGS((void)); |
935 | static void close_spellbuf __ARGS((buf_T *buf)); | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 936 | |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 937 | /* |
938 | * Use our own character-case definitions, because the current locale may | ||||
939 | * differ from what the .spl file uses. | ||||
940 | * These must not be called with negative number! | ||||
941 | */ | ||||
942 | #ifndef FEAT_MBYTE | ||||
943 | /* Non-multi-byte implementation. */ | ||||
944 | # define SPELL_TOFOLD(c) ((c) < 256 ? spelltab.st_fold[c] : (c)) | ||||
945 | # define SPELL_TOUPPER(c) ((c) < 256 ? spelltab.st_upper[c] : (c)) | ||||
946 | # define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE) | ||||
947 | #else | ||||
Bram Moolenaar | cfc7d63 | 2005-07-28 22:28:16 +0000 | [diff] [blame] | 948 | # if defined(HAVE_WCHAR_H) |
949 | # include <wchar.h> /* for towupper() and towlower() */ | ||||
950 | # endif | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 951 | /* Multi-byte implementation. For Unicode we can call utf_*(), but don't do |
952 | * that for ASCII, because we don't want to use 'casemap' here. Otherwise use | ||||
953 | * the "w" library function for characters above 255 if available. */ | ||||
954 | # ifdef HAVE_TOWLOWER | ||||
955 | # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ | ||||
956 | : (c) < 256 ? spelltab.st_fold[c] : towlower(c)) | ||||
957 | # else | ||||
958 | # define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \ | ||||
959 | : (c) < 256 ? spelltab.st_fold[c] : (c)) | ||||
960 | # endif | ||||
961 | |||||
962 | # ifdef HAVE_TOWUPPER | ||||
963 | # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ | ||||
964 | : (c) < 256 ? spelltab.st_upper[c] : towupper(c)) | ||||
965 | # else | ||||
966 | # define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ | ||||
967 | : (c) < 256 ? spelltab.st_upper[c] : (c)) | ||||
968 | # endif | ||||
969 | |||||
970 | # ifdef HAVE_ISWUPPER | ||||
971 | # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ | ||||
972 | : (c) < 256 ? spelltab.st_isu[c] : iswupper(c)) | ||||
973 | # else | ||||
974 | # define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 975 | : (c) < 256 ? spelltab.st_isu[c] : (FALSE)) |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 976 | # endif |
977 | #endif | ||||
978 | |||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 979 | |
980 | static char *e_format = N_("E759: Format error in spell file"); | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 981 | static char *e_spell_trunc = N_("E758: Truncated spell file"); |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 982 | static char *e_afftrailing = N_("Trailing text in %s line %d: %s"); |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 983 | static char *e_affname = N_("Affix name too long in %s line %d: %s"); |
984 | static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP"); | ||||
985 | static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range"); | ||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 986 | static char *msg_compressing = N_("Compressing word tree..."); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 987 | |
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 988 | /* Remember what "z?" replaced. */ |
989 | static char_u *repl_from = NULL; | ||||
990 | static char_u *repl_to = NULL; | ||||
991 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 992 | /* |
993 | * Main spell-checking function. | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 994 | * "ptr" points to a character that could be the start of a word. |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 995 | * "*attrp" is set to the highlight index for a badly spelled word. For a |
996 | * non-word or when it's OK it remains unchanged. | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 997 | * This must only be called when 'spelllang' is not empty. |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 998 | * |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 999 | * "capcol" is used to check for a Capitalised word after the end of a |
1000 | * sentence. If it's zero then perform the check. Return the column where to | ||||
1001 | * check next, or -1 when no sentence end was found. If it's NULL then don't | ||||
1002 | * worry. | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 1003 | * |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 1004 | * Returns the length of the word in bytes, also when it's OK, so that the |
1005 | * caller can skip over the word. | ||||
1006 | */ | ||||
1007 | int | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 1008 | spell_check(wp, ptr, attrp, capcol, docount) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 1009 | win_T *wp; /* current window */ |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 1010 | char_u *ptr; |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 1011 | hlf_T *attrp; |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 1012 | int *capcol; /* column to check for Capital */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 1013 | int docount; /* count good words */ |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 1014 | { |
1015 | matchinf_T mi; /* Most things are put in "mi" so that it can | ||||
1016 | be passed to functions quickly. */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1017 | int nrlen = 0; /* found a number first */ |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 1018 | int c; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1019 | int wrongcaplen = 0; |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 1020 | int lpi; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 1021 | int count_word = docount; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 1022 | |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 1023 | /* A word never starts at a space or a control character. Return quickly |
1024 | * then, skipping over the character. */ | ||||
1025 | if (*ptr <= ' ') | ||||
1026 | return 1; | ||||
Bram Moolenaar | a226a6d | 2006-02-26 23:59:20 +0000 | [diff] [blame] | 1027 | |
1028 | /* Return here when loading language files failed. */ | ||||
1029 | if (wp->w_buffer->b_langp.ga_len == 0) | ||||
1030 | return 1; | ||||
1031 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1032 | vim_memset(&mi, 0, sizeof(matchinf_T)); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 1033 | |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1034 | /* A number is always OK. Also skip hexadecimal numbers 0xFF99 and |
Bram Moolenaar | 43abc52 | 2005-12-10 20:15:02 +0000 | [diff] [blame] | 1035 | * 0X99FF. But always do check spelling to find "3GPP" and "11 |
1036 | * julifeest". */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 1037 | if (*ptr >= '0' && *ptr <= '9') |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1038 | { |
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 1039 | if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X')) |
1040 | mi.mi_end = skiphex(ptr + 2); | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1041 | else |
1042 | mi.mi_end = skipdigits(ptr); | ||||
Bram Moolenaar | 43abc52 | 2005-12-10 20:15:02 +0000 | [diff] [blame] | 1043 | nrlen = mi.mi_end - ptr; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1044 | } |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1045 | |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 1046 | /* Find the normal end of the word (until the next non-word character). */ |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1047 | mi.mi_word = ptr; |
Bram Moolenaar | 43abc52 | 2005-12-10 20:15:02 +0000 | [diff] [blame] | 1048 | mi.mi_fend = ptr; |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 1049 | if (spell_iswordp(mi.mi_fend, wp->w_buffer)) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1050 | { |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1051 | do |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1052 | { |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 1053 | mb_ptr_adv(mi.mi_fend); |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 1054 | } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp->w_buffer)); |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 1055 | |
1056 | if (capcol != NULL && *capcol == 0 && wp->w_buffer->b_cap_prog != NULL) | ||||
1057 | { | ||||
1058 | /* Check word starting with capital letter. */ | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 1059 | c = PTR2CHAR(ptr); |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 1060 | if (!SPELL_ISUPPER(c)) |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1061 | wrongcaplen = (int)(mi.mi_fend - ptr); |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 1062 | } |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1063 | } |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 1064 | if (capcol != NULL) |
1065 | *capcol = -1; | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 1066 | |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1067 | /* We always use the characters up to the next non-word character, |
1068 | * also for bad words. */ | ||||
1069 | mi.mi_end = mi.mi_fend; | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 1070 | |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1071 | /* Check caps type later. */ |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 1072 | mi.mi_buf = wp->w_buffer; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1073 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1074 | /* case-fold the word with one non-word character, so that we can check |
1075 | * for the word end. */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1076 | if (*mi.mi_fend != NUL) |
1077 | mb_ptr_adv(mi.mi_fend); | ||||
1078 | |||||
1079 | (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword, | ||||
1080 | MAXWLEN + 1); | ||||
1081 | mi.mi_fwordlen = STRLEN(mi.mi_fword); | ||||
1082 | |||||
1083 | /* The word is bad unless we recognize it. */ | ||||
1084 | mi.mi_result = SP_BAD; | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1085 | mi.mi_result2 = SP_BAD; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1086 | |
1087 | /* | ||||
1088 | * Loop over the languages specified in 'spelllang'. | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 1089 | * We check them all, because a word may be matched longer in another |
1090 | * language. | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1091 | */ |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 1092 | for (lpi = 0; lpi < wp->w_buffer->b_langp.ga_len; ++lpi) |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1093 | { |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 1094 | mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, lpi); |
1095 | |||||
1096 | /* If reloading fails the language is still in the list but everything | ||||
1097 | * has been cleared. */ | ||||
1098 | if (mi.mi_lp->lp_slang->sl_fidxs == NULL) | ||||
1099 | continue; | ||||
1100 | |||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1101 | /* Check for a matching word in case-folded words. */ |
1102 | find_word(&mi, FIND_FOLDWORD); | ||||
1103 | |||||
1104 | /* Check for a matching word in keep-case words. */ | ||||
1105 | find_word(&mi, FIND_KEEPWORD); | ||||
1106 | |||||
1107 | /* Check for matching prefixes. */ | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 1108 | find_prefix(&mi, FIND_FOLDWORD); |
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1109 | |
1110 | /* For a NOBREAK language, may want to use a word without a following | ||||
1111 | * word as a backup. */ | ||||
1112 | if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD | ||||
1113 | && mi.mi_result2 != SP_BAD) | ||||
1114 | { | ||||
1115 | mi.mi_result = mi.mi_result2; | ||||
1116 | mi.mi_end = mi.mi_end2; | ||||
1117 | } | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 1118 | |
1119 | /* Count the word in the first language where it's found to be OK. */ | ||||
1120 | if (count_word && mi.mi_result == SP_OK) | ||||
1121 | { | ||||
1122 | count_common_word(mi.mi_lp->lp_slang, ptr, | ||||
1123 | (int)(mi.mi_end - ptr), 1); | ||||
1124 | count_word = FALSE; | ||||
1125 | } | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1126 | } |
1127 | |||||
1128 | if (mi.mi_result != SP_OK) | ||||
1129 | { | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 1130 | /* If we found a number skip over it. Allows for "42nd". Do flag |
1131 | * rare and local words, e.g., "3GPP". */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1132 | if (nrlen > 0) |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 1133 | { |
1134 | if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) | ||||
1135 | return nrlen; | ||||
1136 | } | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1137 | |
1138 | /* When we are at a non-word character there is no error, just | ||||
1139 | * skip over the character (try looking for a word after it). */ | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 1140 | else if (!spell_iswordp_nmw(ptr)) |
Bram Moolenaar | 63d5a1e | 2005-04-19 21:30:25 +0000 | [diff] [blame] | 1141 | { |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 1142 | if (capcol != NULL && wp->w_buffer->b_cap_prog != NULL) |
1143 | { | ||||
1144 | regmatch_T regmatch; | ||||
1145 | |||||
1146 | /* Check for end of sentence. */ | ||||
1147 | regmatch.regprog = wp->w_buffer->b_cap_prog; | ||||
1148 | regmatch.rm_ic = FALSE; | ||||
1149 | if (vim_regexec(®match, ptr, 0)) | ||||
1150 | *capcol = (int)(regmatch.endp[0] - ptr); | ||||
1151 | } | ||||
1152 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 1153 | #ifdef FEAT_MBYTE |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1154 | if (has_mbyte) |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 1155 | return (*mb_ptr2len)(ptr); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 1156 | #endif |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1157 | return 1; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 1158 | } |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1159 | else if (mi.mi_end == ptr) |
1160 | /* Always include at least one character. Required for when there | ||||
1161 | * is a mixup in "midword". */ | ||||
1162 | mb_ptr_adv(mi.mi_end); | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1163 | else if (mi.mi_result == SP_BAD |
1164 | && LANGP_ENTRY(wp->w_buffer->b_langp, 0)->lp_slang->sl_nobreak) | ||||
1165 | { | ||||
1166 | char_u *p, *fp; | ||||
1167 | int save_result = mi.mi_result; | ||||
1168 | |||||
1169 | /* First language in 'spelllang' is NOBREAK. Find first position | ||||
1170 | * at which any word would be valid. */ | ||||
1171 | mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0); | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 1172 | if (mi.mi_lp->lp_slang->sl_fidxs != NULL) |
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1173 | { |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 1174 | p = mi.mi_word; |
1175 | fp = mi.mi_fword; | ||||
1176 | for (;;) | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1177 | { |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 1178 | mb_ptr_adv(p); |
1179 | mb_ptr_adv(fp); | ||||
1180 | if (p >= mi.mi_end) | ||||
1181 | break; | ||||
1182 | mi.mi_compoff = fp - mi.mi_fword; | ||||
1183 | find_word(&mi, FIND_COMPOUND); | ||||
1184 | if (mi.mi_result != SP_BAD) | ||||
1185 | { | ||||
1186 | mi.mi_end = p; | ||||
1187 | break; | ||||
1188 | } | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1189 | } |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 1190 | mi.mi_result = save_result; |
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1191 | } |
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1192 | } |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1193 | |
1194 | if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED) | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 1195 | *attrp = HLF_SPB; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1196 | else if (mi.mi_result == SP_RARE) |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 1197 | *attrp = HLF_SPR; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 1198 | else |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 1199 | *attrp = HLF_SPL; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 1200 | } |
1201 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1202 | if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE)) |
1203 | { | ||||
1204 | /* Report SpellCap only when the word isn't badly spelled. */ | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 1205 | *attrp = HLF_SPC; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1206 | return wrongcaplen; |
1207 | } | ||||
1208 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1209 | return (int)(mi.mi_end - ptr); |
1210 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 1211 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1212 | /* |
1213 | * Check if the word at "mip->mi_word" is in the tree. | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1214 | * When "mode" is FIND_FOLDWORD check in fold-case word tree. |
1215 | * When "mode" is FIND_KEEPWORD check in keep-case word tree. | ||||
1216 | * When "mode" is FIND_PREFIX check for word after prefix in fold-case word | ||||
1217 | * tree. | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1218 | * |
1219 | * For a match mip->mi_result is updated. | ||||
1220 | */ | ||||
1221 | static void | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1222 | find_word(mip, mode) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1223 | matchinf_T *mip; |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1224 | int mode; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1225 | { |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 1226 | idx_T arridx = 0; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1227 | int endlen[MAXWLEN]; /* length at possible word endings */ |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 1228 | idx_T endidx[MAXWLEN]; /* possible word endings */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1229 | int endidxcnt = 0; |
1230 | int len; | ||||
1231 | int wlen = 0; | ||||
1232 | int flen; | ||||
1233 | int c; | ||||
1234 | char_u *ptr; | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 1235 | idx_T lo, hi, m; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1236 | #ifdef FEAT_MBYTE |
1237 | char_u *s; | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1238 | #endif |
Bram Moolenaar | e52325c | 2005-08-22 22:54:29 +0000 | [diff] [blame] | 1239 | char_u *p; |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 1240 | int res = SP_BAD; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1241 | slang_T *slang = mip->mi_lp->lp_slang; |
1242 | unsigned flags; | ||||
1243 | char_u *byts; | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 1244 | idx_T *idxs; |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 1245 | int word_ends; |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 1246 | int prefix_found; |
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1247 | int nobreak_result; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1248 | |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 1249 | if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 1250 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1251 | /* Check for word with matching case in keep-case tree. */ |
1252 | ptr = mip->mi_word; | ||||
1253 | flen = 9999; /* no case folding, always enough bytes */ | ||||
1254 | byts = slang->sl_kbyts; | ||||
1255 | idxs = slang->sl_kidxs; | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 1256 | |
1257 | if (mode == FIND_KEEPCOMPOUND) | ||||
1258 | /* Skip over the previously found word(s). */ | ||||
1259 | wlen += mip->mi_compoff; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1260 | } |
1261 | else | ||||
1262 | { | ||||
1263 | /* Check for case-folded in case-folded tree. */ | ||||
1264 | ptr = mip->mi_fword; | ||||
1265 | flen = mip->mi_fwordlen; /* available case-folded bytes */ | ||||
1266 | byts = slang->sl_fbyts; | ||||
1267 | idxs = slang->sl_fidxs; | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1268 | |
1269 | if (mode == FIND_PREFIX) | ||||
1270 | { | ||||
1271 | /* Skip over the prefix. */ | ||||
1272 | wlen = mip->mi_prefixlen; | ||||
1273 | flen -= mip->mi_prefixlen; | ||||
1274 | } | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 1275 | else if (mode == FIND_COMPOUND) |
1276 | { | ||||
1277 | /* Skip over the previously found word(s). */ | ||||
1278 | wlen = mip->mi_compoff; | ||||
1279 | flen -= mip->mi_compoff; | ||||
1280 | } | ||||
1281 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 1282 | } |
1283 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1284 | if (byts == NULL) |
1285 | return; /* array is empty */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 1286 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1287 | /* |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 1288 | * Repeat advancing in the tree until: |
1289 | * - there is a byte that doesn't match, | ||||
1290 | * - we reach the end of the tree, | ||||
1291 | * - or we reach the end of the line. | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1292 | */ |
1293 | for (;;) | ||||
1294 | { | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 1295 | if (flen <= 0 && *mip->mi_fend != NUL) |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1296 | flen = fold_more(mip); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1297 | |
1298 | len = byts[arridx++]; | ||||
1299 | |||||
1300 | /* If the first possible byte is a zero the word could end here. | ||||
1301 | * Remember this index, we first check for the longest word. */ | ||||
1302 | if (byts[arridx] == 0) | ||||
1303 | { | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 1304 | if (endidxcnt == MAXWLEN) |
1305 | { | ||||
1306 | /* Must be a corrupted spell file. */ | ||||
1307 | EMSG(_(e_format)); | ||||
1308 | return; | ||||
1309 | } | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1310 | endlen[endidxcnt] = wlen; |
1311 | endidx[endidxcnt++] = arridx++; | ||||
1312 | --len; | ||||
1313 | |||||
1314 | /* Skip over the zeros, there can be several flag/region | ||||
1315 | * combinations. */ | ||||
1316 | while (len > 0 && byts[arridx] == 0) | ||||
1317 | { | ||||
1318 | ++arridx; | ||||
1319 | --len; | ||||
1320 | } | ||||
1321 | if (len == 0) | ||||
1322 | break; /* no children, word must end here */ | ||||
1323 | } | ||||
1324 | |||||
1325 | /* Stop looking at end of the line. */ | ||||
1326 | if (ptr[wlen] == NUL) | ||||
1327 | break; | ||||
1328 | |||||
1329 | /* Perform a binary search in the list of accepted bytes. */ | ||||
1330 | c = ptr[wlen]; | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 1331 | if (c == TAB) /* <Tab> is handled like <Space> */ |
1332 | c = ' '; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1333 | lo = arridx; |
1334 | hi = arridx + len - 1; | ||||
1335 | while (lo < hi) | ||||
1336 | { | ||||
1337 | m = (lo + hi) / 2; | ||||
1338 | if (byts[m] > c) | ||||
1339 | hi = m - 1; | ||||
1340 | else if (byts[m] < c) | ||||
1341 | lo = m + 1; | ||||
1342 | else | ||||
1343 | { | ||||
1344 | lo = hi = m; | ||||
1345 | break; | ||||
1346 | } | ||||
1347 | } | ||||
1348 | |||||
1349 | /* Stop if there is no matching byte. */ | ||||
1350 | if (hi < lo || byts[lo] != c) | ||||
1351 | break; | ||||
1352 | |||||
1353 | /* Continue at the child (if there is one). */ | ||||
1354 | arridx = idxs[lo]; | ||||
1355 | ++wlen; | ||||
1356 | --flen; | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 1357 | |
1358 | /* One space in the good word may stand for several spaces in the | ||||
1359 | * checked word. */ | ||||
1360 | if (c == ' ') | ||||
1361 | { | ||||
1362 | for (;;) | ||||
1363 | { | ||||
1364 | if (flen <= 0 && *mip->mi_fend != NUL) | ||||
1365 | flen = fold_more(mip); | ||||
1366 | if (ptr[wlen] != ' ' && ptr[wlen] != TAB) | ||||
1367 | break; | ||||
1368 | ++wlen; | ||||
1369 | --flen; | ||||
1370 | } | ||||
1371 | } | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1372 | } |
1373 | |||||
1374 | /* | ||||
1375 | * Verify that one of the possible endings is valid. Try the longest | ||||
1376 | * first. | ||||
1377 | */ | ||||
1378 | while (endidxcnt > 0) | ||||
1379 | { | ||||
1380 | --endidxcnt; | ||||
1381 | arridx = endidx[endidxcnt]; | ||||
1382 | wlen = endlen[endidxcnt]; | ||||
1383 | |||||
1384 | #ifdef FEAT_MBYTE | ||||
1385 | if ((*mb_head_off)(ptr, ptr + wlen) > 0) | ||||
1386 | continue; /* not at first byte of character */ | ||||
1387 | #endif | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 1388 | if (spell_iswordp(ptr + wlen, mip->mi_buf)) |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 1389 | { |
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1390 | if (slang->sl_compprog == NULL && !slang->sl_nobreak) |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 1391 | continue; /* next char is a word character */ |
1392 | word_ends = FALSE; | ||||
1393 | } | ||||
1394 | else | ||||
1395 | word_ends = TRUE; | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 1396 | /* The prefix flag is before compound flags. Once a valid prefix flag |
1397 | * has been found we try compound flags. */ | ||||
1398 | prefix_found = FALSE; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1399 | |
1400 | #ifdef FEAT_MBYTE | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1401 | if (mode != FIND_KEEPWORD && has_mbyte) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1402 | { |
1403 | /* Compute byte length in original word, length may change | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1404 | * when folding case. This can be slow, take a shortcut when the |
1405 | * case-folded word is equal to the keep-case word. */ | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1406 | p = mip->mi_word; |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1407 | if (STRNCMP(ptr, p, wlen) != 0) |
1408 | { | ||||
1409 | for (s = ptr; s < ptr + wlen; mb_ptr_adv(s)) | ||||
1410 | mb_ptr_adv(p); | ||||
1411 | wlen = p - mip->mi_word; | ||||
1412 | } | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1413 | } |
1414 | #endif | ||||
1415 | |||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1416 | /* Check flags and region. For FIND_PREFIX check the condition and |
1417 | * prefix ID. | ||||
1418 | * Repeat this if there are more flags/region alternatives until there | ||||
1419 | * is a match. */ | ||||
1420 | res = SP_BAD; | ||||
1421 | for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0; | ||||
1422 | --len, ++arridx) | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1423 | { |
1424 | flags = idxs[arridx]; | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 1425 | |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1426 | /* For the fold-case tree check that the case of the checked word |
1427 | * matches with what the word in the tree requires. | ||||
1428 | * For keep-case tree the case is always right. For prefixes we | ||||
1429 | * don't bother to check. */ | ||||
1430 | if (mode == FIND_FOLDWORD) | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1431 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1432 | if (mip->mi_cend != mip->mi_word + wlen) |
1433 | { | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 1434 | /* mi_capflags was set for a different word length, need |
1435 | * to do it again. */ | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1436 | mip->mi_cend = mip->mi_word + wlen; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 1437 | mip->mi_capflags = captype(mip->mi_word, mip->mi_cend); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1438 | } |
1439 | |||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 1440 | if (mip->mi_capflags == WF_KEEPCAP |
1441 | || !spell_valid_case(mip->mi_capflags, flags)) | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1442 | continue; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1443 | } |
1444 | |||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1445 | /* When mode is FIND_PREFIX the word must support the prefix: |
1446 | * check the prefix ID and the condition. Do that for the list at | ||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 1447 | * mip->mi_prefarridx that find_prefix() filled. */ |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 1448 | else if (mode == FIND_PREFIX && !prefix_found) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1449 | { |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 1450 | c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx, |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 1451 | flags, |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 1452 | mip->mi_word + mip->mi_cprefixlen, slang, |
1453 | FALSE); | ||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 1454 | if (c == 0) |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1455 | continue; |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 1456 | |
1457 | /* Use the WF_RARE flag for a rare prefix. */ | ||||
1458 | if (c & WF_RAREPFX) | ||||
1459 | flags |= WF_RARE; | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 1460 | prefix_found = TRUE; |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1461 | } |
1462 | |||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1463 | if (slang->sl_nobreak) |
1464 | { | ||||
1465 | if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND) | ||||
1466 | && (flags & WF_BANNED) == 0) | ||||
1467 | { | ||||
1468 | /* NOBREAK: found a valid following word. That's all we | ||||
1469 | * need to know, so return. */ | ||||
1470 | mip->mi_result = SP_OK; | ||||
1471 | break; | ||||
1472 | } | ||||
1473 | } | ||||
1474 | |||||
1475 | else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND | ||||
1476 | || !word_ends)) | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 1477 | { |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1478 | /* If there is no flag or the word is shorter than |
1479 | * COMPOUNDMIN reject it quickly. | ||||
1480 | * Makes you wonder why someone puts a compound flag on a word | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 1481 | * that's too short... Myspell compatibility requires this |
1482 | * anyway. */ | ||||
Bram Moolenaar | e52325c | 2005-08-22 22:54:29 +0000 | [diff] [blame] | 1483 | if (((unsigned)flags >> 24) == 0 |
1484 | || wlen - mip->mi_compoff < slang->sl_compminlen) | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 1485 | continue; |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 1486 | #ifdef FEAT_MBYTE |
1487 | /* For multi-byte chars check character length against | ||||
1488 | * COMPOUNDMIN. */ | ||||
1489 | if (has_mbyte | ||||
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 1490 | && slang->sl_compminlen > 0 |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 1491 | && mb_charlen_len(mip->mi_word + mip->mi_compoff, |
1492 | wlen - mip->mi_compoff) < slang->sl_compminlen) | ||||
1493 | continue; | ||||
1494 | #endif | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 1495 | |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 1496 | /* Limit the number of compound words to COMPOUNDWORDMAX if no |
Bram Moolenaar | e52325c | 2005-08-22 22:54:29 +0000 | [diff] [blame] | 1497 | * maximum for syllables is specified. */ |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 1498 | if (!word_ends && mip->mi_complen + mip->mi_compextra + 2 |
1499 | > slang->sl_compmax | ||||
Bram Moolenaar | e52325c | 2005-08-22 22:54:29 +0000 | [diff] [blame] | 1500 | && slang->sl_compsylmax == MAXWLEN) |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 1501 | continue; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1502 | |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 1503 | /* Quickly check if compounding is possible with this flag. */ |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 1504 | if (!byte_in_str(mip->mi_complen == 0 |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 1505 | ? slang->sl_compstartflags |
1506 | : slang->sl_compallflags, | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 1507 | ((unsigned)flags >> 24))) |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1508 | continue; |
1509 | |||||
Bram Moolenaar | e52325c | 2005-08-22 22:54:29 +0000 | [diff] [blame] | 1510 | if (mode == FIND_COMPOUND) |
1511 | { | ||||
1512 | int capflags; | ||||
1513 | |||||
1514 | /* Need to check the caps type of the appended compound | ||||
1515 | * word. */ | ||||
1516 | #ifdef FEAT_MBYTE | ||||
1517 | if (has_mbyte && STRNCMP(ptr, mip->mi_word, | ||||
1518 | mip->mi_compoff) != 0) | ||||
1519 | { | ||||
1520 | /* case folding may have changed the length */ | ||||
1521 | p = mip->mi_word; | ||||
1522 | for (s = ptr; s < ptr + mip->mi_compoff; mb_ptr_adv(s)) | ||||
1523 | mb_ptr_adv(p); | ||||
1524 | } | ||||
1525 | else | ||||
1526 | #endif | ||||
1527 | p = mip->mi_word + mip->mi_compoff; | ||||
1528 | capflags = captype(p, mip->mi_word + wlen); | ||||
1529 | if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP | ||||
1530 | && (flags & WF_FIXCAP) != 0)) | ||||
1531 | continue; | ||||
1532 | |||||
1533 | if (capflags != WF_ALLCAP) | ||||
1534 | { | ||||
1535 | /* When the character before the word is a word | ||||
1536 | * character we do not accept a Onecap word. We do | ||||
1537 | * accept a no-caps word, even when the dictionary | ||||
1538 | * word specifies ONECAP. */ | ||||
1539 | mb_ptr_back(mip->mi_word, p); | ||||
1540 | if (spell_iswordp_nmw(p) | ||||
1541 | ? capflags == WF_ONECAP | ||||
1542 | : (flags & WF_ONECAP) != 0 | ||||
1543 | && capflags != WF_ONECAP) | ||||
1544 | continue; | ||||
1545 | } | ||||
1546 | } | ||||
1547 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1548 | /* If the word ends the sequence of compound flags of the |
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 1549 | * words must match with one of the COMPOUNDRULE items and |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1550 | * the number of syllables must not be too large. */ |
1551 | mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24); | ||||
1552 | mip->mi_compflags[mip->mi_complen + 1] = NUL; | ||||
1553 | if (word_ends) | ||||
1554 | { | ||||
1555 | char_u fword[MAXWLEN]; | ||||
1556 | |||||
1557 | if (slang->sl_compsylmax < MAXWLEN) | ||||
1558 | { | ||||
1559 | /* "fword" is only needed for checking syllables. */ | ||||
1560 | if (ptr == mip->mi_word) | ||||
1561 | (void)spell_casefold(ptr, wlen, fword, MAXWLEN); | ||||
1562 | else | ||||
1563 | vim_strncpy(fword, ptr, endlen[endidxcnt]); | ||||
1564 | } | ||||
1565 | if (!can_compound(slang, fword, mip->mi_compflags)) | ||||
1566 | continue; | ||||
1567 | } | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 1568 | } |
1569 | |||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 1570 | /* Check NEEDCOMPOUND: can't use word without compounding. */ |
1571 | else if (flags & WF_NEEDCOMP) | ||||
1572 | continue; | ||||
1573 | |||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1574 | nobreak_result = SP_OK; |
1575 | |||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 1576 | if (!word_ends) |
1577 | { | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1578 | int save_result = mip->mi_result; |
1579 | char_u *save_end = mip->mi_end; | ||||
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 1580 | langp_T *save_lp = mip->mi_lp; |
1581 | int lpi; | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1582 | |
1583 | /* Check that a valid word follows. If there is one and we | ||||
1584 | * are compounding, it will set "mi_result", thus we are | ||||
1585 | * always finished here. For NOBREAK we only check that a | ||||
1586 | * valid word follows. | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 1587 | * Recursive! */ |
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1588 | if (slang->sl_nobreak) |
1589 | mip->mi_result = SP_BAD; | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 1590 | |
1591 | /* Find following word in case-folded tree. */ | ||||
1592 | mip->mi_compoff = endlen[endidxcnt]; | ||||
1593 | #ifdef FEAT_MBYTE | ||||
1594 | if (has_mbyte && mode == FIND_KEEPWORD) | ||||
1595 | { | ||||
1596 | /* Compute byte length in case-folded word from "wlen": | ||||
1597 | * byte length in keep-case word. Length may change when | ||||
1598 | * folding case. This can be slow, take a shortcut when | ||||
1599 | * the case-folded word is equal to the keep-case word. */ | ||||
1600 | p = mip->mi_fword; | ||||
1601 | if (STRNCMP(ptr, p, wlen) != 0) | ||||
1602 | { | ||||
1603 | for (s = ptr; s < ptr + wlen; mb_ptr_adv(s)) | ||||
1604 | mb_ptr_adv(p); | ||||
1605 | mip->mi_compoff = p - mip->mi_fword; | ||||
1606 | } | ||||
1607 | } | ||||
1608 | #endif | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 1609 | c = mip->mi_compoff; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1610 | ++mip->mi_complen; |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 1611 | if (flags & WF_COMPROOT) |
1612 | ++mip->mi_compextra; | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 1613 | |
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 1614 | /* For NOBREAK we need to try all NOBREAK languages, at least |
1615 | * to find the ".add" file(s). */ | ||||
1616 | for (lpi = 0; lpi < mip->mi_buf->b_langp.ga_len; ++lpi) | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1617 | { |
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 1618 | if (slang->sl_nobreak) |
1619 | { | ||||
1620 | mip->mi_lp = LANGP_ENTRY(mip->mi_buf->b_langp, lpi); | ||||
1621 | if (mip->mi_lp->lp_slang->sl_fidxs == NULL | ||||
1622 | || !mip->mi_lp->lp_slang->sl_nobreak) | ||||
1623 | continue; | ||||
1624 | } | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 1625 | |
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 1626 | find_word(mip, FIND_COMPOUND); |
1627 | |||||
1628 | /* When NOBREAK any word that matches is OK. Otherwise we | ||||
1629 | * need to find the longest match, thus try with keep-case | ||||
1630 | * and prefix too. */ | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1631 | if (!slang->sl_nobreak || mip->mi_result == SP_BAD) |
1632 | { | ||||
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 1633 | /* Find following word in keep-case tree. */ |
1634 | mip->mi_compoff = wlen; | ||||
1635 | find_word(mip, FIND_KEEPCOMPOUND); | ||||
1636 | |||||
1637 | if (!slang->sl_nobreak || mip->mi_result == SP_BAD) | ||||
1638 | { | ||||
1639 | /* Check for following word with prefix. */ | ||||
1640 | mip->mi_compoff = c; | ||||
1641 | find_prefix(mip, FIND_COMPOUND); | ||||
1642 | } | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1643 | } |
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 1644 | |
1645 | if (!slang->sl_nobreak) | ||||
1646 | break; | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1647 | } |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1648 | --mip->mi_complen; |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 1649 | if (flags & WF_COMPROOT) |
1650 | --mip->mi_compextra; | ||||
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 1651 | mip->mi_lp = save_lp; |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 1652 | |
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1653 | if (slang->sl_nobreak) |
1654 | { | ||||
1655 | nobreak_result = mip->mi_result; | ||||
1656 | mip->mi_result = save_result; | ||||
1657 | mip->mi_end = save_end; | ||||
1658 | } | ||||
1659 | else | ||||
1660 | { | ||||
1661 | if (mip->mi_result == SP_OK) | ||||
1662 | break; | ||||
1663 | continue; | ||||
1664 | } | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 1665 | } |
1666 | |||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1667 | if (flags & WF_BANNED) |
1668 | res = SP_BANNED; | ||||
1669 | else if (flags & WF_REGION) | ||||
1670 | { | ||||
1671 | /* Check region. */ | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 1672 | if ((mip->mi_lp->lp_region & (flags >> 16)) != 0) |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1673 | res = SP_OK; |
1674 | else | ||||
1675 | res = SP_LOCAL; | ||||
1676 | } | ||||
1677 | else if (flags & WF_RARE) | ||||
1678 | res = SP_RARE; | ||||
1679 | else | ||||
1680 | res = SP_OK; | ||||
1681 | |||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1682 | /* Always use the longest match and the best result. For NOBREAK |
1683 | * we separately keep the longest match without a following good | ||||
1684 | * word as a fall-back. */ | ||||
1685 | if (nobreak_result == SP_BAD) | ||||
1686 | { | ||||
1687 | if (mip->mi_result2 > res) | ||||
1688 | { | ||||
1689 | mip->mi_result2 = res; | ||||
1690 | mip->mi_end2 = mip->mi_word + wlen; | ||||
1691 | } | ||||
1692 | else if (mip->mi_result2 == res | ||||
1693 | && mip->mi_end2 < mip->mi_word + wlen) | ||||
1694 | mip->mi_end2 = mip->mi_word + wlen; | ||||
1695 | } | ||||
1696 | else if (mip->mi_result > res) | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1697 | { |
1698 | mip->mi_result = res; | ||||
1699 | mip->mi_end = mip->mi_word + wlen; | ||||
1700 | } | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 1701 | else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen) |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1702 | mip->mi_end = mip->mi_word + wlen; |
1703 | |||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1704 | if (mip->mi_result == SP_OK) |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1705 | break; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1706 | } |
1707 | |||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 1708 | if (mip->mi_result == SP_OK) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1709 | break; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1710 | } |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 1711 | } |
1712 | |||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 1713 | /* |
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 1714 | * Return TRUE if "flags" is a valid sequence of compound flags and "word" |
1715 | * does not have too many syllables. | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 1716 | */ |
1717 | static int | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1718 | can_compound(slang, word, flags) |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 1719 | slang_T *slang; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1720 | char_u *word; |
1721 | char_u *flags; | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 1722 | { |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1723 | regmatch_T regmatch; |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 1724 | #ifdef FEAT_MBYTE |
1725 | char_u uflags[MAXWLEN * 2]; | ||||
1726 | int i; | ||||
1727 | #endif | ||||
1728 | char_u *p; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1729 | |
1730 | if (slang->sl_compprog == NULL) | ||||
1731 | return FALSE; | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 1732 | #ifdef FEAT_MBYTE |
1733 | if (enc_utf8) | ||||
1734 | { | ||||
1735 | /* Need to convert the single byte flags to utf8 characters. */ | ||||
1736 | p = uflags; | ||||
1737 | for (i = 0; flags[i] != NUL; ++i) | ||||
1738 | p += mb_char2bytes(flags[i], p); | ||||
1739 | *p = NUL; | ||||
1740 | p = uflags; | ||||
1741 | } | ||||
1742 | else | ||||
1743 | #endif | ||||
1744 | p = flags; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1745 | regmatch.regprog = slang->sl_compprog; |
1746 | regmatch.rm_ic = FALSE; | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 1747 | if (!vim_regexec(®match, p, 0)) |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1748 | return FALSE; |
1749 | |||||
Bram Moolenaar | e52325c | 2005-08-22 22:54:29 +0000 | [diff] [blame] | 1750 | /* Count the number of syllables. This may be slow, do it last. If there |
1751 | * are too many syllables AND the number of compound words is above | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 1752 | * COMPOUNDWORDMAX then compounding is not allowed. */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1753 | if (slang->sl_compsylmax < MAXWLEN |
1754 | && count_syllables(slang, word) > slang->sl_compsylmax) | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 1755 | return (int)STRLEN(flags) < slang->sl_compmax; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 1756 | return TRUE; |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 1757 | } |
1758 | |||||
1759 | /* | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 1760 | * Return non-zero if the prefix indicated by "arridx" matches with the prefix |
1761 | * ID in "flags" for the word "word". | ||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 1762 | * The WF_RAREPFX flag is included in the return value for a rare prefix. |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 1763 | */ |
1764 | static int | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 1765 | valid_word_prefix(totprefcnt, arridx, flags, word, slang, cond_req) |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 1766 | int totprefcnt; /* nr of prefix IDs */ |
1767 | int arridx; /* idx in sl_pidxs[] */ | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 1768 | int flags; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 1769 | char_u *word; |
1770 | slang_T *slang; | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 1771 | int cond_req; /* only use prefixes with a condition */ |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 1772 | { |
1773 | int prefcnt; | ||||
1774 | int pidx; | ||||
1775 | regprog_T *rp; | ||||
1776 | regmatch_T regmatch; | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 1777 | int prefid; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 1778 | |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 1779 | prefid = (unsigned)flags >> 24; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 1780 | for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt) |
1781 | { | ||||
1782 | pidx = slang->sl_pidxs[arridx + prefcnt]; | ||||
1783 | |||||
1784 | /* Check the prefix ID. */ | ||||
1785 | if (prefid != (pidx & 0xff)) | ||||
1786 | continue; | ||||
1787 | |||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 1788 | /* Check if the prefix doesn't combine and the word already has a |
1789 | * suffix. */ | ||||
1790 | if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC)) | ||||
1791 | continue; | ||||
1792 | |||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 1793 | /* Check the condition, if there is one. The condition index is |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 1794 | * stored in the two bytes above the prefix ID byte. */ |
1795 | rp = slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff]; | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 1796 | if (rp != NULL) |
1797 | { | ||||
1798 | regmatch.regprog = rp; | ||||
1799 | regmatch.rm_ic = FALSE; | ||||
1800 | if (!vim_regexec(®match, word, 0)) | ||||
1801 | continue; | ||||
1802 | } | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 1803 | else if (cond_req) |
1804 | continue; | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 1805 | |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 1806 | /* It's a match! Return the WF_ flags. */ |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 1807 | return pidx; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 1808 | } |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 1809 | return 0; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 1810 | } |
1811 | |||||
1812 | /* | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1813 | * Check if the word at "mip->mi_word" has a matching prefix. |
1814 | * If it does, then check the following word. | ||||
1815 | * | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 1816 | * If "mode" is "FIND_COMPOUND" then do the same after another word, find a |
1817 | * prefix in a compound word. | ||||
1818 | * | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1819 | * For a match mip->mi_result is updated. |
1820 | */ | ||||
1821 | static void | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 1822 | find_prefix(mip, mode) |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1823 | matchinf_T *mip; |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 1824 | int mode; |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1825 | { |
1826 | idx_T arridx = 0; | ||||
1827 | int len; | ||||
1828 | int wlen = 0; | ||||
1829 | int flen; | ||||
1830 | int c; | ||||
1831 | char_u *ptr; | ||||
1832 | idx_T lo, hi, m; | ||||
1833 | slang_T *slang = mip->mi_lp->lp_slang; | ||||
1834 | char_u *byts; | ||||
1835 | idx_T *idxs; | ||||
1836 | |||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 1837 | byts = slang->sl_pbyts; |
1838 | if (byts == NULL) | ||||
1839 | return; /* array is empty */ | ||||
1840 | |||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1841 | /* We use the case-folded word here, since prefixes are always |
1842 | * case-folded. */ | ||||
1843 | ptr = mip->mi_fword; | ||||
1844 | flen = mip->mi_fwordlen; /* available case-folded bytes */ | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 1845 | if (mode == FIND_COMPOUND) |
1846 | { | ||||
1847 | /* Skip over the previously found word(s). */ | ||||
1848 | ptr += mip->mi_compoff; | ||||
1849 | flen -= mip->mi_compoff; | ||||
1850 | } | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1851 | idxs = slang->sl_pidxs; |
1852 | |||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1853 | /* |
1854 | * Repeat advancing in the tree until: | ||||
1855 | * - there is a byte that doesn't match, | ||||
1856 | * - we reach the end of the tree, | ||||
1857 | * - or we reach the end of the line. | ||||
1858 | */ | ||||
1859 | for (;;) | ||||
1860 | { | ||||
1861 | if (flen == 0 && *mip->mi_fend != NUL) | ||||
1862 | flen = fold_more(mip); | ||||
1863 | |||||
1864 | len = byts[arridx++]; | ||||
1865 | |||||
1866 | /* If the first possible byte is a zero the prefix could end here. | ||||
1867 | * Check if the following word matches and supports the prefix. */ | ||||
1868 | if (byts[arridx] == 0) | ||||
1869 | { | ||||
1870 | /* There can be several prefixes with different conditions. We | ||||
1871 | * try them all, since we don't know which one will give the | ||||
1872 | * longest match. The word is the same each time, pass the list | ||||
1873 | * of possible prefixes to find_word(). */ | ||||
1874 | mip->mi_prefarridx = arridx; | ||||
1875 | mip->mi_prefcnt = len; | ||||
1876 | while (len > 0 && byts[arridx] == 0) | ||||
1877 | { | ||||
1878 | ++arridx; | ||||
1879 | --len; | ||||
1880 | } | ||||
1881 | mip->mi_prefcnt -= len; | ||||
1882 | |||||
1883 | /* Find the word that comes after the prefix. */ | ||||
1884 | mip->mi_prefixlen = wlen; | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 1885 | if (mode == FIND_COMPOUND) |
1886 | /* Skip over the previously found word(s). */ | ||||
1887 | mip->mi_prefixlen += mip->mi_compoff; | ||||
1888 | |||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 1889 | #ifdef FEAT_MBYTE |
1890 | if (has_mbyte) | ||||
1891 | { | ||||
1892 | /* Case-folded length may differ from original length. */ | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 1893 | mip->mi_cprefixlen = nofold_len(mip->mi_fword, |
1894 | mip->mi_prefixlen, mip->mi_word); | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 1895 | } |
1896 | else | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 1897 | mip->mi_cprefixlen = mip->mi_prefixlen; |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 1898 | #endif |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1899 | find_word(mip, FIND_PREFIX); |
1900 | |||||
1901 | |||||
1902 | if (len == 0) | ||||
1903 | break; /* no children, word must end here */ | ||||
1904 | } | ||||
1905 | |||||
1906 | /* Stop looking at end of the line. */ | ||||
1907 | if (ptr[wlen] == NUL) | ||||
1908 | break; | ||||
1909 | |||||
1910 | /* Perform a binary search in the list of accepted bytes. */ | ||||
1911 | c = ptr[wlen]; | ||||
1912 | lo = arridx; | ||||
1913 | hi = arridx + len - 1; | ||||
1914 | while (lo < hi) | ||||
1915 | { | ||||
1916 | m = (lo + hi) / 2; | ||||
1917 | if (byts[m] > c) | ||||
1918 | hi = m - 1; | ||||
1919 | else if (byts[m] < c) | ||||
1920 | lo = m + 1; | ||||
1921 | else | ||||
1922 | { | ||||
1923 | lo = hi = m; | ||||
1924 | break; | ||||
1925 | } | ||||
1926 | } | ||||
1927 | |||||
1928 | /* Stop if there is no matching byte. */ | ||||
1929 | if (hi < lo || byts[lo] != c) | ||||
1930 | break; | ||||
1931 | |||||
1932 | /* Continue at the child (if there is one). */ | ||||
1933 | arridx = idxs[lo]; | ||||
1934 | ++wlen; | ||||
1935 | --flen; | ||||
1936 | } | ||||
1937 | } | ||||
1938 | |||||
1939 | /* | ||||
1940 | * Need to fold at least one more character. Do until next non-word character | ||||
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 1941 | * for efficiency. Include the non-word character too. |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1942 | * Return the length of the folded chars in bytes. |
1943 | */ | ||||
1944 | static int | ||||
1945 | fold_more(mip) | ||||
1946 | matchinf_T *mip; | ||||
1947 | { | ||||
1948 | int flen; | ||||
1949 | char_u *p; | ||||
1950 | |||||
1951 | p = mip->mi_fend; | ||||
1952 | do | ||||
1953 | { | ||||
1954 | mb_ptr_adv(mip->mi_fend); | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 1955 | } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_buf)); |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1956 | |
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 1957 | /* Include the non-word character so that we can check for the word end. */ |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 1958 | if (*mip->mi_fend != NUL) |
1959 | mb_ptr_adv(mip->mi_fend); | ||||
1960 | |||||
1961 | (void)spell_casefold(p, (int)(mip->mi_fend - p), | ||||
1962 | mip->mi_fword + mip->mi_fwordlen, | ||||
1963 | MAXWLEN - mip->mi_fwordlen); | ||||
1964 | flen = STRLEN(mip->mi_fword + mip->mi_fwordlen); | ||||
1965 | mip->mi_fwordlen += flen; | ||||
1966 | return flen; | ||||
1967 | } | ||||
1968 | |||||
1969 | /* | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 1970 | * Check case flags for a word. Return TRUE if the word has the requested |
1971 | * case. | ||||
1972 | */ | ||||
1973 | static int | ||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 1974 | spell_valid_case(wordflags, treeflags) |
1975 | int wordflags; /* flags for the checked word. */ | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 1976 | int treeflags; /* flags for the word in the spell tree */ |
1977 | { | ||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 1978 | return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 1979 | || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0 |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 1980 | && ((treeflags & WF_ONECAP) == 0 |
1981 | || (wordflags & WF_ONECAP) != 0))); | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 1982 | } |
1983 | |||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 1984 | /* |
1985 | * Return TRUE if spell checking is not enabled. | ||||
1986 | */ | ||||
1987 | static int | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 1988 | no_spell_checking(wp) |
1989 | win_T *wp; | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 1990 | { |
Bram Moolenaar | a226a6d | 2006-02-26 23:59:20 +0000 | [diff] [blame] | 1991 | if (!wp->w_p_spell || *wp->w_buffer->b_p_spl == NUL |
1992 | || wp->w_buffer->b_langp.ga_len == 0) | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 1993 | { |
1994 | EMSG(_("E756: Spell checking is not enabled")); | ||||
1995 | return TRUE; | ||||
1996 | } | ||||
1997 | return FALSE; | ||||
1998 | } | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 1999 | |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2000 | /* |
2001 | * Move to next spell error. | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 2002 | * "curline" is FALSE for "[s", "]s", "[S" and "]S". |
2003 | * "curline" is TRUE to find word under/after cursor in the same line. | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2004 | * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move |
2005 | * to after badly spelled word before the cursor. | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 2006 | * Return 0 if not found, length of the badly spelled word otherwise. |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2007 | */ |
2008 | int | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 2009 | spell_move_to(wp, dir, allwords, curline, attrp) |
2010 | win_T *wp; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2011 | int dir; /* FORWARD or BACKWARD */ |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 2012 | int allwords; /* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 2013 | int curline; |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 2014 | hlf_T *attrp; /* return: attributes of bad word or NULL |
2015 | (only when "dir" is FORWARD) */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2016 | { |
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 2017 | linenr_T lnum; |
2018 | pos_T found_pos; | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 2019 | int found_len = 0; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2020 | char_u *line; |
2021 | char_u *p; | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2022 | char_u *endp; |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 2023 | hlf_T attr; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2024 | int len; |
Bram Moolenaar | f71a3db | 2006-03-12 21:50:18 +0000 | [diff] [blame] | 2025 | # ifdef FEAT_SYN_HL |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 2026 | int has_syntax = syntax_present(wp->w_buffer); |
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 2027 | int col; |
Bram Moolenaar | f71a3db | 2006-03-12 21:50:18 +0000 | [diff] [blame] | 2028 | # endif |
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 2029 | int can_spell; |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2030 | char_u *buf = NULL; |
2031 | int buflen = 0; | ||||
2032 | int skip = 0; | ||||
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 2033 | int capcol = -1; |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 2034 | int found_one = FALSE; |
2035 | int wrapped = FALSE; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2036 | |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 2037 | if (no_spell_checking(wp)) |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 2038 | return 0; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2039 | |
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 2040 | /* |
2041 | * Start looking for bad word at the start of the line, because we can't | ||||
Bram Moolenaar | 86ca6e3 | 2006-03-29 21:06:37 +0000 | [diff] [blame] | 2042 | * start halfway a word, we don't know where it starts or ends. |
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 2043 | * |
2044 | * When searching backwards, we continue in the line to find the last | ||||
2045 | * bad word (in the cursor line: before the cursor). | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2046 | * |
2047 | * We concatenate the start of the next line, so that wrapped words work | ||||
2048 | * (e.g. "et<line-break>cetera"). Doesn't work when searching backwards | ||||
2049 | * though... | ||||
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 2050 | */ |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 2051 | lnum = wp->w_cursor.lnum; |
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 2052 | clearpos(&found_pos); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2053 | |
2054 | while (!got_int) | ||||
2055 | { | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 2056 | line = ml_get_buf(wp->w_buffer, lnum, FALSE); |
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 2057 | |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2058 | len = STRLEN(line); |
2059 | if (buflen < len + MAXWLEN + 2) | ||||
2060 | { | ||||
2061 | vim_free(buf); | ||||
2062 | buflen = len + MAXWLEN + 2; | ||||
2063 | buf = alloc(buflen); | ||||
2064 | if (buf == NULL) | ||||
2065 | break; | ||||
2066 | } | ||||
2067 | |||||
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 2068 | /* In first line check first word for Capital. */ |
2069 | if (lnum == 1) | ||||
2070 | capcol = 0; | ||||
2071 | |||||
2072 | /* For checking first word with a capital skip white space. */ | ||||
2073 | if (capcol == 0) | ||||
2074 | capcol = skipwhite(line) - line; | ||||
2075 | |||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2076 | /* Copy the line into "buf" and append the start of the next line if |
2077 | * possible. */ | ||||
2078 | STRCPY(buf, line); | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 2079 | if (lnum < wp->w_buffer->b_ml.ml_line_count) |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2080 | spell_cat_line(buf + STRLEN(buf), ml_get(lnum + 1), MAXWLEN); |
2081 | |||||
2082 | p = buf + skip; | ||||
2083 | endp = buf + len; | ||||
2084 | while (p < endp) | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2085 | { |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 2086 | /* When searching backward don't search after the cursor. Unless |
2087 | * we wrapped around the end of the buffer. */ | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2088 | if (dir == BACKWARD |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 2089 | && lnum == wp->w_cursor.lnum |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 2090 | && !wrapped |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 2091 | && (colnr_T)(p - buf) >= wp->w_cursor.col) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2092 | break; |
2093 | |||||
2094 | /* start of word */ | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 2095 | attr = HLF_COUNT; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2096 | len = spell_check(wp, p, &attr, &capcol, FALSE); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2097 | |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 2098 | if (attr != HLF_COUNT) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2099 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2100 | /* We found a bad word. Check the attribute. */ |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 2101 | if (allwords || attr == HLF_SPB) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2102 | { |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 2103 | found_one = TRUE; |
2104 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2105 | /* When searching forward only accept a bad word after |
2106 | * the cursor. */ | ||||
2107 | if (dir == BACKWARD | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 2108 | || lnum != wp->w_cursor.lnum |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 2109 | || (lnum == wp->w_cursor.lnum |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 2110 | && (wrapped |
2111 | || (colnr_T)(curline ? p - buf + len | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2112 | : p - buf) |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 2113 | > wp->w_cursor.col))) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2114 | { |
Bram Moolenaar | f71a3db | 2006-03-12 21:50:18 +0000 | [diff] [blame] | 2115 | # ifdef FEAT_SYN_HL |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2116 | if (has_syntax) |
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 2117 | { |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2118 | col = p - buf; |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 2119 | (void)syn_get_id(wp, lnum, (colnr_T)col, |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2120 | FALSE, &can_spell); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2121 | } |
2122 | else | ||||
Bram Moolenaar | f71a3db | 2006-03-12 21:50:18 +0000 | [diff] [blame] | 2123 | #endif |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2124 | can_spell = TRUE; |
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 2125 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2126 | if (can_spell) |
2127 | { | ||||
2128 | found_pos.lnum = lnum; | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2129 | found_pos.col = p - buf; |
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 2130 | #ifdef FEAT_VIRTUALEDIT |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2131 | found_pos.coladd = 0; |
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 2132 | #endif |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2133 | if (dir == FORWARD) |
2134 | { | ||||
2135 | /* No need to search further. */ | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 2136 | wp->w_cursor = found_pos; |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2137 | vim_free(buf); |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 2138 | if (attrp != NULL) |
2139 | *attrp = attr; | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 2140 | return len; |
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 2141 | } |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2142 | else if (curline) |
2143 | /* Insert mode completion: put cursor after | ||||
2144 | * the bad word. */ | ||||
2145 | found_pos.col += len; | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 2146 | found_len = len; |
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 2147 | } |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2148 | } |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2149 | } |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2150 | } |
2151 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2152 | /* advance to character after the word */ |
2153 | p += len; | ||||
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 2154 | capcol -= len; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2155 | } |
2156 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2157 | if (dir == BACKWARD && found_pos.lnum != 0) |
2158 | { | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 2159 | /* Use the last match in the line (before the cursor). */ |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 2160 | wp->w_cursor = found_pos; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2161 | vim_free(buf); |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 2162 | return found_len; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2163 | } |
2164 | |||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 2165 | if (curline) |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2166 | break; /* only check cursor line */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 2167 | |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2168 | /* Advance to next line. */ |
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 2169 | if (dir == BACKWARD) |
2170 | { | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 2171 | /* If we are back at the starting line and searched it again there |
2172 | * is no match, give up. */ | ||||
2173 | if (lnum == wp->w_cursor.lnum && wrapped) | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2174 | break; |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 2175 | |
2176 | if (lnum > 1) | ||||
2177 | --lnum; | ||||
2178 | else if (!p_ws) | ||||
2179 | break; /* at first line and 'nowrapscan' */ | ||||
2180 | else | ||||
2181 | { | ||||
2182 | /* Wrap around to the end of the buffer. May search the | ||||
2183 | * starting line again and accept the last match. */ | ||||
2184 | lnum = wp->w_buffer->b_ml.ml_line_count; | ||||
2185 | wrapped = TRUE; | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 2186 | if (!shortmess(SHM_SEARCH)) |
2187 | give_warning((char_u *)_(top_bot_msg), TRUE); | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 2188 | } |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 2189 | capcol = -1; |
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 2190 | } |
2191 | else | ||||
2192 | { | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 2193 | if (lnum < wp->w_buffer->b_ml.ml_line_count) |
2194 | ++lnum; | ||||
2195 | else if (!p_ws) | ||||
2196 | break; /* at first line and 'nowrapscan' */ | ||||
2197 | else | ||||
2198 | { | ||||
2199 | /* Wrap around to the start of the buffer. May search the | ||||
2200 | * starting line again and accept the first match. */ | ||||
2201 | lnum = 1; | ||||
2202 | wrapped = TRUE; | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 2203 | if (!shortmess(SHM_SEARCH)) |
2204 | give_warning((char_u *)_(bot_top_msg), TRUE); | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 2205 | } |
2206 | |||||
2207 | /* If we are back at the starting line and there is no match then | ||||
2208 | * give up. */ | ||||
2209 | if (lnum == wp->w_cursor.lnum && !found_one) | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2210 | break; |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2211 | |
2212 | /* Skip the characters at the start of the next line that were | ||||
2213 | * included in a match crossing line boundaries. */ | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 2214 | if (attr == HLF_COUNT) |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2215 | skip = p - endp; |
2216 | else | ||||
2217 | skip = 0; | ||||
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 2218 | |
2219 | /* Capscol skips over the inserted space. */ | ||||
2220 | --capcol; | ||||
2221 | |||||
2222 | /* But after empty line check first word in next line */ | ||||
2223 | if (*skipwhite(line) == NUL) | ||||
2224 | capcol = 0; | ||||
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 2225 | } |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2226 | |
2227 | line_breakcheck(); | ||||
2228 | } | ||||
2229 | |||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2230 | vim_free(buf); |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 2231 | return 0; |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2232 | } |
2233 | |||||
2234 | /* | ||||
2235 | * For spell checking: concatenate the start of the following line "line" into | ||||
2236 | * "buf", blanking-out special characters. Copy less then "maxlen" bytes. | ||||
2237 | */ | ||||
2238 | void | ||||
2239 | spell_cat_line(buf, line, maxlen) | ||||
2240 | char_u *buf; | ||||
2241 | char_u *line; | ||||
2242 | int maxlen; | ||||
2243 | { | ||||
2244 | char_u *p; | ||||
2245 | int n; | ||||
2246 | |||||
2247 | p = skipwhite(line); | ||||
2248 | while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL) | ||||
2249 | p = skipwhite(p + 1); | ||||
2250 | |||||
2251 | if (*p != NUL) | ||||
2252 | { | ||||
2253 | *buf = ' '; | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 2254 | vim_strncpy(buf + 1, line, maxlen - 2); |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 2255 | n = p - line; |
2256 | if (n >= maxlen) | ||||
2257 | n = maxlen - 1; | ||||
2258 | vim_memset(buf + 1, ' ', n); | ||||
2259 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2260 | } |
2261 | |||||
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 2262 | /* |
2263 | * Structure used for the cookie argument of do_in_runtimepath(). | ||||
2264 | */ | ||||
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 2265 | typedef struct spelload_S |
2266 | { | ||||
2267 | char_u sl_lang[MAXWLEN + 1]; /* language name */ | ||||
2268 | slang_T *sl_slang; /* resulting slang_T struct */ | ||||
2269 | int sl_nobreak; /* NOBREAK language found */ | ||||
2270 | } spelload_T; | ||||
2271 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2272 | /* |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 2273 | * Load word list(s) for "lang" from Vim spell file(s). |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2274 | * "lang" must be the language without the region: e.g., "en". |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2275 | */ |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 2276 | static void |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2277 | spell_load_lang(lang) |
2278 | char_u *lang; | ||||
2279 | { | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2280 | char_u fname_enc[85]; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2281 | int r; |
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 2282 | spelload_T sl; |
Bram Moolenaar | b8a7b56 | 2006-02-01 21:47:16 +0000 | [diff] [blame] | 2283 | #ifdef FEAT_AUTOCMD |
2284 | int round; | ||||
2285 | #endif | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2286 | |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2287 | /* Copy the language name to pass it to spell_load_cb() as a cookie. |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 2288 | * It's truncated when an error is detected. */ |
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 2289 | STRCPY(sl.sl_lang, lang); |
2290 | sl.sl_slang = NULL; | ||||
2291 | sl.sl_nobreak = FALSE; | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 2292 | |
Bram Moolenaar | b8a7b56 | 2006-02-01 21:47:16 +0000 | [diff] [blame] | 2293 | #ifdef FEAT_AUTOCMD |
2294 | /* We may retry when no spell file is found for the language, an | ||||
2295 | * autocommand may load it then. */ | ||||
2296 | for (round = 1; round <= 2; ++round) | ||||
2297 | #endif | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 2298 | { |
Bram Moolenaar | b8a7b56 | 2006-02-01 21:47:16 +0000 | [diff] [blame] | 2299 | /* |
2300 | * Find the first spell file for "lang" in 'runtimepath' and load it. | ||||
2301 | */ | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2302 | vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5, |
Bram Moolenaar | b8a7b56 | 2006-02-01 21:47:16 +0000 | [diff] [blame] | 2303 | "spell/%s.%s.spl", lang, spell_enc()); |
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 2304 | r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl); |
Bram Moolenaar | b8a7b56 | 2006-02-01 21:47:16 +0000 | [diff] [blame] | 2305 | |
2306 | if (r == FAIL && *sl.sl_lang != NUL) | ||||
2307 | { | ||||
2308 | /* Try loading the ASCII version. */ | ||||
2309 | vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5, | ||||
2310 | "spell/%s.ascii.spl", lang); | ||||
2311 | r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl); | ||||
2312 | |||||
2313 | #ifdef FEAT_AUTOCMD | ||||
2314 | if (r == FAIL && *sl.sl_lang != NUL && round == 1 | ||||
2315 | && apply_autocmds(EVENT_SPELLFILEMISSING, lang, | ||||
2316 | curbuf->b_fname, FALSE, curbuf)) | ||||
2317 | continue; | ||||
2318 | break; | ||||
2319 | #endif | ||||
2320 | } | ||||
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 2321 | #ifdef FEAT_AUTOCMD |
2322 | break; | ||||
2323 | #endif | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2324 | } |
2325 | |||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 2326 | if (r == FAIL) |
Bram Moolenaar | b8a7b56 | 2006-02-01 21:47:16 +0000 | [diff] [blame] | 2327 | { |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2328 | smsg((char_u *)_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""), |
2329 | lang, spell_enc(), lang); | ||||
Bram Moolenaar | b8a7b56 | 2006-02-01 21:47:16 +0000 | [diff] [blame] | 2330 | } |
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 2331 | else if (sl.sl_slang != NULL) |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2332 | { |
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 2333 | /* At least one file was loaded, now load ALL the additions. */ |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2334 | STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl"); |
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 2335 | do_in_runtimepath(fname_enc, TRUE, spell_load_cb, &sl); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2336 | } |
2337 | } | ||||
2338 | |||||
2339 | /* | ||||
2340 | * Return the encoding used for spell checking: Use 'encoding', except that we | ||||
2341 | * use "latin1" for "latin9". And limit to 60 characters (just in case). | ||||
2342 | */ | ||||
2343 | static char_u * | ||||
2344 | spell_enc() | ||||
2345 | { | ||||
2346 | |||||
2347 | #ifdef FEAT_MBYTE | ||||
2348 | if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0) | ||||
2349 | return p_enc; | ||||
2350 | #endif | ||||
2351 | return (char_u *)"latin1"; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2352 | } |
2353 | |||||
2354 | /* | ||||
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 2355 | * Get the name of the .spl file for the internal wordlist into |
2356 | * "fname[MAXPATHL]". | ||||
2357 | */ | ||||
2358 | static void | ||||
2359 | int_wordlist_spl(fname) | ||||
2360 | char_u *fname; | ||||
2361 | { | ||||
2362 | vim_snprintf((char *)fname, MAXPATHL, "%s.%s.spl", | ||||
2363 | int_wordlist, spell_enc()); | ||||
2364 | } | ||||
2365 | |||||
2366 | /* | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2367 | * Allocate a new slang_T for language "lang". "lang" can be NULL. |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2368 | * Caller must fill "sl_next". |
2369 | */ | ||||
2370 | static slang_T * | ||||
2371 | slang_alloc(lang) | ||||
2372 | char_u *lang; | ||||
2373 | { | ||||
2374 | slang_T *lp; | ||||
2375 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2376 | lp = (slang_T *)alloc_clear(sizeof(slang_T)); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2377 | if (lp != NULL) |
2378 | { | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2379 | if (lang != NULL) |
2380 | lp->sl_name = vim_strsave(lang); | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 2381 | ga_init2(&lp->sl_rep, sizeof(fromto_T), 10); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2382 | ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10); |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2383 | lp->sl_compmax = MAXWLEN; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2384 | lp->sl_compsylmax = MAXWLEN; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2385 | hash_init(&lp->sl_wordcount); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2386 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2387 | |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2388 | return lp; |
2389 | } | ||||
2390 | |||||
2391 | /* | ||||
2392 | * Free the contents of an slang_T and the structure itself. | ||||
2393 | */ | ||||
2394 | static void | ||||
2395 | slang_free(lp) | ||||
2396 | slang_T *lp; | ||||
2397 | { | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2398 | vim_free(lp->sl_name); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2399 | vim_free(lp->sl_fname); |
2400 | slang_clear(lp); | ||||
2401 | vim_free(lp); | ||||
2402 | } | ||||
2403 | |||||
2404 | /* | ||||
2405 | * Clear an slang_T so that the file can be reloaded. | ||||
2406 | */ | ||||
2407 | static void | ||||
2408 | slang_clear(lp) | ||||
2409 | slang_T *lp; | ||||
2410 | { | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 2411 | garray_T *gap; |
2412 | fromto_T *ftp; | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 2413 | salitem_T *smp; |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 2414 | int i; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2415 | int round; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 2416 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2417 | vim_free(lp->sl_fbyts); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2418 | lp->sl_fbyts = NULL; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2419 | vim_free(lp->sl_kbyts); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2420 | lp->sl_kbyts = NULL; |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 2421 | vim_free(lp->sl_pbyts); |
2422 | lp->sl_pbyts = NULL; | ||||
2423 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2424 | vim_free(lp->sl_fidxs); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2425 | lp->sl_fidxs = NULL; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2426 | vim_free(lp->sl_kidxs); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2427 | lp->sl_kidxs = NULL; |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 2428 | vim_free(lp->sl_pidxs); |
2429 | lp->sl_pidxs = NULL; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 2430 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2431 | for (round = 1; round <= 2; ++round) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 2432 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2433 | gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal; |
2434 | while (gap->ga_len > 0) | ||||
2435 | { | ||||
2436 | ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len]; | ||||
2437 | vim_free(ftp->ft_from); | ||||
2438 | vim_free(ftp->ft_to); | ||||
2439 | } | ||||
2440 | ga_clear(gap); | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 2441 | } |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 2442 | |
2443 | gap = &lp->sl_sal; | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 2444 | if (lp->sl_sofo) |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 2445 | { |
2446 | /* "ga_len" is set to 1 without adding an item for latin1 */ | ||||
2447 | if (gap->ga_data != NULL) | ||||
2448 | /* SOFOFROM and SOFOTO items: free lists of wide characters. */ | ||||
2449 | for (i = 0; i < gap->ga_len; ++i) | ||||
2450 | vim_free(((int **)gap->ga_data)[i]); | ||||
2451 | } | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 2452 | else |
2453 | /* SAL items: free salitem_T items */ | ||||
2454 | while (gap->ga_len > 0) | ||||
2455 | { | ||||
2456 | smp = &((salitem_T *)gap->ga_data)[--gap->ga_len]; | ||||
2457 | vim_free(smp->sm_lead); | ||||
2458 | /* Don't free sm_oneof and sm_rules, they point into sm_lead. */ | ||||
2459 | vim_free(smp->sm_to); | ||||
2460 | #ifdef FEAT_MBYTE | ||||
2461 | vim_free(smp->sm_lead_w); | ||||
2462 | vim_free(smp->sm_oneof_w); | ||||
2463 | vim_free(smp->sm_to_w); | ||||
2464 | #endif | ||||
2465 | } | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 2466 | ga_clear(gap); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 2467 | |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 2468 | for (i = 0; i < lp->sl_prefixcnt; ++i) |
2469 | vim_free(lp->sl_prefprog[i]); | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 2470 | lp->sl_prefixcnt = 0; |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 2471 | vim_free(lp->sl_prefprog); |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 2472 | lp->sl_prefprog = NULL; |
2473 | |||||
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 2474 | vim_free(lp->sl_info); |
2475 | lp->sl_info = NULL; | ||||
2476 | |||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 2477 | vim_free(lp->sl_midword); |
2478 | lp->sl_midword = NULL; | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 2479 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2480 | vim_free(lp->sl_compprog); |
2481 | vim_free(lp->sl_compstartflags); | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 2482 | vim_free(lp->sl_compallflags); |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2483 | lp->sl_compprog = NULL; |
2484 | lp->sl_compstartflags = NULL; | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 2485 | lp->sl_compallflags = NULL; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2486 | |
2487 | vim_free(lp->sl_syllable); | ||||
2488 | lp->sl_syllable = NULL; | ||||
2489 | ga_clear(&lp->sl_syl_items); | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 2490 | |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 2491 | ga_clear_strings(&lp->sl_comppat); |
2492 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2493 | hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF); |
2494 | hash_init(&lp->sl_wordcount); | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 2495 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2496 | #ifdef FEAT_MBYTE |
2497 | hash_clear_all(&lp->sl_map_hash, 0); | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 2498 | #endif |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2499 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2500 | /* Clear info from .sug file. */ |
2501 | slang_clear_sug(lp); | ||||
2502 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2503 | lp->sl_compmax = MAXWLEN; |
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 2504 | lp->sl_compminlen = 0; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2505 | lp->sl_compsylmax = MAXWLEN; |
2506 | lp->sl_regions[0] = NUL; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2507 | } |
2508 | |||||
2509 | /* | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2510 | * Clear the info from the .sug file in "lp". |
2511 | */ | ||||
2512 | static void | ||||
2513 | slang_clear_sug(lp) | ||||
2514 | slang_T *lp; | ||||
2515 | { | ||||
2516 | vim_free(lp->sl_sbyts); | ||||
2517 | lp->sl_sbyts = NULL; | ||||
2518 | vim_free(lp->sl_sidxs); | ||||
2519 | lp->sl_sidxs = NULL; | ||||
2520 | close_spellbuf(lp->sl_sugbuf); | ||||
2521 | lp->sl_sugbuf = NULL; | ||||
2522 | lp->sl_sugloaded = FALSE; | ||||
2523 | lp->sl_sugtime = 0; | ||||
2524 | } | ||||
2525 | |||||
2526 | /* | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 2527 | * Load one spell file and store the info into a slang_T. |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2528 | * Invoked through do_in_runtimepath(). |
2529 | */ | ||||
2530 | static void | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2531 | spell_load_cb(fname, cookie) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2532 | char_u *fname; |
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 2533 | void *cookie; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2534 | { |
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 2535 | spelload_T *slp = (spelload_T *)cookie; |
2536 | slang_T *slang; | ||||
2537 | |||||
2538 | slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE); | ||||
2539 | if (slang != NULL) | ||||
2540 | { | ||||
2541 | /* When a previously loaded file has NOBREAK also use it for the | ||||
2542 | * ".add" files. */ | ||||
2543 | if (slp->sl_nobreak && slang->sl_add) | ||||
2544 | slang->sl_nobreak = TRUE; | ||||
2545 | else if (slang->sl_nobreak) | ||||
2546 | slp->sl_nobreak = TRUE; | ||||
2547 | |||||
2548 | slp->sl_slang = slang; | ||||
2549 | } | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2550 | } |
2551 | |||||
2552 | /* | ||||
2553 | * Load one spell file and store the info into a slang_T. | ||||
2554 | * | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2555 | * This is invoked in three ways: |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2556 | * - From spell_load_cb() to load a spell file for the first time. "lang" is |
2557 | * the language name, "old_lp" is NULL. Will allocate an slang_T. | ||||
2558 | * - To reload a spell file that was changed. "lang" is NULL and "old_lp" | ||||
2559 | * points to the existing slang_T. | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2560 | * - Just after writing a .spl file; it's read back to produce the .sug file. |
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 2561 | * "old_lp" is NULL and "lang" is NULL. Will allocate an slang_T. |
2562 | * | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 2563 | * Returns the slang_T the spell file was loaded into. NULL for error. |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2564 | */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 2565 | static slang_T * |
2566 | spell_load_file(fname, lang, old_lp, silent) | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2567 | char_u *fname; |
2568 | char_u *lang; | ||||
2569 | slang_T *old_lp; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 2570 | int silent; /* no error if file doesn't exist */ |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2571 | { |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2572 | FILE *fd; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2573 | char_u buf[VIMSPELLMAGICL]; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2574 | char_u *p; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2575 | int i; |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 2576 | int n; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2577 | int len; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2578 | char_u *save_sourcing_name = sourcing_name; |
2579 | linenr_T save_sourcing_lnum = sourcing_lnum; | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 2580 | slang_T *lp = NULL; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 2581 | int c = 0; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2582 | int res; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2583 | |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2584 | fd = mch_fopen((char *)fname, "r"); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2585 | if (fd == NULL) |
2586 | { | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 2587 | if (!silent) |
2588 | EMSG2(_(e_notopen), fname); | ||||
2589 | else if (p_verbose > 2) | ||||
2590 | { | ||||
2591 | verbose_enter(); | ||||
2592 | smsg((char_u *)e_notopen, fname); | ||||
2593 | verbose_leave(); | ||||
2594 | } | ||||
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 2595 | goto endFAIL; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2596 | } |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2597 | if (p_verbose > 2) |
2598 | { | ||||
2599 | verbose_enter(); | ||||
2600 | smsg((char_u *)_("Reading spell file \"%s\""), fname); | ||||
2601 | verbose_leave(); | ||||
2602 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2603 | |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2604 | if (old_lp == NULL) |
2605 | { | ||||
2606 | lp = slang_alloc(lang); | ||||
2607 | if (lp == NULL) | ||||
2608 | goto endFAIL; | ||||
2609 | |||||
2610 | /* Remember the file name, used to reload the file when it's updated. */ | ||||
2611 | lp->sl_fname = vim_strsave(fname); | ||||
2612 | if (lp->sl_fname == NULL) | ||||
2613 | goto endFAIL; | ||||
2614 | |||||
2615 | /* Check for .add.spl. */ | ||||
2616 | lp->sl_add = strstr((char *)gettail(fname), ".add.") != NULL; | ||||
2617 | } | ||||
2618 | else | ||||
2619 | lp = old_lp; | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 2620 | |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2621 | /* Set sourcing_name, so that error messages mention the file name. */ |
2622 | sourcing_name = fname; | ||||
2623 | sourcing_lnum = 0; | ||||
2624 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2625 | /* |
2626 | * <HEADER>: <fileID> | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 2627 | */ |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2628 | for (i = 0; i < VIMSPELLMAGICL; ++i) |
2629 | buf[i] = getc(fd); /* <fileID> */ | ||||
2630 | if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) | ||||
2631 | { | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2632 | EMSG(_("E757: This does not look like a spell file")); |
2633 | goto endFAIL; | ||||
2634 | } | ||||
2635 | c = getc(fd); /* <versionnr> */ | ||||
2636 | if (c < VIMSPELLVERSION) | ||||
2637 | { | ||||
2638 | EMSG(_("E771: Old spell file, needs to be updated")); | ||||
2639 | goto endFAIL; | ||||
2640 | } | ||||
2641 | else if (c > VIMSPELLVERSION) | ||||
2642 | { | ||||
2643 | EMSG(_("E772: Spell file is for newer version of Vim")); | ||||
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 2644 | goto endFAIL; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2645 | } |
2646 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2647 | |
2648 | /* | ||||
2649 | * <SECTIONS>: <section> ... <sectionend> | ||||
2650 | * <section>: <sectionID> <sectionflags> <sectionlen> (section contents) | ||||
2651 | */ | ||||
2652 | for (;;) | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2653 | { |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2654 | n = getc(fd); /* <sectionID> or <sectionend> */ |
2655 | if (n == SN_END) | ||||
2656 | break; | ||||
2657 | c = getc(fd); /* <sectionflags> */ | ||||
Bram Moolenaar | b388adb | 2006-02-28 23:50:17 +0000 | [diff] [blame] | 2658 | len = get4c(fd); /* <sectionlen> */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2659 | if (len < 0) |
2660 | goto truncerr; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2661 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2662 | res = 0; |
2663 | switch (n) | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 2664 | { |
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 2665 | case SN_INFO: |
2666 | lp->sl_info = read_string(fd, len); /* <infotext> */ | ||||
2667 | if (lp->sl_info == NULL) | ||||
2668 | goto endFAIL; | ||||
2669 | break; | ||||
2670 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2671 | case SN_REGION: |
2672 | res = read_region_section(fd, lp, len); | ||||
2673 | break; | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 2674 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2675 | case SN_CHARFLAGS: |
2676 | res = read_charflags_section(fd); | ||||
2677 | break; | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 2678 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2679 | case SN_MIDWORD: |
2680 | lp->sl_midword = read_string(fd, len); /* <midword> */ | ||||
2681 | if (lp->sl_midword == NULL) | ||||
2682 | goto endFAIL; | ||||
2683 | break; | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 2684 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2685 | case SN_PREFCOND: |
2686 | res = read_prefcond_section(fd, lp); | ||||
2687 | break; | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 2688 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2689 | case SN_REP: |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2690 | res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first); |
2691 | break; | ||||
2692 | |||||
2693 | case SN_REPSAL: | ||||
2694 | res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first); | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2695 | break; |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 2696 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2697 | case SN_SAL: |
2698 | res = read_sal_section(fd, lp); | ||||
2699 | break; | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 2700 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2701 | case SN_SOFO: |
2702 | res = read_sofo_section(fd, lp); | ||||
2703 | break; | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 2704 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2705 | case SN_MAP: |
2706 | p = read_string(fd, len); /* <mapstr> */ | ||||
2707 | if (p == NULL) | ||||
2708 | goto endFAIL; | ||||
2709 | set_map_str(lp, p); | ||||
2710 | vim_free(p); | ||||
2711 | break; | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 2712 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2713 | case SN_WORDS: |
2714 | res = read_words_section(fd, lp, len); | ||||
2715 | break; | ||||
2716 | |||||
2717 | case SN_SUGFILE: | ||||
Bram Moolenaar | b388adb | 2006-02-28 23:50:17 +0000 | [diff] [blame] | 2718 | lp->sl_sugtime = get8c(fd); /* <timestamp> */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2719 | break; |
2720 | |||||
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 2721 | case SN_NOSPLITSUGS: |
2722 | lp->sl_nosplitsugs = TRUE; /* <timestamp> */ | ||||
2723 | break; | ||||
2724 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2725 | case SN_COMPOUND: |
2726 | res = read_compound(fd, lp, len); | ||||
2727 | break; | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 2728 | |
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 2729 | case SN_NOBREAK: |
2730 | lp->sl_nobreak = TRUE; | ||||
2731 | break; | ||||
2732 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2733 | case SN_SYLLABLE: |
2734 | lp->sl_syllable = read_string(fd, len); /* <syllable> */ | ||||
2735 | if (lp->sl_syllable == NULL) | ||||
2736 | goto endFAIL; | ||||
2737 | if (init_syl_tab(lp) == FAIL) | ||||
2738 | goto endFAIL; | ||||
2739 | break; | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 2740 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2741 | default: |
2742 | /* Unsupported section. When it's required give an error | ||||
2743 | * message. When it's not required skip the contents. */ | ||||
2744 | if (c & SNF_REQUIRED) | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 2745 | { |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2746 | EMSG(_("E770: Unsupported section in spell file")); |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 2747 | goto endFAIL; |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 2748 | } |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2749 | while (--len >= 0) |
2750 | if (getc(fd) < 0) | ||||
2751 | goto truncerr; | ||||
2752 | break; | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 2753 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2754 | someerror: |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2755 | if (res == SP_FORMERROR) |
2756 | { | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2757 | EMSG(_(e_format)); |
2758 | goto endFAIL; | ||||
2759 | } | ||||
2760 | if (res == SP_TRUNCERROR) | ||||
2761 | { | ||||
2762 | truncerr: | ||||
2763 | EMSG(_(e_spell_trunc)); | ||||
2764 | goto endFAIL; | ||||
2765 | } | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 2766 | if (res == SP_OTHERERROR) |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2767 | goto endFAIL; |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 2768 | } |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2769 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2770 | /* <LWORDTREE> */ |
2771 | res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0); | ||||
2772 | if (res != 0) | ||||
2773 | goto someerror; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2774 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2775 | /* <KWORDTREE> */ |
2776 | res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0); | ||||
2777 | if (res != 0) | ||||
2778 | goto someerror; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2779 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 2780 | /* <PREFIXTREE> */ |
2781 | res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE, | ||||
2782 | lp->sl_prefixcnt); | ||||
2783 | if (res != 0) | ||||
2784 | goto someerror; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 2785 | |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2786 | /* For a new file link it in the list of spell files. */ |
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 2787 | if (old_lp == NULL && lang != NULL) |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2788 | { |
2789 | lp->sl_next = first_lang; | ||||
2790 | first_lang = lp; | ||||
2791 | } | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 2792 | |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 2793 | goto endOK; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2794 | |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 2795 | endFAIL: |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 2796 | if (lang != NULL) |
2797 | /* truncating the name signals the error to spell_load_lang() */ | ||||
2798 | *lang = NUL; | ||||
2799 | if (lp != NULL && old_lp == NULL) | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 2800 | slang_free(lp); |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 2801 | lp = NULL; |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 2802 | |
2803 | endOK: | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2804 | if (fd != NULL) |
2805 | fclose(fd); | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2806 | sourcing_name = save_sourcing_name; |
2807 | sourcing_lnum = save_sourcing_lnum; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 2808 | |
2809 | return lp; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 2810 | } |
2811 | |||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 2812 | /* |
Bram Moolenaar | b388adb | 2006-02-28 23:50:17 +0000 | [diff] [blame] | 2813 | * Read 2 bytes from "fd" and turn them into an int, MSB first. |
2814 | */ | ||||
2815 | static int | ||||
2816 | get2c(fd) | ||||
2817 | FILE *fd; | ||||
2818 | { | ||||
2819 | long n; | ||||
2820 | |||||
2821 | n = getc(fd); | ||||
2822 | n = (n << 8) + getc(fd); | ||||
2823 | return n; | ||||
2824 | } | ||||
2825 | |||||
2826 | /* | ||||
2827 | * Read 3 bytes from "fd" and turn them into an int, MSB first. | ||||
2828 | */ | ||||
2829 | static int | ||||
2830 | get3c(fd) | ||||
2831 | FILE *fd; | ||||
2832 | { | ||||
2833 | long n; | ||||
2834 | |||||
2835 | n = getc(fd); | ||||
2836 | n = (n << 8) + getc(fd); | ||||
2837 | n = (n << 8) + getc(fd); | ||||
2838 | return n; | ||||
2839 | } | ||||
2840 | |||||
2841 | /* | ||||
2842 | * Read 4 bytes from "fd" and turn them into an int, MSB first. | ||||
2843 | */ | ||||
2844 | static int | ||||
2845 | get4c(fd) | ||||
2846 | FILE *fd; | ||||
2847 | { | ||||
2848 | long n; | ||||
2849 | |||||
2850 | n = getc(fd); | ||||
2851 | n = (n << 8) + getc(fd); | ||||
2852 | n = (n << 8) + getc(fd); | ||||
2853 | n = (n << 8) + getc(fd); | ||||
2854 | return n; | ||||
2855 | } | ||||
2856 | |||||
2857 | /* | ||||
2858 | * Read 8 bytes from "fd" and turn them into a time_t, MSB first. | ||||
2859 | */ | ||||
2860 | static time_t | ||||
2861 | get8c(fd) | ||||
2862 | FILE *fd; | ||||
2863 | { | ||||
2864 | time_t n = 0; | ||||
2865 | int i; | ||||
2866 | |||||
2867 | for (i = 0; i < 8; ++i) | ||||
2868 | n = (n << 8) + getc(fd); | ||||
2869 | return n; | ||||
2870 | } | ||||
2871 | |||||
2872 | /* | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 2873 | * Read a length field from "fd" in "cnt_bytes" bytes. |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 2874 | * Allocate memory, read the string into it and add a NUL at the end. |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 2875 | * Returns NULL when the count is zero. |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2876 | * Sets "*cntp" to SP_*ERROR when there is an error, length of the result |
2877 | * otherwise. | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 2878 | */ |
2879 | static char_u * | ||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 2880 | read_cnt_string(fd, cnt_bytes, cntp) |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 2881 | FILE *fd; |
2882 | int cnt_bytes; | ||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 2883 | int *cntp; |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 2884 | { |
2885 | int cnt = 0; | ||||
2886 | int i; | ||||
2887 | char_u *str; | ||||
2888 | |||||
2889 | /* read the length bytes, MSB first */ | ||||
2890 | for (i = 0; i < cnt_bytes; ++i) | ||||
2891 | cnt = (cnt << 8) + getc(fd); | ||||
2892 | if (cnt < 0) | ||||
2893 | { | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2894 | *cntp = SP_TRUNCERROR; |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 2895 | return NULL; |
2896 | } | ||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 2897 | *cntp = cnt; |
2898 | if (cnt == 0) | ||||
2899 | return NULL; /* nothing to read, return NULL */ | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 2900 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2901 | str = read_string(fd, cnt); |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 2902 | if (str == NULL) |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 2903 | *cntp = SP_OTHERERROR; |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 2904 | return str; |
2905 | } | ||||
2906 | |||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 2907 | /* |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 2908 | * Read a string of length "cnt" from "fd" into allocated memory. |
2909 | * Returns NULL when out of memory. | ||||
2910 | */ | ||||
2911 | static char_u * | ||||
2912 | read_string(fd, cnt) | ||||
2913 | FILE *fd; | ||||
2914 | int cnt; | ||||
2915 | { | ||||
2916 | char_u *str; | ||||
2917 | int i; | ||||
2918 | |||||
2919 | /* allocate memory */ | ||||
2920 | str = alloc((unsigned)cnt + 1); | ||||
2921 | if (str != NULL) | ||||
2922 | { | ||||
2923 | /* Read the string. Doesn't check for truncated file. */ | ||||
2924 | for (i = 0; i < cnt; ++i) | ||||
2925 | str[i] = getc(fd); | ||||
2926 | str[i] = NUL; | ||||
2927 | } | ||||
2928 | return str; | ||||
2929 | } | ||||
2930 | |||||
2931 | /* | ||||
2932 | * Read SN_REGION: <regionname> ... | ||||
2933 | * Return SP_*ERROR flags. | ||||
2934 | */ | ||||
2935 | static int | ||||
2936 | read_region_section(fd, lp, len) | ||||
2937 | FILE *fd; | ||||
2938 | slang_T *lp; | ||||
2939 | int len; | ||||
2940 | { | ||||
2941 | int i; | ||||
2942 | |||||
2943 | if (len > 16) | ||||
2944 | return SP_FORMERROR; | ||||
2945 | for (i = 0; i < len; ++i) | ||||
2946 | lp->sl_regions[i] = getc(fd); /* <regionname> */ | ||||
2947 | lp->sl_regions[len] = NUL; | ||||
2948 | return 0; | ||||
2949 | } | ||||
2950 | |||||
2951 | /* | ||||
2952 | * Read SN_CHARFLAGS section: <charflagslen> <charflags> | ||||
2953 | * <folcharslen> <folchars> | ||||
2954 | * Return SP_*ERROR flags. | ||||
2955 | */ | ||||
2956 | static int | ||||
2957 | read_charflags_section(fd) | ||||
2958 | FILE *fd; | ||||
2959 | { | ||||
2960 | char_u *flags; | ||||
2961 | char_u *fol; | ||||
2962 | int flagslen, follen; | ||||
2963 | |||||
2964 | /* <charflagslen> <charflags> */ | ||||
2965 | flags = read_cnt_string(fd, 1, &flagslen); | ||||
2966 | if (flagslen < 0) | ||||
2967 | return flagslen; | ||||
2968 | |||||
2969 | /* <folcharslen> <folchars> */ | ||||
2970 | fol = read_cnt_string(fd, 2, &follen); | ||||
2971 | if (follen < 0) | ||||
2972 | { | ||||
2973 | vim_free(flags); | ||||
2974 | return follen; | ||||
2975 | } | ||||
2976 | |||||
2977 | /* Set the word-char flags and fill SPELL_ISUPPER() table. */ | ||||
2978 | if (flags != NULL && fol != NULL) | ||||
2979 | set_spell_charflags(flags, flagslen, fol); | ||||
2980 | |||||
2981 | vim_free(flags); | ||||
2982 | vim_free(fol); | ||||
2983 | |||||
2984 | /* When <charflagslen> is zero then <fcharlen> must also be zero. */ | ||||
2985 | if ((flags == NULL) != (fol == NULL)) | ||||
2986 | return SP_FORMERROR; | ||||
2987 | return 0; | ||||
2988 | } | ||||
2989 | |||||
2990 | /* | ||||
2991 | * Read SN_PREFCOND section. | ||||
2992 | * Return SP_*ERROR flags. | ||||
2993 | */ | ||||
2994 | static int | ||||
2995 | read_prefcond_section(fd, lp) | ||||
2996 | FILE *fd; | ||||
2997 | slang_T *lp; | ||||
2998 | { | ||||
2999 | int cnt; | ||||
3000 | int i; | ||||
3001 | int n; | ||||
3002 | char_u *p; | ||||
3003 | char_u buf[MAXWLEN + 1]; | ||||
3004 | |||||
3005 | /* <prefcondcnt> <prefcond> ... */ | ||||
Bram Moolenaar | b388adb | 2006-02-28 23:50:17 +0000 | [diff] [blame] | 3006 | cnt = get2c(fd); /* <prefcondcnt> */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3007 | if (cnt <= 0) |
3008 | return SP_FORMERROR; | ||||
3009 | |||||
3010 | lp->sl_prefprog = (regprog_T **)alloc_clear( | ||||
3011 | (unsigned)sizeof(regprog_T *) * cnt); | ||||
3012 | if (lp->sl_prefprog == NULL) | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3013 | return SP_OTHERERROR; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3014 | lp->sl_prefixcnt = cnt; |
3015 | |||||
3016 | for (i = 0; i < cnt; ++i) | ||||
3017 | { | ||||
3018 | /* <prefcond> : <condlen> <condstr> */ | ||||
3019 | n = getc(fd); /* <condlen> */ | ||||
3020 | if (n < 0 || n >= MAXWLEN) | ||||
3021 | return SP_FORMERROR; | ||||
3022 | |||||
3023 | /* When <condlen> is zero we have an empty condition. Otherwise | ||||
3024 | * compile the regexp program used to check for the condition. */ | ||||
3025 | if (n > 0) | ||||
3026 | { | ||||
3027 | buf[0] = '^'; /* always match at one position only */ | ||||
3028 | p = buf + 1; | ||||
3029 | while (n-- > 0) | ||||
3030 | *p++ = getc(fd); /* <condstr> */ | ||||
3031 | *p = NUL; | ||||
3032 | lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING); | ||||
3033 | } | ||||
3034 | } | ||||
3035 | return 0; | ||||
3036 | } | ||||
3037 | |||||
3038 | /* | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 3039 | * Read REP or REPSAL items section from "fd": <repcount> <rep> ... |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3040 | * Return SP_*ERROR flags. |
3041 | */ | ||||
3042 | static int | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 3043 | read_rep_section(fd, gap, first) |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3044 | FILE *fd; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 3045 | garray_T *gap; |
3046 | short *first; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3047 | { |
3048 | int cnt; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3049 | fromto_T *ftp; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3050 | int i; |
3051 | |||||
Bram Moolenaar | b388adb | 2006-02-28 23:50:17 +0000 | [diff] [blame] | 3052 | cnt = get2c(fd); /* <repcount> */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3053 | if (cnt < 0) |
3054 | return SP_TRUNCERROR; | ||||
3055 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3056 | if (ga_grow(gap, cnt) == FAIL) |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3057 | return SP_OTHERERROR; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3058 | |
3059 | /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ | ||||
3060 | for (; gap->ga_len < cnt; ++gap->ga_len) | ||||
3061 | { | ||||
3062 | ftp = &((fromto_T *)gap->ga_data)[gap->ga_len]; | ||||
3063 | ftp->ft_from = read_cnt_string(fd, 1, &i); | ||||
3064 | if (i < 0) | ||||
3065 | return i; | ||||
3066 | if (i == 0) | ||||
3067 | return SP_FORMERROR; | ||||
3068 | ftp->ft_to = read_cnt_string(fd, 1, &i); | ||||
3069 | if (i <= 0) | ||||
3070 | { | ||||
3071 | vim_free(ftp->ft_from); | ||||
3072 | if (i < 0) | ||||
3073 | return i; | ||||
3074 | return SP_FORMERROR; | ||||
3075 | } | ||||
3076 | } | ||||
3077 | |||||
3078 | /* Fill the first-index table. */ | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3079 | for (i = 0; i < 256; ++i) |
3080 | first[i] = -1; | ||||
3081 | for (i = 0; i < gap->ga_len; ++i) | ||||
3082 | { | ||||
3083 | ftp = &((fromto_T *)gap->ga_data)[i]; | ||||
3084 | if (first[*ftp->ft_from] == -1) | ||||
3085 | first[*ftp->ft_from] = i; | ||||
3086 | } | ||||
3087 | return 0; | ||||
3088 | } | ||||
3089 | |||||
3090 | /* | ||||
3091 | * Read SN_SAL section: <salflags> <salcount> <sal> ... | ||||
3092 | * Return SP_*ERROR flags. | ||||
3093 | */ | ||||
3094 | static int | ||||
3095 | read_sal_section(fd, slang) | ||||
3096 | FILE *fd; | ||||
3097 | slang_T *slang; | ||||
3098 | { | ||||
3099 | int i; | ||||
3100 | int cnt; | ||||
3101 | garray_T *gap; | ||||
3102 | salitem_T *smp; | ||||
3103 | int ccnt; | ||||
3104 | char_u *p; | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 3105 | int c = NUL; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3106 | |
3107 | slang->sl_sofo = FALSE; | ||||
3108 | |||||
3109 | i = getc(fd); /* <salflags> */ | ||||
3110 | if (i & SAL_F0LLOWUP) | ||||
3111 | slang->sl_followup = TRUE; | ||||
3112 | if (i & SAL_COLLAPSE) | ||||
3113 | slang->sl_collapse = TRUE; | ||||
3114 | if (i & SAL_REM_ACCENTS) | ||||
3115 | slang->sl_rem_accents = TRUE; | ||||
3116 | |||||
Bram Moolenaar | b388adb | 2006-02-28 23:50:17 +0000 | [diff] [blame] | 3117 | cnt = get2c(fd); /* <salcount> */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3118 | if (cnt < 0) |
3119 | return SP_TRUNCERROR; | ||||
3120 | |||||
3121 | gap = &slang->sl_sal; | ||||
3122 | ga_init2(gap, sizeof(salitem_T), 10); | ||||
Bram Moolenaar | d5cdbeb | 2005-10-10 20:59:28 +0000 | [diff] [blame] | 3123 | if (ga_grow(gap, cnt + 1) == FAIL) |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3124 | return SP_OTHERERROR; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3125 | |
3126 | /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ | ||||
3127 | for (; gap->ga_len < cnt; ++gap->ga_len) | ||||
3128 | { | ||||
3129 | smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; | ||||
3130 | ccnt = getc(fd); /* <salfromlen> */ | ||||
3131 | if (ccnt < 0) | ||||
3132 | return SP_TRUNCERROR; | ||||
3133 | if ((p = alloc(ccnt + 2)) == NULL) | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3134 | return SP_OTHERERROR; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3135 | smp->sm_lead = p; |
3136 | |||||
3137 | /* Read up to the first special char into sm_lead. */ | ||||
3138 | for (i = 0; i < ccnt; ++i) | ||||
3139 | { | ||||
3140 | c = getc(fd); /* <salfrom> */ | ||||
3141 | if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL) | ||||
3142 | break; | ||||
3143 | *p++ = c; | ||||
3144 | } | ||||
3145 | smp->sm_leadlen = p - smp->sm_lead; | ||||
3146 | *p++ = NUL; | ||||
3147 | |||||
3148 | /* Put (abc) chars in sm_oneof, if any. */ | ||||
3149 | if (c == '(') | ||||
3150 | { | ||||
3151 | smp->sm_oneof = p; | ||||
3152 | for (++i; i < ccnt; ++i) | ||||
3153 | { | ||||
3154 | c = getc(fd); /* <salfrom> */ | ||||
3155 | if (c == ')') | ||||
3156 | break; | ||||
3157 | *p++ = c; | ||||
3158 | } | ||||
3159 | *p++ = NUL; | ||||
3160 | if (++i < ccnt) | ||||
3161 | c = getc(fd); | ||||
3162 | } | ||||
3163 | else | ||||
3164 | smp->sm_oneof = NULL; | ||||
3165 | |||||
3166 | /* Any following chars go in sm_rules. */ | ||||
3167 | smp->sm_rules = p; | ||||
3168 | if (i < ccnt) | ||||
3169 | /* store the char we got while checking for end of sm_lead */ | ||||
3170 | *p++ = c; | ||||
3171 | for (++i; i < ccnt; ++i) | ||||
3172 | *p++ = getc(fd); /* <salfrom> */ | ||||
3173 | *p++ = NUL; | ||||
3174 | |||||
3175 | /* <saltolen> <salto> */ | ||||
3176 | smp->sm_to = read_cnt_string(fd, 1, &ccnt); | ||||
3177 | if (ccnt < 0) | ||||
3178 | { | ||||
3179 | vim_free(smp->sm_lead); | ||||
3180 | return ccnt; | ||||
3181 | } | ||||
3182 | |||||
3183 | #ifdef FEAT_MBYTE | ||||
3184 | if (has_mbyte) | ||||
3185 | { | ||||
3186 | /* convert the multi-byte strings to wide char strings */ | ||||
3187 | smp->sm_lead_w = mb_str2wide(smp->sm_lead); | ||||
3188 | smp->sm_leadlen = mb_charlen(smp->sm_lead); | ||||
3189 | if (smp->sm_oneof == NULL) | ||||
3190 | smp->sm_oneof_w = NULL; | ||||
3191 | else | ||||
3192 | smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); | ||||
3193 | if (smp->sm_to == NULL) | ||||
3194 | smp->sm_to_w = NULL; | ||||
3195 | else | ||||
3196 | smp->sm_to_w = mb_str2wide(smp->sm_to); | ||||
3197 | if (smp->sm_lead_w == NULL | ||||
3198 | || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL) | ||||
3199 | || (smp->sm_to_w == NULL && smp->sm_to != NULL)) | ||||
3200 | { | ||||
3201 | vim_free(smp->sm_lead); | ||||
3202 | vim_free(smp->sm_to); | ||||
3203 | vim_free(smp->sm_lead_w); | ||||
3204 | vim_free(smp->sm_oneof_w); | ||||
3205 | vim_free(smp->sm_to_w); | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3206 | return SP_OTHERERROR; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3207 | } |
3208 | } | ||||
3209 | #endif | ||||
3210 | } | ||||
3211 | |||||
Bram Moolenaar | d5cdbeb | 2005-10-10 20:59:28 +0000 | [diff] [blame] | 3212 | if (gap->ga_len > 0) |
3213 | { | ||||
3214 | /* Add one extra entry to mark the end with an empty sm_lead. Avoids | ||||
3215 | * that we need to check the index every time. */ | ||||
3216 | smp = &((salitem_T *)gap->ga_data)[gap->ga_len]; | ||||
3217 | if ((p = alloc(1)) == NULL) | ||||
3218 | return SP_OTHERERROR; | ||||
3219 | p[0] = NUL; | ||||
3220 | smp->sm_lead = p; | ||||
3221 | smp->sm_leadlen = 0; | ||||
3222 | smp->sm_oneof = NULL; | ||||
3223 | smp->sm_rules = p; | ||||
3224 | smp->sm_to = NULL; | ||||
3225 | #ifdef FEAT_MBYTE | ||||
3226 | if (has_mbyte) | ||||
3227 | { | ||||
3228 | smp->sm_lead_w = mb_str2wide(smp->sm_lead); | ||||
3229 | smp->sm_leadlen = 0; | ||||
3230 | smp->sm_oneof_w = NULL; | ||||
3231 | smp->sm_to_w = NULL; | ||||
3232 | } | ||||
3233 | #endif | ||||
3234 | ++gap->ga_len; | ||||
3235 | } | ||||
3236 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3237 | /* Fill the first-index table. */ |
3238 | set_sal_first(slang); | ||||
3239 | |||||
3240 | return 0; | ||||
3241 | } | ||||
3242 | |||||
3243 | /* | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 3244 | * Read SN_WORDS: <word> ... |
3245 | * Return SP_*ERROR flags. | ||||
3246 | */ | ||||
3247 | static int | ||||
3248 | read_words_section(fd, lp, len) | ||||
3249 | FILE *fd; | ||||
3250 | slang_T *lp; | ||||
3251 | int len; | ||||
3252 | { | ||||
3253 | int done = 0; | ||||
3254 | int i; | ||||
3255 | char_u word[MAXWLEN]; | ||||
3256 | |||||
3257 | while (done < len) | ||||
3258 | { | ||||
3259 | /* Read one word at a time. */ | ||||
3260 | for (i = 0; ; ++i) | ||||
3261 | { | ||||
3262 | word[i] = getc(fd); | ||||
3263 | if (word[i] == NUL) | ||||
3264 | break; | ||||
3265 | if (i == MAXWLEN - 1) | ||||
3266 | return SP_FORMERROR; | ||||
3267 | } | ||||
3268 | |||||
3269 | /* Init the count to 10. */ | ||||
3270 | count_common_word(lp, word, -1, 10); | ||||
3271 | done += i + 1; | ||||
3272 | } | ||||
3273 | return 0; | ||||
3274 | } | ||||
3275 | |||||
3276 | /* | ||||
3277 | * Add a word to the hashtable of common words. | ||||
3278 | * If it's already there then the counter is increased. | ||||
3279 | */ | ||||
3280 | static void | ||||
3281 | count_common_word(lp, word, len, count) | ||||
3282 | slang_T *lp; | ||||
3283 | char_u *word; | ||||
3284 | int len; /* word length, -1 for upto NUL */ | ||||
3285 | int count; /* 1 to count once, 10 to init */ | ||||
3286 | { | ||||
3287 | hash_T hash; | ||||
3288 | hashitem_T *hi; | ||||
3289 | wordcount_T *wc; | ||||
3290 | char_u buf[MAXWLEN]; | ||||
3291 | char_u *p; | ||||
3292 | |||||
3293 | if (len == -1) | ||||
3294 | p = word; | ||||
3295 | else | ||||
3296 | { | ||||
3297 | vim_strncpy(buf, word, len); | ||||
3298 | p = buf; | ||||
3299 | } | ||||
3300 | |||||
3301 | hash = hash_hash(p); | ||||
3302 | hi = hash_lookup(&lp->sl_wordcount, p, hash); | ||||
3303 | if (HASHITEM_EMPTY(hi)) | ||||
3304 | { | ||||
3305 | wc = (wordcount_T *)alloc(sizeof(wordcount_T) + STRLEN(p)); | ||||
3306 | if (wc == NULL) | ||||
3307 | return; | ||||
3308 | STRCPY(wc->wc_word, p); | ||||
3309 | wc->wc_count = count; | ||||
3310 | hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash); | ||||
3311 | } | ||||
3312 | else | ||||
3313 | { | ||||
3314 | wc = HI2WC(hi); | ||||
3315 | if ((wc->wc_count += count) < (unsigned)count) /* check for overflow */ | ||||
3316 | wc->wc_count = MAXWORDCOUNT; | ||||
3317 | } | ||||
3318 | } | ||||
3319 | |||||
3320 | /* | ||||
3321 | * Adjust the score of common words. | ||||
3322 | */ | ||||
3323 | static int | ||||
3324 | score_wordcount_adj(slang, score, word, split) | ||||
3325 | slang_T *slang; | ||||
3326 | int score; | ||||
3327 | char_u *word; | ||||
3328 | int split; /* word was split, less bonus */ | ||||
3329 | { | ||||
3330 | hashitem_T *hi; | ||||
3331 | wordcount_T *wc; | ||||
3332 | int bonus; | ||||
3333 | int newscore; | ||||
3334 | |||||
3335 | hi = hash_find(&slang->sl_wordcount, word); | ||||
3336 | if (!HASHITEM_EMPTY(hi)) | ||||
3337 | { | ||||
3338 | wc = HI2WC(hi); | ||||
3339 | if (wc->wc_count < SCORE_THRES2) | ||||
3340 | bonus = SCORE_COMMON1; | ||||
3341 | else if (wc->wc_count < SCORE_THRES3) | ||||
3342 | bonus = SCORE_COMMON2; | ||||
3343 | else | ||||
3344 | bonus = SCORE_COMMON3; | ||||
3345 | if (split) | ||||
3346 | newscore = score - bonus / 2; | ||||
3347 | else | ||||
3348 | newscore = score - bonus; | ||||
3349 | if (newscore < 0) | ||||
3350 | return 0; | ||||
3351 | return newscore; | ||||
3352 | } | ||||
3353 | return score; | ||||
3354 | } | ||||
3355 | |||||
3356 | /* | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3357 | * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> |
3358 | * Return SP_*ERROR flags. | ||||
3359 | */ | ||||
3360 | static int | ||||
3361 | read_sofo_section(fd, slang) | ||||
3362 | FILE *fd; | ||||
3363 | slang_T *slang; | ||||
3364 | { | ||||
3365 | int cnt; | ||||
3366 | char_u *from, *to; | ||||
3367 | int res; | ||||
3368 | |||||
3369 | slang->sl_sofo = TRUE; | ||||
3370 | |||||
3371 | /* <sofofromlen> <sofofrom> */ | ||||
3372 | from = read_cnt_string(fd, 2, &cnt); | ||||
3373 | if (cnt < 0) | ||||
3374 | return cnt; | ||||
3375 | |||||
3376 | /* <sofotolen> <sofoto> */ | ||||
3377 | to = read_cnt_string(fd, 2, &cnt); | ||||
3378 | if (cnt < 0) | ||||
3379 | { | ||||
3380 | vim_free(from); | ||||
3381 | return cnt; | ||||
3382 | } | ||||
3383 | |||||
3384 | /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */ | ||||
3385 | if (from != NULL && to != NULL) | ||||
3386 | res = set_sofo(slang, from, to); | ||||
3387 | else if (from != NULL || to != NULL) | ||||
3388 | res = SP_FORMERROR; /* only one of two strings is an error */ | ||||
3389 | else | ||||
3390 | res = 0; | ||||
3391 | |||||
3392 | vim_free(from); | ||||
3393 | vim_free(to); | ||||
3394 | return res; | ||||
3395 | } | ||||
3396 | |||||
3397 | /* | ||||
3398 | * Read the compound section from the .spl file: | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 3399 | * <compmax> <compminlen> <compsylmax> <compoptions> <compflags> |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3400 | * Returns SP_*ERROR flags. |
3401 | */ | ||||
3402 | static int | ||||
3403 | read_compound(fd, slang, len) | ||||
3404 | FILE *fd; | ||||
3405 | slang_T *slang; | ||||
3406 | int len; | ||||
3407 | { | ||||
3408 | int todo = len; | ||||
3409 | int c; | ||||
3410 | int atstart; | ||||
3411 | char_u *pat; | ||||
3412 | char_u *pp; | ||||
3413 | char_u *cp; | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 3414 | char_u *ap; |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 3415 | int cnt; |
3416 | garray_T *gap; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3417 | |
3418 | if (todo < 2) | ||||
3419 | return SP_FORMERROR; /* need at least two bytes */ | ||||
3420 | |||||
3421 | --todo; | ||||
3422 | c = getc(fd); /* <compmax> */ | ||||
3423 | if (c < 2) | ||||
3424 | c = MAXWLEN; | ||||
3425 | slang->sl_compmax = c; | ||||
3426 | |||||
3427 | --todo; | ||||
3428 | c = getc(fd); /* <compminlen> */ | ||||
3429 | if (c < 1) | ||||
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 3430 | c = 0; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3431 | slang->sl_compminlen = c; |
3432 | |||||
3433 | --todo; | ||||
3434 | c = getc(fd); /* <compsylmax> */ | ||||
3435 | if (c < 1) | ||||
3436 | c = MAXWLEN; | ||||
3437 | slang->sl_compsylmax = c; | ||||
3438 | |||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 3439 | c = getc(fd); /* <compoptions> */ |
3440 | if (c != 0) | ||||
3441 | ungetc(c, fd); /* be backwards compatible with Vim 7.0b */ | ||||
3442 | else | ||||
3443 | { | ||||
3444 | --todo; | ||||
3445 | c = getc(fd); /* only use the lower byte for now */ | ||||
3446 | --todo; | ||||
3447 | slang->sl_compoptions = c; | ||||
3448 | |||||
3449 | gap = &slang->sl_comppat; | ||||
3450 | c = get2c(fd); /* <comppatcount> */ | ||||
3451 | todo -= 2; | ||||
3452 | ga_init2(gap, sizeof(char_u *), c); | ||||
3453 | if (ga_grow(gap, c) == OK) | ||||
3454 | while (--c >= 0) | ||||
3455 | { | ||||
3456 | ((char_u **)(gap->ga_data))[gap->ga_len++] = | ||||
3457 | read_cnt_string(fd, 1, &cnt); | ||||
3458 | /* <comppatlen> <comppattext> */ | ||||
3459 | if (cnt < 0) | ||||
3460 | return cnt; | ||||
3461 | todo -= cnt + 2; | ||||
3462 | } | ||||
3463 | } | ||||
3464 | |||||
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 3465 | /* Turn the COMPOUNDRULE items into a regexp pattern: |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3466 | * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$". |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3467 | * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes. |
3468 | * Conversion to utf-8 may double the size. */ | ||||
3469 | c = todo * 2 + 7; | ||||
3470 | #ifdef FEAT_MBYTE | ||||
3471 | if (enc_utf8) | ||||
3472 | c += todo * 2; | ||||
3473 | #endif | ||||
3474 | pat = alloc((unsigned)c); | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3475 | if (pat == NULL) |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3476 | return SP_OTHERERROR; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3477 | |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 3478 | /* We also need a list of all flags that can appear at the start and one |
3479 | * for all flags. */ | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3480 | cp = alloc(todo + 1); |
3481 | if (cp == NULL) | ||||
3482 | { | ||||
3483 | vim_free(pat); | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3484 | return SP_OTHERERROR; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3485 | } |
3486 | slang->sl_compstartflags = cp; | ||||
3487 | *cp = NUL; | ||||
3488 | |||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 3489 | ap = alloc(todo + 1); |
3490 | if (ap == NULL) | ||||
3491 | { | ||||
3492 | vim_free(pat); | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3493 | return SP_OTHERERROR; |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 3494 | } |
3495 | slang->sl_compallflags = ap; | ||||
3496 | *ap = NUL; | ||||
3497 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3498 | pp = pat; |
3499 | *pp++ = '^'; | ||||
3500 | *pp++ = '\\'; | ||||
3501 | *pp++ = '('; | ||||
3502 | |||||
3503 | atstart = 1; | ||||
3504 | while (todo-- > 0) | ||||
3505 | { | ||||
3506 | c = getc(fd); /* <compflags> */ | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 3507 | |
3508 | /* Add all flags to "sl_compallflags". */ | ||||
3509 | if (vim_strchr((char_u *)"+*[]/", c) == NULL | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3510 | && !byte_in_str(slang->sl_compallflags, c)) |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 3511 | { |
3512 | *ap++ = c; | ||||
3513 | *ap = NUL; | ||||
3514 | } | ||||
3515 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3516 | if (atstart != 0) |
3517 | { | ||||
3518 | /* At start of item: copy flags to "sl_compstartflags". For a | ||||
3519 | * [abc] item set "atstart" to 2 and copy up to the ']'. */ | ||||
3520 | if (c == '[') | ||||
3521 | atstart = 2; | ||||
3522 | else if (c == ']') | ||||
3523 | atstart = 0; | ||||
3524 | else | ||||
3525 | { | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3526 | if (!byte_in_str(slang->sl_compstartflags, c)) |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3527 | { |
3528 | *cp++ = c; | ||||
3529 | *cp = NUL; | ||||
3530 | } | ||||
3531 | if (atstart == 1) | ||||
3532 | atstart = 0; | ||||
3533 | } | ||||
3534 | } | ||||
3535 | if (c == '/') /* slash separates two items */ | ||||
3536 | { | ||||
3537 | *pp++ = '\\'; | ||||
3538 | *pp++ = '|'; | ||||
3539 | atstart = 1; | ||||
3540 | } | ||||
3541 | else /* normal char, "[abc]" and '*' are copied as-is */ | ||||
3542 | { | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 3543 | if (c == '+' || c == '~') |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3544 | *pp++ = '\\'; /* "a+" becomes "a\+" */ |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3545 | #ifdef FEAT_MBYTE |
3546 | if (enc_utf8) | ||||
3547 | pp += mb_char2bytes(c, pp); | ||||
3548 | else | ||||
3549 | #endif | ||||
3550 | *pp++ = c; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3551 | } |
3552 | } | ||||
3553 | |||||
3554 | *pp++ = '\\'; | ||||
3555 | *pp++ = ')'; | ||||
3556 | *pp++ = '$'; | ||||
3557 | *pp = NUL; | ||||
3558 | |||||
3559 | slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT); | ||||
3560 | vim_free(pat); | ||||
3561 | if (slang->sl_compprog == NULL) | ||||
3562 | return SP_FORMERROR; | ||||
3563 | |||||
3564 | return 0; | ||||
3565 | } | ||||
3566 | |||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3567 | /* |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 3568 | * Return TRUE if byte "n" appears in "str". |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3569 | * Like strchr() but independent of locale. |
3570 | */ | ||||
3571 | static int | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 3572 | byte_in_str(str, n) |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3573 | char_u *str; |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 3574 | int n; |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3575 | { |
3576 | char_u *p; | ||||
3577 | |||||
3578 | for (p = str; *p != NUL; ++p) | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 3579 | if (*p == n) |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3580 | return TRUE; |
3581 | return FALSE; | ||||
3582 | } | ||||
3583 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3584 | #define SY_MAXLEN 30 |
3585 | typedef struct syl_item_S | ||||
3586 | { | ||||
3587 | char_u sy_chars[SY_MAXLEN]; /* the sequence of chars */ | ||||
3588 | int sy_len; | ||||
3589 | } syl_item_T; | ||||
3590 | |||||
3591 | /* | ||||
3592 | * Truncate "slang->sl_syllable" at the first slash and put the following items | ||||
3593 | * in "slang->sl_syl_items". | ||||
3594 | */ | ||||
3595 | static int | ||||
3596 | init_syl_tab(slang) | ||||
3597 | slang_T *slang; | ||||
3598 | { | ||||
3599 | char_u *p; | ||||
3600 | char_u *s; | ||||
3601 | int l; | ||||
3602 | syl_item_T *syl; | ||||
3603 | |||||
3604 | ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4); | ||||
3605 | p = vim_strchr(slang->sl_syllable, '/'); | ||||
3606 | while (p != NULL) | ||||
3607 | { | ||||
3608 | *p++ = NUL; | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3609 | if (*p == NUL) /* trailing slash */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3610 | break; |
3611 | s = p; | ||||
3612 | p = vim_strchr(p, '/'); | ||||
3613 | if (p == NULL) | ||||
3614 | l = STRLEN(s); | ||||
3615 | else | ||||
3616 | l = p - s; | ||||
3617 | if (l >= SY_MAXLEN) | ||||
3618 | return SP_FORMERROR; | ||||
3619 | if (ga_grow(&slang->sl_syl_items, 1) == FAIL) | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3620 | return SP_OTHERERROR; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3621 | syl = ((syl_item_T *)slang->sl_syl_items.ga_data) |
3622 | + slang->sl_syl_items.ga_len++; | ||||
3623 | vim_strncpy(syl->sy_chars, s, l); | ||||
3624 | syl->sy_len = l; | ||||
3625 | } | ||||
3626 | return OK; | ||||
3627 | } | ||||
3628 | |||||
3629 | /* | ||||
3630 | * Count the number of syllables in "word". | ||||
3631 | * When "word" contains spaces the syllables after the last space are counted. | ||||
3632 | * Returns zero if syllables are not defines. | ||||
3633 | */ | ||||
3634 | static int | ||||
3635 | count_syllables(slang, word) | ||||
3636 | slang_T *slang; | ||||
3637 | char_u *word; | ||||
3638 | { | ||||
3639 | int cnt = 0; | ||||
3640 | int skip = FALSE; | ||||
3641 | char_u *p; | ||||
3642 | int len; | ||||
3643 | int i; | ||||
3644 | syl_item_T *syl; | ||||
3645 | int c; | ||||
3646 | |||||
3647 | if (slang->sl_syllable == NULL) | ||||
3648 | return 0; | ||||
3649 | |||||
3650 | for (p = word; *p != NUL; p += len) | ||||
3651 | { | ||||
3652 | /* When running into a space reset counter. */ | ||||
3653 | if (*p == ' ') | ||||
3654 | { | ||||
3655 | len = 1; | ||||
3656 | cnt = 0; | ||||
3657 | continue; | ||||
3658 | } | ||||
3659 | |||||
3660 | /* Find longest match of syllable items. */ | ||||
3661 | len = 0; | ||||
3662 | for (i = 0; i < slang->sl_syl_items.ga_len; ++i) | ||||
3663 | { | ||||
3664 | syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i; | ||||
3665 | if (syl->sy_len > len | ||||
3666 | && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0) | ||||
3667 | len = syl->sy_len; | ||||
3668 | } | ||||
3669 | if (len != 0) /* found a match, count syllable */ | ||||
3670 | { | ||||
3671 | ++cnt; | ||||
3672 | skip = FALSE; | ||||
3673 | } | ||||
3674 | else | ||||
3675 | { | ||||
3676 | /* No recognized syllable item, at least a syllable char then? */ | ||||
3677 | #ifdef FEAT_MBYTE | ||||
3678 | c = mb_ptr2char(p); | ||||
3679 | len = (*mb_ptr2len)(p); | ||||
3680 | #else | ||||
3681 | c = *p; | ||||
3682 | len = 1; | ||||
3683 | #endif | ||||
3684 | if (vim_strchr(slang->sl_syllable, c) == NULL) | ||||
3685 | skip = FALSE; /* No, search for next syllable */ | ||||
3686 | else if (!skip) | ||||
3687 | { | ||||
3688 | ++cnt; /* Yes, count it */ | ||||
3689 | skip = TRUE; /* don't count following syllable chars */ | ||||
3690 | } | ||||
3691 | } | ||||
3692 | } | ||||
3693 | return cnt; | ||||
3694 | } | ||||
3695 | |||||
3696 | /* | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 3697 | * Set the SOFOFROM and SOFOTO items in language "lp". |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3698 | * Returns SP_*ERROR flags when there is something wrong. |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 3699 | */ |
3700 | static int | ||||
3701 | set_sofo(lp, from, to) | ||||
3702 | slang_T *lp; | ||||
3703 | char_u *from; | ||||
3704 | char_u *to; | ||||
3705 | { | ||||
3706 | int i; | ||||
3707 | |||||
3708 | #ifdef FEAT_MBYTE | ||||
3709 | garray_T *gap; | ||||
3710 | char_u *s; | ||||
3711 | char_u *p; | ||||
3712 | int c; | ||||
3713 | int *inp; | ||||
3714 | |||||
3715 | if (has_mbyte) | ||||
3716 | { | ||||
3717 | /* Use "sl_sal" as an array with 256 pointers to a list of wide | ||||
3718 | * characters. The index is the low byte of the character. | ||||
3719 | * The list contains from-to pairs with a terminating NUL. | ||||
3720 | * sl_sal_first[] is used for latin1 "from" characters. */ | ||||
3721 | gap = &lp->sl_sal; | ||||
3722 | ga_init2(gap, sizeof(int *), 1); | ||||
3723 | if (ga_grow(gap, 256) == FAIL) | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3724 | return SP_OTHERERROR; |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 3725 | vim_memset(gap->ga_data, 0, sizeof(int *) * 256); |
3726 | gap->ga_len = 256; | ||||
3727 | |||||
3728 | /* First count the number of items for each list. Temporarily use | ||||
3729 | * sl_sal_first[] for this. */ | ||||
3730 | for (p = from, s = to; *p != NUL && *s != NUL; ) | ||||
3731 | { | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 3732 | c = mb_cptr2char_adv(&p); |
3733 | mb_cptr_adv(s); | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 3734 | if (c >= 256) |
3735 | ++lp->sl_sal_first[c & 0xff]; | ||||
3736 | } | ||||
3737 | if (*p != NUL || *s != NUL) /* lengths differ */ | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3738 | return SP_FORMERROR; |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 3739 | |
3740 | /* Allocate the lists. */ | ||||
3741 | for (i = 0; i < 256; ++i) | ||||
3742 | if (lp->sl_sal_first[i] > 0) | ||||
3743 | { | ||||
3744 | p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); | ||||
3745 | if (p == NULL) | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 3746 | return SP_OTHERERROR; |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 3747 | ((int **)gap->ga_data)[i] = (int *)p; |
3748 | *(int *)p = 0; | ||||
3749 | } | ||||
3750 | |||||
3751 | /* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal | ||||
3752 | * list. */ | ||||
3753 | vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); | ||||
3754 | for (p = from, s = to; *p != NUL && *s != NUL; ) | ||||
3755 | { | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 3756 | c = mb_cptr2char_adv(&p); |
3757 | i = mb_cptr2char_adv(&s); | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 3758 | if (c >= 256) |
3759 | { | ||||
3760 | /* Append the from-to chars at the end of the list with | ||||
3761 | * the low byte. */ | ||||
3762 | inp = ((int **)gap->ga_data)[c & 0xff]; | ||||
3763 | while (*inp != 0) | ||||
3764 | ++inp; | ||||
3765 | *inp++ = c; /* from char */ | ||||
3766 | *inp++ = i; /* to char */ | ||||
3767 | *inp++ = NUL; /* NUL at the end */ | ||||
3768 | } | ||||
3769 | else | ||||
3770 | /* mapping byte to char is done in sl_sal_first[] */ | ||||
3771 | lp->sl_sal_first[c] = i; | ||||
3772 | } | ||||
3773 | } | ||||
3774 | else | ||||
3775 | #endif | ||||
3776 | { | ||||
3777 | /* mapping bytes to bytes is done in sl_sal_first[] */ | ||||
3778 | if (STRLEN(from) != STRLEN(to)) | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3779 | return SP_FORMERROR; |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 3780 | |
3781 | for (i = 0; to[i] != NUL; ++i) | ||||
3782 | lp->sl_sal_first[from[i]] = to[i]; | ||||
3783 | lp->sl_sal.ga_len = 1; /* indicates we have soundfolding */ | ||||
3784 | } | ||||
3785 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 3786 | return 0; |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 3787 | } |
3788 | |||||
3789 | /* | ||||
3790 | * Fill the first-index table for "lp". | ||||
3791 | */ | ||||
3792 | static void | ||||
3793 | set_sal_first(lp) | ||||
3794 | slang_T *lp; | ||||
3795 | { | ||||
3796 | salfirst_T *sfirst; | ||||
3797 | int i; | ||||
3798 | salitem_T *smp; | ||||
3799 | int c; | ||||
3800 | garray_T *gap = &lp->sl_sal; | ||||
3801 | |||||
3802 | sfirst = lp->sl_sal_first; | ||||
3803 | for (i = 0; i < 256; ++i) | ||||
3804 | sfirst[i] = -1; | ||||
3805 | smp = (salitem_T *)gap->ga_data; | ||||
3806 | for (i = 0; i < gap->ga_len; ++i) | ||||
3807 | { | ||||
3808 | #ifdef FEAT_MBYTE | ||||
3809 | if (has_mbyte) | ||||
3810 | /* Use the lowest byte of the first character. For latin1 it's | ||||
3811 | * the character, for other encodings it should differ for most | ||||
3812 | * characters. */ | ||||
3813 | c = *smp[i].sm_lead_w & 0xff; | ||||
3814 | else | ||||
3815 | #endif | ||||
3816 | c = *smp[i].sm_lead; | ||||
3817 | if (sfirst[c] == -1) | ||||
3818 | { | ||||
3819 | sfirst[c] = i; | ||||
3820 | #ifdef FEAT_MBYTE | ||||
3821 | if (has_mbyte) | ||||
3822 | { | ||||
3823 | int n; | ||||
3824 | |||||
3825 | /* Make sure all entries with this byte are following each | ||||
3826 | * other. Move the ones that are in the wrong position. Do | ||||
3827 | * keep the same ordering! */ | ||||
3828 | while (i + 1 < gap->ga_len | ||||
3829 | && (*smp[i + 1].sm_lead_w & 0xff) == c) | ||||
3830 | /* Skip over entry with same index byte. */ | ||||
3831 | ++i; | ||||
3832 | |||||
3833 | for (n = 1; i + n < gap->ga_len; ++n) | ||||
3834 | if ((*smp[i + n].sm_lead_w & 0xff) == c) | ||||
3835 | { | ||||
3836 | salitem_T tsal; | ||||
3837 | |||||
3838 | /* Move entry with same index byte after the entries | ||||
3839 | * we already found. */ | ||||
3840 | ++i; | ||||
3841 | --n; | ||||
3842 | tsal = smp[i + n]; | ||||
3843 | mch_memmove(smp + i + 1, smp + i, | ||||
3844 | sizeof(salitem_T) * n); | ||||
3845 | smp[i] = tsal; | ||||
3846 | } | ||||
3847 | } | ||||
3848 | #endif | ||||
3849 | } | ||||
3850 | } | ||||
3851 | } | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 3852 | |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 3853 | #ifdef FEAT_MBYTE |
3854 | /* | ||||
3855 | * Turn a multi-byte string into a wide character string. | ||||
3856 | * Return it in allocated memory (NULL for out-of-memory) | ||||
3857 | */ | ||||
3858 | static int * | ||||
3859 | mb_str2wide(s) | ||||
3860 | char_u *s; | ||||
3861 | { | ||||
3862 | int *res; | ||||
3863 | char_u *p; | ||||
3864 | int i = 0; | ||||
3865 | |||||
3866 | res = (int *)alloc(sizeof(int) * (mb_charlen(s) + 1)); | ||||
3867 | if (res != NULL) | ||||
3868 | { | ||||
3869 | for (p = s; *p != NUL; ) | ||||
3870 | res[i++] = mb_ptr2char_adv(&p); | ||||
3871 | res[i] = NUL; | ||||
3872 | } | ||||
3873 | return res; | ||||
3874 | } | ||||
3875 | #endif | ||||
3876 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 3877 | /* |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 3878 | * Read a tree from the .spl or .sug file. |
3879 | * Allocates the memory and stores pointers in "bytsp" and "idxsp". | ||||
3880 | * This is skipped when the tree has zero length. | ||||
3881 | * Returns zero when OK, SP_ value for an error. | ||||
3882 | */ | ||||
3883 | static int | ||||
3884 | spell_read_tree(fd, bytsp, idxsp, prefixtree, prefixcnt) | ||||
3885 | FILE *fd; | ||||
3886 | char_u **bytsp; | ||||
3887 | idx_T **idxsp; | ||||
3888 | int prefixtree; /* TRUE for the prefix tree */ | ||||
3889 | int prefixcnt; /* when "prefixtree" is TRUE: prefix count */ | ||||
3890 | { | ||||
3891 | int len; | ||||
3892 | int idx; | ||||
3893 | char_u *bp; | ||||
3894 | idx_T *ip; | ||||
3895 | |||||
3896 | /* The tree size was computed when writing the file, so that we can | ||||
3897 | * allocate it as one long block. <nodecount> */ | ||||
Bram Moolenaar | b388adb | 2006-02-28 23:50:17 +0000 | [diff] [blame] | 3898 | len = get4c(fd); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 3899 | if (len < 0) |
3900 | return SP_TRUNCERROR; | ||||
3901 | if (len > 0) | ||||
3902 | { | ||||
3903 | /* Allocate the byte array. */ | ||||
3904 | bp = lalloc((long_u)len, TRUE); | ||||
3905 | if (bp == NULL) | ||||
3906 | return SP_OTHERERROR; | ||||
3907 | *bytsp = bp; | ||||
3908 | |||||
3909 | /* Allocate the index array. */ | ||||
3910 | ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE); | ||||
3911 | if (ip == NULL) | ||||
3912 | return SP_OTHERERROR; | ||||
3913 | *idxsp = ip; | ||||
3914 | |||||
3915 | /* Recursively read the tree and store it in the array. */ | ||||
3916 | idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt); | ||||
3917 | if (idx < 0) | ||||
3918 | return idx; | ||||
3919 | } | ||||
3920 | return 0; | ||||
3921 | } | ||||
3922 | |||||
3923 | /* | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 3924 | * Read one row of siblings from the spell file and store it in the byte array |
3925 | * "byts" and index array "idxs". Recursively read the children. | ||||
3926 | * | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 3927 | * NOTE: The code here must match put_node()! |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 3928 | * |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 3929 | * Returns the index (>= 0) following the siblings. |
3930 | * Returns SP_TRUNCERROR if the file is shorter than expected. | ||||
3931 | * Returns SP_FORMERROR if there is a format error. | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 3932 | */ |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 3933 | static idx_T |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 3934 | read_tree_node(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 3935 | FILE *fd; |
3936 | char_u *byts; | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 3937 | idx_T *idxs; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 3938 | int maxidx; /* size of arrays */ |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 3939 | idx_T startidx; /* current index in "byts" and "idxs" */ |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 3940 | int prefixtree; /* TRUE for reading PREFIXTREE */ |
3941 | int maxprefcondnr; /* maximum for <prefcondnr> */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 3942 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 3943 | int len; |
3944 | int i; | ||||
3945 | int n; | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 3946 | idx_T idx = startidx; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 3947 | int c; |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 3948 | int c2; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 3949 | #define SHARED_MASK 0x8000000 |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 3950 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 3951 | len = getc(fd); /* <siblingcount> */ |
3952 | if (len <= 0) | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 3953 | return SP_TRUNCERROR; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 3954 | |
3955 | if (startidx + len >= maxidx) | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 3956 | return SP_FORMERROR; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 3957 | byts[idx++] = len; |
3958 | |||||
3959 | /* Read the byte values, flag/region bytes and shared indexes. */ | ||||
3960 | for (i = 1; i <= len; ++i) | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 3961 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 3962 | c = getc(fd); /* <byte> */ |
3963 | if (c < 0) | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 3964 | return SP_TRUNCERROR; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 3965 | if (c <= BY_SPECIAL) |
3966 | { | ||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 3967 | if (c == BY_NOFLAGS && !prefixtree) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 3968 | { |
3969 | /* No flags, all regions. */ | ||||
3970 | idxs[idx] = 0; | ||||
3971 | c = 0; | ||||
3972 | } | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 3973 | else if (c != BY_INDEX) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 3974 | { |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 3975 | if (prefixtree) |
3976 | { | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 3977 | /* Read the optional pflags byte, the prefix ID and the |
3978 | * condition nr. In idxs[] store the prefix ID in the low | ||||
3979 | * byte, the condition index shifted up 8 bits, the flags | ||||
3980 | * shifted up 24 bits. */ | ||||
3981 | if (c == BY_FLAGS) | ||||
3982 | c = getc(fd) << 24; /* <pflags> */ | ||||
3983 | else | ||||
3984 | c = 0; | ||||
3985 | |||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 3986 | c |= getc(fd); /* <affixID> */ |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 3987 | |
Bram Moolenaar | b388adb | 2006-02-28 23:50:17 +0000 | [diff] [blame] | 3988 | n = get2c(fd); /* <prefcondnr> */ |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 3989 | if (n >= maxprefcondnr) |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 3990 | return SP_FORMERROR; |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 3991 | c |= (n << 8); |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 3992 | } |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 3993 | else /* c must be BY_FLAGS or BY_FLAGS2 */ |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 3994 | { |
3995 | /* Read flags and optional region and prefix ID. In | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 3996 | * idxs[] the flags go in the low two bytes, region above |
3997 | * that and prefix ID above the region. */ | ||||
3998 | c2 = c; | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 3999 | c = getc(fd); /* <flags> */ |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 4000 | if (c2 == BY_FLAGS2) |
4001 | c = (getc(fd) << 8) + c; /* <flags2> */ | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 4002 | if (c & WF_REGION) |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 4003 | c = (getc(fd) << 16) + c; /* <region> */ |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 4004 | if (c & WF_AFX) |
4005 | c = (getc(fd) << 24) + c; /* <affixID> */ | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 4006 | } |
4007 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4008 | idxs[idx] = c; |
4009 | c = 0; | ||||
4010 | } | ||||
4011 | else /* c == BY_INDEX */ | ||||
4012 | { | ||||
4013 | /* <nodeidx> */ | ||||
Bram Moolenaar | b388adb | 2006-02-28 23:50:17 +0000 | [diff] [blame] | 4014 | n = get3c(fd); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4015 | if (n < 0 || n >= maxidx) |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 4016 | return SP_FORMERROR; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4017 | idxs[idx] = n + SHARED_MASK; |
4018 | c = getc(fd); /* <xbyte> */ | ||||
4019 | } | ||||
4020 | } | ||||
4021 | byts[idx++] = c; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4022 | } |
4023 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4024 | /* Recursively read the children for non-shared siblings. |
4025 | * Skip the end-of-word ones (zero byte value) and the shared ones (and | ||||
4026 | * remove SHARED_MASK) */ | ||||
4027 | for (i = 1; i <= len; ++i) | ||||
4028 | if (byts[startidx + i] != 0) | ||||
4029 | { | ||||
4030 | if (idxs[startidx + i] & SHARED_MASK) | ||||
4031 | idxs[startidx + i] &= ~SHARED_MASK; | ||||
4032 | else | ||||
4033 | { | ||||
4034 | idxs[startidx + i] = idx; | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 4035 | idx = read_tree_node(fd, byts, idxs, maxidx, idx, |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 4036 | prefixtree, maxprefcondnr); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4037 | if (idx < 0) |
4038 | break; | ||||
4039 | } | ||||
4040 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4041 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4042 | return idx; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4043 | } |
4044 | |||||
4045 | /* | ||||
4046 | * Parse 'spelllang' and set buf->b_langp accordingly. | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 4047 | * Returns NULL if it's OK, an error message otherwise. |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4048 | */ |
4049 | char_u * | ||||
4050 | did_set_spelllang(buf) | ||||
4051 | buf_T *buf; | ||||
4052 | { | ||||
4053 | garray_T ga; | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 4054 | char_u *splp; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4055 | char_u *region; |
Bram Moolenaar | b635633 | 2005-07-18 21:40:44 +0000 | [diff] [blame] | 4056 | char_u region_cp[3]; |
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 4057 | int filename; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4058 | int region_mask; |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4059 | slang_T *slang; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4060 | int c; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 4061 | char_u lang[MAXWLEN + 1]; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4062 | char_u spf_name[MAXPATHL]; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 4063 | int len; |
4064 | char_u *p; | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 4065 | int round; |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 4066 | char_u *spf; |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 4067 | char_u *use_region = NULL; |
4068 | int dont_use_region = FALSE; | ||||
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 4069 | int nobreak = FALSE; |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4070 | int i, j; |
4071 | langp_T *lp, *lp2; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4072 | |
4073 | ga_init2(&ga, sizeof(langp_T), 2); | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 4074 | clear_midword(buf); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4075 | |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 4076 | /* loop over comma separated language names. */ |
4077 | for (splp = buf->b_p_spl; *splp != NUL; ) | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4078 | { |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 4079 | /* Get one language name. */ |
4080 | copy_option_part(&splp, lang, MAXWLEN, ","); | ||||
4081 | |||||
Bram Moolenaar | 5482f33 | 2005-04-17 20:18:43 +0000 | [diff] [blame] | 4082 | region = NULL; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 4083 | len = STRLEN(lang); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4084 | |
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 4085 | /* If the name ends in ".spl" use it as the name of the spell file. |
4086 | * If there is a region name let "region" point to it and remove it | ||||
4087 | * from the name. */ | ||||
4088 | if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0) | ||||
4089 | { | ||||
4090 | filename = TRUE; | ||||
4091 | |||||
Bram Moolenaar | b635633 | 2005-07-18 21:40:44 +0000 | [diff] [blame] | 4092 | /* Locate a region and remove it from the file name. */ |
4093 | p = vim_strchr(gettail(lang), '_'); | ||||
4094 | if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2]) | ||||
4095 | && !ASCII_ISALPHA(p[3])) | ||||
4096 | { | ||||
4097 | vim_strncpy(region_cp, p + 1, 2); | ||||
4098 | mch_memmove(p, p + 3, len - (p - lang) - 2); | ||||
4099 | len -= 3; | ||||
4100 | region = region_cp; | ||||
4101 | } | ||||
4102 | else | ||||
4103 | dont_use_region = TRUE; | ||||
4104 | |||||
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 4105 | /* Check if we loaded this language before. */ |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4106 | for (slang = first_lang; slang != NULL; slang = slang->sl_next) |
4107 | if (fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME) | ||||
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 4108 | break; |
4109 | } | ||||
4110 | else | ||||
4111 | { | ||||
4112 | filename = FALSE; | ||||
4113 | if (len > 3 && lang[len - 3] == '_') | ||||
4114 | { | ||||
4115 | region = lang + len - 2; | ||||
4116 | len -= 3; | ||||
4117 | lang[len] = NUL; | ||||
4118 | } | ||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 4119 | else |
4120 | dont_use_region = TRUE; | ||||
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 4121 | |
4122 | /* Check if we loaded this language before. */ | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4123 | for (slang = first_lang; slang != NULL; slang = slang->sl_next) |
4124 | if (STRICMP(lang, slang->sl_name) == 0) | ||||
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 4125 | break; |
4126 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4127 | |
Bram Moolenaar | b635633 | 2005-07-18 21:40:44 +0000 | [diff] [blame] | 4128 | if (region != NULL) |
4129 | { | ||||
4130 | /* If the region differs from what was used before then don't | ||||
4131 | * use it for 'spellfile'. */ | ||||
4132 | if (use_region != NULL && STRCMP(region, use_region) != 0) | ||||
4133 | dont_use_region = TRUE; | ||||
4134 | use_region = region; | ||||
4135 | } | ||||
4136 | |||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 4137 | /* If not found try loading the language now. */ |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4138 | if (slang == NULL) |
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 4139 | { |
4140 | if (filename) | ||||
4141 | (void)spell_load_file(lang, lang, NULL, FALSE); | ||||
4142 | else | ||||
4143 | spell_load_lang(lang); | ||||
4144 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4145 | |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 4146 | /* |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 4147 | * Loop over the languages, there can be several files for "lang". |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 4148 | */ |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4149 | for (slang = first_lang; slang != NULL; slang = slang->sl_next) |
4150 | if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME | ||||
4151 | : STRICMP(lang, slang->sl_name) == 0) | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4152 | { |
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 4153 | region_mask = REGION_ALL; |
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 4154 | if (!filename && region != NULL) |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 4155 | { |
4156 | /* find region in sl_regions */ | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4157 | c = find_region(slang->sl_regions, region); |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 4158 | if (c == REGION_ALL) |
4159 | { | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4160 | if (slang->sl_add) |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 4161 | { |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4162 | if (*slang->sl_regions != NUL) |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 4163 | /* This addition file is for other regions. */ |
4164 | region_mask = 0; | ||||
4165 | } | ||||
4166 | else | ||||
4167 | /* This is probably an error. Give a warning and | ||||
4168 | * accept the words anyway. */ | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 4169 | smsg((char_u *) |
4170 | _("Warning: region %s not supported"), | ||||
4171 | region); | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 4172 | } |
4173 | else | ||||
4174 | region_mask = 1 << c; | ||||
4175 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4176 | |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 4177 | if (region_mask != 0) |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 4178 | { |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 4179 | if (ga_grow(&ga, 1) == FAIL) |
4180 | { | ||||
4181 | ga_clear(&ga); | ||||
4182 | return e_outofmem; | ||||
4183 | } | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4184 | LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang; |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 4185 | LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask; |
4186 | ++ga.ga_len; | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4187 | use_midword(slang, buf); |
4188 | if (slang->sl_nobreak) | ||||
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 4189 | nobreak = TRUE; |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 4190 | } |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4191 | } |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4192 | } |
4193 | |||||
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 4194 | /* round 0: load int_wordlist, if possible. |
4195 | * round 1: load first name in 'spellfile'. | ||||
4196 | * round 2: load second name in 'spellfile. | ||||
4197 | * etc. */ | ||||
4198 | spf = curbuf->b_p_spf; | ||||
4199 | for (round = 0; round == 0 || *spf != NUL; ++round) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4200 | { |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 4201 | if (round == 0) |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 4202 | { |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 4203 | /* Internal wordlist, if there is one. */ |
4204 | if (int_wordlist == NULL) | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 4205 | continue; |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 4206 | int_wordlist_spl(spf_name); |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 4207 | } |
4208 | else | ||||
4209 | { | ||||
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 4210 | /* One entry in 'spellfile'. */ |
4211 | copy_option_part(&spf, spf_name, MAXPATHL - 5, ","); | ||||
4212 | STRCAT(spf_name, ".spl"); | ||||
4213 | |||||
4214 | /* If it was already found above then skip it. */ | ||||
4215 | for (c = 0; c < ga.ga_len; ++c) | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 4216 | { |
4217 | p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname; | ||||
4218 | if (p != NULL && fullpathcmp(spf_name, p, FALSE) == FPC_SAME) | ||||
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 4219 | break; |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 4220 | } |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 4221 | if (c < ga.ga_len) |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 4222 | continue; |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 4223 | } |
4224 | |||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 4225 | /* Check if it was loaded already. */ |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4226 | for (slang = first_lang; slang != NULL; slang = slang->sl_next) |
4227 | if (fullpathcmp(spf_name, slang->sl_fname, FALSE) == FPC_SAME) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4228 | break; |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4229 | if (slang == NULL) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4230 | { |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 4231 | /* Not loaded, try loading it now. The language name includes the |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 4232 | * region name, the region is ignored otherwise. for int_wordlist |
4233 | * use an arbitrary name. */ | ||||
4234 | if (round == 0) | ||||
4235 | STRCPY(lang, "internal wordlist"); | ||||
4236 | else | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 4237 | { |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 4238 | vim_strncpy(lang, gettail(spf_name), MAXWLEN); |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 4239 | p = vim_strchr(lang, '.'); |
4240 | if (p != NULL) | ||||
4241 | *p = NUL; /* truncate at ".encoding.add" */ | ||||
4242 | } | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4243 | slang = spell_load_file(spf_name, lang, NULL, TRUE); |
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 4244 | |
4245 | /* If one of the languages has NOBREAK we assume the addition | ||||
4246 | * files also have this. */ | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4247 | if (slang != NULL && nobreak) |
4248 | slang->sl_nobreak = TRUE; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4249 | } |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4250 | if (slang != NULL && ga_grow(&ga, 1) == OK) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4251 | { |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 4252 | region_mask = REGION_ALL; |
4253 | if (use_region != NULL && !dont_use_region) | ||||
4254 | { | ||||
4255 | /* find region in sl_regions */ | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4256 | c = find_region(slang->sl_regions, use_region); |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 4257 | if (c != REGION_ALL) |
4258 | region_mask = 1 << c; | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4259 | else if (*slang->sl_regions != NUL) |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 4260 | /* This spell file is for other regions. */ |
4261 | region_mask = 0; | ||||
4262 | } | ||||
4263 | |||||
4264 | if (region_mask != 0) | ||||
4265 | { | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4266 | LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang; |
4267 | LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL; | ||||
4268 | LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL; | ||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 4269 | LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask; |
4270 | ++ga.ga_len; | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4271 | use_midword(slang, buf); |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 4272 | } |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4273 | } |
4274 | } | ||||
4275 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4276 | /* Everything is fine, store the new b_langp value. */ |
4277 | ga_clear(&buf->b_langp); | ||||
4278 | buf->b_langp = ga; | ||||
4279 | |||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4280 | /* For each language figure out what language to use for sound folding and |
4281 | * REP items. If the language doesn't support it itself use another one | ||||
4282 | * with the same name. E.g. for "en-math" use "en". */ | ||||
4283 | for (i = 0; i < ga.ga_len; ++i) | ||||
4284 | { | ||||
4285 | lp = LANGP_ENTRY(ga, i); | ||||
4286 | |||||
4287 | /* sound folding */ | ||||
4288 | if (lp->lp_slang->sl_sal.ga_len > 0) | ||||
4289 | /* language does sound folding itself */ | ||||
4290 | lp->lp_sallang = lp->lp_slang; | ||||
4291 | else | ||||
4292 | /* find first similar language that does sound folding */ | ||||
4293 | for (j = 0; j < ga.ga_len; ++j) | ||||
4294 | { | ||||
4295 | lp2 = LANGP_ENTRY(ga, j); | ||||
4296 | if (lp2->lp_slang->sl_sal.ga_len > 0 | ||||
4297 | && STRNCMP(lp->lp_slang->sl_name, | ||||
4298 | lp2->lp_slang->sl_name, 2) == 0) | ||||
4299 | { | ||||
4300 | lp->lp_sallang = lp2->lp_slang; | ||||
4301 | break; | ||||
4302 | } | ||||
4303 | } | ||||
4304 | |||||
4305 | /* REP items */ | ||||
4306 | if (lp->lp_slang->sl_rep.ga_len > 0) | ||||
4307 | /* language has REP items itself */ | ||||
4308 | lp->lp_replang = lp->lp_slang; | ||||
4309 | else | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 4310 | /* find first similar language that has REP items */ |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4311 | for (j = 0; j < ga.ga_len; ++j) |
4312 | { | ||||
4313 | lp2 = LANGP_ENTRY(ga, j); | ||||
4314 | if (lp2->lp_slang->sl_rep.ga_len > 0 | ||||
4315 | && STRNCMP(lp->lp_slang->sl_name, | ||||
4316 | lp2->lp_slang->sl_name, 2) == 0) | ||||
4317 | { | ||||
4318 | lp->lp_replang = lp2->lp_slang; | ||||
4319 | break; | ||||
4320 | } | ||||
4321 | } | ||||
4322 | } | ||||
4323 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4324 | return NULL; |
4325 | } | ||||
4326 | |||||
4327 | /* | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 4328 | * Clear the midword characters for buffer "buf". |
4329 | */ | ||||
4330 | static void | ||||
4331 | clear_midword(buf) | ||||
4332 | buf_T *buf; | ||||
4333 | { | ||||
4334 | vim_memset(buf->b_spell_ismw, 0, 256); | ||||
4335 | #ifdef FEAT_MBYTE | ||||
4336 | vim_free(buf->b_spell_ismw_mb); | ||||
4337 | buf->b_spell_ismw_mb = NULL; | ||||
4338 | #endif | ||||
4339 | } | ||||
4340 | |||||
4341 | /* | ||||
4342 | * Use the "sl_midword" field of language "lp" for buffer "buf". | ||||
4343 | * They add up to any currently used midword characters. | ||||
4344 | */ | ||||
4345 | static void | ||||
4346 | use_midword(lp, buf) | ||||
4347 | slang_T *lp; | ||||
4348 | buf_T *buf; | ||||
4349 | { | ||||
4350 | char_u *p; | ||||
4351 | |||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 4352 | if (lp->sl_midword == NULL) /* there aren't any */ |
4353 | return; | ||||
4354 | |||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 4355 | for (p = lp->sl_midword; *p != NUL; ) |
4356 | #ifdef FEAT_MBYTE | ||||
4357 | if (has_mbyte) | ||||
4358 | { | ||||
4359 | int c, l, n; | ||||
4360 | char_u *bp; | ||||
4361 | |||||
4362 | c = mb_ptr2char(p); | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4363 | l = (*mb_ptr2len)(p); |
4364 | if (c < 256 && l <= 2) | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 4365 | buf->b_spell_ismw[c] = TRUE; |
4366 | else if (buf->b_spell_ismw_mb == NULL) | ||||
4367 | /* First multi-byte char in "b_spell_ismw_mb". */ | ||||
4368 | buf->b_spell_ismw_mb = vim_strnsave(p, l); | ||||
4369 | else | ||||
4370 | { | ||||
4371 | /* Append multi-byte chars to "b_spell_ismw_mb". */ | ||||
4372 | n = STRLEN(buf->b_spell_ismw_mb); | ||||
4373 | bp = vim_strnsave(buf->b_spell_ismw_mb, n + l); | ||||
4374 | if (bp != NULL) | ||||
4375 | { | ||||
4376 | vim_free(buf->b_spell_ismw_mb); | ||||
4377 | buf->b_spell_ismw_mb = bp; | ||||
4378 | vim_strncpy(bp + n, p, l); | ||||
4379 | } | ||||
4380 | } | ||||
4381 | p += l; | ||||
4382 | } | ||||
4383 | else | ||||
4384 | #endif | ||||
4385 | buf->b_spell_ismw[*p++] = TRUE; | ||||
4386 | } | ||||
4387 | |||||
4388 | /* | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4389 | * Find the region "region[2]" in "rp" (points to "sl_regions"). |
4390 | * Each region is simply stored as the two characters of it's name. | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 4391 | * Returns the index if found (first is 0), REGION_ALL if not found. |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4392 | */ |
4393 | static int | ||||
4394 | find_region(rp, region) | ||||
4395 | char_u *rp; | ||||
4396 | char_u *region; | ||||
4397 | { | ||||
4398 | int i; | ||||
4399 | |||||
4400 | for (i = 0; ; i += 2) | ||||
4401 | { | ||||
4402 | if (rp[i] == NUL) | ||||
4403 | return REGION_ALL; | ||||
4404 | if (rp[i] == region[0] && rp[i + 1] == region[1]) | ||||
4405 | break; | ||||
4406 | } | ||||
4407 | return i / 2; | ||||
4408 | } | ||||
4409 | |||||
4410 | /* | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4411 | * Return case type of word: |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4412 | * w word 0 |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4413 | * Word WF_ONECAP |
4414 | * W WORD WF_ALLCAP | ||||
4415 | * WoRd wOrd WF_KEEPCAP | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4416 | */ |
4417 | static int | ||||
4418 | captype(word, end) | ||||
4419 | char_u *word; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4420 | char_u *end; /* When NULL use up to NUL byte. */ |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4421 | { |
4422 | char_u *p; | ||||
4423 | int c; | ||||
4424 | int firstcap; | ||||
4425 | int allcap; | ||||
4426 | int past_second = FALSE; /* past second word char */ | ||||
4427 | |||||
4428 | /* find first letter */ | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 4429 | for (p = word; !spell_iswordp_nmw(p); mb_ptr_adv(p)) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4430 | if (end == NULL ? *p == NUL : p >= end) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4431 | return 0; /* only non-word characters, illegal word */ |
4432 | #ifdef FEAT_MBYTE | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 4433 | if (has_mbyte) |
4434 | c = mb_ptr2char_adv(&p); | ||||
4435 | else | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4436 | #endif |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 4437 | c = *p++; |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 4438 | firstcap = allcap = SPELL_ISUPPER(c); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4439 | |
4440 | /* | ||||
4441 | * Need to check all letters to find a word with mixed upper/lower. | ||||
4442 | * But a word with an upper char only at start is a ONECAP. | ||||
4443 | */ | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4444 | for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p)) |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 4445 | if (spell_iswordp_nmw(p)) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4446 | { |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 4447 | c = PTR2CHAR(p); |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 4448 | if (!SPELL_ISUPPER(c)) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4449 | { |
4450 | /* UUl -> KEEPCAP */ | ||||
4451 | if (past_second && allcap) | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4452 | return WF_KEEPCAP; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4453 | allcap = FALSE; |
4454 | } | ||||
4455 | else if (!allcap) | ||||
4456 | /* UlU -> KEEPCAP */ | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4457 | return WF_KEEPCAP; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4458 | past_second = TRUE; |
4459 | } | ||||
4460 | |||||
4461 | if (allcap) | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4462 | return WF_ALLCAP; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4463 | if (firstcap) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4464 | return WF_ONECAP; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4465 | return 0; |
4466 | } | ||||
4467 | |||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4468 | /* |
4469 | * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a | ||||
4470 | * capital. So that make_case_word() can turn WOrd into Word. | ||||
4471 | * Add ALLCAP for "WOrD". | ||||
4472 | */ | ||||
4473 | static int | ||||
4474 | badword_captype(word, end) | ||||
4475 | char_u *word; | ||||
4476 | char_u *end; | ||||
4477 | { | ||||
4478 | int flags = captype(word, end); | ||||
Bram Moolenaar | 8b59de9 | 2005-08-11 19:59:29 +0000 | [diff] [blame] | 4479 | int c; |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4480 | int l, u; |
4481 | int first; | ||||
4482 | char_u *p; | ||||
4483 | |||||
4484 | if (flags & WF_KEEPCAP) | ||||
4485 | { | ||||
4486 | /* Count the number of UPPER and lower case letters. */ | ||||
4487 | l = u = 0; | ||||
4488 | first = FALSE; | ||||
4489 | for (p = word; p < end; mb_ptr_adv(p)) | ||||
4490 | { | ||||
Bram Moolenaar | 8b59de9 | 2005-08-11 19:59:29 +0000 | [diff] [blame] | 4491 | c = PTR2CHAR(p); |
4492 | if (SPELL_ISUPPER(c)) | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4493 | { |
4494 | ++u; | ||||
4495 | if (p == word) | ||||
4496 | first = TRUE; | ||||
4497 | } | ||||
4498 | else | ||||
4499 | ++l; | ||||
4500 | } | ||||
4501 | |||||
4502 | /* If there are more UPPER than lower case letters suggest an | ||||
4503 | * ALLCAP word. Otherwise, if the first letter is UPPER then | ||||
4504 | * suggest ONECAP. Exception: "ALl" most likely should be "All", | ||||
4505 | * require three upper case letters. */ | ||||
4506 | if (u > l && u > 2) | ||||
4507 | flags |= WF_ALLCAP; | ||||
4508 | else if (first) | ||||
4509 | flags |= WF_ONECAP; | ||||
Bram Moolenaar | 2d3f489 | 2006-01-20 23:02:51 +0000 | [diff] [blame] | 4510 | |
4511 | if (u >= 2 && l >= 2) /* maCARONI maCAroni */ | ||||
4512 | flags |= WF_MIXCAP; | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4513 | } |
4514 | return flags; | ||||
4515 | } | ||||
4516 | |||||
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 4517 | # if defined(FEAT_MBYTE) || defined(EXITFREE) || defined(PROTO) |
4518 | /* | ||||
4519 | * Free all languages. | ||||
4520 | */ | ||||
4521 | void | ||||
4522 | spell_free_all() | ||||
4523 | { | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4524 | slang_T *slang; |
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 4525 | buf_T *buf; |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 4526 | char_u fname[MAXPATHL]; |
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 4527 | |
4528 | /* Go through all buffers and handle 'spelllang'. */ | ||||
4529 | for (buf = firstbuf; buf != NULL; buf = buf->b_next) | ||||
4530 | ga_clear(&buf->b_langp); | ||||
4531 | |||||
4532 | while (first_lang != NULL) | ||||
4533 | { | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4534 | slang = first_lang; |
4535 | first_lang = slang->sl_next; | ||||
4536 | slang_free(slang); | ||||
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 4537 | } |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 4538 | |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 4539 | if (int_wordlist != NULL) |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 4540 | { |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 4541 | /* Delete the internal wordlist and its .spl file */ |
4542 | mch_remove(int_wordlist); | ||||
4543 | int_wordlist_spl(fname); | ||||
4544 | mch_remove(fname); | ||||
4545 | vim_free(int_wordlist); | ||||
4546 | int_wordlist = NULL; | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 4547 | } |
4548 | |||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 4549 | init_spell_chartab(); |
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 4550 | |
4551 | vim_free(repl_to); | ||||
4552 | repl_to = NULL; | ||||
4553 | vim_free(repl_from); | ||||
4554 | repl_from = NULL; | ||||
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 4555 | } |
4556 | # endif | ||||
4557 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4558 | # if defined(FEAT_MBYTE) || defined(PROTO) |
4559 | /* | ||||
4560 | * Clear all spelling tables and reload them. | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 4561 | * Used after 'encoding' is set and when ":mkspell" was used. |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4562 | */ |
4563 | void | ||||
4564 | spell_reload() | ||||
4565 | { | ||||
4566 | buf_T *buf; | ||||
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 4567 | win_T *wp; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4568 | |
Bram Moolenaar | ea40885 | 2005-06-25 22:49:46 +0000 | [diff] [blame] | 4569 | /* Initialize the table for spell_iswordp(). */ |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4570 | init_spell_chartab(); |
4571 | |||||
4572 | /* Unload all allocated memory. */ | ||||
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 4573 | spell_free_all(); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4574 | |
4575 | /* Go through all buffers and handle 'spelllang'. */ | ||||
4576 | for (buf = firstbuf; buf != NULL; buf = buf->b_next) | ||||
4577 | { | ||||
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 4578 | /* Only load the wordlists when 'spelllang' is set and there is a |
4579 | * window for this buffer in which 'spell' is set. */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4580 | if (*buf->b_p_spl != NUL) |
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 4581 | { |
4582 | FOR_ALL_WINDOWS(wp) | ||||
4583 | if (wp->w_buffer == buf && wp->w_p_spell) | ||||
4584 | { | ||||
4585 | (void)did_set_spelllang(buf); | ||||
4586 | # ifdef FEAT_WINDOWS | ||||
4587 | break; | ||||
4588 | # endif | ||||
4589 | } | ||||
4590 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4591 | } |
4592 | } | ||||
4593 | # endif | ||||
4594 | |||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 4595 | /* |
4596 | * Reload the spell file "fname" if it's loaded. | ||||
4597 | */ | ||||
4598 | static void | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4599 | spell_reload_one(fname, added_word) |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 4600 | char_u *fname; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4601 | int added_word; /* invoked through "zg" */ |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 4602 | { |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4603 | slang_T *slang; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4604 | int didit = FALSE; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4605 | |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4606 | for (slang = first_lang; slang != NULL; slang = slang->sl_next) |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 4607 | { |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4608 | if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME) |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 4609 | { |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4610 | slang_clear(slang); |
4611 | if (spell_load_file(fname, NULL, slang, FALSE) == NULL) | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 4612 | /* reloading failed, clear the language */ |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 4613 | slang_clear(slang); |
Bram Moolenaar | f71a3db | 2006-03-12 21:50:18 +0000 | [diff] [blame] | 4614 | redraw_all_later(SOME_VALID); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4615 | didit = TRUE; |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 4616 | } |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 4617 | } |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4618 | |
4619 | /* When "zg" was used and the file wasn't loaded yet, should redo | ||||
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 4620 | * 'spelllang' to load it now. */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4621 | if (added_word && !didit) |
4622 | did_set_spelllang(curbuf); | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 4623 | } |
4624 | |||||
4625 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4626 | /* |
4627 | * Functions for ":mkspell". | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4628 | */ |
4629 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4630 | #define MAXLINELEN 500 /* Maximum length in bytes of a line in a .aff |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4631 | and .dic file. */ |
4632 | /* | ||||
4633 | * Main structure to store the contents of a ".aff" file. | ||||
4634 | */ | ||||
4635 | typedef struct afffile_S | ||||
4636 | { | ||||
4637 | char_u *af_enc; /* "SET", normalized, alloc'ed string or NULL */ | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 4638 | int af_flagtype; /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */ |
Bram Moolenaar | 371baa9 | 2005-12-29 22:43:53 +0000 | [diff] [blame] | 4639 | unsigned af_rare; /* RARE ID for rare word */ |
4640 | unsigned af_keepcase; /* KEEPCASE ID for keep-case word */ | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 4641 | unsigned af_bad; /* BAD ID for banned word */ |
4642 | unsigned af_needaffix; /* NEEDAFFIX ID */ | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 4643 | unsigned af_needcomp; /* NEEDCOMPOUND ID */ |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 4644 | unsigned af_comproot; /* COMPOUNDROOT ID */ |
4645 | unsigned af_compforbid; /* COMPOUNDFORBIDFLAG ID */ | ||||
4646 | unsigned af_comppermit; /* COMPOUNDPERMITFLAG ID */ | ||||
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 4647 | unsigned af_nosuggest; /* NOSUGGEST ID */ |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 4648 | int af_pfxpostpone; /* postpone prefixes without chop string and |
4649 | without flags */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4650 | hashtab_T af_pref; /* hashtable for prefixes, affheader_T */ |
4651 | hashtab_T af_suff; /* hashtable for suffixes, affheader_T */ | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 4652 | hashtab_T af_comp; /* hashtable for compound flags, compitem_T */ |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4653 | } afffile_T; |
4654 | |||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 4655 | #define AFT_CHAR 0 /* flags are one character */ |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 4656 | #define AFT_LONG 1 /* flags are two characters */ |
4657 | #define AFT_CAPLONG 2 /* flags are one or two characters */ | ||||
4658 | #define AFT_NUM 3 /* flags are numbers, comma separated */ | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 4659 | |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4660 | typedef struct affentry_S affentry_T; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4661 | /* Affix entry from ".aff" file. Used for prefixes and suffixes. */ |
4662 | struct affentry_S | ||||
4663 | { | ||||
4664 | affentry_T *ae_next; /* next affix with same name/number */ | ||||
4665 | char_u *ae_chop; /* text to chop off basic word (can be NULL) */ | ||||
4666 | char_u *ae_add; /* text to add to basic word (can be NULL) */ | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 4667 | char_u *ae_flags; /* flags on the affix (can be NULL) */ |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4668 | char_u *ae_cond; /* condition (NULL for ".") */ |
4669 | regprog_T *ae_prog; /* regexp program for ae_cond or NULL */ | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4670 | }; |
4671 | |||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 4672 | #ifdef FEAT_MBYTE |
4673 | # define AH_KEY_LEN 17 /* 2 x 8 bytes + NUL */ | ||||
4674 | #else | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 4675 | # define AH_KEY_LEN 7 /* 6 digits + NUL */ |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 4676 | #endif |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 4677 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4678 | /* Affix header from ".aff" file. Used for af_pref and af_suff. */ |
4679 | typedef struct affheader_S | ||||
4680 | { | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 4681 | char_u ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */ |
4682 | unsigned ah_flag; /* affix name as number, uses "af_flagtype" */ | ||||
4683 | int ah_newID; /* prefix ID after renumbering; 0 if not used */ | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4684 | int ah_combine; /* suffix may combine with prefix */ |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 4685 | int ah_follows; /* another affix block should be following */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4686 | affentry_T *ah_first; /* first affix entry */ |
4687 | } affheader_T; | ||||
4688 | |||||
4689 | #define HI2AH(hi) ((affheader_T *)(hi)->hi_key) | ||||
4690 | |||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 4691 | /* Flag used in compound items. */ |
4692 | typedef struct compitem_S | ||||
4693 | { | ||||
4694 | char_u ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */ | ||||
4695 | unsigned ci_flag; /* affix name as number, uses "af_flagtype" */ | ||||
4696 | int ci_newID; /* affix ID after renumbering. */ | ||||
4697 | } compitem_T; | ||||
4698 | |||||
4699 | #define HI2CI(hi) ((compitem_T *)(hi)->hi_key) | ||||
4700 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4701 | /* |
4702 | * Structure that is used to store the items in the word tree. This avoids | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4703 | * the need to keep track of each allocated thing, everything is freed all at |
4704 | * once after ":mkspell" is done. | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4705 | */ |
4706 | #define SBLOCKSIZE 16000 /* size of sb_data */ | ||||
4707 | typedef struct sblock_S sblock_T; | ||||
4708 | struct sblock_S | ||||
4709 | { | ||||
4710 | sblock_T *sb_next; /* next block in list */ | ||||
4711 | int sb_used; /* nr of bytes already in use */ | ||||
4712 | char_u sb_data[1]; /* data, actually longer */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4713 | }; |
4714 | |||||
4715 | /* | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4716 | * A node in the tree. |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4717 | */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4718 | typedef struct wordnode_S wordnode_T; |
4719 | struct wordnode_S | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4720 | { |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 4721 | union /* shared to save space */ |
4722 | { | ||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 4723 | char_u hashkey[6]; /* the hash key, only used while compressing */ |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 4724 | int index; /* index in written nodes (valid after first |
4725 | round) */ | ||||
4726 | } wn_u1; | ||||
4727 | union /* shared to save space */ | ||||
4728 | { | ||||
4729 | wordnode_T *next; /* next node with same hash key */ | ||||
4730 | wordnode_T *wnode; /* parent node that will write this node */ | ||||
4731 | } wn_u2; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4732 | wordnode_T *wn_child; /* child (next byte in word) */ |
4733 | wordnode_T *wn_sibling; /* next sibling (alternate byte in word, | ||||
4734 | always sorted) */ | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4735 | int wn_refs; /* Nr. of references to this node. Only |
4736 | relevant for first node in a list of | ||||
4737 | siblings, in following siblings it is | ||||
4738 | always one. */ | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4739 | char_u wn_byte; /* Byte for this node. NUL for word end */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 4740 | |
4741 | /* Info for when "wn_byte" is NUL. | ||||
4742 | * In PREFIXTREE "wn_region" is used for the prefcondnr. | ||||
4743 | * In the soundfolded word tree "wn_flags" has the MSW of the wordnr and | ||||
4744 | * "wn_region" the LSW of the wordnr. */ | ||||
4745 | char_u wn_affixID; /* supported/required prefix ID or 0 */ | ||||
4746 | short_u wn_flags; /* WF_ flags */ | ||||
4747 | short wn_region; /* region mask */ | ||||
4748 | |||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 4749 | #ifdef SPELL_PRINTTREE |
4750 | int wn_nr; /* sequence nr for printing */ | ||||
4751 | #endif | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4752 | }; |
4753 | |||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 4754 | #define WN_MASK 0xffff /* mask relevant bits of "wn_flags" */ |
4755 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4756 | #define HI2WN(hi) (wordnode_T *)((hi)->hi_key) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4757 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4758 | /* |
4759 | * Info used while reading the spell files. | ||||
4760 | */ | ||||
4761 | typedef struct spellinfo_S | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4762 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4763 | wordnode_T *si_foldroot; /* tree with case-folded words */ |
Bram Moolenaar | 8db7318 | 2005-06-17 21:51:16 +0000 | [diff] [blame] | 4764 | long si_foldwcount; /* nr of words in si_foldroot */ |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4765 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4766 | wordnode_T *si_keeproot; /* tree with keep-case words */ |
Bram Moolenaar | 8db7318 | 2005-06-17 21:51:16 +0000 | [diff] [blame] | 4767 | long si_keepwcount; /* nr of words in si_keeproot */ |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4768 | |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 4769 | wordnode_T *si_prefroot; /* tree with postponed prefixes */ |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4770 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 4771 | long si_sugtree; /* creating the soundfolding trie */ |
4772 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4773 | sblock_T *si_blocks; /* memory blocks used */ |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 4774 | long si_blocks_cnt; /* memory blocks allocated */ |
4775 | long si_compress_cnt; /* words to add before lowering | ||||
4776 | compression limit */ | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4777 | wordnode_T *si_first_free; /* List of nodes that have been freed during |
4778 | compression, linked by "wn_child" field. */ | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 4779 | long si_free_count; /* number of nodes in si_first_free */ |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4780 | #ifdef SPELL_PRINTTREE |
4781 | int si_wordnode_nr; /* sequence nr for nodes */ | ||||
4782 | #endif | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 4783 | buf_T *si_spellbuf; /* buffer used to store soundfold word table */ |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4784 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4785 | int si_ascii; /* handling only ASCII words */ |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 4786 | int si_add; /* addition file */ |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 4787 | int si_clear_chartab; /* when TRUE clear char tables */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4788 | int si_region; /* region mask */ |
4789 | vimconv_T si_conv; /* for conversion to 'encoding' */ | ||||
Bram Moolenaar | 50cde82 | 2005-06-05 21:54:54 +0000 | [diff] [blame] | 4790 | int si_memtot; /* runtime memory used */ |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 4791 | int si_verbose; /* verbose messages */ |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4792 | int si_msg_count; /* number of words added since last message */ |
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 4793 | char_u *si_info; /* info text chars or NULL */ |
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 4794 | int si_region_count; /* number of regions supported (1 when there |
4795 | are no regions) */ | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 4796 | char_u si_region_name[16]; /* region names; used only if |
4797 | * si_region_count > 1) */ | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4798 | |
4799 | garray_T si_rep; /* list of fromto_T entries from REP lines */ | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 4800 | garray_T si_repsal; /* list of fromto_T entries from REPSAL lines */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4801 | garray_T si_sal; /* list of fromto_T entries from SAL lines */ |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 4802 | char_u *si_sofofr; /* SOFOFROM text */ |
4803 | char_u *si_sofoto; /* SOFOTO text */ | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 4804 | int si_nosugfile; /* NOSUGFILE item found */ |
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 4805 | int si_nosplitsugs; /* NOSPLITSUGS item found */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4806 | int si_followup; /* soundsalike: ? */ |
4807 | int si_collapse; /* soundsalike: ? */ | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 4808 | hashtab_T si_commonwords; /* hashtable for common words */ |
4809 | time_t si_sugtime; /* timestamp for .sug file */ | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4810 | int si_rem_accents; /* soundsalike: remove accents */ |
4811 | garray_T si_map; /* MAP info concatenated */ | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 4812 | char_u *si_midword; /* MIDWORD chars or NULL */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 4813 | int si_compmax; /* max nr of words for compounding */ |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 4814 | int si_compminlen; /* minimal length for compounding */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 4815 | int si_compsylmax; /* max nr of syllables for compounding */ |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 4816 | int si_compoptions; /* COMP_ flags */ |
4817 | garray_T si_comppat; /* CHECKCOMPOUNDPATTERN items, each stored as | ||||
4818 | a string */ | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 4819 | char_u *si_compflags; /* flags used for compounding */ |
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 4820 | char_u si_nobreak; /* NOBREAK */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 4821 | char_u *si_syllable; /* syllable string */ |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 4822 | garray_T si_prefcond; /* table with conditions for postponed |
4823 | * prefixes, each stored as a string */ | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 4824 | int si_newprefID; /* current value for ah_newID */ |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 4825 | int si_newcompID; /* current value for compound ID */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4826 | } spellinfo_T; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4827 | |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4828 | static afffile_T *spell_read_aff __ARGS((spellinfo_T *spin, char_u *fname)); |
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 4829 | static int spell_info_item __ARGS((char_u *s)); |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 4830 | static unsigned affitem2flag __ARGS((int flagtype, char_u *item, char_u *fname, int lnum)); |
4831 | static unsigned get_affitem __ARGS((int flagtype, char_u **pp)); | ||||
4832 | static void process_compflags __ARGS((spellinfo_T *spin, afffile_T *aff, char_u *compflags)); | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 4833 | static void check_renumber __ARGS((spellinfo_T *spin)); |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 4834 | static int flag_in_afflist __ARGS((int flagtype, char_u *afflist, unsigned flag)); |
4835 | static void aff_check_number __ARGS((int spinval, int affval, char *name)); | ||||
4836 | static void aff_check_string __ARGS((char_u *spinval, char_u *affval, char *name)); | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 4837 | static int str_equal __ARGS((char_u *s1, char_u *s2)); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4838 | static void add_fromto __ARGS((spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to)); |
4839 | static int sal_to_bool __ARGS((char_u *s)); | ||||
Bram Moolenaar | 5482f33 | 2005-04-17 20:18:43 +0000 | [diff] [blame] | 4840 | static int has_non_ascii __ARGS((char_u *s)); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4841 | static void spell_free_aff __ARGS((afffile_T *aff)); |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4842 | static int spell_read_dic __ARGS((spellinfo_T *spin, char_u *fname, afffile_T *affile)); |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 4843 | static int get_pfxlist __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist)); |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 4844 | static void get_compflags __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist)); |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 4845 | static int store_aff_word __ARGS((spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int comb, int flags, char_u *pfxlist, int pfxlen)); |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4846 | static int spell_read_wordfile __ARGS((spellinfo_T *spin, char_u *fname)); |
4847 | static void *getroom __ARGS((spellinfo_T *spin, size_t len, int align)); | ||||
4848 | static char_u *getroom_save __ARGS((spellinfo_T *spin, char_u *s)); | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4849 | static void free_blocks __ARGS((sblock_T *bl)); |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4850 | static wordnode_T *wordtree_alloc __ARGS((spellinfo_T *spin)); |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 4851 | static int store_word __ARGS((spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix)); |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 4852 | static int tree_add_word __ARGS((spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID)); |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4853 | static wordnode_T *get_wordnode __ARGS((spellinfo_T *spin)); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 4854 | static int deref_wordnode __ARGS((spellinfo_T *spin, wordnode_T *node)); |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4855 | static void free_wordnode __ARGS((spellinfo_T *spin, wordnode_T *n)); |
4856 | static void wordtree_compress __ARGS((spellinfo_T *spin, wordnode_T *root)); | ||||
4857 | static int node_compress __ARGS((spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot)); | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4858 | static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2)); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 4859 | static void put_sugtime __ARGS((spellinfo_T *spin, FILE *fd)); |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 4860 | static int write_vim_spell __ARGS((spellinfo_T *spin, char_u *fname)); |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 4861 | static void clear_node __ARGS((wordnode_T *node)); |
4862 | static int put_node __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree)); | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 4863 | static void spell_make_sugfile __ARGS((spellinfo_T *spin, char_u *wfname)); |
4864 | static int sug_filltree __ARGS((spellinfo_T *spin, slang_T *slang)); | ||||
4865 | static int sug_maketable __ARGS((spellinfo_T *spin)); | ||||
4866 | static int sug_filltable __ARGS((spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap)); | ||||
4867 | static int offset2bytes __ARGS((int nr, char_u *buf)); | ||||
4868 | static int bytes2offset __ARGS((char_u **pp)); | ||||
4869 | static void sug_write __ARGS((spellinfo_T *spin, char_u *fname)); | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4870 | static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word)); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 4871 | static void spell_message __ARGS((spellinfo_T *spin, char_u *str)); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 4872 | static void init_spellfile __ARGS((void)); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4873 | |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 4874 | /* In the postponed prefixes tree wn_flags is used to store the WFP_ flags, |
4875 | * but it must be negative to indicate the prefix tree to tree_add_word(). | ||||
4876 | * Use a negative number with the lower 8 bits zero. */ | ||||
4877 | #define PFX_FLAGS -256 | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 4878 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 4879 | /* |
4880 | * Tunable parameters for when the tree is compressed. See 'mkspellmem'. | ||||
4881 | */ | ||||
4882 | static long compress_start = 30000; /* memory / SBLOCKSIZE */ | ||||
4883 | static long compress_inc = 100; /* memory / SBLOCKSIZE */ | ||||
4884 | static long compress_added = 500000; /* word count */ | ||||
4885 | |||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 4886 | #ifdef SPELL_PRINTTREE |
4887 | /* | ||||
4888 | * For debugging the tree code: print the current tree in a (more or less) | ||||
4889 | * readable format, so that we can see what happens when adding a word and/or | ||||
4890 | * compressing the tree. | ||||
4891 | * Based on code from Olaf Seibert. | ||||
4892 | */ | ||||
4893 | #define PRINTLINESIZE 1000 | ||||
4894 | #define PRINTWIDTH 6 | ||||
4895 | |||||
4896 | #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \ | ||||
4897 | PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2) | ||||
4898 | |||||
4899 | static char line1[PRINTLINESIZE]; | ||||
4900 | static char line2[PRINTLINESIZE]; | ||||
4901 | static char line3[PRINTLINESIZE]; | ||||
4902 | |||||
4903 | static void | ||||
4904 | spell_clear_flags(wordnode_T *node) | ||||
4905 | { | ||||
4906 | wordnode_T *np; | ||||
4907 | |||||
4908 | for (np = node; np != NULL; np = np->wn_sibling) | ||||
4909 | { | ||||
4910 | np->wn_u1.index = FALSE; | ||||
4911 | spell_clear_flags(np->wn_child); | ||||
4912 | } | ||||
4913 | } | ||||
4914 | |||||
4915 | static void | ||||
4916 | spell_print_node(wordnode_T *node, int depth) | ||||
4917 | { | ||||
4918 | if (node->wn_u1.index) | ||||
4919 | { | ||||
4920 | /* Done this node before, print the reference. */ | ||||
4921 | PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0); | ||||
4922 | PRINTSOME(line2, depth, " ", 0, 0); | ||||
4923 | PRINTSOME(line3, depth, " ", 0, 0); | ||||
4924 | msg(line1); | ||||
4925 | msg(line2); | ||||
4926 | msg(line3); | ||||
4927 | } | ||||
4928 | else | ||||
4929 | { | ||||
4930 | node->wn_u1.index = TRUE; | ||||
4931 | |||||
4932 | if (node->wn_byte != NUL) | ||||
4933 | { | ||||
4934 | if (node->wn_child != NULL) | ||||
4935 | PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0); | ||||
4936 | else | ||||
4937 | /* Cannot happen? */ | ||||
4938 | PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0); | ||||
4939 | } | ||||
4940 | else | ||||
4941 | PRINTSOME(line1, depth, " $ ", 0, 0); | ||||
4942 | |||||
4943 | PRINTSOME(line2, depth, "%d/%d ", node->wn_nr, node->wn_refs); | ||||
4944 | |||||
4945 | if (node->wn_sibling != NULL) | ||||
4946 | PRINTSOME(line3, depth, " | ", 0, 0); | ||||
4947 | else | ||||
4948 | PRINTSOME(line3, depth, " ", 0, 0); | ||||
4949 | |||||
4950 | if (node->wn_byte == NUL) | ||||
4951 | { | ||||
4952 | msg(line1); | ||||
4953 | msg(line2); | ||||
4954 | msg(line3); | ||||
4955 | } | ||||
4956 | |||||
4957 | /* do the children */ | ||||
4958 | if (node->wn_byte != NUL && node->wn_child != NULL) | ||||
4959 | spell_print_node(node->wn_child, depth + 1); | ||||
4960 | |||||
4961 | /* do the siblings */ | ||||
4962 | if (node->wn_sibling != NULL) | ||||
4963 | { | ||||
4964 | /* get rid of all parent details except | */ | ||||
4965 | STRCPY(line1, line3); | ||||
4966 | STRCPY(line2, line3); | ||||
4967 | spell_print_node(node->wn_sibling, depth); | ||||
4968 | } | ||||
4969 | } | ||||
4970 | } | ||||
4971 | |||||
4972 | static void | ||||
4973 | spell_print_tree(wordnode_T *root) | ||||
4974 | { | ||||
4975 | if (root != NULL) | ||||
4976 | { | ||||
4977 | /* Clear the "wn_u1.index" fields, used to remember what has been | ||||
4978 | * done. */ | ||||
4979 | spell_clear_flags(root); | ||||
4980 | |||||
4981 | /* Recursively print the tree. */ | ||||
4982 | spell_print_node(root, 0); | ||||
4983 | } | ||||
4984 | } | ||||
4985 | #endif /* SPELL_PRINTTREE */ | ||||
4986 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4987 | /* |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 4988 | * Read the affix file "fname". |
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 4989 | * Returns an afffile_T, NULL for complete failure. |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4990 | */ |
4991 | static afffile_T * | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4992 | spell_read_aff(spin, fname) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 4993 | spellinfo_T *spin; |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 4994 | char_u *fname; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 4995 | { |
4996 | FILE *fd; | ||||
4997 | afffile_T *aff; | ||||
4998 | char_u rline[MAXLINELEN]; | ||||
4999 | char_u *line; | ||||
5000 | char_u *pc = NULL; | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 5001 | #define MAXITEMCNT 30 |
Bram Moolenaar | 8db7318 | 2005-06-17 21:51:16 +0000 | [diff] [blame] | 5002 | char_u *(items[MAXITEMCNT]); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5003 | int itemcnt; |
5004 | char_u *p; | ||||
5005 | int lnum = 0; | ||||
5006 | affheader_T *cur_aff = NULL; | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5007 | int did_postpone_prefix = FALSE; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5008 | int aff_todo = 0; |
5009 | hashtab_T *tp; | ||||
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 5010 | char_u *low = NULL; |
5011 | char_u *fol = NULL; | ||||
5012 | char_u *upp = NULL; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 5013 | int do_rep; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 5014 | int do_repsal; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 5015 | int do_sal; |
5016 | int do_map; | ||||
5017 | int found_map = FALSE; | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 5018 | hashitem_T *hi; |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 5019 | int l; |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5020 | int compminlen = 0; /* COMPOUNDMIN value */ |
5021 | int compsylmax = 0; /* COMPOUNDSYLMAX value */ | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 5022 | int compoptions = 0; /* COMP_ flags */ |
5023 | int compmax = 0; /* COMPOUNDWORDMAX value */ | ||||
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 5024 | char_u *compflags = NULL; /* COMPOUNDFLAG and COMPOUNDRULE |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5025 | concatenated */ |
5026 | char_u *midword = NULL; /* MIDWORD value */ | ||||
5027 | char_u *syllable = NULL; /* SYLLABLE value */ | ||||
5028 | char_u *sofofrom = NULL; /* SOFOFROM value */ | ||||
5029 | char_u *sofoto = NULL; /* SOFOTO value */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5030 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 5031 | /* |
5032 | * Open the file. | ||||
5033 | */ | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 5034 | fd = mch_fopen((char *)fname, "r"); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5035 | if (fd == NULL) |
5036 | { | ||||
5037 | EMSG2(_(e_notopen), fname); | ||||
5038 | return NULL; | ||||
5039 | } | ||||
5040 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 5041 | vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname); |
5042 | spell_message(spin, IObuff); | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5043 | |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 5044 | /* Only do REP lines when not done in another .aff file already. */ |
5045 | do_rep = spin->si_rep.ga_len == 0; | ||||
5046 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 5047 | /* Only do REPSAL lines when not done in another .aff file already. */ |
5048 | do_repsal = spin->si_repsal.ga_len == 0; | ||||
5049 | |||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 5050 | /* Only do SAL lines when not done in another .aff file already. */ |
5051 | do_sal = spin->si_sal.ga_len == 0; | ||||
5052 | |||||
5053 | /* Only do MAP lines when not done in another .aff file already. */ | ||||
5054 | do_map = spin->si_map.ga_len == 0; | ||||
5055 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 5056 | /* |
5057 | * Allocate and init the afffile_T structure. | ||||
5058 | */ | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 5059 | aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5060 | if (aff == NULL) |
5061 | return NULL; | ||||
5062 | hash_init(&aff->af_pref); | ||||
5063 | hash_init(&aff->af_suff); | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5064 | hash_init(&aff->af_comp); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5065 | |
5066 | /* | ||||
5067 | * Read all the lines in the file one by one. | ||||
5068 | */ | ||||
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 5069 | while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5070 | { |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 5071 | line_breakcheck(); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5072 | ++lnum; |
5073 | |||||
5074 | /* Skip comment lines. */ | ||||
5075 | if (*rline == '#') | ||||
5076 | continue; | ||||
5077 | |||||
5078 | /* Convert from "SET" to 'encoding' when needed. */ | ||||
5079 | vim_free(pc); | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 5080 | #ifdef FEAT_MBYTE |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 5081 | if (spin->si_conv.vc_type != CONV_NONE) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5082 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 5083 | pc = string_convert(&spin->si_conv, rline, NULL); |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 5084 | if (pc == NULL) |
5085 | { | ||||
5086 | smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), | ||||
5087 | fname, lnum, rline); | ||||
5088 | continue; | ||||
5089 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5090 | line = pc; |
5091 | } | ||||
5092 | else | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 5093 | #endif |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5094 | { |
5095 | pc = NULL; | ||||
5096 | line = rline; | ||||
5097 | } | ||||
5098 | |||||
5099 | /* Split the line up in white separated items. Put a NUL after each | ||||
5100 | * item. */ | ||||
5101 | itemcnt = 0; | ||||
5102 | for (p = line; ; ) | ||||
5103 | { | ||||
5104 | while (*p != NUL && *p <= ' ') /* skip white space and CR/NL */ | ||||
5105 | ++p; | ||||
5106 | if (*p == NUL) | ||||
5107 | break; | ||||
Bram Moolenaar | 8db7318 | 2005-06-17 21:51:16 +0000 | [diff] [blame] | 5108 | if (itemcnt == MAXITEMCNT) /* too many items */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 5109 | break; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5110 | items[itemcnt++] = p; |
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 5111 | /* A few items have arbitrary text argument, don't split them. */ |
5112 | if (itemcnt == 2 && spell_info_item(items[0])) | ||||
5113 | while (*p >= ' ' || *p == TAB) /* skip until CR/NL */ | ||||
5114 | ++p; | ||||
5115 | else | ||||
5116 | while (*p > ' ') /* skip until white space or CR/NL */ | ||||
5117 | ++p; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5118 | if (*p == NUL) |
5119 | break; | ||||
5120 | *p++ = NUL; | ||||
5121 | } | ||||
5122 | |||||
5123 | /* Handle non-empty lines. */ | ||||
5124 | if (itemcnt > 0) | ||||
5125 | { | ||||
5126 | if (STRCMP(items[0], "SET") == 0 && itemcnt == 2 | ||||
5127 | && aff->af_enc == NULL) | ||||
5128 | { | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 5129 | #ifdef FEAT_MBYTE |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 5130 | /* Setup for conversion from "ENC" to 'encoding'. */ |
5131 | aff->af_enc = enc_canonize(items[1]); | ||||
5132 | if (aff->af_enc != NULL && !spin->si_ascii | ||||
5133 | && convert_setup(&spin->si_conv, aff->af_enc, | ||||
5134 | p_enc) == FAIL) | ||||
5135 | smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), | ||||
5136 | fname, aff->af_enc, p_enc); | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 5137 | spin->si_conv.vc_fail = TRUE; |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 5138 | #else |
5139 | smsg((char_u *)_("Conversion in %s not supported"), fname); | ||||
5140 | #endif | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5141 | } |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5142 | else if (STRCMP(items[0], "FLAG") == 0 && itemcnt == 2 |
5143 | && aff->af_flagtype == AFT_CHAR) | ||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 5144 | { |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5145 | if (STRCMP(items[1], "long") == 0) |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 5146 | aff->af_flagtype = AFT_LONG; |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5147 | else if (STRCMP(items[1], "num") == 0) |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 5148 | aff->af_flagtype = AFT_NUM; |
5149 | else if (STRCMP(items[1], "caplong") == 0) | ||||
5150 | aff->af_flagtype = AFT_CAPLONG; | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5151 | else |
5152 | smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"), | ||||
5153 | fname, lnum, items[1]); | ||||
Bram Moolenaar | 371baa9 | 2005-12-29 22:43:53 +0000 | [diff] [blame] | 5154 | if (aff->af_rare != 0 |
5155 | || aff->af_keepcase != 0 | ||||
5156 | || aff->af_bad != 0 | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 5157 | || aff->af_needaffix != 0 |
5158 | || aff->af_needcomp != 0 | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 5159 | || aff->af_comproot != 0 |
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 5160 | || aff->af_nosuggest != 0 |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 5161 | || compflags != NULL |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5162 | || aff->af_suff.ht_used > 0 |
5163 | || aff->af_pref.ht_used > 0) | ||||
5164 | smsg((char_u *)_("FLAG after using flags in %s line %d: %s"), | ||||
5165 | fname, lnum, items[1]); | ||||
5166 | } | ||||
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 5167 | else if (spell_info_item(items[0])) |
5168 | { | ||||
5169 | p = (char_u *)getroom(spin, | ||||
5170 | (spin->si_info == NULL ? 0 : STRLEN(spin->si_info)) | ||||
5171 | + STRLEN(items[0]) | ||||
5172 | + STRLEN(items[1]) + 3, FALSE); | ||||
5173 | if (p != NULL) | ||||
5174 | { | ||||
5175 | if (spin->si_info != NULL) | ||||
5176 | { | ||||
5177 | STRCPY(p, spin->si_info); | ||||
5178 | STRCAT(p, "\n"); | ||||
5179 | } | ||||
5180 | STRCAT(p, items[0]); | ||||
5181 | STRCAT(p, " "); | ||||
5182 | STRCAT(p, items[1]); | ||||
5183 | spin->si_info = p; | ||||
5184 | } | ||||
5185 | } | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5186 | else if (STRCMP(items[0], "MIDWORD") == 0 && itemcnt == 2 |
5187 | && midword == NULL) | ||||
5188 | { | ||||
5189 | midword = getroom_save(spin, items[1]); | ||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 5190 | } |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 5191 | else if (STRCMP(items[0], "TRY") == 0 && itemcnt == 2) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 5192 | { |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 5193 | /* ignored, we look in the tree for what chars may appear */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 5194 | } |
Bram Moolenaar | 371baa9 | 2005-12-29 22:43:53 +0000 | [diff] [blame] | 5195 | /* TODO: remove "RAR" later */ |
5196 | else if ((STRCMP(items[0], "RAR") == 0 | ||||
5197 | || STRCMP(items[0], "RARE") == 0) && itemcnt == 2 | ||||
5198 | && aff->af_rare == 0) | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 5199 | { |
Bram Moolenaar | 371baa9 | 2005-12-29 22:43:53 +0000 | [diff] [blame] | 5200 | aff->af_rare = affitem2flag(aff->af_flagtype, items[1], |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5201 | fname, lnum); |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 5202 | } |
Bram Moolenaar | 371baa9 | 2005-12-29 22:43:53 +0000 | [diff] [blame] | 5203 | /* TODO: remove "KEP" later */ |
5204 | else if ((STRCMP(items[0], "KEP") == 0 | ||||
5205 | || STRCMP(items[0], "KEEPCASE") == 0) && itemcnt == 2 | ||||
5206 | && aff->af_keepcase == 0) | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 5207 | { |
Bram Moolenaar | 371baa9 | 2005-12-29 22:43:53 +0000 | [diff] [blame] | 5208 | aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1], |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5209 | fname, lnum); |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 5210 | } |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 5211 | else if (STRCMP(items[0], "BAD") == 0 && itemcnt == 2 |
5212 | && aff->af_bad == 0) | ||||
5213 | { | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5214 | aff->af_bad = affitem2flag(aff->af_flagtype, items[1], |
5215 | fname, lnum); | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 5216 | } |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5217 | else if (STRCMP(items[0], "NEEDAFFIX") == 0 && itemcnt == 2 |
5218 | && aff->af_needaffix == 0) | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 5219 | { |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5220 | aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1], |
5221 | fname, lnum); | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 5222 | } |
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 5223 | else if (STRCMP(items[0], "NOSUGGEST") == 0 && itemcnt == 2 |
5224 | && aff->af_nosuggest == 0) | ||||
5225 | { | ||||
5226 | aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1], | ||||
5227 | fname, lnum); | ||||
5228 | } | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 5229 | else if (STRCMP(items[0], "NEEDCOMPOUND") == 0 && itemcnt == 2 |
5230 | && aff->af_needcomp == 0) | ||||
5231 | { | ||||
5232 | aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1], | ||||
5233 | fname, lnum); | ||||
5234 | } | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 5235 | else if (STRCMP(items[0], "COMPOUNDROOT") == 0 && itemcnt == 2 |
5236 | && aff->af_comproot == 0) | ||||
5237 | { | ||||
5238 | aff->af_comproot = affitem2flag(aff->af_flagtype, items[1], | ||||
5239 | fname, lnum); | ||||
5240 | } | ||||
5241 | else if (STRCMP(items[0], "COMPOUNDFORBIDFLAG") == 0 | ||||
5242 | && itemcnt == 2 && aff->af_compforbid == 0) | ||||
5243 | { | ||||
5244 | aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1], | ||||
5245 | fname, lnum); | ||||
5246 | } | ||||
5247 | else if (STRCMP(items[0], "COMPOUNDPERMITFLAG") == 0 | ||||
5248 | && itemcnt == 2 && aff->af_comppermit == 0) | ||||
5249 | { | ||||
5250 | aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1], | ||||
5251 | fname, lnum); | ||||
5252 | } | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5253 | else if (STRCMP(items[0], "COMPOUNDFLAG") == 0 && itemcnt == 2 |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5254 | && compflags == NULL) |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 5255 | { |
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 5256 | /* Turn flag "c" into COMPOUNDRULE compatible string "c+", |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5257 | * "Na" into "Na+", "1234" into "1234+". */ |
5258 | p = getroom(spin, STRLEN(items[1]) + 2, FALSE); | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5259 | if (p != NULL) |
5260 | { | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5261 | STRCPY(p, items[1]); |
5262 | STRCAT(p, "+"); | ||||
5263 | compflags = p; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5264 | } |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5265 | } |
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 5266 | else if (STRCMP(items[0], "COMPOUNDRULE") == 0 && itemcnt == 2) |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5267 | { |
5268 | /* Concatenate this string to previously defined ones, using a | ||||
5269 | * slash to separate them. */ | ||||
5270 | l = STRLEN(items[1]) + 1; | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5271 | if (compflags != NULL) |
5272 | l += STRLEN(compflags) + 1; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5273 | p = getroom(spin, l, FALSE); |
5274 | if (p != NULL) | ||||
5275 | { | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5276 | if (compflags != NULL) |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5277 | { |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5278 | STRCPY(p, compflags); |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5279 | STRCAT(p, "/"); |
5280 | } | ||||
5281 | STRCAT(p, items[1]); | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5282 | compflags = p; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5283 | } |
5284 | } | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 5285 | else if (STRCMP(items[0], "COMPOUNDWORDMAX") == 0 && itemcnt == 2 |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5286 | && compmax == 0) |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5287 | { |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5288 | compmax = atoi((char *)items[1]); |
5289 | if (compmax == 0) | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 5290 | smsg((char_u *)_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"), |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5291 | fname, lnum, items[1]); |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 5292 | } |
5293 | else if (STRCMP(items[0], "COMPOUNDMIN") == 0 && itemcnt == 2 | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5294 | && compminlen == 0) |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 5295 | { |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5296 | compminlen = atoi((char *)items[1]); |
5297 | if (compminlen == 0) | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 5298 | smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"), |
5299 | fname, lnum, items[1]); | ||||
5300 | } | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5301 | else if (STRCMP(items[0], "COMPOUNDSYLMAX") == 0 && itemcnt == 2 |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5302 | && compsylmax == 0) |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5303 | { |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5304 | compsylmax = atoi((char *)items[1]); |
5305 | if (compsylmax == 0) | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5306 | smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"), |
5307 | fname, lnum, items[1]); | ||||
5308 | } | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 5309 | else if (STRCMP(items[0], "CHECKCOMPOUNDDUP") == 0 && itemcnt == 1) |
5310 | { | ||||
5311 | compoptions |= COMP_CHECKDUP; | ||||
5312 | } | ||||
5313 | else if (STRCMP(items[0], "CHECKCOMPOUNDREP") == 0 && itemcnt == 1) | ||||
5314 | { | ||||
5315 | compoptions |= COMP_CHECKREP; | ||||
5316 | } | ||||
5317 | else if (STRCMP(items[0], "CHECKCOMPOUNDCASE") == 0 && itemcnt == 1) | ||||
5318 | { | ||||
5319 | compoptions |= COMP_CHECKCASE; | ||||
5320 | } | ||||
5321 | else if (STRCMP(items[0], "CHECKCOMPOUNDTRIPLE") == 0 | ||||
5322 | && itemcnt == 1) | ||||
5323 | { | ||||
5324 | compoptions |= COMP_CHECKTRIPLE; | ||||
5325 | } | ||||
5326 | else if (STRCMP(items[0], "CHECKCOMPOUNDPATTERN") == 0 | ||||
5327 | && itemcnt == 2) | ||||
5328 | { | ||||
5329 | if (atoi((char *)items[1]) == 0) | ||||
5330 | smsg((char_u *)_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"), | ||||
5331 | fname, lnum, items[1]); | ||||
5332 | } | ||||
5333 | else if (STRCMP(items[0], "CHECKCOMPOUNDPATTERN") == 0 | ||||
5334 | && itemcnt == 3) | ||||
5335 | { | ||||
5336 | garray_T *gap = &spin->si_comppat; | ||||
5337 | int i; | ||||
5338 | |||||
5339 | /* Only add the couple if it isn't already there. */ | ||||
5340 | for (i = 0; i < gap->ga_len - 1; i += 2) | ||||
5341 | if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0 | ||||
5342 | && STRCMP(((char_u **)(gap->ga_data))[i + 1], | ||||
5343 | items[2]) == 0) | ||||
5344 | break; | ||||
5345 | if (i >= gap->ga_len && ga_grow(gap, 2) == OK) | ||||
5346 | { | ||||
5347 | ((char_u **)(gap->ga_data))[gap->ga_len++] | ||||
5348 | = getroom_save(spin, items[1]); | ||||
5349 | ((char_u **)(gap->ga_data))[gap->ga_len++] | ||||
5350 | = getroom_save(spin, items[2]); | ||||
5351 | } | ||||
5352 | } | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5353 | else if (STRCMP(items[0], "SYLLABLE") == 0 && itemcnt == 2 |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5354 | && syllable == NULL) |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5355 | { |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5356 | syllable = getroom_save(spin, items[1]); |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5357 | } |
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 5358 | else if (STRCMP(items[0], "NOBREAK") == 0 && itemcnt == 1) |
5359 | { | ||||
5360 | spin->si_nobreak = TRUE; | ||||
5361 | } | ||||
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 5362 | else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1) |
5363 | { | ||||
5364 | spin->si_nosplitsugs = TRUE; | ||||
5365 | } | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 5366 | else if (STRCMP(items[0], "NOSUGFILE") == 0 && itemcnt == 1) |
5367 | { | ||||
5368 | spin->si_nosugfile = TRUE; | ||||
5369 | } | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 5370 | else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1) |
5371 | { | ||||
5372 | aff->af_pfxpostpone = TRUE; | ||||
5373 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5374 | else if ((STRCMP(items[0], "PFX") == 0 |
5375 | || STRCMP(items[0], "SFX") == 0) | ||||
5376 | && aff_todo == 0 | ||||
Bram Moolenaar | 8db7318 | 2005-06-17 21:51:16 +0000 | [diff] [blame] | 5377 | && itemcnt >= 4) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5378 | { |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 5379 | int lasti = 4; |
5380 | char_u key[AH_KEY_LEN]; | ||||
5381 | |||||
5382 | if (*items[0] == 'P') | ||||
5383 | tp = &aff->af_pref; | ||||
5384 | else | ||||
5385 | tp = &aff->af_suff; | ||||
5386 | |||||
5387 | /* Myspell allows the same affix name to be used multiple | ||||
5388 | * times. The affix files that do this have an undocumented | ||||
5389 | * "S" flag on all but the last block, thus we check for that | ||||
5390 | * and store it in ah_follows. */ | ||||
5391 | vim_strncpy(key, items[1], AH_KEY_LEN - 1); | ||||
5392 | hi = hash_find(tp, key); | ||||
5393 | if (!HASHITEM_EMPTY(hi)) | ||||
5394 | { | ||||
5395 | cur_aff = HI2AH(hi); | ||||
5396 | if (cur_aff->ah_combine != (*items[2] == 'Y')) | ||||
5397 | smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"), | ||||
5398 | fname, lnum, items[1]); | ||||
5399 | if (!cur_aff->ah_follows) | ||||
5400 | smsg((char_u *)_("Duplicate affix in %s line %d: %s"), | ||||
5401 | fname, lnum, items[1]); | ||||
5402 | } | ||||
5403 | else | ||||
5404 | { | ||||
5405 | /* New affix letter. */ | ||||
5406 | cur_aff = (affheader_T *)getroom(spin, | ||||
5407 | sizeof(affheader_T), TRUE); | ||||
5408 | if (cur_aff == NULL) | ||||
5409 | break; | ||||
5410 | cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1], | ||||
5411 | fname, lnum); | ||||
5412 | if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN) | ||||
5413 | break; | ||||
5414 | if (cur_aff->ah_flag == aff->af_bad | ||||
Bram Moolenaar | 371baa9 | 2005-12-29 22:43:53 +0000 | [diff] [blame] | 5415 | || cur_aff->ah_flag == aff->af_rare |
5416 | || cur_aff->ah_flag == aff->af_keepcase | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 5417 | || cur_aff->ah_flag == aff->af_needaffix |
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 5418 | || cur_aff->ah_flag == aff->af_nosuggest |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 5419 | || cur_aff->ah_flag == aff->af_needcomp |
5420 | || cur_aff->ah_flag == aff->af_comproot) | ||||
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 5421 | smsg((char_u *)_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"), |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 5422 | fname, lnum, items[1]); |
5423 | STRCPY(cur_aff->ah_key, items[1]); | ||||
5424 | hash_add(tp, cur_aff->ah_key); | ||||
5425 | |||||
5426 | cur_aff->ah_combine = (*items[2] == 'Y'); | ||||
5427 | } | ||||
5428 | |||||
5429 | /* Check for the "S" flag, which apparently means that another | ||||
5430 | * block with the same affix name is following. */ | ||||
5431 | if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0) | ||||
5432 | { | ||||
5433 | ++lasti; | ||||
5434 | cur_aff->ah_follows = TRUE; | ||||
5435 | } | ||||
5436 | else | ||||
5437 | cur_aff->ah_follows = FALSE; | ||||
5438 | |||||
Bram Moolenaar | 8db7318 | 2005-06-17 21:51:16 +0000 | [diff] [blame] | 5439 | /* Myspell allows extra text after the item, but that might |
5440 | * mean mistakes go unnoticed. Require a comment-starter. */ | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 5441 | if (itemcnt > lasti && *items[lasti] != '#') |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 5442 | smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]); |
Bram Moolenaar | 8db7318 | 2005-06-17 21:51:16 +0000 | [diff] [blame] | 5443 | |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 5444 | if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5445 | smsg((char_u *)_("Expected Y or N in %s line %d: %s"), |
5446 | fname, lnum, items[2]); | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 5447 | |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 5448 | if (*items[0] == 'P' && aff->af_pfxpostpone) |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 5449 | { |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 5450 | if (cur_aff->ah_newID == 0) |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5451 | { |
5452 | /* Use a new number in the .spl file later, to be able | ||||
5453 | * to handle multiple .aff files. */ | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 5454 | check_renumber(spin); |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5455 | cur_aff->ah_newID = ++spin->si_newprefID; |
5456 | |||||
5457 | /* We only really use ah_newID if the prefix is | ||||
5458 | * postponed. We know that only after handling all | ||||
5459 | * the items. */ | ||||
5460 | did_postpone_prefix = FALSE; | ||||
5461 | } | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 5462 | else |
5463 | /* Did use the ID in a previous block. */ | ||||
5464 | did_postpone_prefix = TRUE; | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 5465 | } |
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 5466 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 5467 | aff_todo = atoi((char *)items[3]); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5468 | } |
5469 | else if ((STRCMP(items[0], "PFX") == 0 | ||||
5470 | || STRCMP(items[0], "SFX") == 0) | ||||
5471 | && aff_todo > 0 | ||||
5472 | && STRCMP(cur_aff->ah_key, items[1]) == 0 | ||||
Bram Moolenaar | 8db7318 | 2005-06-17 21:51:16 +0000 | [diff] [blame] | 5473 | && itemcnt >= 5) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5474 | { |
5475 | affentry_T *aff_entry; | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 5476 | int upper = FALSE; |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 5477 | int lasti = 5; |
5478 | |||||
Bram Moolenaar | 8db7318 | 2005-06-17 21:51:16 +0000 | [diff] [blame] | 5479 | /* Myspell allows extra text after the item, but that might |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 5480 | * mean mistakes go unnoticed. Require a comment-starter. |
5481 | * Hunspell uses a "-" item. */ | ||||
5482 | if (itemcnt > lasti && *items[lasti] != '#' | ||||
5483 | && (STRCMP(items[lasti], "-") != 0 | ||||
5484 | || itemcnt != lasti + 1)) | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 5485 | smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]); |
Bram Moolenaar | 8db7318 | 2005-06-17 21:51:16 +0000 | [diff] [blame] | 5486 | |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5487 | /* New item for an affix letter. */ |
5488 | --aff_todo; | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 5489 | aff_entry = (affentry_T *)getroom(spin, |
Bram Moolenaar | cfc7d63 | 2005-07-28 22:28:16 +0000 | [diff] [blame] | 5490 | sizeof(affentry_T), TRUE); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5491 | if (aff_entry == NULL) |
5492 | break; | ||||
Bram Moolenaar | 5482f33 | 2005-04-17 20:18:43 +0000 | [diff] [blame] | 5493 | |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5494 | if (STRCMP(items[2], "0") != 0) |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 5495 | aff_entry->ae_chop = getroom_save(spin, items[2]); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5496 | if (STRCMP(items[3], "0") != 0) |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 5497 | { |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 5498 | aff_entry->ae_add = getroom_save(spin, items[3]); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5499 | |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 5500 | /* Recognize flags on the affix: abcd/1234 */ |
5501 | aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/'); | ||||
5502 | if (aff_entry->ae_flags != NULL) | ||||
5503 | *aff_entry->ae_flags++ = NUL; | ||||
5504 | } | ||||
5505 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 5506 | /* Don't use an affix entry with non-ASCII characters when |
5507 | * "spin->si_ascii" is TRUE. */ | ||||
5508 | if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop) | ||||
Bram Moolenaar | 5482f33 | 2005-04-17 20:18:43 +0000 | [diff] [blame] | 5509 | || has_non_ascii(aff_entry->ae_add))) |
5510 | { | ||||
Bram Moolenaar | 5482f33 | 2005-04-17 20:18:43 +0000 | [diff] [blame] | 5511 | aff_entry->ae_next = cur_aff->ah_first; |
5512 | cur_aff->ah_first = aff_entry; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 5513 | |
5514 | if (STRCMP(items[4], ".") != 0) | ||||
5515 | { | ||||
5516 | char_u buf[MAXLINELEN]; | ||||
5517 | |||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 5518 | aff_entry->ae_cond = getroom_save(spin, items[4]); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 5519 | if (*items[0] == 'P') |
5520 | sprintf((char *)buf, "^%s", items[4]); | ||||
5521 | else | ||||
5522 | sprintf((char *)buf, "%s$", items[4]); | ||||
5523 | aff_entry->ae_prog = vim_regcomp(buf, | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 5524 | RE_MAGIC + RE_STRING + RE_STRICT); |
5525 | if (aff_entry->ae_prog == NULL) | ||||
5526 | smsg((char_u *)_("Broken condition in %s line %d: %s"), | ||||
5527 | fname, lnum, items[4]); | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 5528 | } |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 5529 | |
5530 | /* For postponed prefixes we need an entry in si_prefcond | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 5531 | * for the condition. Use an existing one if possible. |
5532 | * Can't be done for an affix with flags. */ | ||||
5533 | if (*items[0] == 'P' && aff->af_pfxpostpone | ||||
5534 | && aff_entry->ae_flags == NULL) | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 5535 | { |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 5536 | /* When the chop string is one lower-case letter and |
5537 | * the add string ends in the upper-case letter we set | ||||
5538 | * the "upper" flag, clear "ae_chop" and remove the | ||||
5539 | * letters from "ae_add". The condition must either | ||||
5540 | * be empty or start with the same letter. */ | ||||
5541 | if (aff_entry->ae_chop != NULL | ||||
5542 | && aff_entry->ae_add != NULL | ||||
5543 | #ifdef FEAT_MBYTE | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 5544 | && aff_entry->ae_chop[(*mb_ptr2len)( |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 5545 | aff_entry->ae_chop)] == NUL |
5546 | #else | ||||
5547 | && aff_entry->ae_chop[1] == NUL | ||||
5548 | #endif | ||||
5549 | ) | ||||
5550 | { | ||||
5551 | int c, c_up; | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 5552 | |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 5553 | c = PTR2CHAR(aff_entry->ae_chop); |
5554 | c_up = SPELL_TOUPPER(c); | ||||
5555 | if (c_up != c | ||||
5556 | && (aff_entry->ae_cond == NULL | ||||
5557 | || PTR2CHAR(aff_entry->ae_cond) == c)) | ||||
5558 | { | ||||
5559 | p = aff_entry->ae_add | ||||
5560 | + STRLEN(aff_entry->ae_add); | ||||
5561 | mb_ptr_back(aff_entry->ae_add, p); | ||||
5562 | if (PTR2CHAR(p) == c_up) | ||||
5563 | { | ||||
5564 | upper = TRUE; | ||||
5565 | aff_entry->ae_chop = NULL; | ||||
5566 | *p = NUL; | ||||
5567 | |||||
5568 | /* The condition is matched with the | ||||
5569 | * actual word, thus must check for the | ||||
5570 | * upper-case letter. */ | ||||
5571 | if (aff_entry->ae_cond != NULL) | ||||
5572 | { | ||||
5573 | char_u buf[MAXLINELEN]; | ||||
5574 | #ifdef FEAT_MBYTE | ||||
5575 | if (has_mbyte) | ||||
5576 | { | ||||
5577 | onecap_copy(items[4], buf, TRUE); | ||||
5578 | aff_entry->ae_cond = getroom_save( | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 5579 | spin, buf); |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 5580 | } |
5581 | else | ||||
5582 | #endif | ||||
5583 | *aff_entry->ae_cond = c_up; | ||||
5584 | if (aff_entry->ae_cond != NULL) | ||||
5585 | { | ||||
5586 | sprintf((char *)buf, "^%s", | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 5587 | aff_entry->ae_cond); |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 5588 | vim_free(aff_entry->ae_prog); |
5589 | aff_entry->ae_prog = vim_regcomp( | ||||
5590 | buf, RE_MAGIC + RE_STRING); | ||||
5591 | } | ||||
5592 | } | ||||
5593 | } | ||||
5594 | } | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 5595 | } |
5596 | |||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 5597 | if (aff_entry->ae_chop == NULL |
5598 | && aff_entry->ae_flags == NULL) | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 5599 | { |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 5600 | int idx; |
5601 | char_u **pp; | ||||
5602 | int n; | ||||
5603 | |||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5604 | /* Find a previously used condition. */ |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 5605 | for (idx = spin->si_prefcond.ga_len - 1; idx >= 0; |
5606 | --idx) | ||||
5607 | { | ||||
5608 | p = ((char_u **)spin->si_prefcond.ga_data)[idx]; | ||||
5609 | if (str_equal(p, aff_entry->ae_cond)) | ||||
5610 | break; | ||||
5611 | } | ||||
5612 | if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK) | ||||
5613 | { | ||||
5614 | /* Not found, add a new condition. */ | ||||
5615 | idx = spin->si_prefcond.ga_len++; | ||||
5616 | pp = ((char_u **)spin->si_prefcond.ga_data) | ||||
5617 | + idx; | ||||
5618 | if (aff_entry->ae_cond == NULL) | ||||
5619 | *pp = NULL; | ||||
5620 | else | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 5621 | *pp = getroom_save(spin, |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 5622 | aff_entry->ae_cond); |
5623 | } | ||||
5624 | |||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 5625 | if (aff_entry->ae_flags != NULL) |
5626 | smsg((char_u *)_("Affix flags ignored when PFXPOSTPONE used in %s line %d: %s"), | ||||
5627 | fname, lnum, items[4]); | ||||
5628 | |||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 5629 | /* Add the prefix to the prefix tree. */ |
5630 | if (aff_entry->ae_add == NULL) | ||||
5631 | p = (char_u *)""; | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 5632 | else |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 5633 | p = aff_entry->ae_add; |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 5634 | |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 5635 | /* PFX_FLAGS is a negative number, so that |
5636 | * tree_add_word() knows this is the prefix tree. */ | ||||
5637 | n = PFX_FLAGS; | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 5638 | if (!cur_aff->ah_combine) |
5639 | n |= WFP_NC; | ||||
5640 | if (upper) | ||||
5641 | n |= WFP_UP; | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 5642 | tree_add_word(spin, p, spin->si_prefroot, n, |
5643 | idx, cur_aff->ah_newID); | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5644 | did_postpone_prefix = TRUE; |
5645 | } | ||||
5646 | |||||
5647 | /* Didn't actually use ah_newID, backup si_newprefID. */ | ||||
5648 | if (aff_todo == 0 && !did_postpone_prefix) | ||||
5649 | { | ||||
5650 | --spin->si_newprefID; | ||||
5651 | cur_aff->ah_newID = 0; | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 5652 | } |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 5653 | } |
Bram Moolenaar | 5482f33 | 2005-04-17 20:18:43 +0000 | [diff] [blame] | 5654 | } |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5655 | } |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5656 | else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2 |
5657 | && fol == NULL) | ||||
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 5658 | { |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5659 | fol = vim_strsave(items[1]); |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 5660 | } |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5661 | else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2 |
5662 | && low == NULL) | ||||
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 5663 | { |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5664 | low = vim_strsave(items[1]); |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 5665 | } |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5666 | else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2 |
5667 | && upp == NULL) | ||||
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 5668 | { |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5669 | upp = vim_strsave(items[1]); |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 5670 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 5671 | else if ((STRCMP(items[0], "REP") == 0 |
5672 | || STRCMP(items[0], "REPSAL") == 0) | ||||
5673 | && itemcnt == 2) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 5674 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 5675 | /* Ignore REP/REPSAL count */; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 5676 | if (!isdigit(*items[1])) |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 5677 | smsg((char_u *)_("Expected REP(SAL) count in %s line %d"), |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 5678 | fname, lnum); |
5679 | } | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 5680 | else if ((STRCMP(items[0], "REP") == 0 |
5681 | || STRCMP(items[0], "REPSAL") == 0) | ||||
5682 | && itemcnt >= 3) | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5683 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 5684 | /* REP/REPSAL item */ |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 5685 | /* Myspell ignores extra arguments, we require it starts with |
5686 | * # to detect mistakes. */ | ||||
5687 | if (itemcnt > 3 && items[3][0] != '#') | ||||
5688 | smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]); | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 5689 | if (items[0][3] == 'S' ? do_repsal : do_rep) |
Bram Moolenaar | 1e01546 | 2005-09-25 22:16:38 +0000 | [diff] [blame] | 5690 | { |
5691 | /* Replace underscore with space (can't include a space | ||||
5692 | * directly). */ | ||||
5693 | for (p = items[1]; *p != NUL; mb_ptr_adv(p)) | ||||
5694 | if (*p == '_') | ||||
5695 | *p = ' '; | ||||
5696 | for (p = items[2]; *p != NUL; mb_ptr_adv(p)) | ||||
5697 | if (*p == '_') | ||||
5698 | *p = ' '; | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 5699 | add_fromto(spin, items[0][3] == 'S' |
5700 | ? &spin->si_repsal | ||||
5701 | : &spin->si_rep, items[1], items[2]); | ||||
Bram Moolenaar | 1e01546 | 2005-09-25 22:16:38 +0000 | [diff] [blame] | 5702 | } |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 5703 | } |
5704 | else if (STRCMP(items[0], "MAP") == 0 && itemcnt == 2) | ||||
5705 | { | ||||
5706 | /* MAP item or count */ | ||||
5707 | if (!found_map) | ||||
5708 | { | ||||
5709 | /* First line contains the count. */ | ||||
5710 | found_map = TRUE; | ||||
5711 | if (!isdigit(*items[1])) | ||||
5712 | smsg((char_u *)_("Expected MAP count in %s line %d"), | ||||
5713 | fname, lnum); | ||||
5714 | } | ||||
5715 | else if (do_map) | ||||
5716 | { | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 5717 | int c; |
5718 | |||||
5719 | /* Check that every character appears only once. */ | ||||
5720 | for (p = items[1]; *p != NUL; ) | ||||
5721 | { | ||||
5722 | #ifdef FEAT_MBYTE | ||||
5723 | c = mb_ptr2char_adv(&p); | ||||
5724 | #else | ||||
5725 | c = *p++; | ||||
5726 | #endif | ||||
5727 | if ((spin->si_map.ga_len > 0 | ||||
5728 | && vim_strchr(spin->si_map.ga_data, c) | ||||
5729 | != NULL) | ||||
5730 | || vim_strchr(p, c) != NULL) | ||||
5731 | smsg((char_u *)_("Duplicate character in MAP in %s line %d"), | ||||
5732 | fname, lnum); | ||||
5733 | } | ||||
5734 | |||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 5735 | /* We simply concatenate all the MAP strings, separated by |
5736 | * slashes. */ | ||||
5737 | ga_concat(&spin->si_map, items[1]); | ||||
5738 | ga_append(&spin->si_map, '/'); | ||||
5739 | } | ||||
5740 | } | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 5741 | /* Accept "SAL from to" and "SAL from to # comment". */ |
5742 | else if (STRCMP(items[0], "SAL") == 0 | ||||
5743 | && (itemcnt == 3 || (itemcnt > 3 && items[3][0] == '#'))) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 5744 | { |
5745 | if (do_sal) | ||||
5746 | { | ||||
5747 | /* SAL item (sounds-a-like) | ||||
5748 | * Either one of the known keys or a from-to pair. */ | ||||
5749 | if (STRCMP(items[1], "followup") == 0) | ||||
5750 | spin->si_followup = sal_to_bool(items[2]); | ||||
5751 | else if (STRCMP(items[1], "collapse_result") == 0) | ||||
5752 | spin->si_collapse = sal_to_bool(items[2]); | ||||
5753 | else if (STRCMP(items[1], "remove_accents") == 0) | ||||
5754 | spin->si_rem_accents = sal_to_bool(items[2]); | ||||
5755 | else | ||||
5756 | /* when "to" is "_" it means empty */ | ||||
5757 | add_fromto(spin, &spin->si_sal, items[1], | ||||
5758 | STRCMP(items[2], "_") == 0 ? (char_u *)"" | ||||
5759 | : items[2]); | ||||
5760 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5761 | } |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 5762 | else if (STRCMP(items[0], "SOFOFROM") == 0 && itemcnt == 2 |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5763 | && sofofrom == NULL) |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 5764 | { |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5765 | sofofrom = getroom_save(spin, items[1]); |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 5766 | } |
5767 | else if (STRCMP(items[0], "SOFOTO") == 0 && itemcnt == 2 | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5768 | && sofoto == NULL) |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 5769 | { |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5770 | sofoto = getroom_save(spin, items[1]); |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 5771 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 5772 | else if (STRCMP(items[0], "COMMON") == 0) |
5773 | { | ||||
5774 | int i; | ||||
5775 | |||||
5776 | for (i = 1; i < itemcnt; ++i) | ||||
5777 | { | ||||
5778 | if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords, | ||||
5779 | items[i]))) | ||||
5780 | { | ||||
5781 | p = vim_strsave(items[i]); | ||||
5782 | if (p == NULL) | ||||
5783 | break; | ||||
5784 | hash_add(&spin->si_commonwords, p); | ||||
5785 | } | ||||
5786 | } | ||||
5787 | } | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 5788 | else |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 5789 | smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"), |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5790 | fname, lnum, items[0]); |
5791 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5792 | } |
5793 | |||||
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 5794 | if (fol != NULL || low != NULL || upp != NULL) |
5795 | { | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 5796 | if (spin->si_clear_chartab) |
5797 | { | ||||
5798 | /* Clear the char type tables, don't want to use any of the | ||||
5799 | * currently used spell properties. */ | ||||
5800 | init_spell_chartab(); | ||||
5801 | spin->si_clear_chartab = FALSE; | ||||
5802 | } | ||||
5803 | |||||
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 5804 | /* |
5805 | * Don't write a word table for an ASCII file, so that we don't check | ||||
5806 | * for conflicts with a word table that matches 'encoding'. | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 5807 | * Don't write one for utf-8 either, we use utf_*() and |
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 5808 | * mb_get_class(), the list of chars in the file will be incomplete. |
5809 | */ | ||||
5810 | if (!spin->si_ascii | ||||
5811 | #ifdef FEAT_MBYTE | ||||
5812 | && !enc_utf8 | ||||
5813 | #endif | ||||
5814 | ) | ||||
Bram Moolenaar | 6f3058f | 2005-04-24 21:58:05 +0000 | [diff] [blame] | 5815 | { |
5816 | if (fol == NULL || low == NULL || upp == NULL) | ||||
5817 | smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname); | ||||
5818 | else | ||||
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 5819 | (void)set_spell_chartab(fol, low, upp); |
Bram Moolenaar | 6f3058f | 2005-04-24 21:58:05 +0000 | [diff] [blame] | 5820 | } |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 5821 | |
5822 | vim_free(fol); | ||||
5823 | vim_free(low); | ||||
5824 | vim_free(upp); | ||||
5825 | } | ||||
5826 | |||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 5827 | /* Use compound specifications of the .aff file for the spell info. */ |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5828 | if (compmax != 0) |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5829 | { |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 5830 | aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX"); |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5831 | spin->si_compmax = compmax; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5832 | } |
5833 | |||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5834 | if (compminlen != 0) |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 5835 | { |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5836 | aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN"); |
5837 | spin->si_compminlen = compminlen; | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 5838 | } |
5839 | |||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5840 | if (compsylmax != 0) |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5841 | { |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5842 | if (syllable == NULL) |
5843 | smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE")); | ||||
5844 | aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX"); | ||||
5845 | spin->si_compsylmax = compsylmax; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5846 | } |
5847 | |||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 5848 | if (compoptions != 0) |
5849 | { | ||||
5850 | aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options"); | ||||
5851 | spin->si_compoptions |= compoptions; | ||||
5852 | } | ||||
5853 | |||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5854 | if (compflags != NULL) |
5855 | process_compflags(spin, aff, compflags); | ||||
5856 | |||||
5857 | /* Check that we didn't use too many renumbered flags. */ | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 5858 | if (spin->si_newcompID < spin->si_newprefID) |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 5859 | { |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 5860 | if (spin->si_newcompID == 127 || spin->si_newcompID == 255) |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5861 | MSG(_("Too many postponed prefixes")); |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 5862 | else if (spin->si_newprefID == 0 || spin->si_newprefID == 127) |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5863 | MSG(_("Too many compound flags")); |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 5864 | else |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5865 | MSG(_("Too many posponed prefixes and/or compound flags")); |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 5866 | } |
5867 | |||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5868 | if (syllable != NULL) |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5869 | { |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5870 | aff_check_string(spin->si_syllable, syllable, "SYLLABLE"); |
5871 | spin->si_syllable = syllable; | ||||
5872 | } | ||||
5873 | |||||
5874 | if (sofofrom != NULL || sofoto != NULL) | ||||
5875 | { | ||||
5876 | if (sofofrom == NULL || sofoto == NULL) | ||||
5877 | smsg((char_u *)_("Missing SOFO%s line in %s"), | ||||
5878 | sofofrom == NULL ? "FROM" : "TO", fname); | ||||
5879 | else if (spin->si_sal.ga_len > 0) | ||||
5880 | smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname); | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5881 | else |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5882 | { |
5883 | aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM"); | ||||
5884 | aff_check_string(spin->si_sofoto, sofoto, "SOFOTO"); | ||||
5885 | spin->si_sofofr = sofofrom; | ||||
5886 | spin->si_sofoto = sofoto; | ||||
5887 | } | ||||
5888 | } | ||||
5889 | |||||
5890 | if (midword != NULL) | ||||
5891 | { | ||||
5892 | aff_check_string(spin->si_midword, midword, "MIDWORD"); | ||||
5893 | spin->si_midword = midword; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 5894 | } |
5895 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 5896 | vim_free(pc); |
5897 | fclose(fd); | ||||
5898 | return aff; | ||||
5899 | } | ||||
5900 | |||||
5901 | /* | ||||
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 5902 | * Return TRUE if "s" is the name of an info item in the affix file. |
5903 | */ | ||||
5904 | static int | ||||
5905 | spell_info_item(s) | ||||
5906 | char_u *s; | ||||
5907 | { | ||||
5908 | return STRCMP(s, "NAME") == 0 | ||||
5909 | || STRCMP(s, "HOME") == 0 | ||||
5910 | || STRCMP(s, "VERSION") == 0 | ||||
5911 | || STRCMP(s, "AUTHOR") == 0 | ||||
5912 | || STRCMP(s, "EMAIL") == 0 | ||||
5913 | || STRCMP(s, "COPYRIGHT") == 0; | ||||
5914 | } | ||||
5915 | |||||
5916 | /* | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5917 | * Turn an affix flag name into a number, according to the FLAG type. |
5918 | * returns zero for failure. | ||||
5919 | */ | ||||
5920 | static unsigned | ||||
5921 | affitem2flag(flagtype, item, fname, lnum) | ||||
5922 | int flagtype; | ||||
5923 | char_u *item; | ||||
5924 | char_u *fname; | ||||
5925 | int lnum; | ||||
5926 | { | ||||
5927 | unsigned res; | ||||
5928 | char_u *p = item; | ||||
5929 | |||||
5930 | res = get_affitem(flagtype, &p); | ||||
5931 | if (res == 0) | ||||
5932 | { | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 5933 | if (flagtype == AFT_NUM) |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5934 | smsg((char_u *)_("Flag is not a number in %s line %d: %s"), |
5935 | fname, lnum, item); | ||||
5936 | else | ||||
5937 | smsg((char_u *)_("Illegal flag in %s line %d: %s"), | ||||
5938 | fname, lnum, item); | ||||
5939 | } | ||||
5940 | if (*p != NUL) | ||||
5941 | { | ||||
5942 | smsg((char_u *)_(e_affname), fname, lnum, item); | ||||
5943 | return 0; | ||||
5944 | } | ||||
5945 | |||||
5946 | return res; | ||||
5947 | } | ||||
5948 | |||||
5949 | /* | ||||
5950 | * Get one affix name from "*pp" and advance the pointer. | ||||
5951 | * Returns zero for an error, still advances the pointer then. | ||||
5952 | */ | ||||
5953 | static unsigned | ||||
5954 | get_affitem(flagtype, pp) | ||||
5955 | int flagtype; | ||||
5956 | char_u **pp; | ||||
5957 | { | ||||
5958 | int res; | ||||
5959 | |||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 5960 | if (flagtype == AFT_NUM) |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5961 | { |
5962 | if (!VIM_ISDIGIT(**pp)) | ||||
5963 | { | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 5964 | ++*pp; /* always advance, avoid getting stuck */ |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5965 | return 0; |
5966 | } | ||||
5967 | res = getdigits(pp); | ||||
5968 | } | ||||
5969 | else | ||||
5970 | { | ||||
5971 | #ifdef FEAT_MBYTE | ||||
5972 | res = mb_ptr2char_adv(pp); | ||||
5973 | #else | ||||
5974 | res = *(*pp)++; | ||||
5975 | #endif | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 5976 | if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 5977 | && res >= 'A' && res <= 'Z')) |
5978 | { | ||||
5979 | if (**pp == NUL) | ||||
5980 | return 0; | ||||
5981 | #ifdef FEAT_MBYTE | ||||
5982 | res = mb_ptr2char_adv(pp) + (res << 16); | ||||
5983 | #else | ||||
5984 | res = *(*pp)++ + (res << 16); | ||||
5985 | #endif | ||||
5986 | } | ||||
5987 | } | ||||
5988 | return res; | ||||
5989 | } | ||||
5990 | |||||
5991 | /* | ||||
5992 | * Process the "compflags" string used in an affix file and append it to | ||||
5993 | * spin->si_compflags. | ||||
5994 | * The processing involves changing the affix names to ID numbers, so that | ||||
5995 | * they fit in one byte. | ||||
5996 | */ | ||||
5997 | static void | ||||
5998 | process_compflags(spin, aff, compflags) | ||||
5999 | spellinfo_T *spin; | ||||
6000 | afffile_T *aff; | ||||
6001 | char_u *compflags; | ||||
6002 | { | ||||
6003 | char_u *p; | ||||
6004 | char_u *prevp; | ||||
6005 | unsigned flag; | ||||
6006 | compitem_T *ci; | ||||
6007 | int id; | ||||
6008 | int len; | ||||
6009 | char_u *tp; | ||||
6010 | char_u key[AH_KEY_LEN]; | ||||
6011 | hashitem_T *hi; | ||||
6012 | |||||
6013 | /* Make room for the old and the new compflags, concatenated with a / in | ||||
6014 | * between. Processing it makes it shorter, but we don't know by how | ||||
6015 | * much, thus allocate the maximum. */ | ||||
6016 | len = STRLEN(compflags) + 1; | ||||
6017 | if (spin->si_compflags != NULL) | ||||
6018 | len += STRLEN(spin->si_compflags) + 1; | ||||
6019 | p = getroom(spin, len, FALSE); | ||||
6020 | if (p == NULL) | ||||
6021 | return; | ||||
6022 | if (spin->si_compflags != NULL) | ||||
6023 | { | ||||
6024 | STRCPY(p, spin->si_compflags); | ||||
6025 | STRCAT(p, "/"); | ||||
6026 | } | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6027 | spin->si_compflags = p; |
6028 | tp = p + STRLEN(p); | ||||
6029 | |||||
6030 | for (p = compflags; *p != NUL; ) | ||||
6031 | { | ||||
6032 | if (vim_strchr((char_u *)"/*+[]", *p) != NULL) | ||||
6033 | /* Copy non-flag characters directly. */ | ||||
6034 | *tp++ = *p++; | ||||
6035 | else | ||||
6036 | { | ||||
6037 | /* First get the flag number, also checks validity. */ | ||||
6038 | prevp = p; | ||||
6039 | flag = get_affitem(aff->af_flagtype, &p); | ||||
6040 | if (flag != 0) | ||||
6041 | { | ||||
6042 | /* Find the flag in the hashtable. If it was used before, use | ||||
6043 | * the existing ID. Otherwise add a new entry. */ | ||||
6044 | vim_strncpy(key, prevp, p - prevp); | ||||
6045 | hi = hash_find(&aff->af_comp, key); | ||||
6046 | if (!HASHITEM_EMPTY(hi)) | ||||
6047 | id = HI2CI(hi)->ci_newID; | ||||
6048 | else | ||||
6049 | { | ||||
6050 | ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE); | ||||
6051 | if (ci == NULL) | ||||
6052 | break; | ||||
6053 | STRCPY(ci->ci_key, key); | ||||
6054 | ci->ci_flag = flag; | ||||
6055 | /* Avoid using a flag ID that has a special meaning in a | ||||
6056 | * regexp (also inside []). */ | ||||
6057 | do | ||||
6058 | { | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 6059 | check_renumber(spin); |
6060 | id = spin->si_newcompID--; | ||||
6061 | } while (vim_strchr((char_u *)"/+*[]\\-^", id) != NULL); | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6062 | ci->ci_newID = id; |
6063 | hash_add(&aff->af_comp, ci->ci_key); | ||||
6064 | } | ||||
6065 | *tp++ = id; | ||||
6066 | } | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 6067 | if (aff->af_flagtype == AFT_NUM && *p == ',') |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6068 | ++p; |
6069 | } | ||||
6070 | } | ||||
6071 | |||||
6072 | *tp = NUL; | ||||
6073 | } | ||||
6074 | |||||
6075 | /* | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 6076 | * Check that the new IDs for postponed affixes and compounding don't overrun |
6077 | * each other. We have almost 255 available, but start at 0-127 to avoid | ||||
6078 | * using two bytes for utf-8. When the 0-127 range is used up go to 128-255. | ||||
6079 | * When that is used up an error message is given. | ||||
6080 | */ | ||||
6081 | static void | ||||
6082 | check_renumber(spin) | ||||
6083 | spellinfo_T *spin; | ||||
6084 | { | ||||
6085 | if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128) | ||||
6086 | { | ||||
6087 | spin->si_newprefID = 127; | ||||
6088 | spin->si_newcompID = 255; | ||||
6089 | } | ||||
6090 | } | ||||
6091 | |||||
6092 | /* | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6093 | * Return TRUE if flag "flag" appears in affix list "afflist". |
6094 | */ | ||||
6095 | static int | ||||
6096 | flag_in_afflist(flagtype, afflist, flag) | ||||
6097 | int flagtype; | ||||
6098 | char_u *afflist; | ||||
6099 | unsigned flag; | ||||
6100 | { | ||||
6101 | char_u *p; | ||||
6102 | unsigned n; | ||||
6103 | |||||
6104 | switch (flagtype) | ||||
6105 | { | ||||
6106 | case AFT_CHAR: | ||||
6107 | return vim_strchr(afflist, flag) != NULL; | ||||
6108 | |||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 6109 | case AFT_CAPLONG: |
6110 | case AFT_LONG: | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6111 | for (p = afflist; *p != NUL; ) |
6112 | { | ||||
6113 | #ifdef FEAT_MBYTE | ||||
6114 | n = mb_ptr2char_adv(&p); | ||||
6115 | #else | ||||
6116 | n = *p++; | ||||
6117 | #endif | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 6118 | if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z')) |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6119 | && *p != NUL) |
6120 | #ifdef FEAT_MBYTE | ||||
6121 | n = mb_ptr2char_adv(&p) + (n << 16); | ||||
6122 | #else | ||||
6123 | n = *p++ + (n << 16); | ||||
6124 | #endif | ||||
6125 | if (n == flag) | ||||
6126 | return TRUE; | ||||
6127 | } | ||||
6128 | break; | ||||
6129 | |||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 6130 | case AFT_NUM: |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6131 | for (p = afflist; *p != NUL; ) |
6132 | { | ||||
6133 | n = getdigits(&p); | ||||
6134 | if (n == flag) | ||||
6135 | return TRUE; | ||||
6136 | if (*p != NUL) /* skip over comma */ | ||||
6137 | ++p; | ||||
6138 | } | ||||
6139 | break; | ||||
6140 | } | ||||
6141 | return FALSE; | ||||
6142 | } | ||||
6143 | |||||
6144 | /* | ||||
6145 | * Give a warning when "spinval" and "affval" numbers are set and not the same. | ||||
6146 | */ | ||||
6147 | static void | ||||
6148 | aff_check_number(spinval, affval, name) | ||||
6149 | int spinval; | ||||
6150 | int affval; | ||||
6151 | char *name; | ||||
6152 | { | ||||
6153 | if (spinval != 0 && spinval != affval) | ||||
6154 | smsg((char_u *)_("%s value differs from what is used in another .aff file"), name); | ||||
6155 | } | ||||
6156 | |||||
6157 | /* | ||||
6158 | * Give a warning when "spinval" and "affval" strings are set and not the same. | ||||
6159 | */ | ||||
6160 | static void | ||||
6161 | aff_check_string(spinval, affval, name) | ||||
6162 | char_u *spinval; | ||||
6163 | char_u *affval; | ||||
6164 | char *name; | ||||
6165 | { | ||||
6166 | if (spinval != NULL && STRCMP(spinval, affval) != 0) | ||||
6167 | smsg((char_u *)_("%s value differs from what is used in another .aff file"), name); | ||||
6168 | } | ||||
6169 | |||||
6170 | /* | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6171 | * Return TRUE if strings "s1" and "s2" are equal. Also consider both being |
6172 | * NULL as equal. | ||||
6173 | */ | ||||
6174 | static int | ||||
6175 | str_equal(s1, s2) | ||||
6176 | char_u *s1; | ||||
6177 | char_u *s2; | ||||
6178 | { | ||||
6179 | if (s1 == NULL || s2 == NULL) | ||||
6180 | return s1 == s2; | ||||
6181 | return STRCMP(s1, s2) == 0; | ||||
6182 | } | ||||
6183 | |||||
6184 | /* | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 6185 | * Add a from-to item to "gap". Used for REP and SAL items. |
6186 | * They are stored case-folded. | ||||
6187 | */ | ||||
6188 | static void | ||||
6189 | add_fromto(spin, gap, from, to) | ||||
6190 | spellinfo_T *spin; | ||||
6191 | garray_T *gap; | ||||
6192 | char_u *from; | ||||
6193 | char_u *to; | ||||
6194 | { | ||||
6195 | fromto_T *ftp; | ||||
6196 | char_u word[MAXWLEN]; | ||||
6197 | |||||
6198 | if (ga_grow(gap, 1) == OK) | ||||
6199 | { | ||||
6200 | ftp = ((fromto_T *)gap->ga_data) + gap->ga_len; | ||||
6201 | (void)spell_casefold(from, STRLEN(from), word, MAXWLEN); | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6202 | ftp->ft_from = getroom_save(spin, word); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 6203 | (void)spell_casefold(to, STRLEN(to), word, MAXWLEN); |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6204 | ftp->ft_to = getroom_save(spin, word); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 6205 | ++gap->ga_len; |
6206 | } | ||||
6207 | } | ||||
6208 | |||||
6209 | /* | ||||
6210 | * Convert a boolean argument in a SAL line to TRUE or FALSE; | ||||
6211 | */ | ||||
6212 | static int | ||||
6213 | sal_to_bool(s) | ||||
6214 | char_u *s; | ||||
6215 | { | ||||
6216 | return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0; | ||||
6217 | } | ||||
6218 | |||||
6219 | /* | ||||
Bram Moolenaar | 5482f33 | 2005-04-17 20:18:43 +0000 | [diff] [blame] | 6220 | * Return TRUE if string "s" contains a non-ASCII character (128 or higher). |
6221 | * When "s" is NULL FALSE is returned. | ||||
6222 | */ | ||||
6223 | static int | ||||
6224 | has_non_ascii(s) | ||||
6225 | char_u *s; | ||||
6226 | { | ||||
6227 | char_u *p; | ||||
6228 | |||||
6229 | if (s != NULL) | ||||
6230 | for (p = s; *p != NUL; ++p) | ||||
6231 | if (*p >= 128) | ||||
6232 | return TRUE; | ||||
6233 | return FALSE; | ||||
6234 | } | ||||
6235 | |||||
6236 | /* | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6237 | * Free the structure filled by spell_read_aff(). |
6238 | */ | ||||
6239 | static void | ||||
6240 | spell_free_aff(aff) | ||||
6241 | afffile_T *aff; | ||||
6242 | { | ||||
6243 | hashtab_T *ht; | ||||
6244 | hashitem_T *hi; | ||||
6245 | int todo; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6246 | affheader_T *ah; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6247 | affentry_T *ae; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6248 | |
6249 | vim_free(aff->af_enc); | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6250 | |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6251 | /* All this trouble to free the "ae_prog" items... */ |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6252 | for (ht = &aff->af_pref; ; ht = &aff->af_suff) |
6253 | { | ||||
6254 | todo = ht->ht_used; | ||||
6255 | for (hi = ht->ht_array; todo > 0; ++hi) | ||||
6256 | { | ||||
6257 | if (!HASHITEM_EMPTY(hi)) | ||||
6258 | { | ||||
6259 | --todo; | ||||
6260 | ah = HI2AH(hi); | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6261 | for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) |
6262 | vim_free(ae->ae_prog); | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6263 | } |
6264 | } | ||||
6265 | if (ht == &aff->af_suff) | ||||
6266 | break; | ||||
6267 | } | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6268 | |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6269 | hash_clear(&aff->af_pref); |
6270 | hash_clear(&aff->af_suff); | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6271 | hash_clear(&aff->af_comp); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6272 | } |
6273 | |||||
6274 | /* | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6275 | * Read dictionary file "fname". |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6276 | * Returns OK or FAIL; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6277 | */ |
6278 | static int | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6279 | spell_read_dic(spin, fname, affile) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6280 | spellinfo_T *spin; |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6281 | char_u *fname; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6282 | afffile_T *affile; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6283 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6284 | hashtab_T ht; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6285 | char_u line[MAXLINELEN]; |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 6286 | char_u *p; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6287 | char_u *afflist; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6288 | char_u store_afflist[MAXWLEN]; |
6289 | int pfxlen; | ||||
6290 | int need_affix; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6291 | char_u *dw; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6292 | char_u *pc; |
6293 | char_u *w; | ||||
6294 | int l; | ||||
6295 | hash_T hash; | ||||
6296 | hashitem_T *hi; | ||||
6297 | FILE *fd; | ||||
6298 | int lnum = 1; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6299 | int non_ascii = 0; |
6300 | int retval = OK; | ||||
6301 | char_u message[MAXLINELEN + MAXWLEN]; | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 6302 | int flags; |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 6303 | int duplicate = 0; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6304 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6305 | /* |
6306 | * Open the file. | ||||
6307 | */ | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 6308 | fd = mch_fopen((char *)fname, "r"); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6309 | if (fd == NULL) |
6310 | { | ||||
6311 | EMSG2(_(e_notopen), fname); | ||||
6312 | return FAIL; | ||||
6313 | } | ||||
6314 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6315 | /* The hashtable is only used to detect duplicated words. */ |
6316 | hash_init(&ht); | ||||
6317 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 6318 | vim_snprintf((char *)IObuff, IOSIZE, |
6319 | _("Reading dictionary file %s ..."), fname); | ||||
6320 | spell_message(spin, IObuff); | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6321 | |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6322 | /* start with a message for the first line */ |
6323 | spin->si_msg_count = 999999; | ||||
6324 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6325 | /* Read and ignore the first line: word count. */ |
6326 | (void)vim_fgets(line, MAXLINELEN, fd); | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 6327 | if (!vim_isdigit(*skipwhite(line))) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6328 | EMSG2(_("E760: No word count in %s"), fname); |
6329 | |||||
6330 | /* | ||||
6331 | * Read all the lines in the file one by one. | ||||
6332 | * The words are converted to 'encoding' here, before being added to | ||||
6333 | * the hashtable. | ||||
6334 | */ | ||||
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 6335 | while (!vim_fgets(line, MAXLINELEN, fd) && !got_int) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6336 | { |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 6337 | line_breakcheck(); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6338 | ++lnum; |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 6339 | if (line[0] == '#' || line[0] == '/') |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 6340 | continue; /* comment line */ |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6341 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6342 | /* Remove CR, LF and white space from the end. White space halfway |
6343 | * the word is kept to allow e.g., "et al.". */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6344 | l = STRLEN(line); |
6345 | while (l > 0 && line[l - 1] <= ' ') | ||||
6346 | --l; | ||||
6347 | if (l == 0) | ||||
6348 | continue; /* empty line */ | ||||
6349 | line[l] = NUL; | ||||
6350 | |||||
Bram Moolenaar | 66fa271 | 2006-01-22 23:22:22 +0000 | [diff] [blame] | 6351 | /* Truncate the word at the "/", set "afflist" to what follows. |
6352 | * Replace "\/" by "/" and "\\" by "\". */ | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 6353 | afflist = NULL; |
6354 | for (p = line; *p != NUL; mb_ptr_adv(p)) | ||||
6355 | { | ||||
Bram Moolenaar | 66fa271 | 2006-01-22 23:22:22 +0000 | [diff] [blame] | 6356 | if (*p == '\\' && (p[1] == '\\' || p[1] == '/')) |
6357 | mch_memmove(p, p + 1, STRLEN(p)); | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 6358 | else if (*p == '/') |
6359 | { | ||||
6360 | *p = NUL; | ||||
6361 | afflist = p + 1; | ||||
6362 | break; | ||||
6363 | } | ||||
6364 | } | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6365 | |
6366 | /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ | ||||
6367 | if (spin->si_ascii && has_non_ascii(line)) | ||||
6368 | { | ||||
6369 | ++non_ascii; | ||||
Bram Moolenaar | 5482f33 | 2005-04-17 20:18:43 +0000 | [diff] [blame] | 6370 | continue; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6371 | } |
Bram Moolenaar | 5482f33 | 2005-04-17 20:18:43 +0000 | [diff] [blame] | 6372 | |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 6373 | #ifdef FEAT_MBYTE |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6374 | /* Convert from "SET" to 'encoding' when needed. */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6375 | if (spin->si_conv.vc_type != CONV_NONE) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6376 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6377 | pc = string_convert(&spin->si_conv, line, NULL); |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 6378 | if (pc == NULL) |
6379 | { | ||||
6380 | smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), | ||||
6381 | fname, lnum, line); | ||||
6382 | continue; | ||||
6383 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6384 | w = pc; |
6385 | } | ||||
6386 | else | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 6387 | #endif |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6388 | { |
6389 | pc = NULL; | ||||
6390 | w = line; | ||||
6391 | } | ||||
6392 | |||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6393 | /* This takes time, print a message every 10000 words. */ |
6394 | if (spin->si_verbose && spin->si_msg_count > 10000) | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6395 | { |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6396 | spin->si_msg_count = 0; |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6397 | vim_snprintf((char *)message, sizeof(message), |
6398 | _("line %6d, word %6d - %s"), | ||||
6399 | lnum, spin->si_foldwcount + spin->si_keepwcount, w); | ||||
6400 | msg_start(); | ||||
6401 | msg_puts_long_attr(message, 0); | ||||
6402 | msg_clr_eos(); | ||||
6403 | msg_didout = FALSE; | ||||
6404 | msg_col = 0; | ||||
6405 | out_flush(); | ||||
6406 | } | ||||
6407 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6408 | /* Store the word in the hashtable to be able to find duplicates. */ |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6409 | dw = (char_u *)getroom_save(spin, w); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6410 | if (dw == NULL) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6411 | retval = FAIL; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6412 | vim_free(pc); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6413 | if (retval == FAIL) |
6414 | break; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6415 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6416 | hash = hash_hash(dw); |
6417 | hi = hash_lookup(&ht, dw, hash); | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6418 | if (!HASHITEM_EMPTY(hi)) |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 6419 | { |
6420 | if (p_verbose > 0) | ||||
6421 | smsg((char_u *)_("Duplicate word in %s line %d: %s"), | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 6422 | fname, lnum, dw); |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 6423 | else if (duplicate == 0) |
6424 | smsg((char_u *)_("First duplicate word in %s line %d: %s"), | ||||
6425 | fname, lnum, dw); | ||||
6426 | ++duplicate; | ||||
6427 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6428 | else |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6429 | hash_add_item(&ht, hi, dw, hash); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6430 | |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 6431 | flags = 0; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6432 | store_afflist[0] = NUL; |
6433 | pfxlen = 0; | ||||
6434 | need_affix = FALSE; | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 6435 | if (afflist != NULL) |
6436 | { | ||||
6437 | /* Check for affix name that stands for keep-case word and stands | ||||
6438 | * for rare word (if defined). */ | ||||
Bram Moolenaar | 371baa9 | 2005-12-29 22:43:53 +0000 | [diff] [blame] | 6439 | if (affile->af_keepcase != 0 && flag_in_afflist( |
6440 | affile->af_flagtype, afflist, affile->af_keepcase)) | ||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 6441 | flags |= WF_KEEPCAP | WF_FIXCAP; |
Bram Moolenaar | 371baa9 | 2005-12-29 22:43:53 +0000 | [diff] [blame] | 6442 | if (affile->af_rare != 0 && flag_in_afflist( |
6443 | affile->af_flagtype, afflist, affile->af_rare)) | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 6444 | flags |= WF_RARE; |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6445 | if (affile->af_bad != 0 && flag_in_afflist( |
6446 | affile->af_flagtype, afflist, affile->af_bad)) | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 6447 | flags |= WF_BANNED; |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6448 | if (affile->af_needaffix != 0 && flag_in_afflist( |
6449 | affile->af_flagtype, afflist, affile->af_needaffix)) | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6450 | need_affix = TRUE; |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 6451 | if (affile->af_needcomp != 0 && flag_in_afflist( |
6452 | affile->af_flagtype, afflist, affile->af_needcomp)) | ||||
6453 | flags |= WF_NEEDCOMP; | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 6454 | if (affile->af_comproot != 0 && flag_in_afflist( |
6455 | affile->af_flagtype, afflist, affile->af_comproot)) | ||||
6456 | flags |= WF_COMPROOT; | ||||
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 6457 | if (affile->af_nosuggest != 0 && flag_in_afflist( |
6458 | affile->af_flagtype, afflist, affile->af_nosuggest)) | ||||
6459 | flags |= WF_NOSUGGEST; | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6460 | |
6461 | if (affile->af_pfxpostpone) | ||||
6462 | /* Need to store the list of prefix IDs with the word. */ | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6463 | pfxlen = get_pfxlist(affile, afflist, store_afflist); |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 6464 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6465 | if (spin->si_compflags != NULL) |
6466 | /* Need to store the list of compound flags with the word. | ||||
6467 | * Concatenate them to the list of prefix IDs. */ | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6468 | get_compflags(affile, afflist, store_afflist + pfxlen); |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 6469 | } |
6470 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6471 | /* Add the word to the word tree(s). */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6472 | if (store_word(spin, dw, flags, spin->si_region, |
6473 | store_afflist, need_affix) == FAIL) | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6474 | retval = FAIL; |
6475 | |||||
6476 | if (afflist != NULL) | ||||
6477 | { | ||||
6478 | /* Find all matching suffixes and add the resulting words. | ||||
6479 | * Additionally do matching prefixes that combine. */ | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6480 | if (store_aff_word(spin, dw, afflist, affile, |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 6481 | &affile->af_suff, &affile->af_pref, |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6482 | FALSE, flags, store_afflist, pfxlen) == FAIL) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6483 | retval = FAIL; |
6484 | |||||
6485 | /* Find all matching prefixes and add the resulting words. */ | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6486 | if (store_aff_word(spin, dw, afflist, affile, |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6487 | &affile->af_pref, NULL, |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6488 | FALSE, flags, store_afflist, pfxlen) == FAIL) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6489 | retval = FAIL; |
6490 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6491 | } |
6492 | |||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 6493 | if (duplicate > 0) |
6494 | smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname); | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6495 | if (spin->si_ascii && non_ascii > 0) |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 6496 | smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"), |
6497 | non_ascii, fname); | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6498 | hash_clear(&ht); |
6499 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6500 | fclose(fd); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6501 | return retval; |
6502 | } | ||||
6503 | |||||
6504 | /* | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6505 | * Get the list of prefix IDs from the affix list "afflist". |
6506 | * Used for PFXPOSTPONE. | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6507 | * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL |
6508 | * and return the number of affixes. | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6509 | */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6510 | static int |
6511 | get_pfxlist(affile, afflist, store_afflist) | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6512 | afffile_T *affile; |
6513 | char_u *afflist; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6514 | char_u *store_afflist; |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6515 | { |
6516 | char_u *p; | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6517 | char_u *prevp; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6518 | int cnt = 0; |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6519 | int id; |
6520 | char_u key[AH_KEY_LEN]; | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6521 | hashitem_T *hi; |
6522 | |||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6523 | for (p = afflist; *p != NUL; ) |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6524 | { |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6525 | prevp = p; |
6526 | if (get_affitem(affile->af_flagtype, &p) != 0) | ||||
6527 | { | ||||
6528 | /* A flag is a postponed prefix flag if it appears in "af_pref" | ||||
6529 | * and it's ID is not zero. */ | ||||
6530 | vim_strncpy(key, prevp, p - prevp); | ||||
6531 | hi = hash_find(&affile->af_pref, key); | ||||
6532 | if (!HASHITEM_EMPTY(hi)) | ||||
6533 | { | ||||
6534 | id = HI2AH(hi)->ah_newID; | ||||
6535 | if (id != 0) | ||||
6536 | store_afflist[cnt++] = id; | ||||
6537 | } | ||||
6538 | } | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 6539 | if (affile->af_flagtype == AFT_NUM && *p == ',') |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6540 | ++p; |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6541 | } |
6542 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6543 | store_afflist[cnt] = NUL; |
6544 | return cnt; | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6545 | } |
6546 | |||||
6547 | /* | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6548 | * Get the list of compound IDs from the affix list "afflist" that are used |
6549 | * for compound words. | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6550 | * Puts the flags in "store_afflist[]". |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 6551 | */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6552 | static void |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6553 | get_compflags(affile, afflist, store_afflist) |
6554 | afffile_T *affile; | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 6555 | char_u *afflist; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6556 | char_u *store_afflist; |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 6557 | { |
6558 | char_u *p; | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6559 | char_u *prevp; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6560 | int cnt = 0; |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6561 | char_u key[AH_KEY_LEN]; |
6562 | hashitem_T *hi; | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 6563 | |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6564 | for (p = afflist; *p != NUL; ) |
6565 | { | ||||
6566 | prevp = p; | ||||
6567 | if (get_affitem(affile->af_flagtype, &p) != 0) | ||||
6568 | { | ||||
6569 | /* A flag is a compound flag if it appears in "af_comp". */ | ||||
6570 | vim_strncpy(key, prevp, p - prevp); | ||||
6571 | hi = hash_find(&affile->af_comp, key); | ||||
6572 | if (!HASHITEM_EMPTY(hi)) | ||||
6573 | store_afflist[cnt++] = HI2CI(hi)->ci_newID; | ||||
6574 | } | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 6575 | if (affile->af_flagtype == AFT_NUM && *p == ',') |
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6576 | ++p; |
6577 | } | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 6578 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6579 | store_afflist[cnt] = NUL; |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 6580 | } |
6581 | |||||
6582 | /* | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6583 | * Apply affixes to a word and store the resulting words. |
6584 | * "ht" is the hashtable with affentry_T that need to be applied, either | ||||
6585 | * prefixes or suffixes. | ||||
6586 | * "xht", when not NULL, is the prefix hashtable, to be used additionally on | ||||
6587 | * the resulting words for combining affixes. | ||||
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 6588 | * |
6589 | * Returns FAIL when out of memory. | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6590 | */ |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 6591 | static int |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6592 | store_aff_word(spin, word, afflist, affile, ht, xht, comb, flags, |
6593 | pfxlist, pfxlen) | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6594 | spellinfo_T *spin; /* spell info */ |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6595 | char_u *word; /* basic word start */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6596 | char_u *afflist; /* list of names of supported affixes */ |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6597 | afffile_T *affile; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6598 | hashtab_T *ht; |
6599 | hashtab_T *xht; | ||||
6600 | int comb; /* only use affixes that combine */ | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 6601 | int flags; /* flags for the word */ |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6602 | char_u *pfxlist; /* list of prefix IDs */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6603 | int pfxlen; /* nr of flags in "pfxlist" for prefixes, rest |
6604 | * is compound flags */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6605 | { |
6606 | int todo; | ||||
6607 | hashitem_T *hi; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6608 | affheader_T *ah; |
6609 | affentry_T *ae; | ||||
6610 | regmatch_T regmatch; | ||||
6611 | char_u newword[MAXWLEN]; | ||||
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 6612 | int retval = OK; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6613 | int i; |
6614 | char_u *p; | ||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 6615 | int use_flags; |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 6616 | char_u *use_pfxlist; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6617 | char_u pfx_pfxlist[MAXWLEN]; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6618 | size_t wordlen = STRLEN(word); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6619 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6620 | todo = ht->ht_used; |
6621 | for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi) | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6622 | { |
6623 | if (!HASHITEM_EMPTY(hi)) | ||||
6624 | { | ||||
6625 | --todo; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6626 | ah = HI2AH(hi); |
Bram Moolenaar | 5482f33 | 2005-04-17 20:18:43 +0000 | [diff] [blame] | 6627 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6628 | /* Check that the affix combines, if required, and that the word |
6629 | * supports this affix. */ | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 6630 | if ((!comb || ah->ah_combine) && flag_in_afflist( |
6631 | affile->af_flagtype, afflist, ah->ah_flag)) | ||||
Bram Moolenaar | 5482f33 | 2005-04-17 20:18:43 +0000 | [diff] [blame] | 6632 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6633 | /* Loop over all affix entries with this name. */ |
6634 | for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next) | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6635 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6636 | /* Check the condition. It's not logical to match case |
6637 | * here, but it is required for compatibility with | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6638 | * Myspell. |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 6639 | * Another requirement from Myspell is that the chop |
6640 | * string is shorter than the word itself. | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6641 | * For prefixes, when "PFXPOSTPONE" was used, only do |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 6642 | * prefixes with a chop string and/or flags. */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6643 | regmatch.regprog = ae->ae_prog; |
6644 | regmatch.rm_ic = FALSE; | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6645 | if ((xht != NULL || !affile->af_pfxpostpone |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 6646 | || ae->ae_chop != NULL |
6647 | || ae->ae_flags != NULL) | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 6648 | && (ae->ae_chop == NULL |
6649 | || STRLEN(ae->ae_chop) < wordlen) | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 6650 | && (ae->ae_prog == NULL |
6651 | || vim_regexec(®match, word, (colnr_T)0))) | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6652 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6653 | /* Match. Remove the chop and add the affix. */ |
6654 | if (xht == NULL) | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6655 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6656 | /* prefix: chop/add at the start of the word */ |
6657 | if (ae->ae_add == NULL) | ||||
6658 | *newword = NUL; | ||||
6659 | else | ||||
6660 | STRCPY(newword, ae->ae_add); | ||||
6661 | p = word; | ||||
6662 | if (ae->ae_chop != NULL) | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 6663 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6664 | /* Skip chop string. */ |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 6665 | #ifdef FEAT_MBYTE |
6666 | if (has_mbyte) | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 6667 | { |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 6668 | i = mb_charlen(ae->ae_chop); |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 6669 | for ( ; i > 0; --i) |
6670 | mb_ptr_adv(p); | ||||
6671 | } | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 6672 | else |
6673 | #endif | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 6674 | p += STRLEN(ae->ae_chop); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 6675 | } |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6676 | STRCAT(newword, p); |
6677 | } | ||||
6678 | else | ||||
6679 | { | ||||
6680 | /* suffix: chop/add at the end of the word */ | ||||
6681 | STRCPY(newword, word); | ||||
6682 | if (ae->ae_chop != NULL) | ||||
6683 | { | ||||
6684 | /* Remove chop string. */ | ||||
6685 | p = newword + STRLEN(newword); | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 6686 | i = MB_CHARLEN(ae->ae_chop); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 6687 | for ( ; i > 0; --i) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6688 | mb_ptr_back(newword, p); |
6689 | *p = NUL; | ||||
6690 | } | ||||
6691 | if (ae->ae_add != NULL) | ||||
6692 | STRCAT(newword, ae->ae_add); | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6693 | } |
6694 | |||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 6695 | /* Obey the "rare" flag of the affix. */ |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 6696 | if (affile->af_rare != 0 |
6697 | && ae->ae_flags != NULL | ||||
6698 | && flag_in_afflist( | ||||
6699 | affile->af_flagtype, ae->ae_flags, | ||||
6700 | affile->af_rare)) | ||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 6701 | use_flags = flags | WF_RARE; |
6702 | else | ||||
6703 | use_flags = flags; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6704 | |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 6705 | /* Obey a "COMPOUNDFORBID" flag of the affix: don't |
6706 | * use the compound flags. */ | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 6707 | use_pfxlist = pfxlist; |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 6708 | if (pfxlist != NULL |
6709 | && affile->af_compforbid != 0 | ||||
6710 | && ae->ae_flags != NULL | ||||
6711 | && flag_in_afflist( | ||||
6712 | affile->af_flagtype, ae->ae_flags, | ||||
6713 | affile->af_compforbid)) | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6714 | { |
6715 | vim_strncpy(pfx_pfxlist, pfxlist, pfxlen); | ||||
6716 | use_pfxlist = pfx_pfxlist; | ||||
6717 | } | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 6718 | |
6719 | /* When there are postponed prefixes... */ | ||||
Bram Moolenaar | 551f84f | 2005-07-06 22:29:20 +0000 | [diff] [blame] | 6720 | if (spin->si_prefroot != NULL |
6721 | && spin->si_prefroot->wn_sibling != NULL) | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 6722 | { |
6723 | /* ... add a flag to indicate an affix was used. */ | ||||
6724 | use_flags |= WF_HAS_AFF; | ||||
6725 | |||||
6726 | /* ... don't use a prefix list if combining | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6727 | * affixes is not allowed. But do use the |
6728 | * compound flags after them. */ | ||||
6729 | if ((!ah->ah_combine || comb) && pfxlist != NULL) | ||||
6730 | use_pfxlist += pfxlen; | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 6731 | } |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 6732 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6733 | /* Store the modified word. */ |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6734 | if (store_word(spin, newword, use_flags, |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6735 | spin->si_region, use_pfxlist, FALSE) == FAIL) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6736 | retval = FAIL; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6737 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6738 | /* When added a suffix and combining is allowed also |
6739 | * try adding prefixes additionally. */ | ||||
6740 | if (xht != NULL && ah->ah_combine) | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6741 | if (store_aff_word(spin, newword, afflist, affile, |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 6742 | xht, NULL, TRUE, |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6743 | use_flags, use_pfxlist, pfxlen) == FAIL) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6744 | retval = FAIL; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6745 | } |
6746 | } | ||||
6747 | } | ||||
6748 | } | ||||
6749 | } | ||||
6750 | |||||
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 6751 | return retval; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 6752 | } |
6753 | |||||
6754 | /* | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6755 | * Read a file with a list of words. |
6756 | */ | ||||
6757 | static int | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6758 | spell_read_wordfile(spin, fname) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6759 | spellinfo_T *spin; |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6760 | char_u *fname; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6761 | { |
6762 | FILE *fd; | ||||
6763 | long lnum = 0; | ||||
6764 | char_u rline[MAXLINELEN]; | ||||
6765 | char_u *line; | ||||
6766 | char_u *pc = NULL; | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 6767 | char_u *p; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6768 | int l; |
6769 | int retval = OK; | ||||
6770 | int did_word = FALSE; | ||||
6771 | int non_ascii = 0; | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 6772 | int flags; |
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 6773 | int regionmask; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6774 | |
6775 | /* | ||||
6776 | * Open the file. | ||||
6777 | */ | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 6778 | fd = mch_fopen((char *)fname, "r"); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6779 | if (fd == NULL) |
6780 | { | ||||
6781 | EMSG2(_(e_notopen), fname); | ||||
6782 | return FAIL; | ||||
6783 | } | ||||
6784 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 6785 | vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname); |
6786 | spell_message(spin, IObuff); | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6787 | |
6788 | /* | ||||
6789 | * Read all the lines in the file one by one. | ||||
6790 | */ | ||||
6791 | while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int) | ||||
6792 | { | ||||
6793 | line_breakcheck(); | ||||
6794 | ++lnum; | ||||
6795 | |||||
6796 | /* Skip comment lines. */ | ||||
6797 | if (*rline == '#') | ||||
6798 | continue; | ||||
6799 | |||||
6800 | /* Remove CR, LF and white space from the end. */ | ||||
6801 | l = STRLEN(rline); | ||||
6802 | while (l > 0 && rline[l - 1] <= ' ') | ||||
6803 | --l; | ||||
6804 | if (l == 0) | ||||
6805 | continue; /* empty or blank line */ | ||||
6806 | rline[l] = NUL; | ||||
6807 | |||||
6808 | /* Convert from "=encoding={encoding}" to 'encoding' when needed. */ | ||||
6809 | vim_free(pc); | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 6810 | #ifdef FEAT_MBYTE |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6811 | if (spin->si_conv.vc_type != CONV_NONE) |
6812 | { | ||||
6813 | pc = string_convert(&spin->si_conv, rline, NULL); | ||||
6814 | if (pc == NULL) | ||||
6815 | { | ||||
6816 | smsg((char_u *)_("Conversion failure for word in %s line %d: %s"), | ||||
6817 | fname, lnum, rline); | ||||
6818 | continue; | ||||
6819 | } | ||||
6820 | line = pc; | ||||
6821 | } | ||||
6822 | else | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 6823 | #endif |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6824 | { |
6825 | pc = NULL; | ||||
6826 | line = rline; | ||||
6827 | } | ||||
6828 | |||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 6829 | if (*line == '/') |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6830 | { |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 6831 | ++line; |
6832 | if (STRNCMP(line, "encoding=", 9) == 0) | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6833 | { |
6834 | if (spin->si_conv.vc_type != CONV_NONE) | ||||
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 6835 | smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"), |
6836 | fname, lnum, line - 1); | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6837 | else if (did_word) |
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 6838 | smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"), |
6839 | fname, lnum, line - 1); | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6840 | else |
6841 | { | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 6842 | #ifdef FEAT_MBYTE |
6843 | char_u *enc; | ||||
6844 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6845 | /* Setup for conversion to 'encoding'. */ |
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 6846 | line += 10; |
6847 | enc = enc_canonize(line); | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6848 | if (enc != NULL && !spin->si_ascii |
6849 | && convert_setup(&spin->si_conv, enc, | ||||
6850 | p_enc) == FAIL) | ||||
6851 | smsg((char_u *)_("Conversion in %s not supported: from %s to %s"), | ||||
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 6852 | fname, line, p_enc); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6853 | vim_free(enc); |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 6854 | spin->si_conv.vc_fail = TRUE; |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 6855 | #else |
6856 | smsg((char_u *)_("Conversion in %s not supported"), fname); | ||||
6857 | #endif | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6858 | } |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 6859 | continue; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6860 | } |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 6861 | |
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 6862 | if (STRNCMP(line, "regions=", 8) == 0) |
6863 | { | ||||
6864 | if (spin->si_region_count > 1) | ||||
6865 | smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"), | ||||
6866 | fname, lnum, line); | ||||
6867 | else | ||||
6868 | { | ||||
6869 | line += 8; | ||||
6870 | if (STRLEN(line) > 16) | ||||
6871 | smsg((char_u *)_("Too many regions in %s line %d: %s"), | ||||
6872 | fname, lnum, line); | ||||
6873 | else | ||||
6874 | { | ||||
6875 | spin->si_region_count = STRLEN(line) / 2; | ||||
6876 | STRCPY(spin->si_region_name, line); | ||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 6877 | |
6878 | /* Adjust the mask for a word valid in all regions. */ | ||||
6879 | spin->si_region = (1 << spin->si_region_count) - 1; | ||||
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 6880 | } |
6881 | } | ||||
6882 | continue; | ||||
6883 | } | ||||
6884 | |||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 6885 | smsg((char_u *)_("/ line ignored in %s line %d: %s"), |
6886 | fname, lnum, line - 1); | ||||
6887 | continue; | ||||
6888 | } | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 6889 | |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 6890 | flags = 0; |
6891 | regionmask = spin->si_region; | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 6892 | |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 6893 | /* Check for flags and region after a slash. */ |
6894 | p = vim_strchr(line, '/'); | ||||
6895 | if (p != NULL) | ||||
6896 | { | ||||
6897 | *p++ = NUL; | ||||
6898 | while (*p != NUL) | ||||
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 6899 | { |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 6900 | if (*p == '=') /* keep-case word */ |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 6901 | flags |= WF_KEEPCAP | WF_FIXCAP; |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 6902 | else if (*p == '!') /* Bad, bad, wicked word. */ |
6903 | flags |= WF_BANNED; | ||||
6904 | else if (*p == '?') /* Rare word. */ | ||||
6905 | flags |= WF_RARE; | ||||
6906 | else if (VIM_ISDIGIT(*p)) /* region number(s) */ | ||||
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 6907 | { |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 6908 | if ((flags & WF_REGION) == 0) /* first one */ |
6909 | regionmask = 0; | ||||
6910 | flags |= WF_REGION; | ||||
6911 | |||||
6912 | l = *p - '0'; | ||||
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 6913 | if (l > spin->si_region_count) |
6914 | { | ||||
6915 | smsg((char_u *)_("Invalid region nr in %s line %d: %s"), | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 6916 | fname, lnum, p); |
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 6917 | break; |
6918 | } | ||||
6919 | regionmask |= 1 << (l - 1); | ||||
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 6920 | } |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 6921 | else |
6922 | { | ||||
6923 | smsg((char_u *)_("Unrecognized flags in %s line %d: %s"), | ||||
6924 | fname, lnum, p); | ||||
6925 | break; | ||||
6926 | } | ||||
6927 | ++p; | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 6928 | } |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6929 | } |
6930 | |||||
6931 | /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */ | ||||
6932 | if (spin->si_ascii && has_non_ascii(line)) | ||||
6933 | { | ||||
6934 | ++non_ascii; | ||||
6935 | continue; | ||||
6936 | } | ||||
6937 | |||||
6938 | /* Normal word: store it. */ | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 6939 | if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6940 | { |
6941 | retval = FAIL; | ||||
6942 | break; | ||||
6943 | } | ||||
6944 | did_word = TRUE; | ||||
6945 | } | ||||
6946 | |||||
6947 | vim_free(pc); | ||||
6948 | fclose(fd); | ||||
6949 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 6950 | if (spin->si_ascii && non_ascii > 0) |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 6951 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 6952 | vim_snprintf((char *)IObuff, IOSIZE, |
6953 | _("Ignored %d words with non-ASCII characters"), non_ascii); | ||||
6954 | spell_message(spin, IObuff); | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 6955 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 6956 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6957 | return retval; |
6958 | } | ||||
6959 | |||||
6960 | /* | ||||
6961 | * Get part of an sblock_T, "len" bytes long. | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6962 | * This avoids calling free() for every little struct we use (and keeping |
6963 | * track of them). | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6964 | * The memory is cleared to all zeros. |
6965 | * Returns NULL when out of memory. | ||||
6966 | */ | ||||
6967 | static void * | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6968 | getroom(spin, len, align) |
6969 | spellinfo_T *spin; | ||||
Bram Moolenaar | cfc7d63 | 2005-07-28 22:28:16 +0000 | [diff] [blame] | 6970 | size_t len; /* length needed */ |
6971 | int align; /* align for pointer */ | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6972 | { |
6973 | char_u *p; | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6974 | sblock_T *bl = spin->si_blocks; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6975 | |
Bram Moolenaar | cfc7d63 | 2005-07-28 22:28:16 +0000 | [diff] [blame] | 6976 | if (align && bl != NULL) |
6977 | /* Round size up for alignment. On some systems structures need to be | ||||
6978 | * aligned to the size of a pointer (e.g., SPARC). */ | ||||
6979 | bl->sb_used = (bl->sb_used + sizeof(char *) - 1) | ||||
6980 | & ~(sizeof(char *) - 1); | ||||
6981 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6982 | if (bl == NULL || bl->sb_used + len > SBLOCKSIZE) |
6983 | { | ||||
6984 | /* Allocate a block of memory. This is not freed until much later. */ | ||||
6985 | bl = (sblock_T *)alloc_clear((unsigned)(sizeof(sblock_T) + SBLOCKSIZE)); | ||||
6986 | if (bl == NULL) | ||||
6987 | return NULL; | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 6988 | bl->sb_next = spin->si_blocks; |
6989 | spin->si_blocks = bl; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6990 | bl->sb_used = 0; |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 6991 | ++spin->si_blocks_cnt; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 6992 | } |
6993 | |||||
6994 | p = bl->sb_data + bl->sb_used; | ||||
6995 | bl->sb_used += len; | ||||
6996 | |||||
6997 | return p; | ||||
6998 | } | ||||
6999 | |||||
7000 | /* | ||||
7001 | * Make a copy of a string into memory allocated with getroom(). | ||||
7002 | */ | ||||
7003 | static char_u * | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7004 | getroom_save(spin, s) |
7005 | spellinfo_T *spin; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7006 | char_u *s; |
7007 | { | ||||
7008 | char_u *sc; | ||||
7009 | |||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7010 | sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7011 | if (sc != NULL) |
7012 | STRCPY(sc, s); | ||||
7013 | return sc; | ||||
7014 | } | ||||
7015 | |||||
7016 | |||||
7017 | /* | ||||
7018 | * Free the list of allocated sblock_T. | ||||
7019 | */ | ||||
7020 | static void | ||||
7021 | free_blocks(bl) | ||||
7022 | sblock_T *bl; | ||||
7023 | { | ||||
7024 | sblock_T *next; | ||||
7025 | |||||
7026 | while (bl != NULL) | ||||
7027 | { | ||||
7028 | next = bl->sb_next; | ||||
7029 | vim_free(bl); | ||||
7030 | bl = next; | ||||
7031 | } | ||||
7032 | } | ||||
7033 | |||||
7034 | /* | ||||
7035 | * Allocate the root of a word tree. | ||||
7036 | */ | ||||
7037 | static wordnode_T * | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7038 | wordtree_alloc(spin) |
7039 | spellinfo_T *spin; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7040 | { |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7041 | return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7042 | } |
7043 | |||||
7044 | /* | ||||
7045 | * Store a word in the tree(s). | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 7046 | * Always store it in the case-folded tree. For a keep-case word this is |
7047 | * useful when the word can also be used with all caps (no WF_FIXCAP flag) and | ||||
7048 | * used to find suggestions. | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7049 | * For a keep-case word also store it in the keep-case tree. |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 7050 | * When "pfxlist" is not NULL store the word for each postponed prefix ID and |
7051 | * compound flag. | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7052 | */ |
7053 | static int | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7054 | store_word(spin, word, flags, region, pfxlist, need_affix) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7055 | spellinfo_T *spin; |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7056 | char_u *word; |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 7057 | int flags; /* extra flags, WF_BANNED */ |
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 7058 | int region; /* supported region(s) */ |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 7059 | char_u *pfxlist; /* list of prefix IDs or NULL */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7060 | int need_affix; /* only store word with affix ID */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7061 | { |
7062 | int len = STRLEN(word); | ||||
7063 | int ct = captype(word, word + len); | ||||
7064 | char_u foldword[MAXWLEN]; | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 7065 | int res = OK; |
7066 | char_u *p; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7067 | |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 7068 | (void)spell_casefold(word, len, foldword, MAXWLEN); |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 7069 | for (p = pfxlist; res == OK; ++p) |
7070 | { | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7071 | if (!need_affix || (p != NULL && *p != NUL)) |
7072 | res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags, | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7073 | region, p == NULL ? 0 : *p); |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 7074 | if (p == NULL || *p == NUL) |
7075 | break; | ||||
7076 | } | ||||
Bram Moolenaar | 8db7318 | 2005-06-17 21:51:16 +0000 | [diff] [blame] | 7077 | ++spin->si_foldwcount; |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 7078 | |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7079 | if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP))) |
Bram Moolenaar | 8db7318 | 2005-06-17 21:51:16 +0000 | [diff] [blame] | 7080 | { |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 7081 | for (p = pfxlist; res == OK; ++p) |
7082 | { | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7083 | if (!need_affix || (p != NULL && *p != NUL)) |
7084 | res = tree_add_word(spin, word, spin->si_keeproot, flags, | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7085 | region, p == NULL ? 0 : *p); |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 7086 | if (p == NULL || *p == NUL) |
7087 | break; | ||||
7088 | } | ||||
Bram Moolenaar | 8db7318 | 2005-06-17 21:51:16 +0000 | [diff] [blame] | 7089 | ++spin->si_keepwcount; |
7090 | } | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7091 | return res; |
7092 | } | ||||
7093 | |||||
7094 | /* | ||||
7095 | * Add word "word" to a word tree at "root". | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7096 | * When "flags" < 0 we are adding to the prefix tree where "flags" is used for |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 7097 | * "rare" and "region" is the condition nr. |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 7098 | * Returns FAIL when out of memory. |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7099 | */ |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 7100 | static int |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 7101 | tree_add_word(spin, word, root, flags, region, affixID) |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7102 | spellinfo_T *spin; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7103 | char_u *word; |
7104 | wordnode_T *root; | ||||
7105 | int flags; | ||||
7106 | int region; | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 7107 | int affixID; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7108 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7109 | wordnode_T *node = root; |
7110 | wordnode_T *np; | ||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7111 | wordnode_T *copyp, **copyprev; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7112 | wordnode_T **prev = NULL; |
7113 | int i; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7114 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7115 | /* Add each byte of the word to the tree, including the NUL at the end. */ |
7116 | for (i = 0; ; ++i) | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7117 | { |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7118 | /* When there is more than one reference to this node we need to make |
7119 | * a copy, so that we can modify it. Copy the whole list of siblings | ||||
7120 | * (we don't optimize for a partly shared list of siblings). */ | ||||
7121 | if (node != NULL && node->wn_refs > 1) | ||||
7122 | { | ||||
7123 | --node->wn_refs; | ||||
7124 | copyprev = prev; | ||||
7125 | for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling) | ||||
7126 | { | ||||
7127 | /* Allocate a new node and copy the info. */ | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7128 | np = get_wordnode(spin); |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7129 | if (np == NULL) |
7130 | return FAIL; | ||||
7131 | np->wn_child = copyp->wn_child; | ||||
7132 | if (np->wn_child != NULL) | ||||
7133 | ++np->wn_child->wn_refs; /* child gets extra ref */ | ||||
7134 | np->wn_byte = copyp->wn_byte; | ||||
7135 | if (np->wn_byte == NUL) | ||||
7136 | { | ||||
7137 | np->wn_flags = copyp->wn_flags; | ||||
7138 | np->wn_region = copyp->wn_region; | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 7139 | np->wn_affixID = copyp->wn_affixID; |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7140 | } |
7141 | |||||
7142 | /* Link the new node in the list, there will be one ref. */ | ||||
7143 | np->wn_refs = 1; | ||||
7144 | *copyprev = np; | ||||
7145 | copyprev = &np->wn_sibling; | ||||
7146 | |||||
7147 | /* Let "node" point to the head of the copied list. */ | ||||
7148 | if (copyp == node) | ||||
7149 | node = np; | ||||
7150 | } | ||||
7151 | } | ||||
7152 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7153 | /* Look for the sibling that has the same character. They are sorted |
7154 | * on byte value, thus stop searching when a sibling is found with a | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 7155 | * higher byte value. For zero bytes (end of word) the sorting is |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 7156 | * done on flags and then on affixID. */ |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 7157 | while (node != NULL |
7158 | && (node->wn_byte < word[i] | ||||
7159 | || (node->wn_byte == NUL | ||||
7160 | && (flags < 0 | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7161 | ? node->wn_affixID < (unsigned)affixID |
7162 | : (node->wn_flags < (unsigned)(flags & WN_MASK) | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 7163 | || (node->wn_flags == (flags & WN_MASK) |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7164 | && (spin->si_sugtree |
7165 | ? (node->wn_region & 0xffff) < region | ||||
7166 | : node->wn_affixID | ||||
7167 | < (unsigned)affixID))))))) | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7168 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7169 | prev = &node->wn_sibling; |
7170 | node = *prev; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7171 | } |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 7172 | if (node == NULL |
7173 | || node->wn_byte != word[i] | ||||
7174 | || (word[i] == NUL | ||||
7175 | && (flags < 0 | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7176 | || spin->si_sugtree |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 7177 | || node->wn_flags != (flags & WN_MASK) |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 7178 | || node->wn_affixID != affixID))) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7179 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7180 | /* Allocate a new node. */ |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7181 | np = get_wordnode(spin); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7182 | if (np == NULL) |
7183 | return FAIL; | ||||
7184 | np->wn_byte = word[i]; | ||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7185 | |
7186 | /* If "node" is NULL this is a new child or the end of the sibling | ||||
7187 | * list: ref count is one. Otherwise use ref count of sibling and | ||||
7188 | * make ref count of sibling one (matters when inserting in front | ||||
7189 | * of the list of siblings). */ | ||||
7190 | if (node == NULL) | ||||
7191 | np->wn_refs = 1; | ||||
7192 | else | ||||
7193 | { | ||||
7194 | np->wn_refs = node->wn_refs; | ||||
7195 | node->wn_refs = 1; | ||||
7196 | } | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7197 | *prev = np; |
7198 | np->wn_sibling = node; | ||||
7199 | node = np; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7200 | } |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7201 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7202 | if (word[i] == NUL) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7203 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7204 | node->wn_flags = flags; |
7205 | node->wn_region |= region; | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 7206 | node->wn_affixID = affixID; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7207 | break; |
Bram Moolenaar | 63d5a1e | 2005-04-19 21:30:25 +0000 | [diff] [blame] | 7208 | } |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7209 | prev = &node->wn_child; |
7210 | node = *prev; | ||||
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 7211 | } |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7212 | #ifdef SPELL_PRINTTREE |
7213 | smsg("Added \"%s\"", word); | ||||
7214 | spell_print_tree(root->wn_sibling); | ||||
7215 | #endif | ||||
7216 | |||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7217 | /* count nr of words added since last message */ |
7218 | ++spin->si_msg_count; | ||||
7219 | |||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 7220 | if (spin->si_compress_cnt > 1) |
7221 | { | ||||
7222 | if (--spin->si_compress_cnt == 1) | ||||
7223 | /* Did enough words to lower the block count limit. */ | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7224 | spin->si_blocks_cnt += compress_inc; |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 7225 | } |
7226 | |||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7227 | /* |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 7228 | * When we have allocated lots of memory we need to compress the word tree |
7229 | * to free up some room. But compression is slow, and we might actually | ||||
7230 | * need that room, thus only compress in the following situations: | ||||
7231 | * 1. When not compressed before (si_compress_cnt == 0): when using | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7232 | * "compress_start" blocks. |
7233 | * 2. When compressed before and used "compress_inc" blocks before | ||||
7234 | * adding "compress_added" words (si_compress_cnt > 1). | ||||
7235 | * 3. When compressed before, added "compress_added" words | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 7236 | * (si_compress_cnt == 1) and the number of free nodes drops below the |
7237 | * maximum word length. | ||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7238 | */ |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 7239 | #ifndef SPELL_PRINTTREE |
7240 | if (spin->si_compress_cnt == 1 | ||||
7241 | ? spin->si_free_count < MAXWLEN | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7242 | : spin->si_blocks_cnt >= compress_start) |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 7243 | #endif |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7244 | { |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 7245 | /* Decrement the block counter. The effect is that we compress again |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7246 | * when the freed up room has been used and another "compress_inc" |
7247 | * blocks have been allocated. Unless "compress_added" words have | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 7248 | * been added, then the limit is put back again. */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7249 | spin->si_blocks_cnt -= compress_inc; |
7250 | spin->si_compress_cnt = compress_added; | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 7251 | |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7252 | if (spin->si_verbose) |
7253 | { | ||||
7254 | msg_start(); | ||||
7255 | msg_puts((char_u *)_(msg_compressing)); | ||||
7256 | msg_clr_eos(); | ||||
7257 | msg_didout = FALSE; | ||||
7258 | msg_col = 0; | ||||
7259 | out_flush(); | ||||
7260 | } | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 7261 | |
7262 | /* Compress both trees. Either they both have many nodes, which makes | ||||
7263 | * compression useful, or one of them is small, which means | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7264 | * compression goes fast. But when filling the souldfold word tree |
7265 | * there is no keep-case tree. */ | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 7266 | wordtree_compress(spin, spin->si_foldroot); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7267 | if (affixID >= 0) |
7268 | wordtree_compress(spin, spin->si_keeproot); | ||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7269 | } |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 7270 | |
7271 | return OK; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7272 | } |
7273 | |||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7274 | /* |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7275 | * Check the 'mkspellmem' option. Return FAIL if it's wrong. |
7276 | * Sets "sps_flags". | ||||
7277 | */ | ||||
7278 | int | ||||
7279 | spell_check_msm() | ||||
7280 | { | ||||
7281 | char_u *p = p_msm; | ||||
7282 | long start = 0; | ||||
7283 | long inc = 0; | ||||
7284 | long added = 0; | ||||
7285 | |||||
7286 | if (!VIM_ISDIGIT(*p)) | ||||
7287 | return FAIL; | ||||
7288 | /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/ | ||||
7289 | start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102); | ||||
7290 | if (*p != ',') | ||||
7291 | return FAIL; | ||||
7292 | ++p; | ||||
7293 | if (!VIM_ISDIGIT(*p)) | ||||
7294 | return FAIL; | ||||
7295 | inc = (getdigits(&p) * 102) / (SBLOCKSIZE / 10); | ||||
7296 | if (*p != ',') | ||||
7297 | return FAIL; | ||||
7298 | ++p; | ||||
7299 | if (!VIM_ISDIGIT(*p)) | ||||
7300 | return FAIL; | ||||
7301 | added = getdigits(&p) * 1024; | ||||
7302 | if (*p != NUL) | ||||
7303 | return FAIL; | ||||
7304 | |||||
7305 | if (start == 0 || inc == 0 || added == 0 || inc > start) | ||||
7306 | return FAIL; | ||||
7307 | |||||
7308 | compress_start = start; | ||||
7309 | compress_inc = inc; | ||||
7310 | compress_added = added; | ||||
7311 | return OK; | ||||
7312 | } | ||||
7313 | |||||
7314 | |||||
7315 | /* | ||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7316 | * Get a wordnode_T, either from the list of previously freed nodes or |
7317 | * allocate a new one. | ||||
7318 | */ | ||||
7319 | static wordnode_T * | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7320 | get_wordnode(spin) |
7321 | spellinfo_T *spin; | ||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7322 | { |
7323 | wordnode_T *n; | ||||
7324 | |||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7325 | if (spin->si_first_free == NULL) |
7326 | n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE); | ||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7327 | else |
7328 | { | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7329 | n = spin->si_first_free; |
7330 | spin->si_first_free = n->wn_child; | ||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7331 | vim_memset(n, 0, sizeof(wordnode_T)); |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 7332 | --spin->si_free_count; |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7333 | } |
7334 | #ifdef SPELL_PRINTTREE | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7335 | n->wn_nr = ++spin->si_wordnode_nr; |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7336 | #endif |
7337 | return n; | ||||
7338 | } | ||||
7339 | |||||
7340 | /* | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7341 | * Decrement the reference count on a node (which is the head of a list of |
7342 | * siblings). If the reference count becomes zero free the node and its | ||||
7343 | * siblings. | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7344 | * Returns the number of nodes actually freed. |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7345 | */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7346 | static int |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7347 | deref_wordnode(spin, node) |
7348 | spellinfo_T *spin; | ||||
7349 | wordnode_T *node; | ||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7350 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7351 | wordnode_T *np; |
7352 | int cnt = 0; | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7353 | |
7354 | if (--node->wn_refs == 0) | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7355 | { |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7356 | for (np = node; np != NULL; np = np->wn_sibling) |
7357 | { | ||||
7358 | if (np->wn_child != NULL) | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7359 | cnt += deref_wordnode(spin, np->wn_child); |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7360 | free_wordnode(spin, np); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7361 | ++cnt; |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7362 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7363 | ++cnt; /* length field */ |
7364 | } | ||||
7365 | return cnt; | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7366 | } |
7367 | |||||
7368 | /* | ||||
7369 | * Free a wordnode_T for re-use later. | ||||
7370 | * Only the "wn_child" field becomes invalid. | ||||
7371 | */ | ||||
7372 | static void | ||||
7373 | free_wordnode(spin, n) | ||||
7374 | spellinfo_T *spin; | ||||
7375 | wordnode_T *n; | ||||
7376 | { | ||||
7377 | n->wn_child = spin->si_first_free; | ||||
7378 | spin->si_first_free = n; | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 7379 | ++spin->si_free_count; |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7380 | } |
7381 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7382 | /* |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7383 | * Compress a tree: find tails that are identical and can be shared. |
7384 | */ | ||||
7385 | static void | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7386 | wordtree_compress(spin, root) |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 7387 | spellinfo_T *spin; |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7388 | wordnode_T *root; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7389 | { |
7390 | hashtab_T ht; | ||||
7391 | int n; | ||||
7392 | int tot = 0; | ||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7393 | int perc; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7394 | |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7395 | /* Skip the root itself, it's not actually used. The first sibling is the |
7396 | * start of the tree. */ | ||||
7397 | if (root->wn_sibling != NULL) | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7398 | { |
7399 | hash_init(&ht); | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7400 | n = node_compress(spin, root->wn_sibling, &ht, &tot); |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7401 | |
7402 | #ifndef SPELL_PRINTTREE | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 7403 | if (spin->si_verbose || p_verbose > 2) |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7404 | #endif |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 7405 | { |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7406 | if (tot > 1000000) |
7407 | perc = (tot - n) / (tot / 100); | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 7408 | else if (tot == 0) |
7409 | perc = 0; | ||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7410 | else |
7411 | perc = (tot - n) * 100 / tot; | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7412 | vim_snprintf((char *)IObuff, IOSIZE, |
7413 | _("Compressed %d of %d nodes; %d (%d%%) remaining"), | ||||
7414 | n, tot, tot - n, perc); | ||||
7415 | spell_message(spin, IObuff); | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 7416 | } |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7417 | #ifdef SPELL_PRINTTREE |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7418 | spell_print_tree(root->wn_sibling); |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7419 | #endif |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7420 | hash_clear(&ht); |
7421 | } | ||||
7422 | } | ||||
7423 | |||||
7424 | /* | ||||
7425 | * Compress a node, its siblings and its children, depth first. | ||||
7426 | * Returns the number of compressed nodes. | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7427 | */ |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 7428 | static int |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7429 | node_compress(spin, node, ht, tot) |
7430 | spellinfo_T *spin; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7431 | wordnode_T *node; |
7432 | hashtab_T *ht; | ||||
7433 | int *tot; /* total count of nodes before compressing, | ||||
7434 | incremented while going through the tree */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7435 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7436 | wordnode_T *np; |
7437 | wordnode_T *tp; | ||||
7438 | wordnode_T *child; | ||||
7439 | hash_T hash; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7440 | hashitem_T *hi; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7441 | int len = 0; |
7442 | unsigned nr, n; | ||||
7443 | int compressed = 0; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7444 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7445 | /* |
7446 | * Go through the list of siblings. Compress each child and then try | ||||
7447 | * finding an identical child to replace it. | ||||
7448 | * Note that with "child" we mean not just the node that is pointed to, | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7449 | * but the whole list of siblings of which the child node is the first. |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7450 | */ |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 7451 | for (np = node; np != NULL && !got_int; np = np->wn_sibling) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7452 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7453 | ++len; |
7454 | if ((child = np->wn_child) != NULL) | ||||
7455 | { | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7456 | /* Compress the child first. This fills hashkey. */ |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7457 | compressed += node_compress(spin, child, ht, tot); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7458 | |
7459 | /* Try to find an identical child. */ | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 7460 | hash = hash_hash(child->wn_u1.hashkey); |
7461 | hi = hash_lookup(ht, child->wn_u1.hashkey, hash); | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7462 | if (!HASHITEM_EMPTY(hi)) |
7463 | { | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7464 | /* There are children we encountered before with a hash value |
7465 | * identical to the current child. Now check if there is one | ||||
7466 | * that is really identical. */ | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 7467 | for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7468 | if (node_equal(child, tp)) |
7469 | { | ||||
7470 | /* Found one! Now use that child in place of the | ||||
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7471 | * current one. This means the current child and all |
7472 | * its siblings is unlinked from the tree. */ | ||||
7473 | ++tp->wn_refs; | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7474 | compressed += deref_wordnode(spin, child); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7475 | np->wn_child = tp; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7476 | break; |
7477 | } | ||||
7478 | if (tp == NULL) | ||||
7479 | { | ||||
7480 | /* No other child with this hash value equals the child of | ||||
7481 | * the node, add it to the linked list after the first | ||||
7482 | * item. */ | ||||
7483 | tp = HI2WN(hi); | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 7484 | child->wn_u2.next = tp->wn_u2.next; |
7485 | tp->wn_u2.next = child; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7486 | } |
7487 | } | ||||
7488 | else | ||||
7489 | /* No other child has this hash value, add it to the | ||||
7490 | * hashtable. */ | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 7491 | hash_add_item(ht, hi, child->wn_u1.hashkey, hash); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7492 | } |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7493 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7494 | *tot += len + 1; /* add one for the node that stores the length */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7495 | |
7496 | /* | ||||
7497 | * Make a hash key for the node and its siblings, so that we can quickly | ||||
7498 | * find a lookalike node. This must be done after compressing the sibling | ||||
7499 | * list, otherwise the hash key would become invalid by the compression. | ||||
7500 | */ | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 7501 | node->wn_u1.hashkey[0] = len; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7502 | nr = 0; |
7503 | for (np = node; np != NULL; np = np->wn_sibling) | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7504 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7505 | if (np->wn_byte == NUL) |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 7506 | /* end node: use wn_flags, wn_region and wn_affixID */ |
7507 | n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16); | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7508 | else |
7509 | /* byte node: use the byte value and the child pointer */ | ||||
7510 | n = np->wn_byte + ((long_u)np->wn_child << 8); | ||||
7511 | nr = nr * 101 + n; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7512 | } |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7513 | |
7514 | /* Avoid NUL bytes, it terminates the hash key. */ | ||||
7515 | n = nr & 0xff; | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 7516 | node->wn_u1.hashkey[1] = n == 0 ? 1 : n; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7517 | n = (nr >> 8) & 0xff; |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 7518 | node->wn_u1.hashkey[2] = n == 0 ? 1 : n; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7519 | n = (nr >> 16) & 0xff; |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 7520 | node->wn_u1.hashkey[3] = n == 0 ? 1 : n; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7521 | n = (nr >> 24) & 0xff; |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 7522 | node->wn_u1.hashkey[4] = n == 0 ? 1 : n; |
7523 | node->wn_u1.hashkey[5] = NUL; | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7524 | |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 7525 | /* Check for CTRL-C pressed now and then. */ |
7526 | fast_breakcheck(); | ||||
7527 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7528 | return compressed; |
7529 | } | ||||
7530 | |||||
7531 | /* | ||||
7532 | * Return TRUE when two nodes have identical siblings and children. | ||||
7533 | */ | ||||
7534 | static int | ||||
7535 | node_equal(n1, n2) | ||||
7536 | wordnode_T *n1; | ||||
7537 | wordnode_T *n2; | ||||
7538 | { | ||||
7539 | wordnode_T *p1; | ||||
7540 | wordnode_T *p2; | ||||
7541 | |||||
7542 | for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL; | ||||
7543 | p1 = p1->wn_sibling, p2 = p2->wn_sibling) | ||||
7544 | if (p1->wn_byte != p2->wn_byte | ||||
7545 | || (p1->wn_byte == NUL | ||||
7546 | ? (p1->wn_flags != p2->wn_flags | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 7547 | || p1->wn_region != p2->wn_region |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 7548 | || p1->wn_affixID != p2->wn_affixID) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7549 | : (p1->wn_child != p2->wn_child))) |
7550 | break; | ||||
7551 | |||||
7552 | return p1 == NULL && p2 == NULL; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7553 | } |
7554 | |||||
7555 | /* | ||||
7556 | * Write a number to file "fd", MSB first, in "len" bytes. | ||||
7557 | */ | ||||
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 7558 | void |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7559 | put_bytes(fd, nr, len) |
7560 | FILE *fd; | ||||
7561 | long_u nr; | ||||
7562 | int len; | ||||
7563 | { | ||||
7564 | int i; | ||||
7565 | |||||
7566 | for (i = len - 1; i >= 0; --i) | ||||
7567 | putc((int)(nr >> (i * 8)), fd); | ||||
7568 | } | ||||
7569 | |||||
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 7570 | #ifdef _MSC_VER |
7571 | # if (_MSC_VER <= 1200) | ||||
7572 | /* This line is required for VC6 without the service pack. Also see the | ||||
7573 | * matching #pragma below. */ | ||||
7574 | /* # pragma optimize("", off) */ | ||||
7575 | # endif | ||||
7576 | #endif | ||||
7577 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7578 | /* |
7579 | * Write spin->si_sugtime to file "fd". | ||||
7580 | */ | ||||
7581 | static void | ||||
7582 | put_sugtime(spin, fd) | ||||
7583 | spellinfo_T *spin; | ||||
7584 | FILE *fd; | ||||
7585 | { | ||||
7586 | int c; | ||||
7587 | int i; | ||||
7588 | |||||
7589 | /* time_t can be up to 8 bytes in size, more than long_u, thus we | ||||
7590 | * can't use put_bytes() here. */ | ||||
7591 | for (i = 7; i >= 0; --i) | ||||
7592 | if (i + 1 > sizeof(time_t)) | ||||
7593 | /* ">>" doesn't work well when shifting more bits than avail */ | ||||
7594 | putc(0, fd); | ||||
7595 | else | ||||
7596 | { | ||||
7597 | c = (unsigned)spin->si_sugtime >> (i * 8); | ||||
7598 | putc(c, fd); | ||||
7599 | } | ||||
7600 | } | ||||
7601 | |||||
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 7602 | #ifdef _MSC_VER |
7603 | # if (_MSC_VER <= 1200) | ||||
7604 | /* # pragma optimize("", on) */ | ||||
7605 | # endif | ||||
7606 | #endif | ||||
7607 | |||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 7608 | static int |
7609 | #ifdef __BORLANDC__ | ||||
7610 | _RTLENTRYF | ||||
7611 | #endif | ||||
7612 | rep_compare __ARGS((const void *s1, const void *s2)); | ||||
7613 | |||||
7614 | /* | ||||
7615 | * Function given to qsort() to sort the REP items on "from" string. | ||||
7616 | */ | ||||
7617 | static int | ||||
7618 | #ifdef __BORLANDC__ | ||||
7619 | _RTLENTRYF | ||||
7620 | #endif | ||||
7621 | rep_compare(s1, s2) | ||||
7622 | const void *s1; | ||||
7623 | const void *s2; | ||||
7624 | { | ||||
7625 | fromto_T *p1 = (fromto_T *)s1; | ||||
7626 | fromto_T *p2 = (fromto_T *)s2; | ||||
7627 | |||||
7628 | return STRCMP(p1->ft_from, p2->ft_from); | ||||
7629 | } | ||||
7630 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7631 | /* |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7632 | * Write the Vim .spl file "fname". |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 7633 | * Return FAIL or OK; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7634 | */ |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 7635 | static int |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7636 | write_vim_spell(spin, fname) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7637 | spellinfo_T *spin; |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 7638 | char_u *fname; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7639 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7640 | FILE *fd; |
7641 | int regionmask; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7642 | int round; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7643 | wordnode_T *tree; |
7644 | int nodecount; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 7645 | int i; |
7646 | int l; | ||||
7647 | garray_T *gap; | ||||
7648 | fromto_T *ftp; | ||||
7649 | char_u *p; | ||||
7650 | int rr; | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 7651 | int retval = OK; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7652 | |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 7653 | fd = mch_fopen((char *)fname, "w"); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7654 | if (fd == NULL) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7655 | { |
7656 | EMSG2(_(e_notopen), fname); | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 7657 | return FAIL; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7658 | } |
7659 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7660 | /* <HEADER>: <fileID> <versionnr> */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7661 | /* <fileID> */ |
7662 | if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1) | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 7663 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7664 | EMSG(_(e_write)); |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 7665 | retval = FAIL; |
7666 | } | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7667 | putc(VIMSPELLVERSION, fd); /* <versionnr> */ |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7668 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7669 | /* |
7670 | * <SECTIONS>: <section> ... <sectionend> | ||||
7671 | */ | ||||
7672 | |||||
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 7673 | /* SN_INFO: <infotext> */ |
7674 | if (spin->si_info != NULL) | ||||
7675 | { | ||||
7676 | putc(SN_INFO, fd); /* <sectionID> */ | ||||
7677 | putc(0, fd); /* <sectionflags> */ | ||||
7678 | |||||
7679 | i = STRLEN(spin->si_info); | ||||
7680 | put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ | ||||
7681 | fwrite(spin->si_info, (size_t)i, (size_t)1, fd); /* <infotext> */ | ||||
7682 | } | ||||
7683 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7684 | /* SN_REGION: <regionname> ... |
7685 | * Write the region names only if there is more than one. */ | ||||
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 7686 | if (spin->si_region_count > 1) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7687 | { |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7688 | putc(SN_REGION, fd); /* <sectionID> */ |
7689 | putc(SNF_REQUIRED, fd); /* <sectionflags> */ | ||||
7690 | l = spin->si_region_count * 2; | ||||
7691 | put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ | ||||
7692 | fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd); | ||||
7693 | /* <regionname> ... */ | ||||
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 7694 | regionmask = (1 << spin->si_region_count) - 1; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7695 | } |
7696 | else | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 7697 | regionmask = 0; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7698 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7699 | /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars> |
7700 | * | ||||
7701 | * The table with character flags and the table for case folding. | ||||
7702 | * This makes sure the same characters are recognized as word characters | ||||
7703 | * when generating an when using a spell file. | ||||
Bram Moolenaar | 6f3058f | 2005-04-24 21:58:05 +0000 | [diff] [blame] | 7704 | * Skip this for ASCII, the table may conflict with the one used for |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 7705 | * 'encoding'. |
7706 | * Also skip this for an .add.spl file, the main spell file must contain | ||||
7707 | * the table (avoids that it conflicts). File is shorter too. | ||||
7708 | */ | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7709 | if (!spin->si_ascii && !spin->si_add) |
Bram Moolenaar | 6f3058f | 2005-04-24 21:58:05 +0000 | [diff] [blame] | 7710 | { |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7711 | char_u folchars[128 * 8]; |
7712 | int flags; | ||||
7713 | |||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 7714 | putc(SN_CHARFLAGS, fd); /* <sectionID> */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7715 | putc(SNF_REQUIRED, fd); /* <sectionflags> */ |
7716 | |||||
7717 | /* Form the <folchars> string first, we need to know its length. */ | ||||
7718 | l = 0; | ||||
7719 | for (i = 128; i < 256; ++i) | ||||
7720 | { | ||||
7721 | #ifdef FEAT_MBYTE | ||||
7722 | if (has_mbyte) | ||||
7723 | l += mb_char2bytes(spelltab.st_fold[i], folchars + l); | ||||
7724 | else | ||||
7725 | #endif | ||||
7726 | folchars[l++] = spelltab.st_fold[i]; | ||||
7727 | } | ||||
7728 | put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4); /* <sectionlen> */ | ||||
7729 | |||||
7730 | fputc(128, fd); /* <charflagslen> */ | ||||
7731 | for (i = 128; i < 256; ++i) | ||||
7732 | { | ||||
7733 | flags = 0; | ||||
7734 | if (spelltab.st_isw[i]) | ||||
7735 | flags |= CF_WORD; | ||||
7736 | if (spelltab.st_isu[i]) | ||||
7737 | flags |= CF_UPPER; | ||||
7738 | fputc(flags, fd); /* <charflags> */ | ||||
7739 | } | ||||
7740 | |||||
7741 | put_bytes(fd, (long_u)l, 2); /* <folcharslen> */ | ||||
7742 | fwrite(folchars, (size_t)l, (size_t)1, fd); /* <folchars> */ | ||||
Bram Moolenaar | 6f3058f | 2005-04-24 21:58:05 +0000 | [diff] [blame] | 7743 | } |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 7744 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7745 | /* SN_MIDWORD: <midword> */ |
7746 | if (spin->si_midword != NULL) | ||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 7747 | { |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7748 | putc(SN_MIDWORD, fd); /* <sectionID> */ |
7749 | putc(SNF_REQUIRED, fd); /* <sectionflags> */ | ||||
7750 | |||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 7751 | i = STRLEN(spin->si_midword); |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7752 | put_bytes(fd, (long_u)i, 4); /* <sectionlen> */ |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 7753 | fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); /* <midword> */ |
7754 | } | ||||
7755 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7756 | /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */ |
7757 | if (spin->si_prefcond.ga_len > 0) | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 7758 | { |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7759 | putc(SN_PREFCOND, fd); /* <sectionID> */ |
7760 | putc(SNF_REQUIRED, fd); /* <sectionflags> */ | ||||
7761 | |||||
7762 | l = write_spell_prefcond(NULL, &spin->si_prefcond); | ||||
7763 | put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ | ||||
7764 | |||||
7765 | write_spell_prefcond(fd, &spin->si_prefcond); | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 7766 | } |
7767 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7768 | /* SN_REP: <repcount> <rep> ... |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7769 | * SN_SAL: <salflags> <salcount> <sal> ... |
7770 | * SN_REPSAL: <repcount> <rep> ... */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7771 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7772 | /* round 1: SN_REP section |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7773 | * round 2: SN_SAL section (unless SN_SOFO is used) |
7774 | * round 3: SN_REPSAL section */ | ||||
7775 | for (round = 1; round <= 3; ++round) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 7776 | { |
7777 | if (round == 1) | ||||
7778 | gap = &spin->si_rep; | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7779 | else if (round == 2) |
7780 | { | ||||
7781 | /* Don't write SN_SAL when using a SN_SOFO section */ | ||||
7782 | if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) | ||||
7783 | continue; | ||||
7784 | gap = &spin->si_sal; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7785 | } |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 7786 | else |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7787 | gap = &spin->si_repsal; |
7788 | |||||
7789 | /* Don't write the section if there are no items. */ | ||||
7790 | if (gap->ga_len == 0) | ||||
7791 | continue; | ||||
7792 | |||||
7793 | /* Sort the REP/REPSAL items. */ | ||||
7794 | if (round != 2) | ||||
7795 | qsort(gap->ga_data, (size_t)gap->ga_len, | ||||
7796 | sizeof(fromto_T), rep_compare); | ||||
7797 | |||||
7798 | i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL); | ||||
7799 | putc(i, fd); /* <sectionID> */ | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7800 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7801 | /* This is for making suggestions, section is not required. */ |
7802 | putc(0, fd); /* <sectionflags> */ | ||||
7803 | |||||
7804 | /* Compute the length of what follows. */ | ||||
7805 | l = 2; /* count <repcount> or <salcount> */ | ||||
7806 | for (i = 0; i < gap->ga_len; ++i) | ||||
7807 | { | ||||
7808 | ftp = &((fromto_T *)gap->ga_data)[i]; | ||||
7809 | l += 1 + STRLEN(ftp->ft_from); /* count <*fromlen> and <*from> */ | ||||
7810 | l += 1 + STRLEN(ftp->ft_to); /* count <*tolen> and <*to> */ | ||||
7811 | } | ||||
7812 | if (round == 2) | ||||
7813 | ++l; /* count <salflags> */ | ||||
7814 | put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ | ||||
7815 | |||||
7816 | if (round == 2) | ||||
7817 | { | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 7818 | i = 0; |
7819 | if (spin->si_followup) | ||||
7820 | i |= SAL_F0LLOWUP; | ||||
7821 | if (spin->si_collapse) | ||||
7822 | i |= SAL_COLLAPSE; | ||||
7823 | if (spin->si_rem_accents) | ||||
7824 | i |= SAL_REM_ACCENTS; | ||||
7825 | putc(i, fd); /* <salflags> */ | ||||
7826 | } | ||||
7827 | |||||
7828 | put_bytes(fd, (long_u)gap->ga_len, 2); /* <repcount> or <salcount> */ | ||||
7829 | for (i = 0; i < gap->ga_len; ++i) | ||||
7830 | { | ||||
7831 | /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */ | ||||
7832 | /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */ | ||||
7833 | ftp = &((fromto_T *)gap->ga_data)[i]; | ||||
7834 | for (rr = 1; rr <= 2; ++rr) | ||||
7835 | { | ||||
7836 | p = rr == 1 ? ftp->ft_from : ftp->ft_to; | ||||
7837 | l = STRLEN(p); | ||||
7838 | putc(l, fd); | ||||
7839 | fwrite(p, l, (size_t)1, fd); | ||||
7840 | } | ||||
7841 | } | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7842 | |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 7843 | } |
7844 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7845 | /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto> |
7846 | * This is for making suggestions, section is not required. */ | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 7847 | if (spin->si_sofofr != NULL && spin->si_sofoto != NULL) |
7848 | { | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7849 | putc(SN_SOFO, fd); /* <sectionID> */ |
7850 | putc(0, fd); /* <sectionflags> */ | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 7851 | |
7852 | l = STRLEN(spin->si_sofofr); | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7853 | put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4); |
7854 | /* <sectionlen> */ | ||||
7855 | |||||
7856 | put_bytes(fd, (long_u)l, 2); /* <sofofromlen> */ | ||||
7857 | fwrite(spin->si_sofofr, l, (size_t)1, fd); /* <sofofrom> */ | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 7858 | |
7859 | l = STRLEN(spin->si_sofoto); | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7860 | put_bytes(fd, (long_u)l, 2); /* <sofotolen> */ |
7861 | fwrite(spin->si_sofoto, l, (size_t)1, fd); /* <sofoto> */ | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 7862 | } |
7863 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7864 | /* SN_WORDS: <word> ... |
7865 | * This is for making suggestions, section is not required. */ | ||||
7866 | if (spin->si_commonwords.ht_used > 0) | ||||
7867 | { | ||||
7868 | putc(SN_WORDS, fd); /* <sectionID> */ | ||||
7869 | putc(0, fd); /* <sectionflags> */ | ||||
7870 | |||||
7871 | /* round 1: count the bytes | ||||
7872 | * round 2: write the bytes */ | ||||
7873 | for (round = 1; round <= 2; ++round) | ||||
7874 | { | ||||
7875 | int todo; | ||||
7876 | int len = 0; | ||||
7877 | hashitem_T *hi; | ||||
7878 | |||||
7879 | todo = spin->si_commonwords.ht_used; | ||||
7880 | for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi) | ||||
7881 | if (!HASHITEM_EMPTY(hi)) | ||||
7882 | { | ||||
7883 | l = STRLEN(hi->hi_key) + 1; | ||||
7884 | len += l; | ||||
7885 | if (round == 2) /* <word> */ | ||||
7886 | fwrite(hi->hi_key, (size_t)l, (size_t)1, fd); | ||||
7887 | --todo; | ||||
7888 | } | ||||
7889 | if (round == 1) | ||||
7890 | put_bytes(fd, (long_u)len, 4); /* <sectionlen> */ | ||||
7891 | } | ||||
7892 | } | ||||
7893 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7894 | /* SN_MAP: <mapstr> |
7895 | * This is for making suggestions, section is not required. */ | ||||
7896 | if (spin->si_map.ga_len > 0) | ||||
7897 | { | ||||
7898 | putc(SN_MAP, fd); /* <sectionID> */ | ||||
7899 | putc(0, fd); /* <sectionflags> */ | ||||
7900 | l = spin->si_map.ga_len; | ||||
7901 | put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ | ||||
7902 | fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd); | ||||
7903 | /* <mapstr> */ | ||||
7904 | } | ||||
7905 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 7906 | /* SN_SUGFILE: <timestamp> |
7907 | * This is used to notify that a .sug file may be available and at the | ||||
7908 | * same time allows for checking that a .sug file that is found matches | ||||
7909 | * with this .spl file. That's because the word numbers must be exactly | ||||
7910 | * right. */ | ||||
7911 | if (!spin->si_nosugfile | ||||
7912 | && (spin->si_sal.ga_len > 0 | ||||
7913 | || (spin->si_sofofr != NULL && spin->si_sofoto != NULL))) | ||||
7914 | { | ||||
7915 | putc(SN_SUGFILE, fd); /* <sectionID> */ | ||||
7916 | putc(0, fd); /* <sectionflags> */ | ||||
7917 | put_bytes(fd, (long_u)8, 4); /* <sectionlen> */ | ||||
7918 | |||||
7919 | /* Set si_sugtime and write it to the file. */ | ||||
7920 | spin->si_sugtime = time(NULL); | ||||
7921 | put_sugtime(spin, fd); /* <timestamp> */ | ||||
7922 | } | ||||
7923 | |||||
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 7924 | /* SN_NOSPLITSUGS: nothing |
7925 | * This is used to notify that no suggestions with word splits are to be | ||||
7926 | * made. */ | ||||
7927 | if (spin->si_nosplitsugs) | ||||
7928 | { | ||||
7929 | putc(SN_NOSPLITSUGS, fd); /* <sectionID> */ | ||||
7930 | putc(0, fd); /* <sectionflags> */ | ||||
7931 | put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ | ||||
7932 | } | ||||
7933 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7934 | /* SN_COMPOUND: compound info. |
7935 | * We don't mark it required, when not supported all compound words will | ||||
7936 | * be bad words. */ | ||||
7937 | if (spin->si_compflags != NULL) | ||||
7938 | { | ||||
7939 | putc(SN_COMPOUND, fd); /* <sectionID> */ | ||||
7940 | putc(0, fd); /* <sectionflags> */ | ||||
7941 | |||||
7942 | l = STRLEN(spin->si_compflags); | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 7943 | for (i = 0; i < spin->si_comppat.ga_len; ++i) |
7944 | l += STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1; | ||||
7945 | put_bytes(fd, (long_u)(l + 7), 4); /* <sectionlen> */ | ||||
7946 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7947 | putc(spin->si_compmax, fd); /* <compmax> */ |
7948 | putc(spin->si_compminlen, fd); /* <compminlen> */ | ||||
7949 | putc(spin->si_compsylmax, fd); /* <compsylmax> */ | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 7950 | putc(0, fd); /* for Vim 7.0b compatibility */ |
7951 | putc(spin->si_compoptions, fd); /* <compoptions> */ | ||||
7952 | put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2); | ||||
7953 | /* <comppatcount> */ | ||||
7954 | for (i = 0; i < spin->si_comppat.ga_len; ++i) | ||||
7955 | { | ||||
7956 | p = ((char_u **)(spin->si_comppat.ga_data))[i]; | ||||
7957 | putc(STRLEN(p), fd); /* <comppatlen> */ | ||||
7958 | fwrite(p, (size_t)STRLEN(p), (size_t)1, fd);/* <comppattext> */ | ||||
7959 | } | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7960 | /* <compflags> */ |
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 7961 | fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags), |
7962 | (size_t)1, fd); | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7963 | } |
7964 | |||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 7965 | /* SN_NOBREAK: NOBREAK flag */ |
7966 | if (spin->si_nobreak) | ||||
7967 | { | ||||
7968 | putc(SN_NOBREAK, fd); /* <sectionID> */ | ||||
7969 | putc(0, fd); /* <sectionflags> */ | ||||
7970 | |||||
7971 | /* It's empty, the precense of the section flags the feature. */ | ||||
7972 | put_bytes(fd, (long_u)0, 4); /* <sectionlen> */ | ||||
7973 | } | ||||
7974 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 7975 | /* SN_SYLLABLE: syllable info. |
7976 | * We don't mark it required, when not supported syllables will not be | ||||
7977 | * counted. */ | ||||
7978 | if (spin->si_syllable != NULL) | ||||
7979 | { | ||||
7980 | putc(SN_SYLLABLE, fd); /* <sectionID> */ | ||||
7981 | putc(0, fd); /* <sectionflags> */ | ||||
7982 | |||||
7983 | l = STRLEN(spin->si_syllable); | ||||
7984 | put_bytes(fd, (long_u)l, 4); /* <sectionlen> */ | ||||
7985 | fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); /* <syllable> */ | ||||
7986 | } | ||||
7987 | |||||
7988 | /* end of <SECTIONS> */ | ||||
7989 | putc(SN_END, fd); /* <sectionend> */ | ||||
7990 | |||||
Bram Moolenaar | 50cde82 | 2005-06-05 21:54:54 +0000 | [diff] [blame] | 7991 | |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7992 | /* |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 7993 | * <LWORDTREE> <KWORDTREE> <PREFIXTREE> |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7994 | */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 7995 | spin->si_memtot = 0; |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 7996 | for (round = 1; round <= 3; ++round) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 7997 | { |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 7998 | if (round == 1) |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 7999 | tree = spin->si_foldroot->wn_sibling; |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 8000 | else if (round == 2) |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 8001 | tree = spin->si_keeproot->wn_sibling; |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 8002 | else |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 8003 | tree = spin->si_prefroot->wn_sibling; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8004 | |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 8005 | /* Clear the index and wnode fields in the tree. */ |
8006 | clear_node(tree); | ||||
8007 | |||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8008 | /* Count the number of nodes. Needed to be able to allocate the |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 8009 | * memory when reading the nodes. Also fills in index for shared |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8010 | * nodes. */ |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 8011 | nodecount = put_node(NULL, tree, 0, regionmask, round == 3); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8012 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8013 | /* number of nodes in 4 bytes */ |
8014 | put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ | ||||
Bram Moolenaar | 50cde82 | 2005-06-05 21:54:54 +0000 | [diff] [blame] | 8015 | spin->si_memtot += nodecount + nodecount * sizeof(int); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8016 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8017 | /* Write the nodes. */ |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 8018 | (void)put_node(fd, tree, 0, regionmask, round == 3); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8019 | } |
8020 | |||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 8021 | /* Write another byte to check for errors. */ |
8022 | if (putc(0, fd) == EOF) | ||||
8023 | retval = FAIL; | ||||
8024 | |||||
8025 | if (fclose(fd) == EOF) | ||||
8026 | retval = FAIL; | ||||
8027 | |||||
8028 | return retval; | ||||
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 8029 | } |
8030 | |||||
8031 | /* | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 8032 | * Clear the index and wnode fields of "node", it siblings and its |
8033 | * children. This is needed because they are a union with other items to save | ||||
8034 | * space. | ||||
8035 | */ | ||||
8036 | static void | ||||
8037 | clear_node(node) | ||||
8038 | wordnode_T *node; | ||||
8039 | { | ||||
8040 | wordnode_T *np; | ||||
8041 | |||||
8042 | if (node != NULL) | ||||
8043 | for (np = node; np != NULL; np = np->wn_sibling) | ||||
8044 | { | ||||
8045 | np->wn_u1.index = 0; | ||||
8046 | np->wn_u2.wnode = NULL; | ||||
8047 | |||||
8048 | if (np->wn_byte != NUL) | ||||
8049 | clear_node(np->wn_child); | ||||
8050 | } | ||||
8051 | } | ||||
8052 | |||||
8053 | |||||
8054 | /* | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8055 | * Dump a word tree at node "node". |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8056 | * |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8057 | * This first writes the list of possible bytes (siblings). Then for each |
8058 | * byte recursively write the children. | ||||
8059 | * | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 8060 | * NOTE: The code here must match the code in read_tree_node(), since |
8061 | * assumptions are made about the indexes (so that we don't have to write them | ||||
8062 | * in the file). | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8063 | * |
8064 | * Returns the number of nodes used. | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8065 | */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8066 | static int |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 8067 | put_node(fd, node, index, regionmask, prefixtree) |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 8068 | FILE *fd; /* NULL when only counting */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8069 | wordnode_T *node; |
8070 | int index; | ||||
8071 | int regionmask; | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 8072 | int prefixtree; /* TRUE for PREFIXTREE */ |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8073 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8074 | int newindex = index; |
8075 | int siblingcount = 0; | ||||
8076 | wordnode_T *np; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8077 | int flags; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8078 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8079 | /* If "node" is zero the tree is empty. */ |
8080 | if (node == NULL) | ||||
8081 | return 0; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8082 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8083 | /* Store the index where this node is written. */ |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 8084 | node->wn_u1.index = index; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8085 | |
8086 | /* Count the number of siblings. */ | ||||
8087 | for (np = node; np != NULL; np = np->wn_sibling) | ||||
8088 | ++siblingcount; | ||||
8089 | |||||
8090 | /* Write the sibling count. */ | ||||
8091 | if (fd != NULL) | ||||
8092 | putc(siblingcount, fd); /* <siblingcount> */ | ||||
8093 | |||||
8094 | /* Write each sibling byte and optionally extra info. */ | ||||
8095 | for (np = node; np != NULL; np = np->wn_sibling) | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8096 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8097 | if (np->wn_byte == 0) |
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 8098 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8099 | if (fd != NULL) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8100 | { |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 8101 | /* For a NUL byte (end of word) write the flags etc. */ |
8102 | if (prefixtree) | ||||
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 8103 | { |
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 8104 | /* In PREFIXTREE write the required affixID and the |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 8105 | * associated condition nr (stored in wn_region). The |
8106 | * byte value is misused to store the "rare" and "not | ||||
8107 | * combining" flags */ | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 8108 | if (np->wn_flags == (short_u)PFX_FLAGS) |
8109 | putc(BY_NOFLAGS, fd); /* <byte> */ | ||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 8110 | else |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 8111 | { |
8112 | putc(BY_FLAGS, fd); /* <byte> */ | ||||
8113 | putc(np->wn_flags, fd); /* <pflags> */ | ||||
8114 | } | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 8115 | putc(np->wn_affixID, fd); /* <affixID> */ |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 8116 | put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8117 | } |
8118 | else | ||||
8119 | { | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 8120 | /* For word trees we write the flag/region items. */ |
8121 | flags = np->wn_flags; | ||||
8122 | if (regionmask != 0 && np->wn_region != regionmask) | ||||
8123 | flags |= WF_REGION; | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 8124 | if (np->wn_affixID != 0) |
8125 | flags |= WF_AFX; | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 8126 | if (flags == 0) |
8127 | { | ||||
8128 | /* word without flags or region */ | ||||
8129 | putc(BY_NOFLAGS, fd); /* <byte> */ | ||||
8130 | } | ||||
8131 | else | ||||
8132 | { | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 8133 | if (np->wn_flags >= 0x100) |
8134 | { | ||||
8135 | putc(BY_FLAGS2, fd); /* <byte> */ | ||||
8136 | putc(flags, fd); /* <flags> */ | ||||
8137 | putc((unsigned)flags >> 8, fd); /* <flags2> */ | ||||
8138 | } | ||||
8139 | else | ||||
8140 | { | ||||
8141 | putc(BY_FLAGS, fd); /* <byte> */ | ||||
8142 | putc(flags, fd); /* <flags> */ | ||||
8143 | } | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 8144 | if (flags & WF_REGION) |
8145 | putc(np->wn_region, fd); /* <region> */ | ||||
Bram Moolenaar | ae5bce1 | 2005-08-15 21:41:48 +0000 | [diff] [blame] | 8146 | if (flags & WF_AFX) |
8147 | putc(np->wn_affixID, fd); /* <affixID> */ | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 8148 | } |
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 8149 | } |
8150 | } | ||||
Bram Moolenaar | 2cf8b30 | 2005-04-20 19:37:22 +0000 | [diff] [blame] | 8151 | } |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8152 | else |
8153 | { | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 8154 | if (np->wn_child->wn_u1.index != 0 |
8155 | && np->wn_child->wn_u2.wnode != node) | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8156 | { |
8157 | /* The child is written elsewhere, write the reference. */ | ||||
8158 | if (fd != NULL) | ||||
8159 | { | ||||
8160 | putc(BY_INDEX, fd); /* <byte> */ | ||||
8161 | /* <nodeidx> */ | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 8162 | put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8163 | } |
8164 | } | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 8165 | else if (np->wn_child->wn_u2.wnode == NULL) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8166 | /* We will write the child below and give it an index. */ |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 8167 | np->wn_child->wn_u2.wnode = node; |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 8168 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8169 | if (fd != NULL) |
8170 | if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */ | ||||
8171 | { | ||||
8172 | EMSG(_(e_write)); | ||||
8173 | return 0; | ||||
8174 | } | ||||
8175 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8176 | } |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8177 | |
8178 | /* Space used in the array when reading: one for each sibling and one for | ||||
8179 | * the count. */ | ||||
8180 | newindex += siblingcount + 1; | ||||
8181 | |||||
8182 | /* Recursively dump the children of each sibling. */ | ||||
8183 | for (np = node; np != NULL; np = np->wn_sibling) | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 8184 | if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node) |
8185 | newindex = put_node(fd, np->wn_child, newindex, regionmask, | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 8186 | prefixtree); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8187 | |
8188 | return newindex; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8189 | } |
8190 | |||||
8191 | |||||
8192 | /* | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8193 | * ":mkspell [-ascii] outfile infile ..." |
8194 | * ":mkspell [-ascii] addfile" | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8195 | */ |
8196 | void | ||||
8197 | ex_mkspell(eap) | ||||
8198 | exarg_T *eap; | ||||
8199 | { | ||||
8200 | int fcount; | ||||
8201 | char_u **fnames; | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8202 | char_u *arg = eap->arg; |
8203 | int ascii = FALSE; | ||||
8204 | |||||
8205 | if (STRNCMP(arg, "-ascii", 6) == 0) | ||||
8206 | { | ||||
8207 | ascii = TRUE; | ||||
8208 | arg = skipwhite(arg + 6); | ||||
8209 | } | ||||
8210 | |||||
8211 | /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */ | ||||
8212 | if (get_arglist_exp(arg, &fcount, &fnames) == OK) | ||||
8213 | { | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 8214 | mkspell(fcount, fnames, ascii, eap->forceit, FALSE); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8215 | FreeWild(fcount, fnames); |
8216 | } | ||||
8217 | } | ||||
8218 | |||||
8219 | /* | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 8220 | * Create the .sug file. |
8221 | * Uses the soundfold info in "spin". | ||||
8222 | * Writes the file with the name "wfname", with ".spl" changed to ".sug". | ||||
8223 | */ | ||||
8224 | static void | ||||
8225 | spell_make_sugfile(spin, wfname) | ||||
8226 | spellinfo_T *spin; | ||||
8227 | char_u *wfname; | ||||
8228 | { | ||||
8229 | char_u fname[MAXPATHL]; | ||||
8230 | int len; | ||||
8231 | slang_T *slang; | ||||
8232 | int free_slang = FALSE; | ||||
8233 | |||||
8234 | /* | ||||
8235 | * Read back the .spl file that was written. This fills the required | ||||
8236 | * info for soundfolding. This also uses less memory than the | ||||
8237 | * pointer-linked version of the trie. And it avoids having two versions | ||||
8238 | * of the code for the soundfolding stuff. | ||||
8239 | * It might have been done already by spell_reload_one(). | ||||
8240 | */ | ||||
8241 | for (slang = first_lang; slang != NULL; slang = slang->sl_next) | ||||
8242 | if (fullpathcmp(wfname, slang->sl_fname, FALSE) == FPC_SAME) | ||||
8243 | break; | ||||
8244 | if (slang == NULL) | ||||
8245 | { | ||||
8246 | spell_message(spin, (char_u *)_("Reading back spell file...")); | ||||
8247 | slang = spell_load_file(wfname, NULL, NULL, FALSE); | ||||
8248 | if (slang == NULL) | ||||
8249 | return; | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 8250 | free_slang = TRUE; |
8251 | } | ||||
8252 | |||||
8253 | /* | ||||
8254 | * Clear the info in "spin" that is used. | ||||
8255 | */ | ||||
8256 | spin->si_blocks = NULL; | ||||
8257 | spin->si_blocks_cnt = 0; | ||||
8258 | spin->si_compress_cnt = 0; /* will stay at 0 all the time*/ | ||||
8259 | spin->si_free_count = 0; | ||||
8260 | spin->si_first_free = NULL; | ||||
8261 | spin->si_foldwcount = 0; | ||||
8262 | |||||
8263 | /* | ||||
8264 | * Go through the trie of good words, soundfold each word and add it to | ||||
8265 | * the soundfold trie. | ||||
8266 | */ | ||||
8267 | spell_message(spin, (char_u *)_("Performing soundfolding...")); | ||||
8268 | if (sug_filltree(spin, slang) == FAIL) | ||||
8269 | goto theend; | ||||
8270 | |||||
8271 | /* | ||||
8272 | * Create the table which links each soundfold word with a list of the | ||||
8273 | * good words it may come from. Creates buffer "spin->si_spellbuf". | ||||
8274 | * This also removes the wordnr from the NUL byte entries to make | ||||
8275 | * compression possible. | ||||
8276 | */ | ||||
8277 | if (sug_maketable(spin) == FAIL) | ||||
8278 | goto theend; | ||||
8279 | |||||
8280 | smsg((char_u *)_("Number of words after soundfolding: %ld"), | ||||
8281 | (long)spin->si_spellbuf->b_ml.ml_line_count); | ||||
8282 | |||||
8283 | /* | ||||
8284 | * Compress the soundfold trie. | ||||
8285 | */ | ||||
8286 | spell_message(spin, (char_u *)_(msg_compressing)); | ||||
8287 | wordtree_compress(spin, spin->si_foldroot); | ||||
8288 | |||||
8289 | /* | ||||
8290 | * Write the .sug file. | ||||
8291 | * Make the file name by changing ".spl" to ".sug". | ||||
8292 | */ | ||||
8293 | STRCPY(fname, wfname); | ||||
8294 | len = STRLEN(fname); | ||||
8295 | fname[len - 2] = 'u'; | ||||
8296 | fname[len - 1] = 'g'; | ||||
8297 | sug_write(spin, fname); | ||||
8298 | |||||
8299 | theend: | ||||
8300 | if (free_slang) | ||||
8301 | slang_free(slang); | ||||
8302 | free_blocks(spin->si_blocks); | ||||
8303 | close_spellbuf(spin->si_spellbuf); | ||||
8304 | } | ||||
8305 | |||||
8306 | /* | ||||
8307 | * Build the soundfold trie for language "slang". | ||||
8308 | */ | ||||
8309 | static int | ||||
8310 | sug_filltree(spin, slang) | ||||
8311 | spellinfo_T *spin; | ||||
8312 | slang_T *slang; | ||||
8313 | { | ||||
8314 | char_u *byts; | ||||
8315 | idx_T *idxs; | ||||
8316 | int depth; | ||||
8317 | idx_T arridx[MAXWLEN]; | ||||
8318 | int curi[MAXWLEN]; | ||||
8319 | char_u tword[MAXWLEN]; | ||||
8320 | char_u tsalword[MAXWLEN]; | ||||
8321 | int c; | ||||
8322 | idx_T n; | ||||
8323 | unsigned words_done = 0; | ||||
8324 | int wordcount[MAXWLEN]; | ||||
8325 | |||||
8326 | /* We use si_foldroot for the souldfolded trie. */ | ||||
8327 | spin->si_foldroot = wordtree_alloc(spin); | ||||
8328 | if (spin->si_foldroot == NULL) | ||||
8329 | return FAIL; | ||||
8330 | |||||
8331 | /* let tree_add_word() know we're adding to the soundfolded tree */ | ||||
8332 | spin->si_sugtree = TRUE; | ||||
8333 | |||||
8334 | /* | ||||
8335 | * Go through the whole case-folded tree, soundfold each word and put it | ||||
8336 | * in the trie. | ||||
8337 | */ | ||||
8338 | byts = slang->sl_fbyts; | ||||
8339 | idxs = slang->sl_fidxs; | ||||
8340 | |||||
8341 | arridx[0] = 0; | ||||
8342 | curi[0] = 1; | ||||
8343 | wordcount[0] = 0; | ||||
8344 | |||||
8345 | depth = 0; | ||||
8346 | while (depth >= 0 && !got_int) | ||||
8347 | { | ||||
8348 | if (curi[depth] > byts[arridx[depth]]) | ||||
8349 | { | ||||
8350 | /* Done all bytes at this node, go up one level. */ | ||||
8351 | idxs[arridx[depth]] = wordcount[depth]; | ||||
8352 | if (depth > 0) | ||||
8353 | wordcount[depth - 1] += wordcount[depth]; | ||||
8354 | |||||
8355 | --depth; | ||||
8356 | line_breakcheck(); | ||||
8357 | } | ||||
8358 | else | ||||
8359 | { | ||||
8360 | |||||
8361 | /* Do one more byte at this node. */ | ||||
8362 | n = arridx[depth] + curi[depth]; | ||||
8363 | ++curi[depth]; | ||||
8364 | |||||
8365 | c = byts[n]; | ||||
8366 | if (c == 0) | ||||
8367 | { | ||||
8368 | /* Sound-fold the word. */ | ||||
8369 | tword[depth] = NUL; | ||||
8370 | spell_soundfold(slang, tword, TRUE, tsalword); | ||||
8371 | |||||
8372 | /* We use the "flags" field for the MSB of the wordnr, | ||||
8373 | * "region" for the LSB of the wordnr. */ | ||||
8374 | if (tree_add_word(spin, tsalword, spin->si_foldroot, | ||||
8375 | words_done >> 16, words_done & 0xffff, | ||||
8376 | 0) == FAIL) | ||||
8377 | return FAIL; | ||||
8378 | |||||
8379 | ++words_done; | ||||
8380 | ++wordcount[depth]; | ||||
8381 | |||||
8382 | /* Reset the block count each time to avoid compression | ||||
8383 | * kicking in. */ | ||||
8384 | spin->si_blocks_cnt = 0; | ||||
8385 | |||||
8386 | /* Skip over any other NUL bytes (same word with different | ||||
8387 | * flags). */ | ||||
8388 | while (byts[n + 1] == 0) | ||||
8389 | { | ||||
8390 | ++n; | ||||
8391 | ++curi[depth]; | ||||
8392 | } | ||||
8393 | } | ||||
8394 | else | ||||
8395 | { | ||||
8396 | /* Normal char, go one level deeper. */ | ||||
8397 | tword[depth++] = c; | ||||
8398 | arridx[depth] = idxs[n]; | ||||
8399 | curi[depth] = 1; | ||||
8400 | wordcount[depth] = 0; | ||||
8401 | } | ||||
8402 | } | ||||
8403 | } | ||||
8404 | |||||
8405 | smsg((char_u *)_("Total number of words: %d"), words_done); | ||||
8406 | |||||
8407 | return OK; | ||||
8408 | } | ||||
8409 | |||||
8410 | /* | ||||
8411 | * Make the table that links each word in the soundfold trie to the words it | ||||
8412 | * can be produced from. | ||||
8413 | * This is not unlike lines in a file, thus use a memfile to be able to access | ||||
8414 | * the table efficiently. | ||||
8415 | * Returns FAIL when out of memory. | ||||
8416 | */ | ||||
8417 | static int | ||||
8418 | sug_maketable(spin) | ||||
8419 | spellinfo_T *spin; | ||||
8420 | { | ||||
8421 | garray_T ga; | ||||
8422 | int res = OK; | ||||
8423 | |||||
8424 | /* Allocate a buffer, open a memline for it and create the swap file | ||||
8425 | * (uses a temp file, not a .swp file). */ | ||||
8426 | spin->si_spellbuf = open_spellbuf(); | ||||
8427 | if (spin->si_spellbuf == NULL) | ||||
8428 | return FAIL; | ||||
8429 | |||||
8430 | /* Use a buffer to store the line info, avoids allocating many small | ||||
8431 | * pieces of memory. */ | ||||
8432 | ga_init2(&ga, 1, 100); | ||||
8433 | |||||
8434 | /* recursively go through the tree */ | ||||
8435 | if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1) | ||||
8436 | res = FAIL; | ||||
8437 | |||||
8438 | ga_clear(&ga); | ||||
8439 | return res; | ||||
8440 | } | ||||
8441 | |||||
8442 | /* | ||||
8443 | * Fill the table for one node and its children. | ||||
8444 | * Returns the wordnr at the start of the node. | ||||
8445 | * Returns -1 when out of memory. | ||||
8446 | */ | ||||
8447 | static int | ||||
8448 | sug_filltable(spin, node, startwordnr, gap) | ||||
8449 | spellinfo_T *spin; | ||||
8450 | wordnode_T *node; | ||||
8451 | int startwordnr; | ||||
8452 | garray_T *gap; /* place to store line of numbers */ | ||||
8453 | { | ||||
8454 | wordnode_T *p, *np; | ||||
8455 | int wordnr = startwordnr; | ||||
8456 | int nr; | ||||
8457 | int prev_nr; | ||||
8458 | |||||
8459 | for (p = node; p != NULL; p = p->wn_sibling) | ||||
8460 | { | ||||
8461 | if (p->wn_byte == NUL) | ||||
8462 | { | ||||
8463 | gap->ga_len = 0; | ||||
8464 | prev_nr = 0; | ||||
8465 | for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling) | ||||
8466 | { | ||||
8467 | if (ga_grow(gap, 10) == FAIL) | ||||
8468 | return -1; | ||||
8469 | |||||
8470 | nr = (np->wn_flags << 16) + (np->wn_region & 0xffff); | ||||
8471 | /* Compute the offset from the previous nr and store the | ||||
8472 | * offset in a way that it takes a minimum number of bytes. | ||||
8473 | * It's a bit like utf-8, but without the need to mark | ||||
8474 | * following bytes. */ | ||||
8475 | nr -= prev_nr; | ||||
8476 | prev_nr += nr; | ||||
8477 | gap->ga_len += offset2bytes(nr, | ||||
8478 | (char_u *)gap->ga_data + gap->ga_len); | ||||
8479 | } | ||||
8480 | |||||
8481 | /* add the NUL byte */ | ||||
8482 | ((char_u *)gap->ga_data)[gap->ga_len++] = NUL; | ||||
8483 | |||||
8484 | if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr, | ||||
8485 | gap->ga_data, gap->ga_len, TRUE) == FAIL) | ||||
8486 | return -1; | ||||
8487 | ++wordnr; | ||||
8488 | |||||
8489 | /* Remove extra NUL entries, we no longer need them. We don't | ||||
8490 | * bother freeing the nodes, the won't be reused anyway. */ | ||||
8491 | while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL) | ||||
8492 | p->wn_sibling = p->wn_sibling->wn_sibling; | ||||
8493 | |||||
8494 | /* Clear the flags on the remaining NUL node, so that compression | ||||
8495 | * works a lot better. */ | ||||
8496 | p->wn_flags = 0; | ||||
8497 | p->wn_region = 0; | ||||
8498 | } | ||||
8499 | else | ||||
8500 | { | ||||
8501 | wordnr = sug_filltable(spin, p->wn_child, wordnr, gap); | ||||
8502 | if (wordnr == -1) | ||||
8503 | return -1; | ||||
8504 | } | ||||
8505 | } | ||||
8506 | return wordnr; | ||||
8507 | } | ||||
8508 | |||||
8509 | /* | ||||
8510 | * Convert an offset into a minimal number of bytes. | ||||
8511 | * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL | ||||
8512 | * bytes. | ||||
8513 | */ | ||||
8514 | static int | ||||
8515 | offset2bytes(nr, buf) | ||||
8516 | int nr; | ||||
8517 | char_u *buf; | ||||
8518 | { | ||||
8519 | int rem; | ||||
8520 | int b1, b2, b3, b4; | ||||
8521 | |||||
8522 | /* Split the number in parts of base 255. We need to avoid NUL bytes. */ | ||||
8523 | b1 = nr % 255 + 1; | ||||
8524 | rem = nr / 255; | ||||
8525 | b2 = rem % 255 + 1; | ||||
8526 | rem = rem / 255; | ||||
8527 | b3 = rem % 255 + 1; | ||||
8528 | b4 = rem / 255 + 1; | ||||
8529 | |||||
8530 | if (b4 > 1 || b3 > 0x1f) /* 4 bytes */ | ||||
8531 | { | ||||
8532 | buf[0] = 0xe0 + b4; | ||||
8533 | buf[1] = b3; | ||||
8534 | buf[2] = b2; | ||||
8535 | buf[3] = b1; | ||||
8536 | return 4; | ||||
8537 | } | ||||
8538 | if (b3 > 1 || b2 > 0x3f ) /* 3 bytes */ | ||||
8539 | { | ||||
8540 | buf[0] = 0xc0 + b3; | ||||
8541 | buf[1] = b2; | ||||
8542 | buf[2] = b1; | ||||
8543 | return 3; | ||||
8544 | } | ||||
8545 | if (b2 > 1 || b1 > 0x7f ) /* 2 bytes */ | ||||
8546 | { | ||||
8547 | buf[0] = 0x80 + b2; | ||||
8548 | buf[1] = b1; | ||||
8549 | return 2; | ||||
8550 | } | ||||
8551 | /* 1 byte */ | ||||
8552 | buf[0] = b1; | ||||
8553 | return 1; | ||||
8554 | } | ||||
8555 | |||||
8556 | /* | ||||
8557 | * Opposite of offset2bytes(). | ||||
8558 | * "pp" points to the bytes and is advanced over it. | ||||
8559 | * Returns the offset. | ||||
8560 | */ | ||||
8561 | static int | ||||
8562 | bytes2offset(pp) | ||||
8563 | char_u **pp; | ||||
8564 | { | ||||
8565 | char_u *p = *pp; | ||||
8566 | int nr; | ||||
8567 | int c; | ||||
8568 | |||||
8569 | c = *p++; | ||||
8570 | if ((c & 0x80) == 0x00) /* 1 byte */ | ||||
8571 | { | ||||
8572 | nr = c - 1; | ||||
8573 | } | ||||
8574 | else if ((c & 0xc0) == 0x80) /* 2 bytes */ | ||||
8575 | { | ||||
8576 | nr = (c & 0x3f) - 1; | ||||
8577 | nr = nr * 255 + (*p++ - 1); | ||||
8578 | } | ||||
8579 | else if ((c & 0xe0) == 0xc0) /* 3 bytes */ | ||||
8580 | { | ||||
8581 | nr = (c & 0x1f) - 1; | ||||
8582 | nr = nr * 255 + (*p++ - 1); | ||||
8583 | nr = nr * 255 + (*p++ - 1); | ||||
8584 | } | ||||
8585 | else /* 4 bytes */ | ||||
8586 | { | ||||
8587 | nr = (c & 0x0f) - 1; | ||||
8588 | nr = nr * 255 + (*p++ - 1); | ||||
8589 | nr = nr * 255 + (*p++ - 1); | ||||
8590 | nr = nr * 255 + (*p++ - 1); | ||||
8591 | } | ||||
8592 | |||||
8593 | *pp = p; | ||||
8594 | return nr; | ||||
8595 | } | ||||
8596 | |||||
8597 | /* | ||||
8598 | * Write the .sug file in "fname". | ||||
8599 | */ | ||||
8600 | static void | ||||
8601 | sug_write(spin, fname) | ||||
8602 | spellinfo_T *spin; | ||||
8603 | char_u *fname; | ||||
8604 | { | ||||
8605 | FILE *fd; | ||||
8606 | wordnode_T *tree; | ||||
8607 | int nodecount; | ||||
8608 | int wcount; | ||||
8609 | char_u *line; | ||||
8610 | linenr_T lnum; | ||||
8611 | int len; | ||||
8612 | |||||
8613 | /* Create the file. Note that an existing file is silently overwritten! */ | ||||
8614 | fd = mch_fopen((char *)fname, "w"); | ||||
8615 | if (fd == NULL) | ||||
8616 | { | ||||
8617 | EMSG2(_(e_notopen), fname); | ||||
8618 | return; | ||||
8619 | } | ||||
8620 | |||||
8621 | vim_snprintf((char *)IObuff, IOSIZE, | ||||
8622 | _("Writing suggestion file %s ..."), fname); | ||||
8623 | spell_message(spin, IObuff); | ||||
8624 | |||||
8625 | /* | ||||
8626 | * <SUGHEADER>: <fileID> <versionnr> <timestamp> | ||||
8627 | */ | ||||
8628 | if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */ | ||||
8629 | { | ||||
8630 | EMSG(_(e_write)); | ||||
8631 | goto theend; | ||||
8632 | } | ||||
8633 | putc(VIMSUGVERSION, fd); /* <versionnr> */ | ||||
8634 | |||||
8635 | /* Write si_sugtime to the file. */ | ||||
8636 | put_sugtime(spin, fd); /* <timestamp> */ | ||||
8637 | |||||
8638 | /* | ||||
8639 | * <SUGWORDTREE> | ||||
8640 | */ | ||||
8641 | spin->si_memtot = 0; | ||||
8642 | tree = spin->si_foldroot->wn_sibling; | ||||
8643 | |||||
8644 | /* Clear the index and wnode fields in the tree. */ | ||||
8645 | clear_node(tree); | ||||
8646 | |||||
8647 | /* Count the number of nodes. Needed to be able to allocate the | ||||
8648 | * memory when reading the nodes. Also fills in index for shared | ||||
8649 | * nodes. */ | ||||
8650 | nodecount = put_node(NULL, tree, 0, 0, FALSE); | ||||
8651 | |||||
8652 | /* number of nodes in 4 bytes */ | ||||
8653 | put_bytes(fd, (long_u)nodecount, 4); /* <nodecount> */ | ||||
8654 | spin->si_memtot += nodecount + nodecount * sizeof(int); | ||||
8655 | |||||
8656 | /* Write the nodes. */ | ||||
8657 | (void)put_node(fd, tree, 0, 0, FALSE); | ||||
8658 | |||||
8659 | /* | ||||
8660 | * <SUGTABLE>: <sugwcount> <sugline> ... | ||||
8661 | */ | ||||
8662 | wcount = spin->si_spellbuf->b_ml.ml_line_count; | ||||
8663 | put_bytes(fd, (long_u)wcount, 4); /* <sugwcount> */ | ||||
8664 | |||||
8665 | for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum) | ||||
8666 | { | ||||
8667 | /* <sugline>: <sugnr> ... NUL */ | ||||
8668 | line = ml_get_buf(spin->si_spellbuf, lnum, FALSE); | ||||
8669 | len = STRLEN(line) + 1; | ||||
8670 | if (fwrite(line, (size_t)len, (size_t)1, fd) == 0) | ||||
8671 | { | ||||
8672 | EMSG(_(e_write)); | ||||
8673 | goto theend; | ||||
8674 | } | ||||
8675 | spin->si_memtot += len; | ||||
8676 | } | ||||
8677 | |||||
8678 | /* Write another byte to check for errors. */ | ||||
8679 | if (putc(0, fd) == EOF) | ||||
8680 | EMSG(_(e_write)); | ||||
8681 | |||||
8682 | vim_snprintf((char *)IObuff, IOSIZE, | ||||
8683 | _("Estimated runtime memory use: %d bytes"), spin->si_memtot); | ||||
8684 | spell_message(spin, IObuff); | ||||
8685 | |||||
8686 | theend: | ||||
8687 | /* close the file */ | ||||
8688 | fclose(fd); | ||||
8689 | } | ||||
8690 | |||||
8691 | /* | ||||
8692 | * Open a spell buffer. This is a nameless buffer that is not in the buffer | ||||
8693 | * list and only contains text lines. Can use a swapfile to reduce memory | ||||
8694 | * use. | ||||
8695 | * Most other fields are invalid! Esp. watch out for string options being | ||||
8696 | * NULL and there is no undo info. | ||||
8697 | * Returns NULL when out of memory. | ||||
8698 | */ | ||||
8699 | static buf_T * | ||||
8700 | open_spellbuf() | ||||
8701 | { | ||||
8702 | buf_T *buf; | ||||
8703 | |||||
8704 | buf = (buf_T *)alloc_clear(sizeof(buf_T)); | ||||
8705 | if (buf != NULL) | ||||
8706 | { | ||||
8707 | buf->b_spell = TRUE; | ||||
8708 | buf->b_p_swf = TRUE; /* may create a swap file */ | ||||
8709 | ml_open(buf); | ||||
8710 | ml_open_file(buf); /* create swap file now */ | ||||
8711 | } | ||||
8712 | return buf; | ||||
8713 | } | ||||
8714 | |||||
8715 | /* | ||||
8716 | * Close the buffer used for spell info. | ||||
8717 | */ | ||||
8718 | static void | ||||
8719 | close_spellbuf(buf) | ||||
8720 | buf_T *buf; | ||||
8721 | { | ||||
8722 | if (buf != NULL) | ||||
8723 | { | ||||
8724 | ml_close(buf, TRUE); | ||||
8725 | vim_free(buf); | ||||
8726 | } | ||||
8727 | } | ||||
8728 | |||||
8729 | |||||
8730 | /* | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8731 | * Create a Vim spell file from one or more word lists. |
8732 | * "fnames[0]" is the output file name. | ||||
8733 | * "fnames[fcount - 1]" is the last input file name. | ||||
8734 | * Exception: when "fnames[0]" ends in ".add" it's used as the input file name | ||||
8735 | * and ".spl" is appended to make the output file name. | ||||
8736 | */ | ||||
8737 | static void | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 8738 | mkspell(fcount, fnames, ascii, overwrite, added_word) |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8739 | int fcount; |
8740 | char_u **fnames; | ||||
8741 | int ascii; /* -ascii argument given */ | ||||
8742 | int overwrite; /* overwrite existing output file */ | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 8743 | int added_word; /* invoked through "zg" */ |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8744 | { |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8745 | char_u fname[MAXPATHL]; |
8746 | char_u wfname[MAXPATHL]; | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8747 | char_u **innames; |
8748 | int incount; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8749 | afffile_T *(afile[8]); |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8750 | int i; |
8751 | int len; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8752 | struct stat st; |
Bram Moolenaar | 8fef2ad | 2005-04-23 20:42:23 +0000 | [diff] [blame] | 8753 | int error = FALSE; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8754 | spellinfo_T spin; |
8755 | |||||
8756 | vim_memset(&spin, 0, sizeof(spin)); | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 8757 | spin.si_verbose = !added_word; |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8758 | spin.si_ascii = ascii; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 8759 | spin.si_followup = TRUE; |
8760 | spin.si_rem_accents = TRUE; | ||||
8761 | ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20); | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 8762 | ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 8763 | ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20); |
8764 | ga_init2(&spin.si_map, (int)sizeof(char_u), 100); | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 8765 | ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20); |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 8766 | ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 8767 | hash_init(&spin.si_commonwords); |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 8768 | spin.si_newcompID = 127; /* start compound ID at first maximum */ |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8769 | |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8770 | /* default: fnames[0] is output file, following are input files */ |
8771 | innames = &fnames[1]; | ||||
8772 | incount = fcount - 1; | ||||
8773 | |||||
8774 | if (fcount >= 1) | ||||
Bram Moolenaar | 5482f33 | 2005-04-17 20:18:43 +0000 | [diff] [blame] | 8775 | { |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8776 | len = STRLEN(fnames[0]); |
8777 | if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0) | ||||
8778 | { | ||||
8779 | /* For ":mkspell path/en.latin1.add" output file is | ||||
8780 | * "path/en.latin1.add.spl". */ | ||||
8781 | innames = &fnames[0]; | ||||
8782 | incount = 1; | ||||
8783 | vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]); | ||||
8784 | } | ||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 8785 | else if (fcount == 1) |
8786 | { | ||||
8787 | /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */ | ||||
8788 | innames = &fnames[0]; | ||||
8789 | incount = 1; | ||||
8790 | vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0], | ||||
8791 | spin.si_ascii ? (char_u *)"ascii" : spell_enc()); | ||||
8792 | } | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8793 | else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0) |
8794 | { | ||||
8795 | /* Name ends in ".spl", use as the file name. */ | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 8796 | vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8797 | } |
8798 | else | ||||
8799 | /* Name should be language, make the file name from it. */ | ||||
8800 | vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0], | ||||
8801 | spin.si_ascii ? (char_u *)"ascii" : spell_enc()); | ||||
8802 | |||||
8803 | /* Check for .ascii.spl. */ | ||||
8804 | if (strstr((char *)gettail(wfname), ".ascii.") != NULL) | ||||
8805 | spin.si_ascii = TRUE; | ||||
8806 | |||||
8807 | /* Check for .add.spl. */ | ||||
8808 | if (strstr((char *)gettail(wfname), ".add.") != NULL) | ||||
8809 | spin.si_add = TRUE; | ||||
Bram Moolenaar | 5482f33 | 2005-04-17 20:18:43 +0000 | [diff] [blame] | 8810 | } |
8811 | |||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8812 | if (incount <= 0) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8813 | EMSG(_(e_invarg)); /* need at least output and input names */ |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 8814 | else if (vim_strchr(gettail(wfname), '_') != NULL) |
8815 | EMSG(_("E751: Output file name must not have region name")); | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8816 | else if (incount > 8) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8817 | EMSG(_("E754: Only up to 8 regions supported")); |
8818 | else | ||||
8819 | { | ||||
8820 | /* Check for overwriting before doing things that may take a lot of | ||||
8821 | * time. */ | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8822 | if (!overwrite && mch_stat((char *)wfname, &st) >= 0) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8823 | { |
8824 | EMSG(_(e_exists)); | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8825 | return; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8826 | } |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8827 | if (mch_isdir(wfname)) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8828 | { |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8829 | EMSG2(_(e_isadir2), wfname); |
8830 | return; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8831 | } |
8832 | |||||
8833 | /* | ||||
8834 | * Init the aff and dic pointers. | ||||
8835 | * Get the region names if there are more than 2 arguments. | ||||
8836 | */ | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8837 | for (i = 0; i < incount; ++i) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8838 | { |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8839 | afile[i] = NULL; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8840 | |
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 8841 | if (incount > 1) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8842 | { |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8843 | len = STRLEN(innames[i]); |
8844 | if (STRLEN(gettail(innames[i])) < 5 | ||||
8845 | || innames[i][len - 3] != '_') | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8846 | { |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8847 | EMSG2(_("E755: Invalid region in %s"), innames[i]); |
8848 | return; | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8849 | } |
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 8850 | spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]); |
8851 | spin.si_region_name[i * 2 + 1] = | ||||
8852 | TOLOWER_ASC(innames[i][len - 1]); | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8853 | } |
8854 | } | ||||
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 8855 | spin.si_region_count = incount; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8856 | |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 8857 | spin.si_foldroot = wordtree_alloc(&spin); |
8858 | spin.si_keeproot = wordtree_alloc(&spin); | ||||
8859 | spin.si_prefroot = wordtree_alloc(&spin); | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 8860 | if (spin.si_foldroot == NULL |
8861 | || spin.si_keeproot == NULL | ||||
8862 | || spin.si_prefroot == NULL) | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8863 | { |
Bram Moolenaar | 329cc7e | 2005-08-10 07:51:35 +0000 | [diff] [blame] | 8864 | free_blocks(spin.si_blocks); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8865 | return; |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8866 | } |
8867 | |||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 8868 | /* When not producing a .add.spl file clear the character table when |
8869 | * we encounter one in the .aff file. This means we dump the current | ||||
8870 | * one in the .spl file if the .aff file doesn't define one. That's | ||||
8871 | * better than guessing the contents, the table will match a | ||||
8872 | * previously loaded spell file. */ | ||||
8873 | if (!spin.si_add) | ||||
8874 | spin.si_clear_chartab = TRUE; | ||||
8875 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8876 | /* |
8877 | * Read all the .aff and .dic files. | ||||
8878 | * Text is converted to 'encoding'. | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8879 | * Words are stored in the case-folded and keep-case trees. |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8880 | */ |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8881 | for (i = 0; i < incount && !error; ++i) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8882 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8883 | spin.si_conv.vc_type = CONV_NONE; |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8884 | spin.si_region = 1 << i; |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8885 | |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8886 | vim_snprintf((char *)fname, sizeof(fname), "%s.aff", innames[i]); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8887 | if (mch_stat((char *)fname, &st) >= 0) |
8888 | { | ||||
8889 | /* Read the .aff file. Will init "spin->si_conv" based on the | ||||
8890 | * "SET" line. */ | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 8891 | afile[i] = spell_read_aff(&spin, fname); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8892 | if (afile[i] == NULL) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8893 | error = TRUE; |
8894 | else | ||||
8895 | { | ||||
8896 | /* Read the .dic file and store the words in the trees. */ | ||||
8897 | vim_snprintf((char *)fname, sizeof(fname), "%s.dic", | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8898 | innames[i]); |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 8899 | if (spell_read_dic(&spin, fname, afile[i]) == FAIL) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8900 | error = TRUE; |
8901 | } | ||||
8902 | } | ||||
8903 | else | ||||
8904 | { | ||||
8905 | /* No .aff file, try reading the file as a word list. Store | ||||
8906 | * the words in the trees. */ | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 8907 | if (spell_read_wordfile(&spin, innames[i]) == FAIL) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8908 | error = TRUE; |
8909 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8910 | |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8911 | #ifdef FEAT_MBYTE |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8912 | /* Free any conversion stuff. */ |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8913 | convert_setup(&spin.si_conv, NULL, NULL); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8914 | #endif |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8915 | } |
8916 | |||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 8917 | if (spin.si_compflags != NULL && spin.si_nobreak) |
8918 | MSG(_("Warning: both compounding and NOBREAK specified")); | ||||
8919 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 8920 | if (!error && !got_int) |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8921 | { |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8922 | /* |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8923 | * Combine tails in the tree. |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8924 | */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 8925 | spell_message(&spin, (char_u *)_(msg_compressing)); |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 8926 | wordtree_compress(&spin, spin.si_foldroot); |
8927 | wordtree_compress(&spin, spin.si_keeproot); | ||||
8928 | wordtree_compress(&spin, spin.si_prefroot); | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8929 | } |
8930 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 8931 | if (!error && !got_int) |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8932 | { |
8933 | /* | ||||
8934 | * Write the info in the spell file. | ||||
8935 | */ | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 8936 | vim_snprintf((char *)IObuff, IOSIZE, |
8937 | _("Writing spell file %s ..."), wfname); | ||||
8938 | spell_message(&spin, IObuff); | ||||
Bram Moolenaar | 50cde82 | 2005-06-05 21:54:54 +0000 | [diff] [blame] | 8939 | |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 8940 | error = write_vim_spell(&spin, wfname) == FAIL; |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8941 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 8942 | spell_message(&spin, (char_u *)_("Done!")); |
8943 | vim_snprintf((char *)IObuff, IOSIZE, | ||||
8944 | _("Estimated runtime memory use: %d bytes"), spin.si_memtot); | ||||
8945 | spell_message(&spin, IObuff); | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 8946 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 8947 | /* |
8948 | * If the file is loaded need to reload it. | ||||
8949 | */ | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 8950 | if (!error) |
8951 | spell_reload_one(wfname, added_word); | ||||
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8952 | } |
8953 | |||||
8954 | /* Free the allocated memory. */ | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 8955 | ga_clear(&spin.si_rep); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 8956 | ga_clear(&spin.si_repsal); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 8957 | ga_clear(&spin.si_sal); |
8958 | ga_clear(&spin.si_map); | ||||
Bram Moolenaar | 899dddf | 2006-03-26 21:06:50 +0000 | [diff] [blame] | 8959 | ga_clear(&spin.si_comppat); |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 8960 | ga_clear(&spin.si_prefcond); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 8961 | hash_clear_all(&spin.si_commonwords, 0); |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 8962 | |
8963 | /* Free the .aff file structures. */ | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 8964 | for (i = 0; i < incount; ++i) |
8965 | if (afile[i] != NULL) | ||||
8966 | spell_free_aff(afile[i]); | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 8967 | |
8968 | /* Free all the bits and pieces at once. */ | ||||
8969 | free_blocks(spin.si_blocks); | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 8970 | |
8971 | /* | ||||
8972 | * If there is soundfolding info and no NOSUGFILE item create the | ||||
8973 | * .sug file with the soundfolded word trie. | ||||
8974 | */ | ||||
8975 | if (spin.si_sugtime != 0 && !error && !got_int) | ||||
8976 | spell_make_sugfile(&spin, wfname); | ||||
8977 | |||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8978 | } |
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 8979 | } |
8980 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 8981 | /* |
8982 | * Display a message for spell file processing when 'verbose' is set or using | ||||
8983 | * ":mkspell". "str" can be IObuff. | ||||
8984 | */ | ||||
8985 | static void | ||||
8986 | spell_message(spin, str) | ||||
8987 | spellinfo_T *spin; | ||||
8988 | char_u *str; | ||||
8989 | { | ||||
8990 | if (spin->si_verbose || p_verbose > 2) | ||||
8991 | { | ||||
8992 | if (!spin->si_verbose) | ||||
8993 | verbose_enter(); | ||||
8994 | MSG(str); | ||||
8995 | out_flush(); | ||||
8996 | if (!spin->si_verbose) | ||||
8997 | verbose_leave(); | ||||
8998 | } | ||||
8999 | } | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9000 | |
9001 | /* | ||||
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 9002 | * ":[count]spellgood {word}" |
9003 | * ":[count]spellwrong {word}" | ||||
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 9004 | * ":[count]spellundo {word}" |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9005 | */ |
9006 | void | ||||
9007 | ex_spell(eap) | ||||
9008 | exarg_T *eap; | ||||
9009 | { | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 9010 | spell_add_word(eap->arg, STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong, |
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 9011 | eap->forceit ? 0 : (int)eap->line2, |
9012 | eap->cmdidx == CMD_spellundo); | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9013 | } |
9014 | |||||
9015 | /* | ||||
9016 | * Add "word[len]" to 'spellfile' as a good or bad word. | ||||
9017 | */ | ||||
9018 | void | ||||
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 9019 | spell_add_word(word, len, bad, index, undo) |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9020 | char_u *word; |
9021 | int len; | ||||
9022 | int bad; | ||||
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 9023 | int index; /* "zG" and "zW": zero, otherwise index in |
9024 | 'spellfile' */ | ||||
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 9025 | int undo; /* TRUE for "zug", "zuG", "zuw" and "zuW" */ |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9026 | { |
9027 | FILE *fd; | ||||
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 9028 | buf_T *buf = NULL; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 9029 | int new_spf = FALSE; |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 9030 | char_u *fname; |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 9031 | char_u fnamebuf[MAXPATHL]; |
9032 | char_u line[MAXWLEN * 2]; | ||||
9033 | long fpos, fpos_next = 0; | ||||
9034 | int i; | ||||
9035 | char_u *spf; | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9036 | |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 9037 | if (index == 0) /* use internal wordlist */ |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 9038 | { |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 9039 | if (int_wordlist == NULL) |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 9040 | { |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 9041 | int_wordlist = vim_tempname('s'); |
9042 | if (int_wordlist == NULL) | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 9043 | return; |
9044 | } | ||||
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 9045 | fname = int_wordlist; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 9046 | } |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9047 | else |
9048 | { | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 9049 | /* If 'spellfile' isn't set figure out a good default value. */ |
9050 | if (*curbuf->b_p_spf == NUL) | ||||
9051 | { | ||||
9052 | init_spellfile(); | ||||
9053 | new_spf = TRUE; | ||||
9054 | } | ||||
9055 | |||||
9056 | if (*curbuf->b_p_spf == NUL) | ||||
9057 | { | ||||
Bram Moolenaar | f75a963 | 2005-09-13 21:20:47 +0000 | [diff] [blame] | 9058 | EMSG2(_(e_notset), "spellfile"); |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 9059 | return; |
9060 | } | ||||
9061 | |||||
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 9062 | for (spf = curbuf->b_p_spf, i = 1; *spf != NUL; ++i) |
9063 | { | ||||
9064 | copy_option_part(&spf, fnamebuf, MAXPATHL, ","); | ||||
9065 | if (i == index) | ||||
9066 | break; | ||||
9067 | if (*spf == NUL) | ||||
9068 | { | ||||
Bram Moolenaar | e344bea | 2005-09-01 20:46:49 +0000 | [diff] [blame] | 9069 | EMSGN(_("E765: 'spellfile' does not have %ld entries"), index); |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 9070 | return; |
9071 | } | ||||
9072 | } | ||||
9073 | |||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9074 | /* Check that the user isn't editing the .add file somewhere. */ |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 9075 | buf = buflist_findname_exp(fnamebuf); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9076 | if (buf != NULL && buf->b_ml.ml_mfp == NULL) |
9077 | buf = NULL; | ||||
9078 | if (buf != NULL && bufIsChanged(buf)) | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9079 | { |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 9080 | EMSG(_(e_bufloaded)); |
9081 | return; | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9082 | } |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 9083 | |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 9084 | fname = fnamebuf; |
9085 | } | ||||
9086 | |||||
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 9087 | if (bad || undo) |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 9088 | { |
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 9089 | /* When the word appears as good word we need to remove that one, |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 9090 | * since its flags sort before the one with WF_BANNED. */ |
9091 | fd = mch_fopen((char *)fname, "r"); | ||||
9092 | if (fd != NULL) | ||||
9093 | { | ||||
9094 | while (!vim_fgets(line, MAXWLEN * 2, fd)) | ||||
9095 | { | ||||
9096 | fpos = fpos_next; | ||||
9097 | fpos_next = ftell(fd); | ||||
9098 | if (STRNCMP(word, line, len) == 0 | ||||
9099 | && (line[len] == '/' || line[len] < ' ')) | ||||
9100 | { | ||||
9101 | /* Found duplicate word. Remove it by writing a '#' at | ||||
9102 | * the start of the line. Mixing reading and writing | ||||
9103 | * doesn't work for all systems, close the file first. */ | ||||
9104 | fclose(fd); | ||||
9105 | fd = mch_fopen((char *)fname, "r+"); | ||||
9106 | if (fd == NULL) | ||||
9107 | break; | ||||
9108 | if (fseek(fd, fpos, SEEK_SET) == 0) | ||||
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 9109 | { |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 9110 | fputc('#', fd); |
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 9111 | if (undo) |
9112 | smsg((char_u *)_("Word removed from %s"), NameBuff); | ||||
9113 | } | ||||
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 9114 | fseek(fd, fpos_next, SEEK_SET); |
9115 | } | ||||
9116 | } | ||||
9117 | fclose(fd); | ||||
9118 | } | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 9119 | } |
9120 | |||||
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 9121 | if (!undo) |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 9122 | { |
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 9123 | fd = mch_fopen((char *)fname, "a"); |
9124 | if (fd == NULL && new_spf) | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 9125 | { |
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 9126 | /* We just initialized the 'spellfile' option and can't open the |
9127 | * file. We may need to create the "spell" directory first. We | ||||
9128 | * already checked the runtime directory is writable in | ||||
9129 | * init_spellfile(). */ | ||||
9130 | if (!dir_of_file_exists(fname)) | ||||
9131 | { | ||||
9132 | /* The directory doesn't exist. Try creating it and opening | ||||
9133 | * the file again. */ | ||||
9134 | vim_mkdir(NameBuff, 0755); | ||||
9135 | fd = mch_fopen((char *)fname, "a"); | ||||
9136 | } | ||||
9137 | } | ||||
9138 | |||||
9139 | if (fd == NULL) | ||||
9140 | EMSG2(_(e_notopen), fname); | ||||
9141 | else | ||||
9142 | { | ||||
9143 | if (bad) | ||||
9144 | fprintf(fd, "%.*s/!\n", len, word); | ||||
9145 | else | ||||
9146 | fprintf(fd, "%.*s\n", len, word); | ||||
9147 | fclose(fd); | ||||
9148 | |||||
9149 | home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE); | ||||
9150 | smsg((char_u *)_("Word added to %s"), NameBuff); | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 9151 | } |
9152 | } | ||||
9153 | |||||
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 9154 | if (fd != NULL) |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 9155 | { |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 9156 | /* Update the .add.spl file. */ |
9157 | mkspell(1, &fname, FALSE, TRUE, TRUE); | ||||
9158 | |||||
9159 | /* If the .add file is edited somewhere, reload it. */ | ||||
9160 | if (buf != NULL) | ||||
Bram Moolenaar | ea8bd73 | 2006-01-14 21:15:59 +0000 | [diff] [blame] | 9161 | buf_reload(buf, buf->b_orig_mode); |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 9162 | |
Bram Moolenaar | f71a3db | 2006-03-12 21:50:18 +0000 | [diff] [blame] | 9163 | redraw_all_later(SOME_VALID); |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9164 | } |
9165 | } | ||||
9166 | |||||
9167 | /* | ||||
9168 | * Initialize 'spellfile' for the current buffer. | ||||
9169 | */ | ||||
9170 | static void | ||||
9171 | init_spellfile() | ||||
9172 | { | ||||
9173 | char_u buf[MAXPATHL]; | ||||
9174 | int l; | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 9175 | char_u *fname; |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9176 | char_u *rtp; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 9177 | char_u *lend; |
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 9178 | int aspath = FALSE; |
9179 | char_u *lstart = curbuf->b_p_spl; | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9180 | |
9181 | if (*curbuf->b_p_spl != NUL && curbuf->b_langp.ga_len > 0) | ||||
9182 | { | ||||
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 9183 | /* Find the end of the language name. Exclude the region. If there |
9184 | * is a path separator remember the start of the tail. */ | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 9185 | for (lend = curbuf->b_p_spl; *lend != NUL |
9186 | && vim_strchr((char_u *)",._", *lend) == NULL; ++lend) | ||||
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 9187 | if (vim_ispathsep(*lend)) |
9188 | { | ||||
9189 | aspath = TRUE; | ||||
9190 | lstart = lend + 1; | ||||
9191 | } | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 9192 | |
9193 | /* Loop over all entries in 'runtimepath'. Use the first one where we | ||||
9194 | * are allowed to write. */ | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9195 | rtp = p_rtp; |
9196 | while (*rtp != NUL) | ||||
9197 | { | ||||
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 9198 | if (aspath) |
9199 | /* Use directory of an entry with path, e.g., for | ||||
9200 | * "/dir/lg.utf-8.spl" use "/dir". */ | ||||
9201 | vim_strncpy(buf, curbuf->b_p_spl, lstart - curbuf->b_p_spl - 1); | ||||
9202 | else | ||||
9203 | /* Copy the path from 'runtimepath' to buf[]. */ | ||||
9204 | copy_option_part(&rtp, buf, MAXPATHL, ","); | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9205 | if (filewritable(buf) == 2) |
9206 | { | ||||
Bram Moolenaar | 3982c54 | 2005-06-08 21:56:31 +0000 | [diff] [blame] | 9207 | /* Use the first language name from 'spelllang' and the |
9208 | * encoding used in the first loaded .spl file. */ | ||||
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 9209 | if (aspath) |
9210 | vim_strncpy(buf, curbuf->b_p_spl, lend - curbuf->b_p_spl); | ||||
9211 | else | ||||
9212 | { | ||||
9213 | l = STRLEN(buf); | ||||
9214 | vim_snprintf((char *)buf + l, MAXPATHL - l, | ||||
9215 | "/spell/%.*s", (int)(lend - lstart), lstart); | ||||
9216 | } | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9217 | l = STRLEN(buf); |
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 9218 | fname = LANGP_ENTRY(curbuf->b_langp, 0)->lp_slang->sl_fname; |
9219 | vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add", | ||||
9220 | fname != NULL | ||||
9221 | && strstr((char *)gettail(fname), ".ascii.") != NULL | ||||
9222 | ? (char_u *)"ascii" : spell_enc()); | ||||
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9223 | set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL); |
9224 | break; | ||||
9225 | } | ||||
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 9226 | aspath = FALSE; |
Bram Moolenaar | b765d63 | 2005-06-07 21:00:02 +0000 | [diff] [blame] | 9227 | } |
9228 | } | ||||
9229 | } | ||||
Bram Moolenaar | 402d2fe | 2005-04-15 21:00:38 +0000 | [diff] [blame] | 9230 | |
Bram Moolenaar | 51485f0 | 2005-06-04 21:55:20 +0000 | [diff] [blame] | 9231 | |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9232 | /* |
9233 | * Init the chartab used for spelling for ASCII. | ||||
9234 | * EBCDIC is not supported! | ||||
9235 | */ | ||||
9236 | static void | ||||
9237 | clear_spell_chartab(sp) | ||||
9238 | spelltab_T *sp; | ||||
9239 | { | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9240 | int i; |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9241 | |
9242 | /* Init everything to FALSE. */ | ||||
9243 | vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw)); | ||||
9244 | vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu)); | ||||
9245 | for (i = 0; i < 256; ++i) | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9246 | { |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9247 | sp->st_fold[i] = i; |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9248 | sp->st_upper[i] = i; |
9249 | } | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9250 | |
9251 | /* We include digits. A word shouldn't start with a digit, but handling | ||||
9252 | * that is done separately. */ | ||||
9253 | for (i = '0'; i <= '9'; ++i) | ||||
9254 | sp->st_isw[i] = TRUE; | ||||
9255 | for (i = 'A'; i <= 'Z'; ++i) | ||||
9256 | { | ||||
9257 | sp->st_isw[i] = TRUE; | ||||
9258 | sp->st_isu[i] = TRUE; | ||||
9259 | sp->st_fold[i] = i + 0x20; | ||||
9260 | } | ||||
9261 | for (i = 'a'; i <= 'z'; ++i) | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9262 | { |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9263 | sp->st_isw[i] = TRUE; |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9264 | sp->st_upper[i] = i - 0x20; |
9265 | } | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9266 | } |
9267 | |||||
9268 | /* | ||||
9269 | * Init the chartab used for spelling. Only depends on 'encoding'. | ||||
9270 | * Called once while starting up and when 'encoding' changes. | ||||
9271 | * The default is to use isalpha(), but the spell file should define the word | ||||
9272 | * characters to make it possible that 'encoding' differs from the current | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 9273 | * locale. For utf-8 we don't use isalpha() but our own functions. |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9274 | */ |
9275 | void | ||||
9276 | init_spell_chartab() | ||||
9277 | { | ||||
9278 | int i; | ||||
9279 | |||||
9280 | did_set_spelltab = FALSE; | ||||
9281 | clear_spell_chartab(&spelltab); | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9282 | #ifdef FEAT_MBYTE |
9283 | if (enc_dbcs) | ||||
9284 | { | ||||
9285 | /* DBCS: assume double-wide characters are word characters. */ | ||||
9286 | for (i = 128; i <= 255; ++i) | ||||
9287 | if (MB_BYTE2LEN(i) == 2) | ||||
9288 | spelltab.st_isw[i] = TRUE; | ||||
9289 | } | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9290 | else if (enc_utf8) |
9291 | { | ||||
9292 | for (i = 128; i < 256; ++i) | ||||
9293 | { | ||||
9294 | spelltab.st_isu[i] = utf_isupper(i); | ||||
9295 | spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i); | ||||
9296 | spelltab.st_fold[i] = utf_fold(i); | ||||
9297 | spelltab.st_upper[i] = utf_toupper(i); | ||||
9298 | } | ||||
9299 | } | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9300 | else |
9301 | #endif | ||||
9302 | { | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9303 | /* Rough guess: use locale-dependent library functions. */ |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9304 | for (i = 128; i < 256; ++i) |
9305 | { | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9306 | if (MB_ISUPPER(i)) |
9307 | { | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9308 | spelltab.st_isw[i] = TRUE; |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9309 | spelltab.st_isu[i] = TRUE; |
9310 | spelltab.st_fold[i] = MB_TOLOWER(i); | ||||
9311 | } | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9312 | else if (MB_ISLOWER(i)) |
9313 | { | ||||
9314 | spelltab.st_isw[i] = TRUE; | ||||
9315 | spelltab.st_upper[i] = MB_TOUPPER(i); | ||||
9316 | } | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9317 | } |
9318 | } | ||||
9319 | } | ||||
9320 | |||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9321 | /* |
9322 | * Set the spell character tables from strings in the affix file. | ||||
9323 | */ | ||||
9324 | static int | ||||
9325 | set_spell_chartab(fol, low, upp) | ||||
9326 | char_u *fol; | ||||
9327 | char_u *low; | ||||
9328 | char_u *upp; | ||||
9329 | { | ||||
9330 | /* We build the new tables here first, so that we can compare with the | ||||
9331 | * previous one. */ | ||||
9332 | spelltab_T new_st; | ||||
9333 | char_u *pf = fol, *pl = low, *pu = upp; | ||||
9334 | int f, l, u; | ||||
9335 | |||||
9336 | clear_spell_chartab(&new_st); | ||||
9337 | |||||
9338 | while (*pf != NUL) | ||||
9339 | { | ||||
9340 | if (*pl == NUL || *pu == NUL) | ||||
9341 | { | ||||
9342 | EMSG(_(e_affform)); | ||||
9343 | return FAIL; | ||||
9344 | } | ||||
9345 | #ifdef FEAT_MBYTE | ||||
9346 | f = mb_ptr2char_adv(&pf); | ||||
9347 | l = mb_ptr2char_adv(&pl); | ||||
9348 | u = mb_ptr2char_adv(&pu); | ||||
9349 | #else | ||||
9350 | f = *pf++; | ||||
9351 | l = *pl++; | ||||
9352 | u = *pu++; | ||||
9353 | #endif | ||||
9354 | /* Every character that appears is a word character. */ | ||||
9355 | if (f < 256) | ||||
9356 | new_st.st_isw[f] = TRUE; | ||||
9357 | if (l < 256) | ||||
9358 | new_st.st_isw[l] = TRUE; | ||||
9359 | if (u < 256) | ||||
9360 | new_st.st_isw[u] = TRUE; | ||||
9361 | |||||
9362 | /* if "LOW" and "FOL" are not the same the "LOW" char needs | ||||
9363 | * case-folding */ | ||||
9364 | if (l < 256 && l != f) | ||||
9365 | { | ||||
9366 | if (f >= 256) | ||||
9367 | { | ||||
9368 | EMSG(_(e_affrange)); | ||||
9369 | return FAIL; | ||||
9370 | } | ||||
9371 | new_st.st_fold[l] = f; | ||||
9372 | } | ||||
9373 | |||||
9374 | /* if "UPP" and "FOL" are not the same the "UPP" char needs | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9375 | * case-folding, it's upper case and the "UPP" is the upper case of |
9376 | * "FOL" . */ | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9377 | if (u < 256 && u != f) |
9378 | { | ||||
9379 | if (f >= 256) | ||||
9380 | { | ||||
9381 | EMSG(_(e_affrange)); | ||||
9382 | return FAIL; | ||||
9383 | } | ||||
9384 | new_st.st_fold[u] = f; | ||||
9385 | new_st.st_isu[u] = TRUE; | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9386 | new_st.st_upper[f] = u; |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9387 | } |
9388 | } | ||||
9389 | |||||
9390 | if (*pl != NUL || *pu != NUL) | ||||
9391 | { | ||||
9392 | EMSG(_(e_affform)); | ||||
9393 | return FAIL; | ||||
9394 | } | ||||
9395 | |||||
9396 | return set_spell_finish(&new_st); | ||||
9397 | } | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9398 | |
9399 | /* | ||||
9400 | * Set the spell character tables from strings in the .spl file. | ||||
9401 | */ | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 9402 | static void |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 9403 | set_spell_charflags(flags, cnt, fol) |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9404 | char_u *flags; |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 9405 | int cnt; /* length of "flags" */ |
9406 | char_u *fol; | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9407 | { |
9408 | /* We build the new tables here first, so that we can compare with the | ||||
9409 | * previous one. */ | ||||
9410 | spelltab_T new_st; | ||||
9411 | int i; | ||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 9412 | char_u *p = fol; |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9413 | int c; |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9414 | |
9415 | clear_spell_chartab(&new_st); | ||||
9416 | |||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 9417 | for (i = 0; i < 128; ++i) |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9418 | { |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 9419 | if (i < cnt) |
9420 | { | ||||
9421 | new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0; | ||||
9422 | new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0; | ||||
9423 | } | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9424 | |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 9425 | if (*p != NUL) |
9426 | { | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9427 | #ifdef FEAT_MBYTE |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 9428 | c = mb_ptr2char_adv(&p); |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9429 | #else |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 9430 | c = *p++; |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9431 | #endif |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 9432 | new_st.st_fold[i + 128] = c; |
9433 | if (i + 128 != c && new_st.st_isu[i + 128] && c < 256) | ||||
9434 | new_st.st_upper[c] = i + 128; | ||||
9435 | } | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9436 | } |
9437 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 9438 | (void)set_spell_finish(&new_st); |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9439 | } |
9440 | |||||
9441 | static int | ||||
9442 | set_spell_finish(new_st) | ||||
9443 | spelltab_T *new_st; | ||||
9444 | { | ||||
9445 | int i; | ||||
9446 | |||||
9447 | if (did_set_spelltab) | ||||
9448 | { | ||||
9449 | /* check that it's the same table */ | ||||
9450 | for (i = 0; i < 256; ++i) | ||||
9451 | { | ||||
9452 | if (spelltab.st_isw[i] != new_st->st_isw[i] | ||||
9453 | || spelltab.st_isu[i] != new_st->st_isu[i] | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9454 | || spelltab.st_fold[i] != new_st->st_fold[i] |
9455 | || spelltab.st_upper[i] != new_st->st_upper[i]) | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9456 | { |
9457 | EMSG(_("E763: Word characters differ between spell files")); | ||||
9458 | return FAIL; | ||||
9459 | } | ||||
9460 | } | ||||
9461 | } | ||||
9462 | else | ||||
9463 | { | ||||
9464 | /* copy the new spelltab into the one being used */ | ||||
9465 | spelltab = *new_st; | ||||
9466 | did_set_spelltab = TRUE; | ||||
9467 | } | ||||
9468 | |||||
9469 | return OK; | ||||
9470 | } | ||||
9471 | |||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9472 | /* |
Bram Moolenaar | ea40885 | 2005-06-25 22:49:46 +0000 | [diff] [blame] | 9473 | * Return TRUE if "p" points to a word character. |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 9474 | * As a special case we see "midword" characters as word character when it is |
Bram Moolenaar | ea40885 | 2005-06-25 22:49:46 +0000 | [diff] [blame] | 9475 | * followed by a word character. This finds they'there but not 'they there'. |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 9476 | * Thus this only works properly when past the first character of the word. |
Bram Moolenaar | ea40885 | 2005-06-25 22:49:46 +0000 | [diff] [blame] | 9477 | */ |
9478 | static int | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 9479 | spell_iswordp(p, buf) |
Bram Moolenaar | ea40885 | 2005-06-25 22:49:46 +0000 | [diff] [blame] | 9480 | char_u *p; |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 9481 | buf_T *buf; /* buffer used */ |
Bram Moolenaar | ea40885 | 2005-06-25 22:49:46 +0000 | [diff] [blame] | 9482 | { |
Bram Moolenaar | ea40885 | 2005-06-25 22:49:46 +0000 | [diff] [blame] | 9483 | #ifdef FEAT_MBYTE |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 9484 | char_u *s; |
9485 | int l; | ||||
9486 | int c; | ||||
9487 | |||||
9488 | if (has_mbyte) | ||||
9489 | { | ||||
9490 | l = MB_BYTE2LEN(*p); | ||||
9491 | s = p; | ||||
9492 | if (l == 1) | ||||
9493 | { | ||||
9494 | /* be quick for ASCII */ | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 9495 | if (buf->b_spell_ismw[*p]) |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 9496 | { |
9497 | s = p + 1; /* skip a mid-word character */ | ||||
9498 | l = MB_BYTE2LEN(*s); | ||||
9499 | } | ||||
9500 | } | ||||
9501 | else | ||||
9502 | { | ||||
9503 | c = mb_ptr2char(p); | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 9504 | if (c < 256 ? buf->b_spell_ismw[c] |
9505 | : (buf->b_spell_ismw_mb != NULL | ||||
9506 | && vim_strchr(buf->b_spell_ismw_mb, c) != NULL)) | ||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 9507 | { |
9508 | s = p + l; | ||||
9509 | l = MB_BYTE2LEN(*s); | ||||
9510 | } | ||||
9511 | } | ||||
9512 | |||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 9513 | c = mb_ptr2char(s); |
9514 | if (c > 255) | ||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 9515 | return mb_get_class(s) >= 2; |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 9516 | return spelltab.st_isw[c]; |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 9517 | } |
Bram Moolenaar | ea40885 | 2005-06-25 22:49:46 +0000 | [diff] [blame] | 9518 | #endif |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 9519 | |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 9520 | return spelltab.st_isw[buf->b_spell_ismw[*p] ? p[1] : p[0]]; |
9521 | } | ||||
9522 | |||||
9523 | /* | ||||
9524 | * Return TRUE if "p" points to a word character. | ||||
9525 | * Unlike spell_iswordp() this doesn't check for "midword" characters. | ||||
9526 | */ | ||||
9527 | static int | ||||
9528 | spell_iswordp_nmw(p) | ||||
9529 | char_u *p; | ||||
9530 | { | ||||
9531 | #ifdef FEAT_MBYTE | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 9532 | int c; |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 9533 | |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 9534 | if (has_mbyte) |
9535 | { | ||||
9536 | c = mb_ptr2char(p); | ||||
9537 | if (c > 255) | ||||
9538 | return mb_get_class(p) >= 2; | ||||
9539 | return spelltab.st_isw[c]; | ||||
9540 | } | ||||
9541 | #endif | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 9542 | return spelltab.st_isw[*p]; |
Bram Moolenaar | ea40885 | 2005-06-25 22:49:46 +0000 | [diff] [blame] | 9543 | } |
9544 | |||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 9545 | #ifdef FEAT_MBYTE |
9546 | /* | ||||
9547 | * Return TRUE if "p" points to a word character. | ||||
9548 | * Wide version of spell_iswordp(). | ||||
9549 | */ | ||||
9550 | static int | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 9551 | spell_iswordp_w(p, buf) |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 9552 | int *p; |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 9553 | buf_T *buf; |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 9554 | { |
9555 | int *s; | ||||
9556 | |||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 9557 | if (*p < 256 ? buf->b_spell_ismw[*p] |
9558 | : (buf->b_spell_ismw_mb != NULL | ||||
9559 | && vim_strchr(buf->b_spell_ismw_mb, *p) != NULL)) | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 9560 | s = p + 1; |
9561 | else | ||||
9562 | s = p; | ||||
9563 | |||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 9564 | if (*s > 255) |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 9565 | { |
9566 | if (enc_utf8) | ||||
9567 | return utf_class(*s) >= 2; | ||||
9568 | if (enc_dbcs) | ||||
9569 | return dbcs_class((unsigned)*s >> 8, *s & 0xff) >= 2; | ||||
9570 | return 0; | ||||
9571 | } | ||||
9572 | return spelltab.st_isw[*s]; | ||||
9573 | } | ||||
9574 | #endif | ||||
9575 | |||||
Bram Moolenaar | ea40885 | 2005-06-25 22:49:46 +0000 | [diff] [blame] | 9576 | /* |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 9577 | * Write the table with prefix conditions to the .spl file. |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 9578 | * When "fd" is NULL only count the length of what is written. |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 9579 | */ |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 9580 | static int |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 9581 | write_spell_prefcond(fd, gap) |
9582 | FILE *fd; | ||||
9583 | garray_T *gap; | ||||
9584 | { | ||||
9585 | int i; | ||||
9586 | char_u *p; | ||||
9587 | int len; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 9588 | int totlen; |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 9589 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 9590 | if (fd != NULL) |
9591 | put_bytes(fd, (long_u)gap->ga_len, 2); /* <prefcondcnt> */ | ||||
9592 | |||||
9593 | totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */ | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 9594 | |
9595 | for (i = 0; i < gap->ga_len; ++i) | ||||
9596 | { | ||||
9597 | /* <prefcond> : <condlen> <condstr> */ | ||||
9598 | p = ((char_u **)gap->ga_data)[i]; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 9599 | if (p != NULL) |
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 9600 | { |
9601 | len = STRLEN(p); | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 9602 | if (fd != NULL) |
9603 | { | ||||
9604 | fputc(len, fd); | ||||
9605 | fwrite(p, (size_t)len, (size_t)1, fd); | ||||
9606 | } | ||||
9607 | totlen += len; | ||||
Bram Moolenaar | 1d73c88 | 2005-06-19 22:48:47 +0000 | [diff] [blame] | 9608 | } |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 9609 | else if (fd != NULL) |
9610 | fputc(0, fd); | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9611 | } |
9612 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 9613 | return totlen; |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9614 | } |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9615 | |
9616 | /* | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9617 | * Case-fold "str[len]" into "buf[buflen]". The result is NUL terminated. |
9618 | * Uses the character definitions from the .spl file. | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9619 | * When using a multi-byte 'encoding' the length may change! |
9620 | * Returns FAIL when something wrong. | ||||
9621 | */ | ||||
9622 | static int | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9623 | spell_casefold(str, len, buf, buflen) |
9624 | char_u *str; | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9625 | int len; |
9626 | char_u *buf; | ||||
9627 | int buflen; | ||||
9628 | { | ||||
9629 | int i; | ||||
9630 | |||||
9631 | if (len >= buflen) | ||||
9632 | { | ||||
9633 | buf[0] = NUL; | ||||
9634 | return FAIL; /* result will not fit */ | ||||
9635 | } | ||||
9636 | |||||
9637 | #ifdef FEAT_MBYTE | ||||
9638 | if (has_mbyte) | ||||
9639 | { | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9640 | int outi = 0; |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9641 | char_u *p; |
9642 | int c; | ||||
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9643 | |
9644 | /* Fold one character at a time. */ | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9645 | for (p = str; p < str + len; ) |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9646 | { |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9647 | if (outi + MB_MAXBYTES > buflen) |
9648 | { | ||||
9649 | buf[outi] = NUL; | ||||
9650 | return FAIL; | ||||
9651 | } | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 9652 | c = mb_cptr2char_adv(&p); |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9653 | outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi); |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9654 | } |
9655 | buf[outi] = NUL; | ||||
9656 | } | ||||
9657 | else | ||||
9658 | #endif | ||||
9659 | { | ||||
9660 | /* Be quick for non-multibyte encodings. */ | ||||
9661 | for (i = 0; i < len; ++i) | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9662 | buf[i] = spelltab.st_fold[str[i]]; |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 9663 | buf[i] = NUL; |
9664 | } | ||||
9665 | |||||
9666 | return OK; | ||||
9667 | } | ||||
9668 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 9669 | /* values for sps_flags */ |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 9670 | #define SPS_BEST 1 |
9671 | #define SPS_FAST 2 | ||||
9672 | #define SPS_DOUBLE 4 | ||||
9673 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 9674 | static int sps_flags = SPS_BEST; /* flags from 'spellsuggest' */ |
9675 | static int sps_limit = 9999; /* max nr of suggestions given */ | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 9676 | |
9677 | /* | ||||
9678 | * Check the 'spellsuggest' option. Return FAIL if it's wrong. | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 9679 | * Sets "sps_flags" and "sps_limit". |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 9680 | */ |
9681 | int | ||||
9682 | spell_check_sps() | ||||
9683 | { | ||||
9684 | char_u *p; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 9685 | char_u *s; |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 9686 | char_u buf[MAXPATHL]; |
9687 | int f; | ||||
9688 | |||||
9689 | sps_flags = 0; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 9690 | sps_limit = 9999; |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 9691 | |
9692 | for (p = p_sps; *p != NUL; ) | ||||
9693 | { | ||||
9694 | copy_option_part(&p, buf, MAXPATHL, ","); | ||||
9695 | |||||
9696 | f = 0; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 9697 | if (VIM_ISDIGIT(*buf)) |
9698 | { | ||||
9699 | s = buf; | ||||
9700 | sps_limit = getdigits(&s); | ||||
9701 | if (*s != NUL && !VIM_ISDIGIT(*s)) | ||||
9702 | f = -1; | ||||
9703 | } | ||||
9704 | else if (STRCMP(buf, "best") == 0) | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 9705 | f = SPS_BEST; |
9706 | else if (STRCMP(buf, "fast") == 0) | ||||
9707 | f = SPS_FAST; | ||||
9708 | else if (STRCMP(buf, "double") == 0) | ||||
9709 | f = SPS_DOUBLE; | ||||
9710 | else if (STRNCMP(buf, "expr:", 5) != 0 | ||||
9711 | && STRNCMP(buf, "file:", 5) != 0) | ||||
9712 | f = -1; | ||||
9713 | |||||
9714 | if (f == -1 || (sps_flags != 0 && f != 0)) | ||||
9715 | { | ||||
9716 | sps_flags = SPS_BEST; | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 9717 | sps_limit = 9999; |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 9718 | return FAIL; |
9719 | } | ||||
9720 | if (f != 0) | ||||
9721 | sps_flags = f; | ||||
9722 | } | ||||
9723 | |||||
9724 | if (sps_flags == 0) | ||||
9725 | sps_flags = SPS_BEST; | ||||
9726 | |||||
9727 | return OK; | ||||
9728 | } | ||||
9729 | |||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9730 | /* |
9731 | * "z?": Find badly spelled word under or after the cursor. | ||||
9732 | * Give suggestions for the properly spelled word. | ||||
Bram Moolenaar | 66fa271 | 2006-01-22 23:22:22 +0000 | [diff] [blame] | 9733 | * In Visual mode use the highlighted word as the bad word. |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 9734 | * When "count" is non-zero use that suggestion. |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9735 | */ |
9736 | void | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 9737 | spell_suggest(count) |
9738 | int count; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9739 | { |
9740 | char_u *line; | ||||
9741 | pos_T prev_cursor = curwin->w_cursor; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9742 | char_u wcopy[MAXWLEN + 2]; |
9743 | char_u *p; | ||||
9744 | int i; | ||||
9745 | int c; | ||||
9746 | suginfo_T sug; | ||||
9747 | suggest_T *stp; | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 9748 | int mouse_used; |
Bram Moolenaar | 7d1f5db | 2005-07-03 21:39:27 +0000 | [diff] [blame] | 9749 | int need_cap; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 9750 | int limit; |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 9751 | int selected = count; |
Bram Moolenaar | 66fa271 | 2006-01-22 23:22:22 +0000 | [diff] [blame] | 9752 | int badlen = 0; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9753 | |
Bram Moolenaar | 66fa271 | 2006-01-22 23:22:22 +0000 | [diff] [blame] | 9754 | if (no_spell_checking(curwin)) |
9755 | return; | ||||
9756 | |||||
9757 | #ifdef FEAT_VISUAL | ||||
9758 | if (VIsual_active) | ||||
9759 | { | ||||
9760 | /* Use the Visually selected text as the bad word. But reject | ||||
9761 | * a multi-line selection. */ | ||||
9762 | if (curwin->w_cursor.lnum != VIsual.lnum) | ||||
9763 | { | ||||
9764 | vim_beep(); | ||||
9765 | return; | ||||
9766 | } | ||||
9767 | badlen = (int)curwin->w_cursor.col - (int)VIsual.col; | ||||
9768 | if (badlen < 0) | ||||
9769 | badlen = -badlen; | ||||
9770 | else | ||||
9771 | curwin->w_cursor.col = VIsual.col; | ||||
9772 | ++badlen; | ||||
9773 | end_visual_mode(); | ||||
9774 | } | ||||
9775 | else | ||||
9776 | #endif | ||||
9777 | /* Find the start of the badly spelled word. */ | ||||
9778 | if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0 | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 9779 | || curwin->w_cursor.col > prev_cursor.col) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9780 | { |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 9781 | /* No bad word or it starts after the cursor: use the word under the |
9782 | * cursor. */ | ||||
9783 | curwin->w_cursor = prev_cursor; | ||||
9784 | line = ml_get_curline(); | ||||
9785 | p = line + curwin->w_cursor.col; | ||||
9786 | /* Backup to before start of word. */ | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 9787 | while (p > line && spell_iswordp_nmw(p)) |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 9788 | mb_ptr_back(line, p); |
9789 | /* Forward to start of word. */ | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 9790 | while (*p != NUL && !spell_iswordp_nmw(p)) |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 9791 | mb_ptr_adv(p); |
9792 | |||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 9793 | if (!spell_iswordp_nmw(p)) /* No word found. */ |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 9794 | { |
9795 | beep_flush(); | ||||
9796 | return; | ||||
9797 | } | ||||
9798 | curwin->w_cursor.col = p - line; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9799 | } |
9800 | |||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 9801 | /* Get the word and its length. */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9802 | |
Bram Moolenaar | 7d1f5db | 2005-07-03 21:39:27 +0000 | [diff] [blame] | 9803 | /* Figure out if the word should be capitalised. */ |
Bram Moolenaar | 8b59de9 | 2005-08-11 19:59:29 +0000 | [diff] [blame] | 9804 | need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col); |
Bram Moolenaar | 7d1f5db | 2005-07-03 21:39:27 +0000 | [diff] [blame] | 9805 | |
Bram Moolenaar | 8b59de9 | 2005-08-11 19:59:29 +0000 | [diff] [blame] | 9806 | line = ml_get_curline(); |
Bram Moolenaar | 7d1f5db | 2005-07-03 21:39:27 +0000 | [diff] [blame] | 9807 | |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 9808 | /* Get the list of suggestions. Limit to 'lines' - 2 or the number in |
9809 | * 'spellsuggest', whatever is smaller. */ | ||||
9810 | if (sps_limit > (int)Rows - 2) | ||||
9811 | limit = (int)Rows - 2; | ||||
9812 | else | ||||
9813 | limit = sps_limit; | ||||
Bram Moolenaar | 66fa271 | 2006-01-22 23:22:22 +0000 | [diff] [blame] | 9814 | spell_find_suggest(line + curwin->w_cursor.col, badlen, &sug, limit, |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 9815 | TRUE, need_cap, TRUE); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9816 | |
9817 | if (sug.su_ga.ga_len == 0) | ||||
9818 | MSG(_("Sorry, no suggestions")); | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 9819 | else if (count > 0) |
9820 | { | ||||
9821 | if (count > sug.su_ga.ga_len) | ||||
9822 | smsg((char_u *)_("Sorry, only %ld suggestions"), | ||||
9823 | (long)sug.su_ga.ga_len); | ||||
9824 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9825 | else |
9826 | { | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 9827 | vim_free(repl_from); |
9828 | repl_from = NULL; | ||||
9829 | vim_free(repl_to); | ||||
9830 | repl_to = NULL; | ||||
9831 | |||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 9832 | #ifdef FEAT_RIGHTLEFT |
9833 | /* When 'rightleft' is set the list is drawn right-left. */ | ||||
9834 | cmdmsg_rl = curwin->w_p_rl; | ||||
9835 | if (cmdmsg_rl) | ||||
9836 | msg_col = Columns - 1; | ||||
9837 | #endif | ||||
9838 | |||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9839 | /* List the suggestions. */ |
9840 | msg_start(); | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 9841 | lines_left = Rows; /* avoid more prompt */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9842 | vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"), |
9843 | sug.su_badlen, sug.su_badptr); | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 9844 | #ifdef FEAT_RIGHTLEFT |
9845 | if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0) | ||||
9846 | { | ||||
9847 | /* And now the rabbit from the high hat: Avoid showing the | ||||
9848 | * untranslated message rightleft. */ | ||||
9849 | vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC", | ||||
9850 | sug.su_badlen, sug.su_badptr); | ||||
9851 | } | ||||
9852 | #endif | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9853 | msg_puts(IObuff); |
9854 | msg_clr_eos(); | ||||
9855 | msg_putchar('\n'); | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 9856 | |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9857 | msg_scroll = TRUE; |
9858 | for (i = 0; i < sug.su_ga.ga_len; ++i) | ||||
9859 | { | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 9860 | stp = &SUG(sug.su_ga, i); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9861 | |
9862 | /* The suggested word may replace only part of the bad word, add | ||||
9863 | * the not replaced part. */ | ||||
9864 | STRCPY(wcopy, stp->st_word); | ||||
9865 | if (sug.su_badlen > stp->st_orglen) | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 9866 | vim_strncpy(wcopy + stp->st_wordlen, |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9867 | sug.su_badptr + stp->st_orglen, |
9868 | sug.su_badlen - stp->st_orglen); | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 9869 | vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1); |
9870 | #ifdef FEAT_RIGHTLEFT | ||||
9871 | if (cmdmsg_rl) | ||||
9872 | rl_mirror(IObuff); | ||||
9873 | #endif | ||||
9874 | msg_puts(IObuff); | ||||
9875 | |||||
9876 | vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy); | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 9877 | msg_puts(IObuff); |
9878 | |||||
9879 | /* The word may replace more than "su_badlen". */ | ||||
9880 | if (sug.su_badlen < stp->st_orglen) | ||||
9881 | { | ||||
9882 | vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""), | ||||
9883 | stp->st_orglen, sug.su_badptr); | ||||
9884 | msg_puts(IObuff); | ||||
9885 | } | ||||
9886 | |||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 9887 | if (p_verbose > 0) |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 9888 | { |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 9889 | /* Add the score. */ |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 9890 | if (sps_flags & (SPS_DOUBLE | SPS_BEST)) |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 9891 | vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)", |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 9892 | stp->st_salscore ? "s " : "", |
9893 | stp->st_score, stp->st_altscore); | ||||
9894 | else | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 9895 | vim_snprintf((char *)IObuff, IOSIZE, " (%d)", |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 9896 | stp->st_score); |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 9897 | #ifdef FEAT_RIGHTLEFT |
9898 | if (cmdmsg_rl) | ||||
9899 | /* Mirror the numbers, but keep the leading space. */ | ||||
9900 | rl_mirror(IObuff + 1); | ||||
9901 | #endif | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 9902 | msg_advance(30); |
9903 | msg_puts(IObuff); | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 9904 | } |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9905 | msg_putchar('\n'); |
9906 | } | ||||
9907 | |||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 9908 | #ifdef FEAT_RIGHTLEFT |
9909 | cmdmsg_rl = FALSE; | ||||
9910 | msg_col = 0; | ||||
9911 | #endif | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9912 | /* Ask for choice. */ |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 9913 | selected = prompt_for_number(&mouse_used); |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 9914 | if (mouse_used) |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 9915 | selected -= lines_left; |
Bram Moolenaar | 0fd9289 | 2006-03-09 22:27:48 +0000 | [diff] [blame] | 9916 | lines_left = Rows; /* avoid more prompt */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 9917 | } |
9918 | |||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 9919 | if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK) |
9920 | { | ||||
9921 | /* Save the from and to text for :spellrepall. */ | ||||
9922 | stp = &SUG(sug.su_ga, selected - 1); | ||||
Bram Moolenaar | d5cdbeb | 2005-10-10 20:59:28 +0000 | [diff] [blame] | 9923 | if (sug.su_badlen > stp->st_orglen) |
9924 | { | ||||
9925 | /* Replacing less than "su_badlen", append the remainder to | ||||
9926 | * repl_to. */ | ||||
9927 | repl_from = vim_strnsave(sug.su_badptr, sug.su_badlen); | ||||
9928 | vim_snprintf((char *)IObuff, IOSIZE, "%s%.*s", stp->st_word, | ||||
9929 | sug.su_badlen - stp->st_orglen, | ||||
9930 | sug.su_badptr + stp->st_orglen); | ||||
9931 | repl_to = vim_strsave(IObuff); | ||||
9932 | } | ||||
9933 | else | ||||
9934 | { | ||||
9935 | /* Replacing su_badlen or more, use the whole word. */ | ||||
9936 | repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen); | ||||
9937 | repl_to = vim_strsave(stp->st_word); | ||||
9938 | } | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 9939 | |
9940 | /* Replace the word. */ | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 9941 | p = alloc(STRLEN(line) - stp->st_orglen + stp->st_wordlen + 1); |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 9942 | if (p != NULL) |
9943 | { | ||||
9944 | c = sug.su_badptr - line; | ||||
9945 | mch_memmove(p, line, c); | ||||
9946 | STRCPY(p + c, stp->st_word); | ||||
9947 | STRCAT(p, sug.su_badptr + stp->st_orglen); | ||||
9948 | ml_replace(curwin->w_cursor.lnum, p, FALSE); | ||||
9949 | curwin->w_cursor.col = c; | ||||
9950 | changed_bytes(curwin->w_cursor.lnum, c); | ||||
9951 | |||||
9952 | /* For redo we use a change-word command. */ | ||||
9953 | ResetRedobuff(); | ||||
9954 | AppendToRedobuff((char_u *)"ciw"); | ||||
Bram Moolenaar | ebefac6 | 2005-12-28 22:39:57 +0000 | [diff] [blame] | 9955 | AppendToRedobuffLit(p + c, |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 9956 | stp->st_wordlen + sug.su_badlen - stp->st_orglen); |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 9957 | AppendCharToRedobuff(ESC); |
9958 | } | ||||
9959 | } | ||||
9960 | else | ||||
9961 | curwin->w_cursor = prev_cursor; | ||||
9962 | |||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 9963 | spell_find_cleanup(&sug); |
9964 | } | ||||
9965 | |||||
9966 | /* | ||||
Bram Moolenaar | 8b59de9 | 2005-08-11 19:59:29 +0000 | [diff] [blame] | 9967 | * Check if the word at line "lnum" column "col" is required to start with a |
9968 | * capital. This uses 'spellcapcheck' of the current buffer. | ||||
9969 | */ | ||||
9970 | static int | ||||
9971 | check_need_cap(lnum, col) | ||||
9972 | linenr_T lnum; | ||||
9973 | colnr_T col; | ||||
9974 | { | ||||
9975 | int need_cap = FALSE; | ||||
9976 | char_u *line; | ||||
9977 | char_u *line_copy = NULL; | ||||
9978 | char_u *p; | ||||
9979 | colnr_T endcol; | ||||
9980 | regmatch_T regmatch; | ||||
9981 | |||||
9982 | if (curbuf->b_cap_prog == NULL) | ||||
9983 | return FALSE; | ||||
9984 | |||||
9985 | line = ml_get_curline(); | ||||
9986 | endcol = 0; | ||||
9987 | if ((int)(skipwhite(line) - line) >= (int)col) | ||||
9988 | { | ||||
9989 | /* At start of line, check if previous line is empty or sentence | ||||
9990 | * ends there. */ | ||||
9991 | if (lnum == 1) | ||||
9992 | need_cap = TRUE; | ||||
9993 | else | ||||
9994 | { | ||||
9995 | line = ml_get(lnum - 1); | ||||
9996 | if (*skipwhite(line) == NUL) | ||||
9997 | need_cap = TRUE; | ||||
9998 | else | ||||
9999 | { | ||||
10000 | /* Append a space in place of the line break. */ | ||||
10001 | line_copy = concat_str(line, (char_u *)" "); | ||||
10002 | line = line_copy; | ||||
10003 | endcol = STRLEN(line); | ||||
10004 | } | ||||
10005 | } | ||||
10006 | } | ||||
10007 | else | ||||
10008 | endcol = col; | ||||
10009 | |||||
10010 | if (endcol > 0) | ||||
10011 | { | ||||
10012 | /* Check if sentence ends before the bad word. */ | ||||
10013 | regmatch.regprog = curbuf->b_cap_prog; | ||||
10014 | regmatch.rm_ic = FALSE; | ||||
10015 | p = line + endcol; | ||||
10016 | for (;;) | ||||
10017 | { | ||||
10018 | mb_ptr_back(line, p); | ||||
10019 | if (p == line || spell_iswordp_nmw(p)) | ||||
10020 | break; | ||||
10021 | if (vim_regexec(®match, p, 0) | ||||
10022 | && regmatch.endp[0] == line + endcol) | ||||
10023 | { | ||||
10024 | need_cap = TRUE; | ||||
10025 | break; | ||||
10026 | } | ||||
10027 | } | ||||
10028 | } | ||||
10029 | |||||
10030 | vim_free(line_copy); | ||||
10031 | |||||
10032 | return need_cap; | ||||
10033 | } | ||||
10034 | |||||
10035 | |||||
10036 | /* | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10037 | * ":spellrepall" |
10038 | */ | ||||
10039 | /*ARGSUSED*/ | ||||
10040 | void | ||||
10041 | ex_spellrepall(eap) | ||||
10042 | exarg_T *eap; | ||||
10043 | { | ||||
10044 | pos_T pos = curwin->w_cursor; | ||||
10045 | char_u *frompat; | ||||
10046 | int addlen; | ||||
10047 | char_u *line; | ||||
10048 | char_u *p; | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10049 | int save_ws = p_ws; |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 10050 | linenr_T prev_lnum = 0; |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10051 | |
10052 | if (repl_from == NULL || repl_to == NULL) | ||||
10053 | { | ||||
10054 | EMSG(_("E752: No previous spell replacement")); | ||||
10055 | return; | ||||
10056 | } | ||||
10057 | addlen = STRLEN(repl_to) - STRLEN(repl_from); | ||||
10058 | |||||
10059 | frompat = alloc(STRLEN(repl_from) + 7); | ||||
10060 | if (frompat == NULL) | ||||
10061 | return; | ||||
10062 | sprintf((char *)frompat, "\\V\\<%s\\>", repl_from); | ||||
10063 | p_ws = FALSE; | ||||
10064 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 10065 | sub_nsubs = 0; |
10066 | sub_nlines = 0; | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10067 | curwin->w_cursor.lnum = 0; |
10068 | while (!got_int) | ||||
10069 | { | ||||
10070 | if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP) == 0 | ||||
10071 | || u_save_cursor() == FAIL) | ||||
10072 | break; | ||||
10073 | |||||
10074 | /* Only replace when the right word isn't there yet. This happens | ||||
10075 | * when changing "etc" to "etc.". */ | ||||
10076 | line = ml_get_curline(); | ||||
10077 | if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col, | ||||
10078 | repl_to, STRLEN(repl_to)) != 0) | ||||
10079 | { | ||||
10080 | p = alloc(STRLEN(line) + addlen + 1); | ||||
10081 | if (p == NULL) | ||||
10082 | break; | ||||
10083 | mch_memmove(p, line, curwin->w_cursor.col); | ||||
10084 | STRCPY(p + curwin->w_cursor.col, repl_to); | ||||
10085 | STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from)); | ||||
10086 | ml_replace(curwin->w_cursor.lnum, p, FALSE); | ||||
10087 | changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col); | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 10088 | |
10089 | if (curwin->w_cursor.lnum != prev_lnum) | ||||
10090 | { | ||||
10091 | ++sub_nlines; | ||||
10092 | prev_lnum = curwin->w_cursor.lnum; | ||||
10093 | } | ||||
10094 | ++sub_nsubs; | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10095 | } |
10096 | curwin->w_cursor.col += STRLEN(repl_to); | ||||
10097 | } | ||||
10098 | |||||
10099 | p_ws = save_ws; | ||||
10100 | curwin->w_cursor = pos; | ||||
10101 | vim_free(frompat); | ||||
10102 | |||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 10103 | if (sub_nsubs == 0) |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10104 | EMSG2(_("E753: Not found: %s"), repl_from); |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 10105 | else |
10106 | do_sub_msg(FALSE); | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10107 | } |
10108 | |||||
10109 | /* | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10110 | * Find spell suggestions for "word". Return them in the growarray "*gap" as |
10111 | * a list of allocated strings. | ||||
10112 | */ | ||||
10113 | void | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10114 | spell_suggest_list(gap, word, maxcount, need_cap, interactive) |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10115 | garray_T *gap; |
10116 | char_u *word; | ||||
10117 | int maxcount; /* maximum nr of suggestions */ | ||||
Bram Moolenaar | 8b59de9 | 2005-08-11 19:59:29 +0000 | [diff] [blame] | 10118 | int need_cap; /* 'spellcapcheck' matched */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10119 | int interactive; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10120 | { |
10121 | suginfo_T sug; | ||||
10122 | int i; | ||||
10123 | suggest_T *stp; | ||||
10124 | char_u *wcopy; | ||||
10125 | |||||
Bram Moolenaar | 66fa271 | 2006-01-22 23:22:22 +0000 | [diff] [blame] | 10126 | spell_find_suggest(word, 0, &sug, maxcount, FALSE, need_cap, interactive); |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10127 | |
10128 | /* Make room in "gap". */ | ||||
10129 | ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1); | ||||
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 10130 | if (ga_grow(gap, sug.su_ga.ga_len) == OK) |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10131 | { |
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 10132 | for (i = 0; i < sug.su_ga.ga_len; ++i) |
10133 | { | ||||
10134 | stp = &SUG(sug.su_ga, i); | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10135 | |
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 10136 | /* The suggested word may replace only part of "word", add the not |
10137 | * replaced part. */ | ||||
10138 | wcopy = alloc(stp->st_wordlen | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10139 | + STRLEN(sug.su_badptr + stp->st_orglen) + 1); |
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 10140 | if (wcopy == NULL) |
10141 | break; | ||||
10142 | STRCPY(wcopy, stp->st_word); | ||||
10143 | STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen); | ||||
10144 | ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy; | ||||
10145 | } | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10146 | } |
10147 | |||||
10148 | spell_find_cleanup(&sug); | ||||
10149 | } | ||||
10150 | |||||
10151 | /* | ||||
10152 | * Find spell suggestions for the word at the start of "badptr". | ||||
10153 | * Return the suggestions in "su->su_ga". | ||||
10154 | * The maximum number of suggestions is "maxcount". | ||||
10155 | * Note: does use info for the current window. | ||||
10156 | * This is based on the mechanisms of Aspell, but completely reimplemented. | ||||
10157 | */ | ||||
10158 | static void | ||||
Bram Moolenaar | 66fa271 | 2006-01-22 23:22:22 +0000 | [diff] [blame] | 10159 | spell_find_suggest(badptr, badlen, su, maxcount, banbadword, need_cap, interactive) |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10160 | char_u *badptr; |
Bram Moolenaar | 66fa271 | 2006-01-22 23:22:22 +0000 | [diff] [blame] | 10161 | int badlen; /* length of bad word or 0 if unknown */ |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10162 | suginfo_T *su; |
10163 | int maxcount; | ||||
Bram Moolenaar | ea40885 | 2005-06-25 22:49:46 +0000 | [diff] [blame] | 10164 | int banbadword; /* don't include badword in suggestions */ |
Bram Moolenaar | 7d1f5db | 2005-07-03 21:39:27 +0000 | [diff] [blame] | 10165 | int need_cap; /* word should start with capital */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10166 | int interactive; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10167 | { |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 10168 | hlf_T attr = HLF_COUNT; |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10169 | char_u buf[MAXPATHL]; |
10170 | char_u *p; | ||||
10171 | int do_combine = FALSE; | ||||
10172 | char_u *sps_copy; | ||||
10173 | #ifdef FEAT_EVAL | ||||
10174 | static int expr_busy = FALSE; | ||||
10175 | #endif | ||||
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 10176 | int c; |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 10177 | int i; |
10178 | langp_T *lp; | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10179 | |
10180 | /* | ||||
10181 | * Set the info in "*su". | ||||
10182 | */ | ||||
10183 | vim_memset(su, 0, sizeof(suginfo_T)); | ||||
10184 | ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10); | ||||
10185 | ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10); | ||||
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 10186 | if (*badptr == NUL) |
10187 | return; | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10188 | hash_init(&su->su_banned); |
10189 | |||||
10190 | su->su_badptr = badptr; | ||||
Bram Moolenaar | 66fa271 | 2006-01-22 23:22:22 +0000 | [diff] [blame] | 10191 | if (badlen != 0) |
10192 | su->su_badlen = badlen; | ||||
10193 | else | ||||
10194 | su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL, FALSE); | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10195 | su->su_maxcount = maxcount; |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10196 | su->su_maxscore = SCORE_MAXINIT; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10197 | |
10198 | if (su->su_badlen >= MAXWLEN) | ||||
10199 | su->su_badlen = MAXWLEN - 1; /* just in case */ | ||||
10200 | vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen); | ||||
10201 | (void)spell_casefold(su->su_badptr, su->su_badlen, | ||||
10202 | su->su_fbadword, MAXWLEN); | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 10203 | /* get caps flags for bad word */ |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 10204 | su->su_badflags = badword_captype(su->su_badptr, |
10205 | su->su_badptr + su->su_badlen); | ||||
Bram Moolenaar | 7d1f5db | 2005-07-03 21:39:27 +0000 | [diff] [blame] | 10206 | if (need_cap) |
10207 | su->su_badflags |= WF_ONECAP; | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10208 | |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 10209 | /* Find the default language for sound folding. We simply use the first |
10210 | * one in 'spelllang' that supports sound folding. That's good for when | ||||
10211 | * using multiple files for one language, it's not that bad when mixing | ||||
10212 | * languages (e.g., "pl,en"). */ | ||||
10213 | for (i = 0; i < curbuf->b_langp.ga_len; ++i) | ||||
10214 | { | ||||
10215 | lp = LANGP_ENTRY(curbuf->b_langp, i); | ||||
10216 | if (lp->lp_sallang != NULL) | ||||
10217 | { | ||||
10218 | su->su_sallang = lp->lp_sallang; | ||||
10219 | break; | ||||
10220 | } | ||||
10221 | } | ||||
10222 | |||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 10223 | /* Soundfold the bad word with the default sound folding, so that we don't |
10224 | * have to do this many times. */ | ||||
10225 | if (su->su_sallang != NULL) | ||||
10226 | spell_soundfold(su->su_sallang, su->su_fbadword, TRUE, | ||||
10227 | su->su_sal_badword); | ||||
10228 | |||||
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 10229 | /* If the word is not capitalised and spell_check() doesn't consider the |
10230 | * word to be bad then it might need to be capitalised. Add a suggestion | ||||
10231 | * for that. */ | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 10232 | c = PTR2CHAR(su->su_badptr); |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 10233 | if (!SPELL_ISUPPER(c) && attr == HLF_COUNT) |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 10234 | { |
10235 | make_case_word(su->su_badword, buf, WF_ONECAP); | ||||
10236 | add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE, | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10237 | 0, TRUE, su->su_sallang, FALSE); |
Bram Moolenaar | f9184a1 | 2005-07-02 23:10:47 +0000 | [diff] [blame] | 10238 | } |
10239 | |||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10240 | /* Ban the bad word itself. It may appear in another region. */ |
Bram Moolenaar | ea40885 | 2005-06-25 22:49:46 +0000 | [diff] [blame] | 10241 | if (banbadword) |
10242 | add_banned(su, su->su_badword); | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10243 | |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10244 | /* Make a copy of 'spellsuggest', because the expression may change it. */ |
10245 | sps_copy = vim_strsave(p_sps); | ||||
10246 | if (sps_copy == NULL) | ||||
10247 | return; | ||||
10248 | |||||
10249 | /* Loop over the items in 'spellsuggest'. */ | ||||
10250 | for (p = sps_copy; *p != NUL; ) | ||||
10251 | { | ||||
10252 | copy_option_part(&p, buf, MAXPATHL, ","); | ||||
10253 | |||||
10254 | if (STRNCMP(buf, "expr:", 5) == 0) | ||||
10255 | { | ||||
10256 | #ifdef FEAT_EVAL | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 10257 | /* Evaluate an expression. Skip this when called recursively, |
10258 | * when using spellsuggest() in the expression. */ | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10259 | if (!expr_busy) |
10260 | { | ||||
10261 | expr_busy = TRUE; | ||||
10262 | spell_suggest_expr(su, buf + 5); | ||||
10263 | expr_busy = FALSE; | ||||
10264 | } | ||||
10265 | #endif | ||||
10266 | } | ||||
10267 | else if (STRNCMP(buf, "file:", 5) == 0) | ||||
10268 | /* Use list of suggestions in a file. */ | ||||
10269 | spell_suggest_file(su, buf + 5); | ||||
10270 | else | ||||
10271 | { | ||||
10272 | /* Use internal method. */ | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10273 | spell_suggest_intern(su, interactive); |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10274 | if (sps_flags & SPS_DOUBLE) |
10275 | do_combine = TRUE; | ||||
10276 | } | ||||
10277 | } | ||||
10278 | |||||
10279 | vim_free(sps_copy); | ||||
10280 | |||||
10281 | if (do_combine) | ||||
10282 | /* Combine the two list of suggestions. This must be done last, | ||||
10283 | * because sorting changes the order again. */ | ||||
10284 | score_combine(su); | ||||
10285 | } | ||||
10286 | |||||
10287 | #ifdef FEAT_EVAL | ||||
10288 | /* | ||||
10289 | * Find suggestions by evaluating expression "expr". | ||||
10290 | */ | ||||
10291 | static void | ||||
10292 | spell_suggest_expr(su, expr) | ||||
10293 | suginfo_T *su; | ||||
10294 | char_u *expr; | ||||
10295 | { | ||||
10296 | list_T *list; | ||||
10297 | listitem_T *li; | ||||
10298 | int score; | ||||
10299 | char_u *p; | ||||
10300 | |||||
10301 | /* The work is split up in a few parts to avoid having to export | ||||
10302 | * suginfo_T. | ||||
10303 | * First evaluate the expression and get the resulting list. */ | ||||
10304 | list = eval_spell_expr(su->su_badword, expr); | ||||
10305 | if (list != NULL) | ||||
10306 | { | ||||
10307 | /* Loop over the items in the list. */ | ||||
10308 | for (li = list->lv_first; li != NULL; li = li->li_next) | ||||
10309 | if (li->li_tv.v_type == VAR_LIST) | ||||
10310 | { | ||||
10311 | /* Get the word and the score from the items. */ | ||||
10312 | score = get_spellword(li->li_tv.vval.v_list, &p); | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10313 | if (score >= 0 && score <= su->su_maxscore) |
10314 | add_suggestion(su, &su->su_ga, p, su->su_badlen, | ||||
10315 | score, 0, TRUE, su->su_sallang, FALSE); | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10316 | } |
10317 | list_unref(list); | ||||
10318 | } | ||||
10319 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10320 | /* Remove bogus suggestions, sort and truncate at "maxcount". */ |
10321 | check_suggestions(su, &su->su_ga); | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10322 | (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); |
10323 | } | ||||
10324 | #endif | ||||
10325 | |||||
10326 | /* | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 10327 | * Find suggestions in file "fname". Used for "file:" in 'spellsuggest'. |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10328 | */ |
10329 | static void | ||||
10330 | spell_suggest_file(su, fname) | ||||
10331 | suginfo_T *su; | ||||
10332 | char_u *fname; | ||||
10333 | { | ||||
10334 | FILE *fd; | ||||
10335 | char_u line[MAXWLEN * 2]; | ||||
10336 | char_u *p; | ||||
10337 | int len; | ||||
10338 | char_u cword[MAXWLEN]; | ||||
10339 | |||||
10340 | /* Open the file. */ | ||||
10341 | fd = mch_fopen((char *)fname, "r"); | ||||
10342 | if (fd == NULL) | ||||
10343 | { | ||||
10344 | EMSG2(_(e_notopen), fname); | ||||
10345 | return; | ||||
10346 | } | ||||
10347 | |||||
10348 | /* Read it line by line. */ | ||||
10349 | while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int) | ||||
10350 | { | ||||
10351 | line_breakcheck(); | ||||
10352 | |||||
10353 | p = vim_strchr(line, '/'); | ||||
10354 | if (p == NULL) | ||||
10355 | continue; /* No Tab found, just skip the line. */ | ||||
10356 | *p++ = NUL; | ||||
10357 | if (STRICMP(su->su_badword, line) == 0) | ||||
10358 | { | ||||
10359 | /* Match! Isolate the good word, until CR or NL. */ | ||||
10360 | for (len = 0; p[len] >= ' '; ++len) | ||||
10361 | ; | ||||
10362 | p[len] = NUL; | ||||
10363 | |||||
10364 | /* If the suggestion doesn't have specific case duplicate the case | ||||
10365 | * of the bad word. */ | ||||
10366 | if (captype(p, NULL) == 0) | ||||
10367 | { | ||||
10368 | make_case_word(p, cword, su->su_badflags); | ||||
10369 | p = cword; | ||||
10370 | } | ||||
10371 | |||||
10372 | add_suggestion(su, &su->su_ga, p, su->su_badlen, | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10373 | SCORE_FILE, 0, TRUE, su->su_sallang, FALSE); |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10374 | } |
10375 | } | ||||
10376 | |||||
10377 | fclose(fd); | ||||
10378 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10379 | /* Remove bogus suggestions, sort and truncate at "maxcount". */ |
10380 | check_suggestions(su, &su->su_ga); | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10381 | (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); |
10382 | } | ||||
10383 | |||||
10384 | /* | ||||
10385 | * Find suggestions for the internal method indicated by "sps_flags". | ||||
10386 | */ | ||||
10387 | static void | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10388 | spell_suggest_intern(su, interactive) |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10389 | suginfo_T *su; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10390 | int interactive; |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10391 | { |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10392 | /* |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10393 | * Load the .sug file(s) that are available and not done yet. |
10394 | */ | ||||
10395 | suggest_load_files(); | ||||
10396 | |||||
10397 | /* | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 10398 | * 1. Try special cases, such as repeating a word: "the the" -> "the". |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10399 | * |
10400 | * Set a maximum score to limit the combination of operations that is | ||||
10401 | * tried. | ||||
10402 | */ | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 10403 | suggest_try_special(su); |
10404 | |||||
10405 | /* | ||||
10406 | * 2. Try inserting/deleting/swapping/changing a letter, use REP entries | ||||
10407 | * from the .aff file and inserting a space (split the word). | ||||
10408 | */ | ||||
10409 | suggest_try_change(su); | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10410 | |
10411 | /* For the resulting top-scorers compute the sound-a-like score. */ | ||||
10412 | if (sps_flags & SPS_DOUBLE) | ||||
10413 | score_comp_sal(su); | ||||
10414 | |||||
10415 | /* | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 10416 | * 3. Try finding sound-a-like words. |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10417 | */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10418 | if ((sps_flags & SPS_FAST) == 0) |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10419 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10420 | if (sps_flags & SPS_BEST) |
10421 | /* Adjust the word score for the suggestions found so far for how | ||||
10422 | * they sounds like. */ | ||||
10423 | rescore_suggestions(su); | ||||
10424 | |||||
10425 | /* | ||||
10426 | * While going throught the soundfold tree "su_maxscore" is the score | ||||
10427 | * for the soundfold word, limits the changes that are being tried, | ||||
10428 | * and "su_sfmaxscore" the rescored score, which is set by | ||||
10429 | * cleanup_suggestions(). | ||||
10430 | * First find words with a small edit distance, because this is much | ||||
10431 | * faster and often already finds the top-N suggestions. If we didn't | ||||
10432 | * find many suggestions try again with a higher edit distance. | ||||
10433 | * "sl_sounddone" is used to avoid doing the same word twice. | ||||
10434 | */ | ||||
10435 | suggest_try_soundalike_prep(); | ||||
10436 | su->su_maxscore = SCORE_SFMAX1; | ||||
10437 | su->su_sfmaxscore = SCORE_MAXINIT * 3; | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 10438 | suggest_try_soundalike(su); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10439 | if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su)) |
10440 | { | ||||
10441 | /* We didn't find enough matches, try again, allowing more | ||||
10442 | * changes to the soundfold word. */ | ||||
10443 | su->su_maxscore = SCORE_SFMAX2; | ||||
10444 | suggest_try_soundalike(su); | ||||
10445 | if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su)) | ||||
10446 | { | ||||
10447 | /* Still didn't find enough matches, try again, allowing even | ||||
10448 | * more changes to the soundfold word. */ | ||||
10449 | su->su_maxscore = SCORE_SFMAX3; | ||||
10450 | suggest_try_soundalike(su); | ||||
10451 | } | ||||
10452 | } | ||||
10453 | su->su_maxscore = su->su_sfmaxscore; | ||||
10454 | suggest_try_soundalike_finish(); | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10455 | } |
10456 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10457 | /* When CTRL-C was hit while searching do show the results. Only clear |
10458 | * got_int when using a command, not for spellsuggest(). */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10459 | ui_breakcheck(); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10460 | if (interactive && got_int) |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10461 | { |
10462 | (void)vgetc(); | ||||
10463 | got_int = FALSE; | ||||
10464 | } | ||||
10465 | |||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10466 | if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0) |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10467 | { |
10468 | if (sps_flags & SPS_BEST) | ||||
10469 | /* Adjust the word score for how it sounds like. */ | ||||
10470 | rescore_suggestions(su); | ||||
10471 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10472 | /* Remove bogus suggestions, sort and truncate at "maxcount". */ |
10473 | check_suggestions(su, &su->su_ga); | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 10474 | (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10475 | } |
10476 | } | ||||
10477 | |||||
10478 | /* | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10479 | * Load the .sug files for languages that have one and weren't loaded yet. |
10480 | */ | ||||
10481 | static void | ||||
10482 | suggest_load_files() | ||||
10483 | { | ||||
10484 | langp_T *lp; | ||||
10485 | int lpi; | ||||
10486 | slang_T *slang; | ||||
10487 | char_u *dotp; | ||||
10488 | FILE *fd; | ||||
10489 | char_u buf[MAXWLEN]; | ||||
10490 | int i; | ||||
10491 | time_t timestamp; | ||||
10492 | int wcount; | ||||
10493 | int wordnr; | ||||
10494 | garray_T ga; | ||||
10495 | int c; | ||||
10496 | |||||
10497 | /* Do this for all languages that support sound folding. */ | ||||
10498 | for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) | ||||
10499 | { | ||||
10500 | lp = LANGP_ENTRY(curbuf->b_langp, lpi); | ||||
10501 | slang = lp->lp_slang; | ||||
10502 | if (slang->sl_sugtime != 0 && !slang->sl_sugloaded) | ||||
10503 | { | ||||
10504 | /* Change ".spl" to ".sug" and open the file. When the file isn't | ||||
10505 | * found silently skip it. Do set "sl_sugloaded" so that we | ||||
10506 | * don't try again and again. */ | ||||
10507 | slang->sl_sugloaded = TRUE; | ||||
10508 | |||||
10509 | dotp = vim_strrchr(slang->sl_fname, '.'); | ||||
10510 | if (dotp == NULL || fnamecmp(dotp, ".spl") != 0) | ||||
10511 | continue; | ||||
10512 | STRCPY(dotp, ".sug"); | ||||
10513 | fd = fopen((char *)slang->sl_fname, "r"); | ||||
10514 | if (fd == NULL) | ||||
10515 | goto nextone; | ||||
10516 | |||||
10517 | /* | ||||
10518 | * <SUGHEADER>: <fileID> <versionnr> <timestamp> | ||||
10519 | */ | ||||
10520 | for (i = 0; i < VIMSUGMAGICL; ++i) | ||||
10521 | buf[i] = getc(fd); /* <fileID> */ | ||||
10522 | if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0) | ||||
10523 | { | ||||
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 10524 | EMSG2(_("E778: This does not look like a .sug file: %s"), |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10525 | slang->sl_fname); |
10526 | goto nextone; | ||||
10527 | } | ||||
10528 | c = getc(fd); /* <versionnr> */ | ||||
10529 | if (c < VIMSUGVERSION) | ||||
10530 | { | ||||
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 10531 | EMSG2(_("E779: Old .sug file, needs to be updated: %s"), |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10532 | slang->sl_fname); |
10533 | goto nextone; | ||||
10534 | } | ||||
10535 | else if (c > VIMSUGVERSION) | ||||
10536 | { | ||||
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 10537 | EMSG2(_("E780: .sug file is for newer version of Vim: %s"), |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10538 | slang->sl_fname); |
10539 | goto nextone; | ||||
10540 | } | ||||
10541 | |||||
10542 | /* Check the timestamp, it must be exactly the same as the one in | ||||
10543 | * the .spl file. Otherwise the word numbers won't match. */ | ||||
Bram Moolenaar | b388adb | 2006-02-28 23:50:17 +0000 | [diff] [blame] | 10544 | timestamp = get8c(fd); /* <timestamp> */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10545 | if (timestamp != slang->sl_sugtime) |
10546 | { | ||||
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 10547 | EMSG2(_("E781: .sug file doesn't match .spl file: %s"), |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10548 | slang->sl_fname); |
10549 | goto nextone; | ||||
10550 | } | ||||
10551 | |||||
10552 | /* | ||||
10553 | * <SUGWORDTREE>: <wordtree> | ||||
10554 | * Read the trie with the soundfolded words. | ||||
10555 | */ | ||||
10556 | if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs, | ||||
10557 | FALSE, 0) != 0) | ||||
10558 | { | ||||
10559 | someerror: | ||||
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 10560 | EMSG2(_("E782: error while reading .sug file: %s"), |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10561 | slang->sl_fname); |
10562 | slang_clear_sug(slang); | ||||
10563 | goto nextone; | ||||
10564 | } | ||||
10565 | |||||
10566 | /* | ||||
10567 | * <SUGTABLE>: <sugwcount> <sugline> ... | ||||
10568 | * | ||||
10569 | * Read the table with word numbers. We use a file buffer for | ||||
10570 | * this, because it's so much like a file with lines. Makes it | ||||
10571 | * possible to swap the info and save on memory use. | ||||
10572 | */ | ||||
10573 | slang->sl_sugbuf = open_spellbuf(); | ||||
10574 | if (slang->sl_sugbuf == NULL) | ||||
10575 | goto someerror; | ||||
10576 | /* <sugwcount> */ | ||||
Bram Moolenaar | b388adb | 2006-02-28 23:50:17 +0000 | [diff] [blame] | 10577 | wcount = get4c(fd); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10578 | if (wcount < 0) |
10579 | goto someerror; | ||||
10580 | |||||
10581 | /* Read all the wordnr lists into the buffer, one NUL terminated | ||||
10582 | * list per line. */ | ||||
10583 | ga_init2(&ga, 1, 100); | ||||
10584 | for (wordnr = 0; wordnr < wcount; ++wordnr) | ||||
10585 | { | ||||
10586 | ga.ga_len = 0; | ||||
10587 | for (;;) | ||||
10588 | { | ||||
10589 | c = getc(fd); /* <sugline> */ | ||||
10590 | if (c < 0 || ga_grow(&ga, 1) == FAIL) | ||||
10591 | goto someerror; | ||||
10592 | ((char_u *)ga.ga_data)[ga.ga_len++] = c; | ||||
10593 | if (c == NUL) | ||||
10594 | break; | ||||
10595 | } | ||||
10596 | if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr, | ||||
10597 | ga.ga_data, ga.ga_len, TRUE) == FAIL) | ||||
10598 | goto someerror; | ||||
10599 | } | ||||
10600 | ga_clear(&ga); | ||||
10601 | |||||
10602 | /* | ||||
10603 | * Need to put word counts in the word tries, so that we can find | ||||
10604 | * a word by its number. | ||||
10605 | */ | ||||
10606 | tree_count_words(slang->sl_fbyts, slang->sl_fidxs); | ||||
10607 | tree_count_words(slang->sl_sbyts, slang->sl_sidxs); | ||||
10608 | |||||
10609 | nextone: | ||||
10610 | if (fd != NULL) | ||||
10611 | fclose(fd); | ||||
10612 | STRCPY(dotp, ".spl"); | ||||
10613 | } | ||||
10614 | } | ||||
10615 | } | ||||
10616 | |||||
10617 | |||||
10618 | /* | ||||
10619 | * Fill in the wordcount fields for a trie. | ||||
10620 | * Returns the total number of words. | ||||
10621 | */ | ||||
10622 | static void | ||||
10623 | tree_count_words(byts, idxs) | ||||
10624 | char_u *byts; | ||||
10625 | idx_T *idxs; | ||||
10626 | { | ||||
10627 | int depth; | ||||
10628 | idx_T arridx[MAXWLEN]; | ||||
10629 | int curi[MAXWLEN]; | ||||
10630 | int c; | ||||
10631 | idx_T n; | ||||
10632 | int wordcount[MAXWLEN]; | ||||
10633 | |||||
10634 | arridx[0] = 0; | ||||
10635 | curi[0] = 1; | ||||
10636 | wordcount[0] = 0; | ||||
10637 | depth = 0; | ||||
10638 | while (depth >= 0 && !got_int) | ||||
10639 | { | ||||
10640 | if (curi[depth] > byts[arridx[depth]]) | ||||
10641 | { | ||||
10642 | /* Done all bytes at this node, go up one level. */ | ||||
10643 | idxs[arridx[depth]] = wordcount[depth]; | ||||
10644 | if (depth > 0) | ||||
10645 | wordcount[depth - 1] += wordcount[depth]; | ||||
10646 | |||||
10647 | --depth; | ||||
10648 | fast_breakcheck(); | ||||
10649 | } | ||||
10650 | else | ||||
10651 | { | ||||
10652 | /* Do one more byte at this node. */ | ||||
10653 | n = arridx[depth] + curi[depth]; | ||||
10654 | ++curi[depth]; | ||||
10655 | |||||
10656 | c = byts[n]; | ||||
10657 | if (c == 0) | ||||
10658 | { | ||||
10659 | /* End of word, count it. */ | ||||
10660 | ++wordcount[depth]; | ||||
10661 | |||||
10662 | /* Skip over any other NUL bytes (same word with different | ||||
10663 | * flags). */ | ||||
10664 | while (byts[n + 1] == 0) | ||||
10665 | { | ||||
10666 | ++n; | ||||
10667 | ++curi[depth]; | ||||
10668 | } | ||||
10669 | } | ||||
10670 | else | ||||
10671 | { | ||||
10672 | /* Normal char, go one level deeper to count the words. */ | ||||
10673 | ++depth; | ||||
10674 | arridx[depth] = idxs[n]; | ||||
10675 | curi[depth] = 1; | ||||
10676 | wordcount[depth] = 0; | ||||
10677 | } | ||||
10678 | } | ||||
10679 | } | ||||
10680 | } | ||||
10681 | |||||
10682 | /* | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 10683 | * Free the info put in "*su" by spell_find_suggest(). |
10684 | */ | ||||
10685 | static void | ||||
10686 | spell_find_cleanup(su) | ||||
10687 | suginfo_T *su; | ||||
10688 | { | ||||
10689 | int i; | ||||
10690 | |||||
10691 | /* Free the suggestions. */ | ||||
10692 | for (i = 0; i < su->su_ga.ga_len; ++i) | ||||
10693 | vim_free(SUG(su->su_ga, i).st_word); | ||||
10694 | ga_clear(&su->su_ga); | ||||
10695 | for (i = 0; i < su->su_sga.ga_len; ++i) | ||||
10696 | vim_free(SUG(su->su_sga, i).st_word); | ||||
10697 | ga_clear(&su->su_sga); | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10698 | |
10699 | /* Free the banned words. */ | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10700 | hash_clear_all(&su->su_banned, 0); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10701 | } |
10702 | |||||
10703 | /* | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 10704 | * Make a copy of "word", with the first letter upper or lower cased, to |
10705 | * "wcopy[MAXWLEN]". "word" must not be empty. | ||||
10706 | * The result is NUL terminated. | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10707 | */ |
10708 | static void | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 10709 | onecap_copy(word, wcopy, upper) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10710 | char_u *word; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10711 | char_u *wcopy; |
10712 | int upper; /* TRUE: first letter made upper case */ | ||||
10713 | { | ||||
10714 | char_u *p; | ||||
10715 | int c; | ||||
10716 | int l; | ||||
10717 | |||||
10718 | p = word; | ||||
10719 | #ifdef FEAT_MBYTE | ||||
10720 | if (has_mbyte) | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 10721 | c = mb_cptr2char_adv(&p); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10722 | else |
10723 | #endif | ||||
10724 | c = *p++; | ||||
10725 | if (upper) | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 10726 | c = SPELL_TOUPPER(c); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10727 | else |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 10728 | c = SPELL_TOFOLD(c); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10729 | #ifdef FEAT_MBYTE |
10730 | if (has_mbyte) | ||||
10731 | l = mb_char2bytes(c, wcopy); | ||||
10732 | else | ||||
10733 | #endif | ||||
10734 | { | ||||
10735 | l = 1; | ||||
10736 | wcopy[0] = c; | ||||
10737 | } | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 10738 | vim_strncpy(wcopy + l, p, MAXWLEN - l - 1); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10739 | } |
10740 | |||||
10741 | /* | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 10742 | * Make a copy of "word" with all the letters upper cased into |
10743 | * "wcopy[MAXWLEN]". The result is NUL terminated. | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10744 | */ |
10745 | static void | ||||
10746 | allcap_copy(word, wcopy) | ||||
10747 | char_u *word; | ||||
10748 | char_u *wcopy; | ||||
10749 | { | ||||
10750 | char_u *s; | ||||
10751 | char_u *d; | ||||
10752 | int c; | ||||
10753 | |||||
10754 | d = wcopy; | ||||
10755 | for (s = word; *s != NUL; ) | ||||
10756 | { | ||||
10757 | #ifdef FEAT_MBYTE | ||||
10758 | if (has_mbyte) | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 10759 | c = mb_cptr2char_adv(&s); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10760 | else |
10761 | #endif | ||||
10762 | c = *s++; | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 10763 | |
10764 | #ifdef FEAT_MBYTE | ||||
10765 | /* We only change ß to SS when we are certain latin1 is used. It | ||||
10766 | * would cause weird errors in other 8-bit encodings. */ | ||||
10767 | if (enc_latin1like && c == 0xdf) | ||||
10768 | { | ||||
10769 | c = 'S'; | ||||
10770 | if (d - wcopy >= MAXWLEN - 1) | ||||
10771 | break; | ||||
10772 | *d++ = c; | ||||
10773 | } | ||||
10774 | else | ||||
10775 | #endif | ||||
10776 | c = SPELL_TOUPPER(c); | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10777 | |
10778 | #ifdef FEAT_MBYTE | ||||
10779 | if (has_mbyte) | ||||
10780 | { | ||||
10781 | if (d - wcopy >= MAXWLEN - MB_MAXBYTES) | ||||
10782 | break; | ||||
10783 | d += mb_char2bytes(c, d); | ||||
10784 | } | ||||
10785 | else | ||||
10786 | #endif | ||||
10787 | { | ||||
10788 | if (d - wcopy >= MAXWLEN - 1) | ||||
10789 | break; | ||||
10790 | *d++ = c; | ||||
10791 | } | ||||
10792 | } | ||||
10793 | *d = NUL; | ||||
10794 | } | ||||
10795 | |||||
10796 | /* | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 10797 | * Try finding suggestions by recognizing specific situations. |
10798 | */ | ||||
10799 | static void | ||||
10800 | suggest_try_special(su) | ||||
10801 | suginfo_T *su; | ||||
10802 | { | ||||
10803 | char_u *p; | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 10804 | size_t len; |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 10805 | int c; |
10806 | char_u word[MAXWLEN]; | ||||
10807 | |||||
10808 | /* | ||||
10809 | * Recognize a word that is repeated: "the the". | ||||
10810 | */ | ||||
10811 | p = skiptowhite(su->su_fbadword); | ||||
10812 | len = p - su->su_fbadword; | ||||
10813 | p = skipwhite(p); | ||||
10814 | if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0) | ||||
10815 | { | ||||
10816 | /* Include badflags: if the badword is onecap or allcap | ||||
10817 | * use that for the goodword too: "The the" -> "The". */ | ||||
10818 | c = su->su_fbadword[len]; | ||||
10819 | su->su_fbadword[len] = NUL; | ||||
10820 | make_case_word(su->su_fbadword, word, su->su_badflags); | ||||
10821 | su->su_fbadword[len] = c; | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 10822 | |
10823 | /* Give a soundalike score of 0, compute the score as if deleting one | ||||
10824 | * character. */ | ||||
10825 | add_suggestion(su, &su->su_ga, word, su->su_badlen, | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10826 | RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang, FALSE); |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 10827 | } |
10828 | } | ||||
10829 | |||||
10830 | /* | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10831 | * Try finding suggestions by adding/removing/swapping letters. |
10832 | */ | ||||
10833 | static void | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 10834 | suggest_try_change(su) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10835 | suginfo_T *su; |
10836 | { | ||||
10837 | char_u fword[MAXWLEN]; /* copy of the bad word, case-folded */ | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 10838 | int n; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10839 | char_u *p; |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 10840 | int lpi; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10841 | langp_T *lp; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10842 | |
10843 | /* We make a copy of the case-folded bad word, so that we can modify it | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 10844 | * to find matches (esp. REP items). Append some more text, changing |
10845 | * chars after the bad word may help. */ | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10846 | STRCPY(fword, su->su_fbadword); |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 10847 | n = STRLEN(fword); |
10848 | p = su->su_badptr + su->su_badlen; | ||||
10849 | (void)spell_casefold(p, STRLEN(p), fword + n, MAXWLEN - n); | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10850 | |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 10851 | for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10852 | { |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 10853 | lp = LANGP_ENTRY(curbuf->b_langp, lpi); |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 10854 | |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 10855 | /* If reloading a spell file fails it's still in the list but |
10856 | * everything has been cleared. */ | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10857 | if (lp->lp_slang->sl_fbyts == NULL) |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 10858 | continue; |
10859 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10860 | /* Try it for this language. Will add possible suggestions. */ |
10861 | suggest_trie_walk(su, lp, fword, FALSE); | ||||
10862 | } | ||||
10863 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10864 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10865 | /* Check the maximum score, if we go over it we won't try this change. */ |
10866 | #define TRY_DEEPER(su, stack, depth, add) \ | ||||
10867 | (stack[depth].ts_score + (add) < su->su_maxscore) | ||||
10868 | |||||
10869 | /* | ||||
10870 | * Try finding suggestions by adding/removing/swapping letters. | ||||
10871 | * | ||||
10872 | * This uses a state machine. At each node in the tree we try various | ||||
10873 | * operations. When trying if an operation works "depth" is increased and the | ||||
10874 | * stack[] is used to store info. This allows combinations, thus insert one | ||||
10875 | * character, replace one and delete another. The number of changes is | ||||
10876 | * limited by su->su_maxscore. | ||||
10877 | * | ||||
10878 | * After implementing this I noticed an article by Kemal Oflazer that | ||||
10879 | * describes something similar: "Error-tolerant Finite State Recognition with | ||||
10880 | * Applications to Morphological Analysis and Spelling Correction" (1996). | ||||
10881 | * The implementation in the article is simplified and requires a stack of | ||||
10882 | * unknown depth. The implementation here only needs a stack depth equal to | ||||
10883 | * the length of the word. | ||||
10884 | * | ||||
10885 | * This is also used for the sound-folded word, "soundfold" is TRUE then. | ||||
10886 | * The mechanism is the same, but we find a match with a sound-folded word | ||||
10887 | * that comes from one or more original words. Each of these words may be | ||||
10888 | * added, this is done by add_sound_suggest(). | ||||
10889 | * Don't use: | ||||
10890 | * the prefix tree or the keep-case tree | ||||
10891 | * "su->su_badlen" | ||||
10892 | * anything to do with upper and lower case | ||||
10893 | * anything to do with word or non-word characters ("spell_iswordp()") | ||||
10894 | * banned words | ||||
10895 | * word flags (rare, region, compounding) | ||||
10896 | * word splitting for now | ||||
10897 | * "similar_chars()" | ||||
10898 | * use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep" | ||||
10899 | */ | ||||
10900 | static void | ||||
10901 | suggest_trie_walk(su, lp, fword, soundfold) | ||||
10902 | suginfo_T *su; | ||||
10903 | langp_T *lp; | ||||
10904 | char_u *fword; | ||||
10905 | int soundfold; | ||||
10906 | { | ||||
10907 | char_u tword[MAXWLEN]; /* good word collected so far */ | ||||
10908 | trystate_T stack[MAXWLEN]; | ||||
10909 | char_u preword[MAXWLEN * 3]; /* word found with proper case; | ||||
10910 | * concatanation of prefix compound | ||||
10911 | * words and split word. NUL terminated | ||||
10912 | * when going deeper but not when coming | ||||
10913 | * back. */ | ||||
10914 | char_u compflags[MAXWLEN]; /* compound flags, one for each word */ | ||||
10915 | trystate_T *sp; | ||||
10916 | int newscore; | ||||
10917 | int score; | ||||
10918 | char_u *byts, *fbyts, *pbyts; | ||||
10919 | idx_T *idxs, *fidxs, *pidxs; | ||||
10920 | int depth; | ||||
10921 | int c, c2, c3; | ||||
10922 | int n = 0; | ||||
10923 | int flags; | ||||
10924 | garray_T *gap; | ||||
10925 | idx_T arridx; | ||||
10926 | int len; | ||||
10927 | char_u *p; | ||||
10928 | fromto_T *ftp; | ||||
10929 | int fl = 0, tl; | ||||
10930 | int repextra = 0; /* extra bytes in fword[] from REP item */ | ||||
10931 | slang_T *slang = lp->lp_slang; | ||||
10932 | int fword_ends; | ||||
10933 | int goodword_ends; | ||||
10934 | #ifdef DEBUG_TRIEWALK | ||||
10935 | /* Stores the name of the change made at each level. */ | ||||
10936 | char_u changename[MAXWLEN][80]; | ||||
10937 | #endif | ||||
10938 | int breakcheckcount = 1000; | ||||
10939 | int compound_ok; | ||||
10940 | |||||
10941 | /* | ||||
10942 | * Go through the whole case-fold tree, try changes at each node. | ||||
10943 | * "tword[]" contains the word collected from nodes in the tree. | ||||
10944 | * "fword[]" the word we are trying to match with (initially the bad | ||||
10945 | * word). | ||||
10946 | */ | ||||
10947 | depth = 0; | ||||
10948 | sp = &stack[0]; | ||||
10949 | vim_memset(sp, 0, sizeof(trystate_T)); | ||||
10950 | sp->ts_curi = 1; | ||||
10951 | |||||
10952 | if (soundfold) | ||||
10953 | { | ||||
10954 | /* Going through the soundfold tree. */ | ||||
10955 | byts = fbyts = slang->sl_sbyts; | ||||
10956 | idxs = fidxs = slang->sl_sidxs; | ||||
10957 | pbyts = NULL; | ||||
10958 | pidxs = NULL; | ||||
10959 | sp->ts_prefixdepth = PFD_NOPREFIX; | ||||
10960 | sp->ts_state = STATE_START; | ||||
10961 | } | ||||
10962 | else | ||||
10963 | { | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 10964 | /* |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 10965 | * When there are postponed prefixes we need to use these first. At |
10966 | * the end of the prefix we continue in the case-fold tree. | ||||
10967 | */ | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 10968 | fbyts = slang->sl_fbyts; |
10969 | fidxs = slang->sl_fidxs; | ||||
10970 | pbyts = slang->sl_pbyts; | ||||
10971 | pidxs = slang->sl_pidxs; | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 10972 | if (pbyts != NULL) |
10973 | { | ||||
10974 | byts = pbyts; | ||||
10975 | idxs = pidxs; | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 10976 | sp->ts_prefixdepth = PFD_PREFIXTREE; |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 10977 | sp->ts_state = STATE_NOPREFIX; /* try without prefix first */ |
10978 | } | ||||
10979 | else | ||||
10980 | { | ||||
10981 | byts = fbyts; | ||||
10982 | idxs = fidxs; | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 10983 | sp->ts_prefixdepth = PFD_NOPREFIX; |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 10984 | sp->ts_state = STATE_START; |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 10985 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10986 | } |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 10987 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 10988 | /* |
10989 | * Loop to find all suggestions. At each round we either: | ||||
10990 | * - For the current state try one operation, advance "ts_curi", | ||||
10991 | * increase "depth". | ||||
10992 | * - When a state is done go to the next, set "ts_state". | ||||
10993 | * - When all states are tried decrease "depth". | ||||
10994 | */ | ||||
10995 | while (depth >= 0 && !got_int) | ||||
10996 | { | ||||
10997 | sp = &stack[depth]; | ||||
10998 | switch (sp->ts_state) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 10999 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11000 | case STATE_START: |
11001 | case STATE_NOPREFIX: | ||||
11002 | /* | ||||
11003 | * Start of node: Deal with NUL bytes, which means | ||||
11004 | * tword[] may end here. | ||||
11005 | */ | ||||
11006 | arridx = sp->ts_arridx; /* current node in the tree */ | ||||
11007 | len = byts[arridx]; /* bytes in this node */ | ||||
11008 | arridx += sp->ts_curi; /* index of current byte */ | ||||
11009 | |||||
11010 | if (sp->ts_prefixdepth == PFD_PREFIXTREE) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11011 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11012 | /* Skip over the NUL bytes, we use them later. */ |
11013 | for (n = 0; n < len && byts[arridx + n] == 0; ++n) | ||||
11014 | ; | ||||
11015 | sp->ts_curi += n; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11016 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11017 | /* Always past NUL bytes now. */ |
11018 | n = (int)sp->ts_state; | ||||
11019 | sp->ts_state = STATE_ENDNUL; | ||||
11020 | sp->ts_save_badflags = su->su_badflags; | ||||
11021 | |||||
11022 | /* At end of a prefix or at start of prefixtree: check for | ||||
11023 | * following word. */ | ||||
11024 | if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX) | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 11025 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11026 | /* Set su->su_badflags to the caps type at this position. |
11027 | * Use the caps type until here for the prefix itself. */ | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 11028 | #ifdef FEAT_MBYTE |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11029 | if (has_mbyte) |
11030 | n = nofold_len(fword, sp->ts_fidx, su->su_badptr); | ||||
11031 | else | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 11032 | #endif |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11033 | n = sp->ts_fidx; |
11034 | flags = badword_captype(su->su_badptr, su->su_badptr + n); | ||||
11035 | su->su_badflags = badword_captype(su->su_badptr + n, | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 11036 | su->su_badptr + su->su_badlen); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11037 | #ifdef DEBUG_TRIEWALK |
11038 | sprintf(changename[depth], "prefix"); | ||||
11039 | #endif | ||||
11040 | go_deeper(stack, depth, 0); | ||||
11041 | ++depth; | ||||
11042 | sp = &stack[depth]; | ||||
11043 | sp->ts_prefixdepth = depth - 1; | ||||
11044 | byts = fbyts; | ||||
11045 | idxs = fidxs; | ||||
11046 | sp->ts_arridx = 0; | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 11047 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11048 | /* Move the prefix to preword[] with the right case |
11049 | * and make find_keepcap_word() works. */ | ||||
11050 | tword[sp->ts_twordlen] = NUL; | ||||
11051 | make_case_word(tword + sp->ts_splitoff, | ||||
11052 | preword + sp->ts_prewordlen, flags); | ||||
11053 | sp->ts_prewordlen = STRLEN(preword); | ||||
11054 | sp->ts_splitoff = sp->ts_twordlen; | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 11055 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11056 | break; |
11057 | } | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 11058 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11059 | if (sp->ts_curi > len || byts[arridx] != 0) |
11060 | { | ||||
11061 | /* Past bytes in node and/or past NUL bytes. */ | ||||
11062 | sp->ts_state = STATE_ENDNUL; | ||||
11063 | sp->ts_save_badflags = su->su_badflags; | ||||
11064 | break; | ||||
11065 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11066 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11067 | /* |
11068 | * End of word in tree. | ||||
11069 | */ | ||||
11070 | ++sp->ts_curi; /* eat one NUL byte */ | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11071 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11072 | flags = (int)idxs[arridx]; |
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 11073 | |
11074 | /* Skip words with the NOSUGGEST flag. */ | ||||
11075 | if (flags & WF_NOSUGGEST) | ||||
11076 | break; | ||||
11077 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11078 | fword_ends = (fword[sp->ts_fidx] == NUL |
11079 | || (soundfold | ||||
11080 | ? vim_iswhite(fword[sp->ts_fidx]) | ||||
11081 | : !spell_iswordp(fword + sp->ts_fidx, curbuf))); | ||||
11082 | tword[sp->ts_twordlen] = NUL; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11083 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11084 | if (sp->ts_prefixdepth <= PFD_NOTSPECIAL |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 11085 | && (sp->ts_flags & TSF_PREFIXOK) == 0) |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11086 | { |
11087 | /* There was a prefix before the word. Check that the prefix | ||||
11088 | * can be used with this word. */ | ||||
11089 | /* Count the length of the NULs in the prefix. If there are | ||||
11090 | * none this must be the first try without a prefix. */ | ||||
11091 | n = stack[sp->ts_prefixdepth].ts_arridx; | ||||
11092 | len = pbyts[n++]; | ||||
11093 | for (c = 0; c < len && pbyts[n + c] == 0; ++c) | ||||
11094 | ; | ||||
11095 | if (c > 0) | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 11096 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11097 | c = valid_word_prefix(c, n, flags, |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 11098 | tword + sp->ts_splitoff, slang, FALSE); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11099 | if (c == 0) |
11100 | break; | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 11101 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11102 | /* Use the WF_RARE flag for a rare prefix. */ |
11103 | if (c & WF_RAREPFX) | ||||
11104 | flags |= WF_RARE; | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 11105 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11106 | /* Tricky: when checking for both prefix and compounding |
11107 | * we run into the prefix flag first. | ||||
11108 | * Remember that it's OK, so that we accept the prefix | ||||
11109 | * when arriving at a compound flag. */ | ||||
11110 | sp->ts_flags |= TSF_PREFIXOK; | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 11111 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11112 | } |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 11113 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11114 | /* Check NEEDCOMPOUND: can't use word without compounding. Do try |
11115 | * appending another compound word below. */ | ||||
11116 | if (sp->ts_complen == sp->ts_compsplit && fword_ends | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 11117 | && (flags & WF_NEEDCOMP)) |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11118 | goodword_ends = FALSE; |
11119 | else | ||||
11120 | goodword_ends = TRUE; | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 11121 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11122 | p = NULL; |
11123 | compound_ok = TRUE; | ||||
11124 | if (sp->ts_complen > sp->ts_compsplit) | ||||
11125 | { | ||||
11126 | if (slang->sl_nobreak) | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 11127 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11128 | /* There was a word before this word. When there was no |
11129 | * change in this word (it was correct) add the first word | ||||
11130 | * as a suggestion. If this word was corrected too, we | ||||
11131 | * need to check if a correct word follows. */ | ||||
11132 | if (sp->ts_fidx - sp->ts_splitfidx | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 11133 | == sp->ts_twordlen - sp->ts_splitoff |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11134 | && STRNCMP(fword + sp->ts_splitfidx, |
11135 | tword + sp->ts_splitoff, | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 11136 | sp->ts_fidx - sp->ts_splitfidx) == 0) |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11137 | { |
11138 | preword[sp->ts_prewordlen] = NUL; | ||||
11139 | newscore = score_wordcount_adj(slang, sp->ts_score, | ||||
11140 | preword + sp->ts_prewordlen, | ||||
11141 | sp->ts_prewordlen > 0); | ||||
11142 | /* Add the suggestion if the score isn't too bad. */ | ||||
11143 | if (newscore <= su->su_maxscore) | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 11144 | add_suggestion(su, &su->su_ga, preword, |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 11145 | sp->ts_splitfidx - repextra, |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11146 | newscore, 0, FALSE, |
11147 | lp->lp_sallang, FALSE); | ||||
11148 | break; | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 11149 | } |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 11150 | } |
Bram Moolenaar | e52325c | 2005-08-22 22:54:29 +0000 | [diff] [blame] | 11151 | else |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 11152 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11153 | /* There was a compound word before this word. If this |
11154 | * word does not support compounding then give up | ||||
11155 | * (splitting is tried for the word without compound | ||||
11156 | * flag). */ | ||||
11157 | if (((unsigned)flags >> 24) == 0 | ||||
11158 | || sp->ts_twordlen - sp->ts_splitoff | ||||
11159 | < slang->sl_compminlen) | ||||
11160 | break; | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 11161 | #ifdef FEAT_MBYTE |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11162 | /* For multi-byte chars check character length against |
11163 | * COMPOUNDMIN. */ | ||||
11164 | if (has_mbyte | ||||
11165 | && slang->sl_compminlen > 0 | ||||
11166 | && mb_charlen(tword + sp->ts_splitoff) | ||||
11167 | < slang->sl_compminlen) | ||||
11168 | break; | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 11169 | #endif |
Bram Moolenaar | e52325c | 2005-08-22 22:54:29 +0000 | [diff] [blame] | 11170 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11171 | compflags[sp->ts_complen] = ((unsigned)flags >> 24); |
11172 | compflags[sp->ts_complen + 1] = NUL; | ||||
11173 | vim_strncpy(preword + sp->ts_prewordlen, | ||||
11174 | tword + sp->ts_splitoff, | ||||
11175 | sp->ts_twordlen - sp->ts_splitoff); | ||||
11176 | p = preword; | ||||
11177 | while (*skiptowhite(p) != NUL) | ||||
11178 | p = skipwhite(skiptowhite(p)); | ||||
11179 | if (fword_ends && !can_compound(slang, p, | ||||
11180 | compflags + sp->ts_compsplit)) | ||||
11181 | /* Compound is not allowed. But it may still be | ||||
11182 | * possible if we add another (short) word. */ | ||||
11183 | compound_ok = FALSE; | ||||
11184 | |||||
11185 | /* Get pointer to last char of previous word. */ | ||||
11186 | p = preword + sp->ts_prewordlen; | ||||
11187 | mb_ptr_back(preword, p); | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 11188 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11189 | } |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11190 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11191 | /* |
11192 | * Form the word with proper case in preword. | ||||
11193 | * If there is a word from a previous split, append. | ||||
11194 | * For the soundfold tree don't change the case, simply append. | ||||
11195 | */ | ||||
11196 | if (soundfold) | ||||
11197 | STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff); | ||||
11198 | else if (flags & WF_KEEPCAP) | ||||
11199 | /* Must find the word in the keep-case tree. */ | ||||
11200 | find_keepcap_word(slang, tword + sp->ts_splitoff, | ||||
11201 | preword + sp->ts_prewordlen); | ||||
11202 | else | ||||
11203 | { | ||||
11204 | /* Include badflags: If the badword is onecap or allcap | ||||
11205 | * use that for the goodword too. But if the badword is | ||||
11206 | * allcap and it's only one char long use onecap. */ | ||||
11207 | c = su->su_badflags; | ||||
11208 | if ((c & WF_ALLCAP) | ||||
11209 | #ifdef FEAT_MBYTE | ||||
11210 | && su->su_badlen == (*mb_ptr2len)(su->su_badptr) | ||||
11211 | #else | ||||
11212 | && su->su_badlen == 1 | ||||
11213 | #endif | ||||
11214 | ) | ||||
11215 | c = WF_ONECAP; | ||||
11216 | c |= flags; | ||||
11217 | |||||
11218 | /* When appending a compound word after a word character don't | ||||
11219 | * use Onecap. */ | ||||
11220 | if (p != NULL && spell_iswordp_nmw(p)) | ||||
11221 | c &= ~WF_ONECAP; | ||||
11222 | make_case_word(tword + sp->ts_splitoff, | ||||
11223 | preword + sp->ts_prewordlen, c); | ||||
11224 | } | ||||
11225 | |||||
11226 | if (!soundfold) | ||||
11227 | { | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11228 | /* Don't use a banned word. It may appear again as a good |
11229 | * word, thus remember it. */ | ||||
11230 | if (flags & WF_BANNED) | ||||
11231 | { | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 11232 | add_banned(su, preword + sp->ts_prewordlen); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11233 | break; |
11234 | } | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 11235 | if ((sp->ts_complen == sp->ts_compsplit |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11236 | && WAS_BANNED(su, preword + sp->ts_prewordlen)) |
11237 | || WAS_BANNED(su, preword)) | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 11238 | { |
11239 | if (slang->sl_compprog == NULL) | ||||
11240 | break; | ||||
11241 | /* the word so far was banned but we may try compounding */ | ||||
11242 | goodword_ends = FALSE; | ||||
11243 | } | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11244 | } |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11245 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11246 | newscore = 0; |
11247 | if (!soundfold) /* soundfold words don't have flags */ | ||||
11248 | { | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11249 | if ((flags & WF_REGION) |
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 11250 | && (((unsigned)flags >> 16) & lp->lp_region) == 0) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11251 | newscore += SCORE_REGION; |
11252 | if (flags & WF_RARE) | ||||
11253 | newscore += SCORE_RARE; | ||||
11254 | |||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 11255 | if (!spell_valid_case(su->su_badflags, |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 11256 | captype(preword + sp->ts_prewordlen, NULL))) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11257 | newscore += SCORE_ICASE; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11258 | } |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11259 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11260 | /* TODO: how about splitting in the soundfold tree? */ |
11261 | if (fword_ends | ||||
11262 | && goodword_ends | ||||
11263 | && sp->ts_fidx >= sp->ts_fidxtry | ||||
11264 | && compound_ok) | ||||
11265 | { | ||||
11266 | /* The badword also ends: add suggestions. */ | ||||
11267 | #ifdef DEBUG_TRIEWALK | ||||
11268 | if (soundfold && STRCMP(preword, "smwrd") == 0) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11269 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11270 | int j; |
11271 | |||||
11272 | /* print the stack of changes that brought us here */ | ||||
11273 | smsg("------ %s -------", fword); | ||||
11274 | for (j = 0; j < depth; ++j) | ||||
11275 | smsg("%s", changename[j]); | ||||
11276 | } | ||||
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 11277 | #endif |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11278 | if (soundfold) |
11279 | { | ||||
11280 | /* For soundfolded words we need to find the original | ||||
11281 | * words, the edit distrance and then add them. */ | ||||
11282 | add_sound_suggest(su, preword, sp->ts_score, lp); | ||||
11283 | } | ||||
11284 | else | ||||
11285 | { | ||||
11286 | /* Give a penalty when changing non-word char to word | ||||
11287 | * char, e.g., "thes," -> "these". */ | ||||
11288 | p = fword + sp->ts_fidx; | ||||
11289 | mb_ptr_back(fword, p); | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 11290 | if (!spell_iswordp(p, curbuf)) |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 11291 | { |
11292 | p = preword + STRLEN(preword); | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11293 | mb_ptr_back(preword, p); |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 11294 | if (spell_iswordp(p, curbuf)) |
Bram Moolenaar | cf6bf39 | 2005-06-27 22:27:46 +0000 | [diff] [blame] | 11295 | newscore += SCORE_NONWORD; |
11296 | } | ||||
11297 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11298 | /* Give a bonus to words seen before. */ |
11299 | score = score_wordcount_adj(slang, | ||||
11300 | sp->ts_score + newscore, | ||||
11301 | preword + sp->ts_prewordlen, | ||||
11302 | sp->ts_prewordlen > 0); | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 11303 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11304 | /* Add the suggestion if the score isn't too bad. */ |
11305 | if (score <= su->su_maxscore) | ||||
Bram Moolenaar | 2d3f489 | 2006-01-20 23:02:51 +0000 | [diff] [blame] | 11306 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11307 | add_suggestion(su, &su->su_ga, preword, |
11308 | sp->ts_fidx - repextra, | ||||
11309 | score, 0, FALSE, lp->lp_sallang, FALSE); | ||||
Bram Moolenaar | 2d3f489 | 2006-01-20 23:02:51 +0000 | [diff] [blame] | 11310 | |
11311 | if (su->su_badflags & WF_MIXCAP) | ||||
11312 | { | ||||
11313 | /* We really don't know if the word should be | ||||
11314 | * upper or lower case, add both. */ | ||||
11315 | c = captype(preword, NULL); | ||||
11316 | if (c == 0 || c == WF_ALLCAP) | ||||
11317 | { | ||||
11318 | make_case_word(tword + sp->ts_splitoff, | ||||
11319 | preword + sp->ts_prewordlen, | ||||
11320 | c == 0 ? WF_ALLCAP : 0); | ||||
11321 | |||||
11322 | add_suggestion(su, &su->su_ga, preword, | ||||
11323 | sp->ts_fidx - repextra, | ||||
11324 | score + SCORE_ICASE, 0, FALSE, | ||||
11325 | lp->lp_sallang, FALSE); | ||||
11326 | } | ||||
11327 | } | ||||
11328 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11329 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11330 | } |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 11331 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11332 | /* |
11333 | * Try word split and/or compounding. | ||||
11334 | */ | ||||
11335 | if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends) | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11336 | #ifdef FEAT_MBYTE |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11337 | /* Don't split halfway a character. */ |
11338 | && (!has_mbyte || sp->ts_tcharlen == 0) | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11339 | #endif |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11340 | ) |
11341 | { | ||||
11342 | int try_compound; | ||||
11343 | int try_split; | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 11344 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11345 | /* If past the end of the bad word don't try a split. |
11346 | * Otherwise try changing the next word. E.g., find | ||||
11347 | * suggestions for "the the" where the second "the" is | ||||
11348 | * different. It's done like a split. | ||||
11349 | * TODO: word split for soundfold words */ | ||||
11350 | try_split = (sp->ts_fidx - repextra < su->su_badlen) | ||||
11351 | && !soundfold; | ||||
11352 | |||||
11353 | /* Get here in several situations: | ||||
11354 | * 1. The word in the tree ends: | ||||
11355 | * If the word allows compounding try that. Otherwise try | ||||
11356 | * a split by inserting a space. For both check that a | ||||
11357 | * valid words starts at fword[sp->ts_fidx]. | ||||
11358 | * For NOBREAK do like compounding to be able to check if | ||||
11359 | * the next word is valid. | ||||
11360 | * 2. The badword does end, but it was due to a change (e.g., | ||||
11361 | * a swap). No need to split, but do check that the | ||||
11362 | * following word is valid. | ||||
11363 | * 3. The badword and the word in the tree end. It may still | ||||
11364 | * be possible to compound another (short) word. | ||||
11365 | */ | ||||
11366 | try_compound = FALSE; | ||||
11367 | if (!soundfold | ||||
11368 | && slang->sl_compprog != NULL | ||||
11369 | && ((unsigned)flags >> 24) != 0 | ||||
11370 | && sp->ts_twordlen - sp->ts_splitoff | ||||
11371 | >= slang->sl_compminlen | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 11372 | #ifdef FEAT_MBYTE |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11373 | && (!has_mbyte |
11374 | || slang->sl_compminlen == 0 | ||||
11375 | || mb_charlen(tword + sp->ts_splitoff) | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 11376 | >= slang->sl_compminlen) |
11377 | #endif | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11378 | && (slang->sl_compsylmax < MAXWLEN |
11379 | || sp->ts_complen + 1 - sp->ts_compsplit | ||||
11380 | < slang->sl_compmax) | ||||
11381 | && (byte_in_str(sp->ts_complen == sp->ts_compsplit | ||||
11382 | ? slang->sl_compstartflags | ||||
11383 | : slang->sl_compallflags, | ||||
Bram Moolenaar | 6de6853 | 2005-08-24 22:08:48 +0000 | [diff] [blame] | 11384 | ((unsigned)flags >> 24)))) |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11385 | { |
11386 | try_compound = TRUE; | ||||
11387 | compflags[sp->ts_complen] = ((unsigned)flags >> 24); | ||||
11388 | compflags[sp->ts_complen + 1] = NUL; | ||||
11389 | } | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 11390 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11391 | /* For NOBREAK we never try splitting, it won't make any word |
11392 | * valid. */ | ||||
11393 | if (slang->sl_nobreak) | ||||
11394 | try_compound = TRUE; | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 11395 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11396 | /* If we could add a compound word, and it's also possible to |
11397 | * split at this point, do the split first and set | ||||
11398 | * TSF_DIDSPLIT to avoid doing it again. */ | ||||
11399 | else if (!fword_ends | ||||
11400 | && try_compound | ||||
11401 | && (sp->ts_flags & TSF_DIDSPLIT) == 0) | ||||
11402 | { | ||||
11403 | try_compound = FALSE; | ||||
11404 | sp->ts_flags |= TSF_DIDSPLIT; | ||||
11405 | --sp->ts_curi; /* do the same NUL again */ | ||||
11406 | compflags[sp->ts_complen] = NUL; | ||||
11407 | } | ||||
11408 | else | ||||
11409 | sp->ts_flags &= ~TSF_DIDSPLIT; | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 11410 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11411 | if (try_split || try_compound) |
11412 | { | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 11413 | if (!try_compound && (!fword_ends || !goodword_ends)) |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 11414 | { |
11415 | /* If we're going to split need to check that the | ||||
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 11416 | * words so far are valid for compounding. If there |
11417 | * is only one word it must not have the NEEDCOMPOUND | ||||
11418 | * flag. */ | ||||
11419 | if (sp->ts_complen == sp->ts_compsplit | ||||
11420 | && (flags & WF_NEEDCOMP)) | ||||
11421 | break; | ||||
Bram Moolenaar | e52325c | 2005-08-22 22:54:29 +0000 | [diff] [blame] | 11422 | p = preword; |
11423 | while (*skiptowhite(p) != NUL) | ||||
11424 | p = skipwhite(skiptowhite(p)); | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 11425 | if (sp->ts_complen > sp->ts_compsplit |
Bram Moolenaar | e52325c | 2005-08-22 22:54:29 +0000 | [diff] [blame] | 11426 | && !can_compound(slang, p, |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 11427 | compflags + sp->ts_compsplit)) |
11428 | break; | ||||
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 11429 | |
11430 | if (slang->sl_nosplitsugs) | ||||
11431 | newscore += SCORE_SPLIT_NO; | ||||
11432 | else | ||||
11433 | newscore += SCORE_SPLIT; | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11434 | |
11435 | /* Give a bonus to words seen before. */ | ||||
11436 | newscore = score_wordcount_adj(slang, newscore, | ||||
11437 | preword + sp->ts_prewordlen, TRUE); | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 11438 | } |
11439 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11440 | if (TRY_DEEPER(su, stack, depth, newscore)) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11441 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11442 | go_deeper(stack, depth, newscore); |
11443 | #ifdef DEBUG_TRIEWALK | ||||
11444 | if (!try_compound && !fword_ends) | ||||
11445 | sprintf(changename[depth], "%.*s-%s: split", | ||||
11446 | sp->ts_twordlen, tword, fword + sp->ts_fidx); | ||||
11447 | else | ||||
11448 | sprintf(changename[depth], "%.*s-%s: compound", | ||||
11449 | sp->ts_twordlen, tword, fword + sp->ts_fidx); | ||||
11450 | #endif | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11451 | /* Save things to be restored at STATE_SPLITUNDO. */ |
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 11452 | sp->ts_save_badflags = su->su_badflags; |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 11453 | sp->ts_state = STATE_SPLITUNDO; |
11454 | |||||
11455 | ++depth; | ||||
11456 | sp = &stack[depth]; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11457 | |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 11458 | /* Append a space to preword when splitting. */ |
11459 | if (!try_compound && !fword_ends) | ||||
11460 | STRCAT(preword, " "); | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 11461 | sp->ts_prewordlen = STRLEN(preword); |
11462 | sp->ts_splitoff = sp->ts_twordlen; | ||||
Bram Moolenaar | 7862282 | 2005-08-23 21:00:13 +0000 | [diff] [blame] | 11463 | sp->ts_splitfidx = sp->ts_fidx; |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 11464 | |
11465 | /* If the badword has a non-word character at this | ||||
11466 | * position skip it. That means replacing the | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 11467 | * non-word character with a space. Always skip a |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 11468 | * character when the word ends. But only when the |
11469 | * good word can end. */ | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11470 | if (((!try_compound && !spell_iswordp_nmw(fword |
11471 | + sp->ts_fidx)) | ||||
11472 | || fword_ends) | ||||
11473 | && fword[sp->ts_fidx] != NUL | ||||
11474 | && goodword_ends) | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 11475 | { |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 11476 | int l; |
11477 | |||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 11478 | #ifdef FEAT_MBYTE |
11479 | if (has_mbyte) | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 11480 | l = MB_BYTE2LEN(fword[sp->ts_fidx]); |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 11481 | else |
11482 | #endif | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 11483 | l = 1; |
11484 | if (fword_ends) | ||||
11485 | { | ||||
11486 | /* Copy the skipped character to preword. */ | ||||
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 11487 | mch_memmove(preword + sp->ts_prewordlen, |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 11488 | fword + sp->ts_fidx, l); |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 11489 | sp->ts_prewordlen += l; |
11490 | preword[sp->ts_prewordlen] = NUL; | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 11491 | } |
11492 | else | ||||
11493 | sp->ts_score -= SCORE_SPLIT - SCORE_SUBST; | ||||
11494 | sp->ts_fidx += l; | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 11495 | } |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 11496 | |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 11497 | /* When compounding include compound flag in |
11498 | * compflags[] (already set above). When splitting we | ||||
11499 | * may start compounding over again. */ | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 11500 | if (try_compound) |
Bram Moolenaar | 5195e45 | 2005-08-19 20:32:47 +0000 | [diff] [blame] | 11501 | ++sp->ts_complen; |
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 11502 | else |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 11503 | sp->ts_compsplit = sp->ts_complen; |
11504 | sp->ts_prefixdepth = PFD_NOPREFIX; | ||||
Bram Moolenaar | 5b8d8fd | 2005-08-16 23:01:50 +0000 | [diff] [blame] | 11505 | |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 11506 | /* set su->su_badflags to the caps type at this |
11507 | * position */ | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 11508 | #ifdef FEAT_MBYTE |
11509 | if (has_mbyte) | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 11510 | n = nofold_len(fword, sp->ts_fidx, su->su_badptr); |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 11511 | else |
11512 | #endif | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 11513 | n = sp->ts_fidx; |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 11514 | su->su_badflags = badword_captype(su->su_badptr + n, |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 11515 | su->su_badptr + su->su_badlen); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11516 | |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11517 | /* Restart at top of the tree. */ |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 11518 | sp->ts_arridx = 0; |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 11519 | |
11520 | /* If there are postponed prefixes, try these too. */ | ||||
11521 | if (pbyts != NULL) | ||||
11522 | { | ||||
11523 | byts = pbyts; | ||||
11524 | idxs = pidxs; | ||||
11525 | sp->ts_prefixdepth = PFD_PREFIXTREE; | ||||
11526 | sp->ts_state = STATE_NOPREFIX; | ||||
11527 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11528 | } |
11529 | } | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11530 | } |
11531 | break; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11532 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11533 | case STATE_SPLITUNDO: |
11534 | /* Undo the changes done for word split or compound word. */ | ||||
11535 | su->su_badflags = sp->ts_save_badflags; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11536 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11537 | /* Continue looking for NUL bytes. */ |
11538 | sp->ts_state = STATE_START; | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 11539 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11540 | /* In case we went into the prefix tree. */ |
11541 | byts = fbyts; | ||||
11542 | idxs = fidxs; | ||||
11543 | break; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11544 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11545 | case STATE_ENDNUL: |
11546 | /* Past the NUL bytes in the node. */ | ||||
11547 | su->su_badflags = sp->ts_save_badflags; | ||||
11548 | if (fword[sp->ts_fidx] == NUL | ||||
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 11549 | #ifdef FEAT_MBYTE |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11550 | && sp->ts_tcharlen == 0 |
Bram Moolenaar | da2303d | 2005-08-30 21:55:26 +0000 | [diff] [blame] | 11551 | #endif |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11552 | ) |
11553 | { | ||||
11554 | /* The badword ends, can't use STATE_PLAIN. */ | ||||
11555 | sp->ts_state = STATE_DEL; | ||||
11556 | break; | ||||
11557 | } | ||||
11558 | sp->ts_state = STATE_PLAIN; | ||||
11559 | /*FALLTHROUGH*/ | ||||
11560 | |||||
11561 | case STATE_PLAIN: | ||||
11562 | /* | ||||
11563 | * Go over all possible bytes at this node, add each to tword[] | ||||
11564 | * and use child node. "ts_curi" is the index. | ||||
11565 | */ | ||||
11566 | arridx = sp->ts_arridx; | ||||
11567 | if (sp->ts_curi > byts[arridx]) | ||||
11568 | { | ||||
11569 | /* Done all bytes at this node, do next state. When still at | ||||
11570 | * already changed bytes skip the other tricks. */ | ||||
11571 | if (sp->ts_fidx >= sp->ts_fidxtry) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11572 | sp->ts_state = STATE_DEL; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11573 | else |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11574 | sp->ts_state = STATE_FINAL; |
11575 | } | ||||
11576 | else | ||||
11577 | { | ||||
11578 | arridx += sp->ts_curi++; | ||||
11579 | c = byts[arridx]; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11580 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11581 | /* Normal byte, go one level deeper. If it's not equal to the |
11582 | * byte in the bad word adjust the score. But don't even try | ||||
11583 | * when the byte was already changed. And don't try when we | ||||
11584 | * just deleted this byte, accepting it is always cheaper then | ||||
11585 | * delete + substitute. */ | ||||
11586 | if (c == fword[sp->ts_fidx] | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11587 | #ifdef FEAT_MBYTE |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11588 | || (sp->ts_tcharlen > 0 && sp->ts_isdiff != DIFF_NONE) |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 11589 | #endif |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11590 | ) |
11591 | newscore = 0; | ||||
11592 | else | ||||
11593 | newscore = SCORE_SUBST; | ||||
11594 | if ((newscore == 0 | ||||
11595 | || (sp->ts_fidx >= sp->ts_fidxtry | ||||
11596 | && ((sp->ts_flags & TSF_DIDDEL) == 0 | ||||
11597 | || c != fword[sp->ts_delidx]))) | ||||
11598 | && TRY_DEEPER(su, stack, depth, newscore)) | ||||
11599 | { | ||||
11600 | go_deeper(stack, depth, newscore); | ||||
11601 | #ifdef DEBUG_TRIEWALK | ||||
11602 | if (newscore > 0) | ||||
11603 | sprintf(changename[depth], "%.*s-%s: subst %c to %c", | ||||
11604 | sp->ts_twordlen, tword, fword + sp->ts_fidx, | ||||
11605 | fword[sp->ts_fidx], c); | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11606 | else |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11607 | sprintf(changename[depth], "%.*s-%s: accept %c", |
11608 | sp->ts_twordlen, tword, fword + sp->ts_fidx, | ||||
11609 | fword[sp->ts_fidx]); | ||||
11610 | #endif | ||||
11611 | ++depth; | ||||
11612 | sp = &stack[depth]; | ||||
11613 | ++sp->ts_fidx; | ||||
11614 | tword[sp->ts_twordlen++] = c; | ||||
11615 | sp->ts_arridx = idxs[arridx]; | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11616 | #ifdef FEAT_MBYTE |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11617 | if (newscore == SCORE_SUBST) |
11618 | sp->ts_isdiff = DIFF_YES; | ||||
11619 | if (has_mbyte) | ||||
11620 | { | ||||
11621 | /* Multi-byte characters are a bit complicated to | ||||
11622 | * handle: They differ when any of the bytes differ | ||||
11623 | * and then their length may also differ. */ | ||||
11624 | if (sp->ts_tcharlen == 0) | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11625 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11626 | /* First byte. */ |
11627 | sp->ts_tcharidx = 0; | ||||
11628 | sp->ts_tcharlen = MB_BYTE2LEN(c); | ||||
11629 | sp->ts_fcharstart = sp->ts_fidx - 1; | ||||
11630 | sp->ts_isdiff = (newscore != 0) | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11631 | ? DIFF_YES : DIFF_NONE; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11632 | } |
11633 | else if (sp->ts_isdiff == DIFF_INSERT) | ||||
11634 | /* When inserting trail bytes don't advance in the | ||||
11635 | * bad word. */ | ||||
11636 | --sp->ts_fidx; | ||||
11637 | if (++sp->ts_tcharidx == sp->ts_tcharlen) | ||||
11638 | { | ||||
11639 | /* Last byte of character. */ | ||||
11640 | if (sp->ts_isdiff == DIFF_YES) | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11641 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11642 | /* Correct ts_fidx for the byte length of the |
11643 | * character (we didn't check that before). */ | ||||
11644 | sp->ts_fidx = sp->ts_fcharstart | ||||
11645 | + MB_BYTE2LEN( | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11646 | fword[sp->ts_fcharstart]); |
11647 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11648 | /* For changing a composing character adjust |
11649 | * the score from SCORE_SUBST to | ||||
11650 | * SCORE_SUBCOMP. */ | ||||
11651 | if (enc_utf8 | ||||
11652 | && utf_iscomposing( | ||||
11653 | mb_ptr2char(tword | ||||
11654 | + sp->ts_twordlen | ||||
Bram Moolenaar | e5b8e3d | 2005-08-12 19:48:49 +0000 | [diff] [blame] | 11655 | - sp->ts_tcharlen)) |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11656 | && utf_iscomposing( |
11657 | mb_ptr2char(fword | ||||
Bram Moolenaar | e5b8e3d | 2005-08-12 19:48:49 +0000 | [diff] [blame] | 11658 | + sp->ts_fcharstart))) |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11659 | sp->ts_score -= |
Bram Moolenaar | e5b8e3d | 2005-08-12 19:48:49 +0000 | [diff] [blame] | 11660 | SCORE_SUBST - SCORE_SUBCOMP; |
11661 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11662 | /* For a similar character adjust score from |
11663 | * SCORE_SUBST to SCORE_SIMILAR. */ | ||||
11664 | else if (!soundfold | ||||
11665 | && slang->sl_has_map | ||||
11666 | && similar_chars(slang, | ||||
11667 | mb_ptr2char(tword | ||||
11668 | + sp->ts_twordlen | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11669 | - sp->ts_tcharlen), |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11670 | mb_ptr2char(fword |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11671 | + sp->ts_fcharstart))) |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11672 | sp->ts_score -= |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11673 | SCORE_SUBST - SCORE_SIMILAR; |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11674 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11675 | else if (sp->ts_isdiff == DIFF_INSERT |
11676 | && sp->ts_twordlen > sp->ts_tcharlen) | ||||
11677 | { | ||||
11678 | p = tword + sp->ts_twordlen - sp->ts_tcharlen; | ||||
11679 | c = mb_ptr2char(p); | ||||
11680 | if (enc_utf8 && utf_iscomposing(c)) | ||||
11681 | { | ||||
11682 | /* Inserting a composing char doesn't | ||||
11683 | * count that much. */ | ||||
11684 | sp->ts_score -= SCORE_INS - SCORE_INSCOMP; | ||||
11685 | } | ||||
11686 | else | ||||
11687 | { | ||||
11688 | /* If the previous character was the same, | ||||
11689 | * thus doubling a character, give a bonus | ||||
11690 | * to the score. Also for the soundfold | ||||
11691 | * tree (might seem illogical but does | ||||
11692 | * give better scores). */ | ||||
11693 | mb_ptr_back(tword, p); | ||||
11694 | if (c == mb_ptr2char(p)) | ||||
11695 | sp->ts_score -= SCORE_INS | ||||
11696 | - SCORE_INSDUP; | ||||
11697 | } | ||||
11698 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11699 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11700 | /* Starting a new char, reset the length. */ |
11701 | sp->ts_tcharlen = 0; | ||||
11702 | } | ||||
Bram Moolenaar | ea40885 | 2005-06-25 22:49:46 +0000 | [diff] [blame] | 11703 | } |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11704 | else |
11705 | #endif | ||||
Bram Moolenaar | ea40885 | 2005-06-25 22:49:46 +0000 | [diff] [blame] | 11706 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11707 | /* If we found a similar char adjust the score. |
11708 | * We do this after calling go_deeper() because | ||||
11709 | * it's slow. */ | ||||
11710 | if (newscore != 0 | ||||
11711 | && !soundfold | ||||
11712 | && slang->sl_has_map | ||||
11713 | && similar_chars(slang, | ||||
11714 | c, fword[sp->ts_fidx - 1])) | ||||
11715 | sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR; | ||||
Bram Moolenaar | ea40885 | 2005-06-25 22:49:46 +0000 | [diff] [blame] | 11716 | } |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11717 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11718 | } |
11719 | break; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11720 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11721 | case STATE_DEL: |
11722 | #ifdef FEAT_MBYTE | ||||
11723 | /* When past the first byte of a multi-byte char don't try | ||||
11724 | * delete/insert/swap a character. */ | ||||
11725 | if (has_mbyte && sp->ts_tcharlen > 0) | ||||
11726 | { | ||||
11727 | sp->ts_state = STATE_FINAL; | ||||
11728 | break; | ||||
11729 | } | ||||
11730 | #endif | ||||
11731 | /* | ||||
11732 | * Try skipping one character in the bad word (delete it). | ||||
11733 | */ | ||||
11734 | sp->ts_state = STATE_INS_PREP; | ||||
11735 | sp->ts_curi = 1; | ||||
11736 | if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*') | ||||
11737 | /* Deleting a vowel at the start of a word counts less, see | ||||
11738 | * soundalike_score(). */ | ||||
11739 | newscore = 2 * SCORE_DEL / 3; | ||||
11740 | else | ||||
11741 | newscore = SCORE_DEL; | ||||
11742 | if (fword[sp->ts_fidx] != NUL | ||||
11743 | && TRY_DEEPER(su, stack, depth, newscore)) | ||||
11744 | { | ||||
11745 | go_deeper(stack, depth, newscore); | ||||
11746 | #ifdef DEBUG_TRIEWALK | ||||
11747 | sprintf(changename[depth], "%.*s-%s: delete %c", | ||||
11748 | sp->ts_twordlen, tword, fword + sp->ts_fidx, | ||||
11749 | fword[sp->ts_fidx]); | ||||
11750 | #endif | ||||
11751 | ++depth; | ||||
11752 | |||||
11753 | /* Remember what character we deleted, so that we can avoid | ||||
11754 | * inserting it again. */ | ||||
11755 | stack[depth].ts_flags |= TSF_DIDDEL; | ||||
11756 | stack[depth].ts_delidx = sp->ts_fidx; | ||||
11757 | |||||
11758 | /* Advance over the character in fword[]. Give a bonus to the | ||||
11759 | * score if the same character is following "nn" -> "n". It's | ||||
11760 | * a bit illogical for soundfold tree but it does give better | ||||
11761 | * results. */ | ||||
11762 | #ifdef FEAT_MBYTE | ||||
11763 | if (has_mbyte) | ||||
11764 | { | ||||
11765 | c = mb_ptr2char(fword + sp->ts_fidx); | ||||
11766 | stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]); | ||||
11767 | if (enc_utf8 && utf_iscomposing(c)) | ||||
11768 | stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP; | ||||
11769 | else if (c == mb_ptr2char(fword + stack[depth].ts_fidx)) | ||||
11770 | stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; | ||||
11771 | } | ||||
11772 | else | ||||
11773 | #endif | ||||
11774 | { | ||||
11775 | ++stack[depth].ts_fidx; | ||||
11776 | if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1]) | ||||
11777 | stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP; | ||||
11778 | } | ||||
11779 | break; | ||||
11780 | } | ||||
11781 | /*FALLTHROUGH*/ | ||||
11782 | |||||
11783 | case STATE_INS_PREP: | ||||
11784 | if (sp->ts_flags & TSF_DIDDEL) | ||||
11785 | { | ||||
11786 | /* If we just deleted a byte then inserting won't make sense, | ||||
11787 | * a substitute is always cheaper. */ | ||||
11788 | sp->ts_state = STATE_SWAP; | ||||
11789 | break; | ||||
11790 | } | ||||
11791 | |||||
11792 | /* skip over NUL bytes */ | ||||
11793 | n = sp->ts_arridx; | ||||
11794 | for (;;) | ||||
11795 | { | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11796 | if (sp->ts_curi > byts[n]) |
11797 | { | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11798 | /* Only NUL bytes at this node, go to next state. */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11799 | sp->ts_state = STATE_SWAP; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11800 | break; |
11801 | } | ||||
11802 | if (byts[n + sp->ts_curi] != NUL) | ||||
11803 | { | ||||
11804 | /* Found a byte to insert. */ | ||||
11805 | sp->ts_state = STATE_INS; | ||||
11806 | break; | ||||
11807 | } | ||||
11808 | ++sp->ts_curi; | ||||
11809 | } | ||||
11810 | break; | ||||
11811 | |||||
11812 | /*FALLTHROUGH*/ | ||||
11813 | |||||
11814 | case STATE_INS: | ||||
11815 | /* Insert one byte. Repeat this for each possible byte at this | ||||
11816 | * node. */ | ||||
11817 | n = sp->ts_arridx; | ||||
11818 | if (sp->ts_curi > byts[n]) | ||||
11819 | { | ||||
11820 | /* Done all bytes at this node, go to next state. */ | ||||
11821 | sp->ts_state = STATE_SWAP; | ||||
11822 | break; | ||||
11823 | } | ||||
11824 | |||||
11825 | /* Do one more byte at this node, but: | ||||
11826 | * - Skip NUL bytes. | ||||
11827 | * - Skip the byte if it's equal to the byte in the word, | ||||
11828 | * accepting that byte is always better. | ||||
11829 | */ | ||||
11830 | n += sp->ts_curi++; | ||||
11831 | c = byts[n]; | ||||
11832 | if (soundfold && sp->ts_twordlen == 0 && c == '*') | ||||
11833 | /* Inserting a vowel at the start of a word counts less, | ||||
11834 | * see soundalike_score(). */ | ||||
11835 | newscore = 2 * SCORE_INS / 3; | ||||
11836 | else | ||||
11837 | newscore = SCORE_INS; | ||||
11838 | if (c != fword[sp->ts_fidx] | ||||
11839 | && TRY_DEEPER(su, stack, depth, newscore)) | ||||
11840 | { | ||||
11841 | go_deeper(stack, depth, newscore); | ||||
11842 | #ifdef DEBUG_TRIEWALK | ||||
11843 | sprintf(changename[depth], "%.*s-%s: insert %c", | ||||
11844 | sp->ts_twordlen, tword, fword + sp->ts_fidx, | ||||
11845 | c); | ||||
11846 | #endif | ||||
11847 | ++depth; | ||||
11848 | sp = &stack[depth]; | ||||
11849 | tword[sp->ts_twordlen++] = c; | ||||
11850 | sp->ts_arridx = idxs[n]; | ||||
11851 | #ifdef FEAT_MBYTE | ||||
11852 | if (has_mbyte) | ||||
11853 | { | ||||
11854 | fl = MB_BYTE2LEN(c); | ||||
11855 | if (fl > 1) | ||||
11856 | { | ||||
11857 | /* There are following bytes for the same character. | ||||
11858 | * We must find all bytes before trying | ||||
11859 | * delete/insert/swap/etc. */ | ||||
11860 | sp->ts_tcharlen = fl; | ||||
11861 | sp->ts_tcharidx = 1; | ||||
11862 | sp->ts_isdiff = DIFF_INSERT; | ||||
11863 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11864 | } |
11865 | else | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11866 | fl = 1; |
11867 | if (fl == 1) | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11868 | #endif |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11869 | { |
11870 | /* If the previous character was the same, thus doubling a | ||||
11871 | * character, give a bonus to the score. Also for | ||||
11872 | * soundfold words (illogical but does give a better | ||||
11873 | * score). */ | ||||
11874 | if (sp->ts_twordlen >= 2 | ||||
Bram Moolenaar | ea40885 | 2005-06-25 22:49:46 +0000 | [diff] [blame] | 11875 | && tword[sp->ts_twordlen - 2] == c) |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11876 | sp->ts_score -= SCORE_INS - SCORE_INSDUP; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11877 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11878 | } |
11879 | break; | ||||
11880 | |||||
11881 | case STATE_SWAP: | ||||
11882 | /* | ||||
11883 | * Swap two bytes in the bad word: "12" -> "21". | ||||
11884 | * We change "fword" here, it's changed back afterwards at | ||||
11885 | * STATE_UNSWAP. | ||||
11886 | */ | ||||
11887 | p = fword + sp->ts_fidx; | ||||
11888 | c = *p; | ||||
11889 | if (c == NUL) | ||||
11890 | { | ||||
11891 | /* End of word, can't swap or replace. */ | ||||
11892 | sp->ts_state = STATE_FINAL; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11893 | break; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11894 | } |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 11895 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11896 | /* Don't swap if the first character is not a word character. |
11897 | * SWAP3 etc. also don't make sense then. */ | ||||
11898 | if (!soundfold && !spell_iswordp(p, curbuf)) | ||||
11899 | { | ||||
11900 | sp->ts_state = STATE_REP_INI; | ||||
11901 | break; | ||||
11902 | } | ||||
Bram Moolenaar | bb15b65 | 2005-10-03 21:52:09 +0000 | [diff] [blame] | 11903 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11904 | #ifdef FEAT_MBYTE |
11905 | if (has_mbyte) | ||||
11906 | { | ||||
11907 | n = mb_cptr2len(p); | ||||
11908 | c = mb_ptr2char(p); | ||||
11909 | if (!soundfold && !spell_iswordp(p + n, curbuf)) | ||||
11910 | c2 = c; /* don't swap non-word char */ | ||||
11911 | else | ||||
11912 | c2 = mb_ptr2char(p + n); | ||||
11913 | } | ||||
11914 | else | ||||
11915 | #endif | ||||
11916 | { | ||||
11917 | if (!soundfold && !spell_iswordp(p + 1, curbuf)) | ||||
11918 | c2 = c; /* don't swap non-word char */ | ||||
11919 | else | ||||
11920 | c2 = p[1]; | ||||
11921 | } | ||||
Bram Moolenaar | bb15b65 | 2005-10-03 21:52:09 +0000 | [diff] [blame] | 11922 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11923 | /* When characters are identical, swap won't do anything. |
11924 | * Also get here if the second char is not a word character. */ | ||||
11925 | if (c == c2) | ||||
11926 | { | ||||
11927 | sp->ts_state = STATE_SWAP3; | ||||
11928 | break; | ||||
11929 | } | ||||
11930 | if (c2 != NUL && TRY_DEEPER(su, stack, depth, SCORE_SWAP)) | ||||
11931 | { | ||||
11932 | go_deeper(stack, depth, SCORE_SWAP); | ||||
11933 | #ifdef DEBUG_TRIEWALK | ||||
11934 | sprintf(changename[depth], "%.*s-%s: swap %c and %c", | ||||
11935 | sp->ts_twordlen, tword, fword + sp->ts_fidx, | ||||
11936 | c, c2); | ||||
11937 | #endif | ||||
11938 | sp->ts_state = STATE_UNSWAP; | ||||
11939 | ++depth; | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11940 | #ifdef FEAT_MBYTE |
11941 | if (has_mbyte) | ||||
11942 | { | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11943 | fl = mb_char2len(c2); |
11944 | mch_memmove(p, p + n, fl); | ||||
11945 | mb_char2bytes(c, p + fl); | ||||
11946 | stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11947 | } |
11948 | else | ||||
11949 | #endif | ||||
Bram Moolenaar | bb15b65 | 2005-10-03 21:52:09 +0000 | [diff] [blame] | 11950 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11951 | p[0] = c2; |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11952 | p[1] = c; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11953 | stack[depth].ts_fidxtry = sp->ts_fidx + 2; |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11954 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11955 | } |
11956 | else | ||||
11957 | /* If this swap doesn't work then SWAP3 won't either. */ | ||||
11958 | sp->ts_state = STATE_REP_INI; | ||||
11959 | break; | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 11960 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 11961 | case STATE_UNSWAP: |
11962 | /* Undo the STATE_SWAP swap: "21" -> "12". */ | ||||
11963 | p = fword + sp->ts_fidx; | ||||
11964 | #ifdef FEAT_MBYTE | ||||
11965 | if (has_mbyte) | ||||
11966 | { | ||||
11967 | n = MB_BYTE2LEN(*p); | ||||
11968 | c = mb_ptr2char(p + n); | ||||
11969 | mch_memmove(p + MB_BYTE2LEN(p[n]), p, n); | ||||
11970 | mb_char2bytes(c, p); | ||||
11971 | } | ||||
11972 | else | ||||
11973 | #endif | ||||
11974 | { | ||||
11975 | c = *p; | ||||
11976 | *p = p[1]; | ||||
11977 | p[1] = c; | ||||
11978 | } | ||||
11979 | /*FALLTHROUGH*/ | ||||
11980 | |||||
11981 | case STATE_SWAP3: | ||||
11982 | /* Swap two bytes, skipping one: "123" -> "321". We change | ||||
11983 | * "fword" here, it's changed back afterwards at STATE_UNSWAP3. */ | ||||
11984 | p = fword + sp->ts_fidx; | ||||
11985 | #ifdef FEAT_MBYTE | ||||
11986 | if (has_mbyte) | ||||
11987 | { | ||||
11988 | n = mb_cptr2len(p); | ||||
11989 | c = mb_ptr2char(p); | ||||
11990 | fl = mb_cptr2len(p + n); | ||||
11991 | c2 = mb_ptr2char(p + n); | ||||
11992 | if (!soundfold && !spell_iswordp(p + n + fl, curbuf)) | ||||
11993 | c3 = c; /* don't swap non-word char */ | ||||
11994 | else | ||||
11995 | c3 = mb_ptr2char(p + n + fl); | ||||
11996 | } | ||||
11997 | else | ||||
11998 | #endif | ||||
11999 | { | ||||
12000 | c = *p; | ||||
12001 | c2 = p[1]; | ||||
12002 | if (!soundfold && !spell_iswordp(p + 2, curbuf)) | ||||
12003 | c3 = c; /* don't swap non-word char */ | ||||
12004 | else | ||||
12005 | c3 = p[2]; | ||||
12006 | } | ||||
12007 | |||||
12008 | /* When characters are identical: "121" then SWAP3 result is | ||||
12009 | * identical, ROT3L result is same as SWAP: "211", ROT3L result is | ||||
12010 | * same as SWAP on next char: "112". Thus skip all swapping. | ||||
12011 | * Also skip when c3 is NUL. | ||||
12012 | * Also get here when the third character is not a word character. | ||||
12013 | * Second character may any char: "a.b" -> "b.a" */ | ||||
12014 | if (c == c3 || c3 == NUL) | ||||
12015 | { | ||||
12016 | sp->ts_state = STATE_REP_INI; | ||||
12017 | break; | ||||
12018 | } | ||||
12019 | if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) | ||||
12020 | { | ||||
12021 | go_deeper(stack, depth, SCORE_SWAP3); | ||||
12022 | #ifdef DEBUG_TRIEWALK | ||||
12023 | sprintf(changename[depth], "%.*s-%s: swap3 %c and %c", | ||||
12024 | sp->ts_twordlen, tword, fword + sp->ts_fidx, | ||||
12025 | c, c3); | ||||
12026 | #endif | ||||
12027 | sp->ts_state = STATE_UNSWAP3; | ||||
12028 | ++depth; | ||||
12029 | #ifdef FEAT_MBYTE | ||||
12030 | if (has_mbyte) | ||||
12031 | { | ||||
12032 | tl = mb_char2len(c3); | ||||
12033 | mch_memmove(p, p + n + fl, tl); | ||||
12034 | mb_char2bytes(c2, p + tl); | ||||
12035 | mb_char2bytes(c, p + fl + tl); | ||||
12036 | stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl; | ||||
12037 | } | ||||
12038 | else | ||||
12039 | #endif | ||||
12040 | { | ||||
12041 | p[0] = p[2]; | ||||
12042 | p[2] = c; | ||||
12043 | stack[depth].ts_fidxtry = sp->ts_fidx + 3; | ||||
12044 | } | ||||
12045 | } | ||||
12046 | else | ||||
12047 | sp->ts_state = STATE_REP_INI; | ||||
12048 | break; | ||||
12049 | |||||
12050 | case STATE_UNSWAP3: | ||||
12051 | /* Undo STATE_SWAP3: "321" -> "123" */ | ||||
12052 | p = fword + sp->ts_fidx; | ||||
12053 | #ifdef FEAT_MBYTE | ||||
12054 | if (has_mbyte) | ||||
12055 | { | ||||
12056 | n = MB_BYTE2LEN(*p); | ||||
12057 | c2 = mb_ptr2char(p + n); | ||||
12058 | fl = MB_BYTE2LEN(p[n]); | ||||
12059 | c = mb_ptr2char(p + n + fl); | ||||
12060 | tl = MB_BYTE2LEN(p[n + fl]); | ||||
12061 | mch_memmove(p + fl + tl, p, n); | ||||
12062 | mb_char2bytes(c, p); | ||||
12063 | mb_char2bytes(c2, p + tl); | ||||
12064 | p = p + tl; | ||||
12065 | } | ||||
12066 | else | ||||
12067 | #endif | ||||
12068 | { | ||||
12069 | c = *p; | ||||
12070 | *p = p[2]; | ||||
12071 | p[2] = c; | ||||
12072 | ++p; | ||||
12073 | } | ||||
12074 | |||||
12075 | if (!soundfold && !spell_iswordp(p, curbuf)) | ||||
12076 | { | ||||
12077 | /* Middle char is not a word char, skip the rotate. First and | ||||
12078 | * third char were already checked at swap and swap3. */ | ||||
12079 | sp->ts_state = STATE_REP_INI; | ||||
12080 | break; | ||||
12081 | } | ||||
12082 | |||||
12083 | /* Rotate three characters left: "123" -> "231". We change | ||||
12084 | * "fword" here, it's changed back afterwards at STATE_UNROT3L. */ | ||||
12085 | if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) | ||||
12086 | { | ||||
12087 | go_deeper(stack, depth, SCORE_SWAP3); | ||||
12088 | #ifdef DEBUG_TRIEWALK | ||||
12089 | p = fword + sp->ts_fidx; | ||||
12090 | sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c", | ||||
12091 | sp->ts_twordlen, tword, fword + sp->ts_fidx, | ||||
12092 | p[0], p[1], p[2]); | ||||
12093 | #endif | ||||
12094 | sp->ts_state = STATE_UNROT3L; | ||||
12095 | ++depth; | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 12096 | p = fword + sp->ts_fidx; |
12097 | #ifdef FEAT_MBYTE | ||||
12098 | if (has_mbyte) | ||||
12099 | { | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 12100 | n = mb_cptr2len(p); |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 12101 | c = mb_ptr2char(p); |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 12102 | fl = mb_cptr2len(p + n); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12103 | fl += mb_cptr2len(p + n + fl); |
12104 | mch_memmove(p, p + n, fl); | ||||
12105 | mb_char2bytes(c, p + fl); | ||||
12106 | stack[depth].ts_fidxtry = sp->ts_fidx + n + fl; | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 12107 | } |
12108 | else | ||||
12109 | #endif | ||||
12110 | { | ||||
12111 | c = *p; | ||||
12112 | *p = p[1]; | ||||
12113 | p[1] = p[2]; | ||||
12114 | p[2] = c; | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12115 | stack[depth].ts_fidxtry = sp->ts_fidx + 3; |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 12116 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12117 | } |
12118 | else | ||||
12119 | sp->ts_state = STATE_REP_INI; | ||||
12120 | break; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12121 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12122 | case STATE_UNROT3L: |
12123 | /* Undo ROT3L: "231" -> "123" */ | ||||
12124 | p = fword + sp->ts_fidx; | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 12125 | #ifdef FEAT_MBYTE |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12126 | if (has_mbyte) |
12127 | { | ||||
12128 | n = MB_BYTE2LEN(*p); | ||||
12129 | n += MB_BYTE2LEN(p[n]); | ||||
12130 | c = mb_ptr2char(p + n); | ||||
12131 | tl = MB_BYTE2LEN(p[n]); | ||||
12132 | mch_memmove(p + tl, p, n); | ||||
12133 | mb_char2bytes(c, p); | ||||
12134 | } | ||||
12135 | else | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 12136 | #endif |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12137 | { |
12138 | c = p[2]; | ||||
12139 | p[2] = p[1]; | ||||
12140 | p[1] = *p; | ||||
12141 | *p = c; | ||||
12142 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12143 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12144 | /* Rotate three bytes right: "123" -> "312". We change "fword" |
12145 | * here, it's changed back afterwards at STATE_UNROT3R. */ | ||||
12146 | if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3)) | ||||
12147 | { | ||||
12148 | go_deeper(stack, depth, SCORE_SWAP3); | ||||
12149 | #ifdef DEBUG_TRIEWALK | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12150 | p = fword + sp->ts_fidx; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12151 | sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c", |
12152 | sp->ts_twordlen, tword, fword + sp->ts_fidx, | ||||
12153 | p[0], p[1], p[2]); | ||||
12154 | #endif | ||||
12155 | sp->ts_state = STATE_UNROT3R; | ||||
12156 | ++depth; | ||||
12157 | p = fword + sp->ts_fidx; | ||||
12158 | #ifdef FEAT_MBYTE | ||||
12159 | if (has_mbyte) | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 12160 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12161 | n = mb_cptr2len(p); |
12162 | n += mb_cptr2len(p + n); | ||||
12163 | c = mb_ptr2char(p + n); | ||||
12164 | tl = mb_cptr2len(p + n); | ||||
12165 | mch_memmove(p + tl, p, n); | ||||
12166 | mb_char2bytes(c, p); | ||||
12167 | stack[depth].ts_fidxtry = sp->ts_fidx + n + tl; | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 12168 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12169 | else |
12170 | #endif | ||||
12171 | { | ||||
12172 | c = p[2]; | ||||
12173 | p[2] = p[1]; | ||||
12174 | p[1] = *p; | ||||
12175 | *p = c; | ||||
12176 | stack[depth].ts_fidxtry = sp->ts_fidx + 3; | ||||
12177 | } | ||||
12178 | } | ||||
12179 | else | ||||
12180 | sp->ts_state = STATE_REP_INI; | ||||
12181 | break; | ||||
12182 | |||||
12183 | case STATE_UNROT3R: | ||||
12184 | /* Undo ROT3R: "312" -> "123" */ | ||||
12185 | p = fword + sp->ts_fidx; | ||||
12186 | #ifdef FEAT_MBYTE | ||||
12187 | if (has_mbyte) | ||||
12188 | { | ||||
12189 | c = mb_ptr2char(p); | ||||
12190 | tl = MB_BYTE2LEN(*p); | ||||
12191 | n = MB_BYTE2LEN(p[tl]); | ||||
12192 | n += MB_BYTE2LEN(p[tl + n]); | ||||
12193 | mch_memmove(p, p + tl, n); | ||||
12194 | mb_char2bytes(c, p + n); | ||||
12195 | } | ||||
12196 | else | ||||
12197 | #endif | ||||
12198 | { | ||||
12199 | c = *p; | ||||
12200 | *p = p[1]; | ||||
12201 | p[1] = p[2]; | ||||
12202 | p[2] = c; | ||||
12203 | } | ||||
12204 | /*FALLTHROUGH*/ | ||||
12205 | |||||
12206 | case STATE_REP_INI: | ||||
12207 | /* Check if matching with REP items from the .aff file would work. | ||||
12208 | * Quickly skip if: | ||||
12209 | * - there are no REP items and we are not in the soundfold trie | ||||
12210 | * - the score is going to be too high anyway | ||||
12211 | * - already applied a REP item or swapped here */ | ||||
12212 | if ((lp->lp_replang == NULL && !soundfold) | ||||
12213 | || sp->ts_score + SCORE_REP >= su->su_maxscore | ||||
12214 | || sp->ts_fidx < sp->ts_fidxtry) | ||||
12215 | { | ||||
12216 | sp->ts_state = STATE_FINAL; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12217 | break; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12218 | } |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12219 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12220 | /* Use the first byte to quickly find the first entry that may |
12221 | * match. If the index is -1 there is none. */ | ||||
12222 | if (soundfold) | ||||
12223 | sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]]; | ||||
12224 | else | ||||
12225 | sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]]; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12226 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12227 | if (sp->ts_curi < 0) |
12228 | { | ||||
12229 | sp->ts_state = STATE_FINAL; | ||||
12230 | break; | ||||
12231 | } | ||||
12232 | |||||
12233 | sp->ts_state = STATE_REP; | ||||
12234 | /*FALLTHROUGH*/ | ||||
12235 | |||||
12236 | case STATE_REP: | ||||
12237 | /* Try matching with REP items from the .aff file. For each match | ||||
12238 | * replace the characters and check if the resulting word is | ||||
12239 | * valid. */ | ||||
12240 | p = fword + sp->ts_fidx; | ||||
12241 | |||||
12242 | if (soundfold) | ||||
12243 | gap = &slang->sl_repsal; | ||||
12244 | else | ||||
12245 | gap = &lp->lp_replang->sl_rep; | ||||
12246 | while (sp->ts_curi < gap->ga_len) | ||||
12247 | { | ||||
12248 | ftp = (fromto_T *)gap->ga_data + sp->ts_curi++; | ||||
12249 | if (*ftp->ft_from != *p) | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 12250 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12251 | /* past possible matching entries */ |
12252 | sp->ts_curi = gap->ga_len; | ||||
12253 | break; | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 12254 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12255 | if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0 |
12256 | && TRY_DEEPER(su, stack, depth, SCORE_REP)) | ||||
12257 | { | ||||
12258 | go_deeper(stack, depth, SCORE_REP); | ||||
12259 | #ifdef DEBUG_TRIEWALK | ||||
12260 | sprintf(changename[depth], "%.*s-%s: replace %s with %s", | ||||
12261 | sp->ts_twordlen, tword, fword + sp->ts_fidx, | ||||
12262 | ftp->ft_from, ftp->ft_to); | ||||
12263 | #endif | ||||
12264 | /* Need to undo this afterwards. */ | ||||
12265 | sp->ts_state = STATE_REP_UNDO; | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 12266 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12267 | /* Change the "from" to the "to" string. */ |
12268 | ++depth; | ||||
12269 | fl = STRLEN(ftp->ft_from); | ||||
12270 | tl = STRLEN(ftp->ft_to); | ||||
12271 | if (fl != tl) | ||||
12272 | { | ||||
12273 | mch_memmove(p + tl, p + fl, STRLEN(p + fl) + 1); | ||||
12274 | repextra += tl - fl; | ||||
12275 | } | ||||
12276 | mch_memmove(p, ftp->ft_to, tl); | ||||
12277 | stack[depth].ts_fidxtry = sp->ts_fidx + tl; | ||||
12278 | #ifdef FEAT_MBYTE | ||||
12279 | stack[depth].ts_tcharlen = 0; | ||||
12280 | #endif | ||||
12281 | break; | ||||
12282 | } | ||||
12283 | } | ||||
12284 | |||||
12285 | if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP) | ||||
12286 | /* No (more) matches. */ | ||||
12287 | sp->ts_state = STATE_FINAL; | ||||
12288 | |||||
12289 | break; | ||||
12290 | |||||
12291 | case STATE_REP_UNDO: | ||||
12292 | /* Undo a REP replacement and continue with the next one. */ | ||||
12293 | if (soundfold) | ||||
12294 | gap = &slang->sl_repsal; | ||||
12295 | else | ||||
12296 | gap = &lp->lp_replang->sl_rep; | ||||
12297 | ftp = (fromto_T *)gap->ga_data + sp->ts_curi - 1; | ||||
12298 | fl = STRLEN(ftp->ft_from); | ||||
12299 | tl = STRLEN(ftp->ft_to); | ||||
12300 | p = fword + sp->ts_fidx; | ||||
12301 | if (fl != tl) | ||||
12302 | { | ||||
12303 | mch_memmove(p + fl, p + tl, STRLEN(p + tl) + 1); | ||||
12304 | repextra -= tl - fl; | ||||
12305 | } | ||||
12306 | mch_memmove(p, ftp->ft_from, fl); | ||||
12307 | sp->ts_state = STATE_REP; | ||||
12308 | break; | ||||
12309 | |||||
12310 | default: | ||||
12311 | /* Did all possible states at this level, go up one level. */ | ||||
12312 | --depth; | ||||
12313 | |||||
12314 | if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE) | ||||
12315 | { | ||||
12316 | /* Continue in or go back to the prefix tree. */ | ||||
12317 | byts = pbyts; | ||||
12318 | idxs = pidxs; | ||||
12319 | } | ||||
12320 | |||||
12321 | /* Don't check for CTRL-C too often, it takes time. */ | ||||
12322 | if (--breakcheckcount == 0) | ||||
12323 | { | ||||
12324 | ui_breakcheck(); | ||||
12325 | breakcheckcount = 1000; | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12326 | } |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12327 | } |
12328 | } | ||||
12329 | } | ||||
12330 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12331 | |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12332 | /* |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12333 | * Go one level deeper in the tree. |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12334 | */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12335 | static void |
12336 | go_deeper(stack, depth, score_add) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12337 | trystate_T *stack; |
12338 | int depth; | ||||
12339 | int score_add; | ||||
12340 | { | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 12341 | stack[depth + 1] = stack[depth]; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12342 | stack[depth + 1].ts_state = STATE_START; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12343 | stack[depth + 1].ts_score = stack[depth].ts_score + score_add; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12344 | stack[depth + 1].ts_curi = 1; /* start just after length byte */ |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 12345 | stack[depth + 1].ts_flags = 0; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12346 | } |
12347 | |||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 12348 | #ifdef FEAT_MBYTE |
12349 | /* | ||||
12350 | * Case-folding may change the number of bytes: Count nr of chars in | ||||
12351 | * fword[flen] and return the byte length of that many chars in "word". | ||||
12352 | */ | ||||
12353 | static int | ||||
12354 | nofold_len(fword, flen, word) | ||||
12355 | char_u *fword; | ||||
12356 | int flen; | ||||
12357 | char_u *word; | ||||
12358 | { | ||||
12359 | char_u *p; | ||||
12360 | int i = 0; | ||||
12361 | |||||
12362 | for (p = fword; p < fword + flen; mb_ptr_adv(p)) | ||||
12363 | ++i; | ||||
12364 | for (p = word; i > 0; mb_ptr_adv(p)) | ||||
12365 | --i; | ||||
12366 | return (int)(p - word); | ||||
12367 | } | ||||
12368 | #endif | ||||
12369 | |||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12370 | /* |
12371 | * "fword" is a good word with case folded. Find the matching keep-case | ||||
12372 | * words and put it in "kword". | ||||
12373 | * Theoretically there could be several keep-case words that result in the | ||||
12374 | * same case-folded word, but we only find one... | ||||
12375 | */ | ||||
12376 | static void | ||||
12377 | find_keepcap_word(slang, fword, kword) | ||||
12378 | slang_T *slang; | ||||
12379 | char_u *fword; | ||||
12380 | char_u *kword; | ||||
12381 | { | ||||
12382 | char_u uword[MAXWLEN]; /* "fword" in upper-case */ | ||||
12383 | int depth; | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 12384 | idx_T tryidx; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12385 | |
12386 | /* The following arrays are used at each depth in the tree. */ | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 12387 | idx_T arridx[MAXWLEN]; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12388 | int round[MAXWLEN]; |
12389 | int fwordidx[MAXWLEN]; | ||||
12390 | int uwordidx[MAXWLEN]; | ||||
12391 | int kwordlen[MAXWLEN]; | ||||
12392 | |||||
12393 | int flen, ulen; | ||||
12394 | int l; | ||||
12395 | int len; | ||||
12396 | int c; | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 12397 | idx_T lo, hi, m; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12398 | char_u *p; |
12399 | char_u *byts = slang->sl_kbyts; /* array with bytes of the words */ | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 12400 | idx_T *idxs = slang->sl_kidxs; /* array with indexes */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12401 | |
12402 | if (byts == NULL) | ||||
12403 | { | ||||
12404 | /* array is empty: "cannot happen" */ | ||||
12405 | *kword = NUL; | ||||
12406 | return; | ||||
12407 | } | ||||
12408 | |||||
12409 | /* Make an all-cap version of "fword". */ | ||||
12410 | allcap_copy(fword, uword); | ||||
12411 | |||||
12412 | /* | ||||
12413 | * Each character needs to be tried both case-folded and upper-case. | ||||
12414 | * All this gets very complicated if we keep in mind that changing case | ||||
12415 | * may change the byte length of a multi-byte character... | ||||
12416 | */ | ||||
12417 | depth = 0; | ||||
12418 | arridx[0] = 0; | ||||
12419 | round[0] = 0; | ||||
12420 | fwordidx[0] = 0; | ||||
12421 | uwordidx[0] = 0; | ||||
12422 | kwordlen[0] = 0; | ||||
12423 | while (depth >= 0) | ||||
12424 | { | ||||
12425 | if (fword[fwordidx[depth]] == NUL) | ||||
12426 | { | ||||
12427 | /* We are at the end of "fword". If the tree allows a word to end | ||||
12428 | * here we have found a match. */ | ||||
12429 | if (byts[arridx[depth] + 1] == 0) | ||||
12430 | { | ||||
12431 | kword[kwordlen[depth]] = NUL; | ||||
12432 | return; | ||||
12433 | } | ||||
12434 | |||||
12435 | /* kword is getting too long, continue one level up */ | ||||
12436 | --depth; | ||||
12437 | } | ||||
12438 | else if (++round[depth] > 2) | ||||
12439 | { | ||||
12440 | /* tried both fold-case and upper-case character, continue one | ||||
12441 | * level up */ | ||||
12442 | --depth; | ||||
12443 | } | ||||
12444 | else | ||||
12445 | { | ||||
12446 | /* | ||||
12447 | * round[depth] == 1: Try using the folded-case character. | ||||
12448 | * round[depth] == 2: Try using the upper-case character. | ||||
12449 | */ | ||||
12450 | #ifdef FEAT_MBYTE | ||||
12451 | if (has_mbyte) | ||||
12452 | { | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 12453 | flen = mb_cptr2len(fword + fwordidx[depth]); |
12454 | ulen = mb_cptr2len(uword + uwordidx[depth]); | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12455 | } |
12456 | else | ||||
12457 | #endif | ||||
12458 | ulen = flen = 1; | ||||
12459 | if (round[depth] == 1) | ||||
12460 | { | ||||
12461 | p = fword + fwordidx[depth]; | ||||
12462 | l = flen; | ||||
12463 | } | ||||
12464 | else | ||||
12465 | { | ||||
12466 | p = uword + uwordidx[depth]; | ||||
12467 | l = ulen; | ||||
12468 | } | ||||
12469 | |||||
12470 | for (tryidx = arridx[depth]; l > 0; --l) | ||||
12471 | { | ||||
12472 | /* Perform a binary search in the list of accepted bytes. */ | ||||
12473 | len = byts[tryidx++]; | ||||
12474 | c = *p++; | ||||
12475 | lo = tryidx; | ||||
12476 | hi = tryidx + len - 1; | ||||
12477 | while (lo < hi) | ||||
12478 | { | ||||
12479 | m = (lo + hi) / 2; | ||||
12480 | if (byts[m] > c) | ||||
12481 | hi = m - 1; | ||||
12482 | else if (byts[m] < c) | ||||
12483 | lo = m + 1; | ||||
12484 | else | ||||
12485 | { | ||||
12486 | lo = hi = m; | ||||
12487 | break; | ||||
12488 | } | ||||
12489 | } | ||||
12490 | |||||
12491 | /* Stop if there is no matching byte. */ | ||||
12492 | if (hi < lo || byts[lo] != c) | ||||
12493 | break; | ||||
12494 | |||||
12495 | /* Continue at the child (if there is one). */ | ||||
12496 | tryidx = idxs[lo]; | ||||
12497 | } | ||||
12498 | |||||
12499 | if (l == 0) | ||||
12500 | { | ||||
12501 | /* | ||||
12502 | * Found the matching char. Copy it to "kword" and go a | ||||
12503 | * level deeper. | ||||
12504 | */ | ||||
12505 | if (round[depth] == 1) | ||||
12506 | { | ||||
12507 | STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth], | ||||
12508 | flen); | ||||
12509 | kwordlen[depth + 1] = kwordlen[depth] + flen; | ||||
12510 | } | ||||
12511 | else | ||||
12512 | { | ||||
12513 | STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth], | ||||
12514 | ulen); | ||||
12515 | kwordlen[depth + 1] = kwordlen[depth] + ulen; | ||||
12516 | } | ||||
12517 | fwordidx[depth + 1] = fwordidx[depth] + flen; | ||||
12518 | uwordidx[depth + 1] = uwordidx[depth] + ulen; | ||||
12519 | |||||
12520 | ++depth; | ||||
12521 | arridx[depth] = tryidx; | ||||
12522 | round[depth] = 0; | ||||
12523 | } | ||||
12524 | } | ||||
12525 | } | ||||
12526 | |||||
12527 | /* Didn't find it: "cannot happen". */ | ||||
12528 | *kword = NUL; | ||||
12529 | } | ||||
12530 | |||||
12531 | /* | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12532 | * Compute the sound-a-like score for suggestions in su->su_ga and add them to |
12533 | * su->su_sga. | ||||
12534 | */ | ||||
12535 | static void | ||||
12536 | score_comp_sal(su) | ||||
12537 | suginfo_T *su; | ||||
12538 | { | ||||
12539 | langp_T *lp; | ||||
12540 | char_u badsound[MAXWLEN]; | ||||
12541 | int i; | ||||
12542 | suggest_T *stp; | ||||
12543 | suggest_T *sstp; | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12544 | int score; |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 12545 | int lpi; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12546 | |
12547 | if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL) | ||||
12548 | return; | ||||
12549 | |||||
12550 | /* Use the sound-folding of the first language that supports it. */ | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 12551 | for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 12552 | { |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 12553 | lp = LANGP_ENTRY(curbuf->b_langp, lpi); |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12554 | if (lp->lp_slang->sl_sal.ga_len > 0) |
12555 | { | ||||
12556 | /* soundfold the bad word */ | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 12557 | spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound); |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12558 | |
12559 | for (i = 0; i < su->su_ga.ga_len; ++i) | ||||
12560 | { | ||||
12561 | stp = &SUG(su->su_ga, i); | ||||
12562 | |||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 12563 | /* Case-fold the suggested word, sound-fold it and compute the |
12564 | * sound-a-like score. */ | ||||
12565 | score = stp_sal_score(stp, su, lp->lp_slang, badsound); | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12566 | if (score < SCORE_MAXMAX) |
12567 | { | ||||
12568 | /* Add the suggestion. */ | ||||
12569 | sstp = &SUG(su->su_sga, su->su_sga.ga_len); | ||||
12570 | sstp->st_word = vim_strsave(stp->st_word); | ||||
12571 | if (sstp->st_word != NULL) | ||||
12572 | { | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12573 | sstp->st_wordlen = stp->st_wordlen; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12574 | sstp->st_score = score; |
12575 | sstp->st_altscore = 0; | ||||
12576 | sstp->st_orglen = stp->st_orglen; | ||||
12577 | ++su->su_sga.ga_len; | ||||
12578 | } | ||||
12579 | } | ||||
12580 | } | ||||
12581 | break; | ||||
12582 | } | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 12583 | } |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12584 | } |
12585 | |||||
12586 | /* | ||||
12587 | * Combine the list of suggestions in su->su_ga and su->su_sga. | ||||
12588 | * They are intwined. | ||||
12589 | */ | ||||
12590 | static void | ||||
12591 | score_combine(su) | ||||
12592 | suginfo_T *su; | ||||
12593 | { | ||||
12594 | int i; | ||||
12595 | int j; | ||||
12596 | garray_T ga; | ||||
12597 | garray_T *gap; | ||||
12598 | langp_T *lp; | ||||
12599 | suggest_T *stp; | ||||
12600 | char_u *p; | ||||
12601 | char_u badsound[MAXWLEN]; | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12602 | int round; |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 12603 | int lpi; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12604 | slang_T *slang = NULL; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12605 | |
12606 | /* Add the alternate score to su_ga. */ | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 12607 | for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12608 | { |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 12609 | lp = LANGP_ENTRY(curbuf->b_langp, lpi); |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12610 | if (lp->lp_slang->sl_sal.ga_len > 0) |
12611 | { | ||||
12612 | /* soundfold the bad word */ | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12613 | slang = lp->lp_slang; |
12614 | spell_soundfold(slang, su->su_fbadword, TRUE, badsound); | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12615 | |
12616 | for (i = 0; i < su->su_ga.ga_len; ++i) | ||||
12617 | { | ||||
12618 | stp = &SUG(su->su_ga, i); | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12619 | stp->st_altscore = stp_sal_score(stp, su, slang, badsound); |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12620 | if (stp->st_altscore == SCORE_MAXMAX) |
12621 | stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4; | ||||
12622 | else | ||||
12623 | stp->st_score = (stp->st_score * 3 | ||||
12624 | + stp->st_altscore) / 4; | ||||
12625 | stp->st_salscore = FALSE; | ||||
12626 | } | ||||
12627 | break; | ||||
12628 | } | ||||
12629 | } | ||||
12630 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12631 | if (slang == NULL) /* just in case */ |
12632 | return; | ||||
12633 | |||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12634 | /* Add the alternate score to su_sga. */ |
12635 | for (i = 0; i < su->su_sga.ga_len; ++i) | ||||
12636 | { | ||||
12637 | stp = &SUG(su->su_sga, i); | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12638 | stp->st_altscore = spell_edit_score(slang, |
12639 | su->su_badword, stp->st_word); | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12640 | if (stp->st_score == SCORE_MAXMAX) |
12641 | stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8; | ||||
12642 | else | ||||
12643 | stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8; | ||||
12644 | stp->st_salscore = TRUE; | ||||
12645 | } | ||||
12646 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12647 | /* Remove bad suggestions, sort the suggestions and truncate at "maxcount" |
12648 | * for both lists. */ | ||||
12649 | check_suggestions(su, &su->su_ga); | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12650 | (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount); |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12651 | check_suggestions(su, &su->su_sga); |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 12652 | (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount); |
12653 | |||||
12654 | ga_init2(&ga, (int)sizeof(suginfo_T), 1); | ||||
12655 | if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL) | ||||
12656 | return; | ||||
12657 | |||||
12658 | stp = &SUG(ga, 0); | ||||
12659 | for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i) | ||||
12660 | { | ||||
12661 | /* round 1: get a suggestion from su_ga | ||||
12662 | * round 2: get a suggestion from su_sga */ | ||||
12663 | for (round = 1; round <= 2; ++round) | ||||
12664 | { | ||||
12665 | gap = round == 1 ? &su->su_ga : &su->su_sga; | ||||
12666 | if (i < gap->ga_len) | ||||
12667 | { | ||||
12668 | /* Don't add a word if it's already there. */ | ||||
12669 | p = SUG(*gap, i).st_word; | ||||
12670 | for (j = 0; j < ga.ga_len; ++j) | ||||
12671 | if (STRCMP(stp[j].st_word, p) == 0) | ||||
12672 | break; | ||||
12673 | if (j == ga.ga_len) | ||||
12674 | stp[ga.ga_len++] = SUG(*gap, i); | ||||
12675 | else | ||||
12676 | vim_free(p); | ||||
12677 | } | ||||
12678 | } | ||||
12679 | } | ||||
12680 | |||||
12681 | ga_clear(&su->su_ga); | ||||
12682 | ga_clear(&su->su_sga); | ||||
12683 | |||||
12684 | /* Truncate the list to the number of suggestions that will be displayed. */ | ||||
12685 | if (ga.ga_len > su->su_maxcount) | ||||
12686 | { | ||||
12687 | for (i = su->su_maxcount; i < ga.ga_len; ++i) | ||||
12688 | vim_free(stp[i].st_word); | ||||
12689 | ga.ga_len = su->su_maxcount; | ||||
12690 | } | ||||
12691 | |||||
12692 | su->su_ga = ga; | ||||
12693 | } | ||||
12694 | |||||
12695 | /* | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 12696 | * For the goodword in "stp" compute the soundalike score compared to the |
12697 | * badword. | ||||
12698 | */ | ||||
12699 | static int | ||||
12700 | stp_sal_score(stp, su, slang, badsound) | ||||
12701 | suggest_T *stp; | ||||
12702 | suginfo_T *su; | ||||
12703 | slang_T *slang; | ||||
12704 | char_u *badsound; /* sound-folded badword */ | ||||
12705 | { | ||||
12706 | char_u *p; | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 12707 | char_u *pbad; |
12708 | char_u *pgood; | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 12709 | char_u badsound2[MAXWLEN]; |
12710 | char_u fword[MAXWLEN]; | ||||
12711 | char_u goodsound[MAXWLEN]; | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 12712 | char_u goodword[MAXWLEN]; |
12713 | int lendiff; | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 12714 | |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 12715 | lendiff = (int)(su->su_badlen - stp->st_orglen); |
12716 | if (lendiff >= 0) | ||||
12717 | pbad = badsound; | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 12718 | else |
12719 | { | ||||
12720 | /* soundfold the bad word with more characters following */ | ||||
12721 | (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN); | ||||
12722 | |||||
12723 | /* When joining two words the sound often changes a lot. E.g., "t he" | ||||
12724 | * sounds like "t h" while "the" sounds like "@". Avoid that by | ||||
12725 | * removing the space. Don't do it when the good word also contains a | ||||
12726 | * space. */ | ||||
12727 | if (vim_iswhite(su->su_badptr[su->su_badlen]) | ||||
12728 | && *skiptowhite(stp->st_word) == NUL) | ||||
12729 | for (p = fword; *(p = skiptowhite(p)) != NUL; ) | ||||
12730 | mch_memmove(p, p + 1, STRLEN(p)); | ||||
12731 | |||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 12732 | spell_soundfold(slang, fword, TRUE, badsound2); |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 12733 | pbad = badsound2; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 12734 | } |
12735 | |||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 12736 | if (lendiff > 0) |
12737 | { | ||||
12738 | /* Add part of the bad word to the good word, so that we soundfold | ||||
12739 | * what replaces the bad word. */ | ||||
12740 | STRCPY(goodword, stp->st_word); | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12741 | vim_strncpy(goodword + stp->st_wordlen, |
12742 | su->su_badptr + su->su_badlen - lendiff, lendiff); | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 12743 | pgood = goodword; |
12744 | } | ||||
12745 | else | ||||
12746 | pgood = stp->st_word; | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 12747 | |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 12748 | /* Sound-fold the word and compute the score for the difference. */ |
12749 | spell_soundfold(slang, pgood, FALSE, goodsound); | ||||
12750 | |||||
12751 | return soundalike_score(goodsound, pbad); | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 12752 | } |
12753 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12754 | /* structure used to store soundfolded words that add_sound_suggest() has |
12755 | * handled already. */ | ||||
12756 | typedef struct | ||||
12757 | { | ||||
12758 | short sft_score; /* lowest score used */ | ||||
12759 | char_u sft_word[1]; /* soundfolded word, actually longer */ | ||||
12760 | } sftword_T; | ||||
12761 | |||||
12762 | static sftword_T dumsft; | ||||
12763 | #define HIKEY2SFT(p) ((sftword_T *)(p - (dumsft.sft_word - (char_u *)&dumsft))) | ||||
12764 | #define HI2SFT(hi) HIKEY2SFT((hi)->hi_key) | ||||
12765 | |||||
12766 | /* | ||||
12767 | * Prepare for calling suggest_try_soundalike(). | ||||
12768 | */ | ||||
12769 | static void | ||||
12770 | suggest_try_soundalike_prep() | ||||
12771 | { | ||||
12772 | langp_T *lp; | ||||
12773 | int lpi; | ||||
12774 | slang_T *slang; | ||||
12775 | |||||
12776 | /* Do this for all languages that support sound folding and for which a | ||||
12777 | * .sug file has been loaded. */ | ||||
12778 | for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) | ||||
12779 | { | ||||
12780 | lp = LANGP_ENTRY(curbuf->b_langp, lpi); | ||||
12781 | slang = lp->lp_slang; | ||||
12782 | if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) | ||||
12783 | /* prepare the hashtable used by add_sound_suggest() */ | ||||
12784 | hash_init(&slang->sl_sounddone); | ||||
12785 | } | ||||
12786 | } | ||||
12787 | |||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 12788 | /* |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12789 | * Find suggestions by comparing the word in a sound-a-like form. |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 12790 | * Note: This doesn't support postponed prefixes. |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12791 | */ |
12792 | static void | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 12793 | suggest_try_soundalike(su) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12794 | suginfo_T *su; |
12795 | { | ||||
12796 | char_u salword[MAXWLEN]; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12797 | langp_T *lp; |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 12798 | int lpi; |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 12799 | slang_T *slang; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12800 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12801 | /* Do this for all languages that support sound folding and for which a |
12802 | * .sug file has been loaded. */ | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 12803 | for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12804 | { |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 12805 | lp = LANGP_ENTRY(curbuf->b_langp, lpi); |
12806 | slang = lp->lp_slang; | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12807 | if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12808 | { |
12809 | /* soundfold the bad word */ | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 12810 | spell_soundfold(slang, su->su_fbadword, TRUE, salword); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12811 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12812 | /* try all kinds of inserts/deletes/swaps/etc. */ |
12813 | /* TODO: also soundfold the next words, so that we can try joining | ||||
12814 | * and splitting */ | ||||
12815 | suggest_trie_walk(su, lp, salword, TRUE); | ||||
12816 | } | ||||
12817 | } | ||||
12818 | } | ||||
12819 | |||||
12820 | /* | ||||
12821 | * Finish up after calling suggest_try_soundalike(). | ||||
12822 | */ | ||||
12823 | static void | ||||
12824 | suggest_try_soundalike_finish() | ||||
12825 | { | ||||
12826 | langp_T *lp; | ||||
12827 | int lpi; | ||||
12828 | slang_T *slang; | ||||
12829 | int todo; | ||||
12830 | hashitem_T *hi; | ||||
12831 | |||||
12832 | /* Do this for all languages that support sound folding and for which a | ||||
12833 | * .sug file has been loaded. */ | ||||
12834 | for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) | ||||
12835 | { | ||||
12836 | lp = LANGP_ENTRY(curbuf->b_langp, lpi); | ||||
12837 | slang = lp->lp_slang; | ||||
12838 | if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL) | ||||
12839 | { | ||||
12840 | /* Free the info about handled words. */ | ||||
12841 | todo = slang->sl_sounddone.ht_used; | ||||
12842 | for (hi = slang->sl_sounddone.ht_array; todo > 0; ++hi) | ||||
12843 | if (!HASHITEM_EMPTY(hi)) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12844 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12845 | vim_free(HI2SFT(hi)); |
12846 | --todo; | ||||
12847 | } | ||||
12848 | hash_clear(&slang->sl_sounddone); | ||||
12849 | } | ||||
12850 | } | ||||
12851 | } | ||||
12852 | |||||
12853 | /* | ||||
12854 | * A match with a soundfolded word is found. Add the good word(s) that | ||||
12855 | * produce this soundfolded word. | ||||
12856 | */ | ||||
12857 | static void | ||||
12858 | add_sound_suggest(su, goodword, score, lp) | ||||
12859 | suginfo_T *su; | ||||
12860 | char_u *goodword; | ||||
12861 | int score; /* soundfold score */ | ||||
12862 | langp_T *lp; | ||||
12863 | { | ||||
12864 | slang_T *slang = lp->lp_slang; /* language for sound folding */ | ||||
12865 | int sfwordnr; | ||||
12866 | char_u *nrline; | ||||
12867 | int orgnr; | ||||
12868 | char_u theword[MAXWLEN]; | ||||
12869 | int i; | ||||
12870 | int wlen; | ||||
12871 | char_u *byts; | ||||
12872 | idx_T *idxs; | ||||
12873 | int n; | ||||
12874 | int wordcount; | ||||
12875 | int wc; | ||||
12876 | int goodscore; | ||||
12877 | hash_T hash; | ||||
12878 | hashitem_T *hi; | ||||
12879 | sftword_T *sft; | ||||
12880 | int bc, gc; | ||||
12881 | int limit; | ||||
12882 | |||||
12883 | /* | ||||
12884 | * It's very well possible that the same soundfold word is found several | ||||
12885 | * times with different scores. Since the following is quite slow only do | ||||
12886 | * the words that have a better score than before. Use a hashtable to | ||||
12887 | * remember the words that have been done. | ||||
12888 | */ | ||||
12889 | hash = hash_hash(goodword); | ||||
12890 | hi = hash_lookup(&slang->sl_sounddone, goodword, hash); | ||||
12891 | if (HASHITEM_EMPTY(hi)) | ||||
12892 | { | ||||
12893 | sft = (sftword_T *)alloc(sizeof(sftword_T) + STRLEN(goodword)); | ||||
12894 | if (sft != NULL) | ||||
12895 | { | ||||
12896 | sft->sft_score = score; | ||||
12897 | STRCPY(sft->sft_word, goodword); | ||||
12898 | hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash); | ||||
12899 | } | ||||
12900 | } | ||||
12901 | else | ||||
12902 | { | ||||
12903 | sft = HI2SFT(hi); | ||||
12904 | if (score >= sft->sft_score) | ||||
12905 | return; | ||||
12906 | sft->sft_score = score; | ||||
12907 | } | ||||
12908 | |||||
12909 | /* | ||||
12910 | * Find the word nr in the soundfold tree. | ||||
12911 | */ | ||||
12912 | sfwordnr = soundfold_find(slang, goodword); | ||||
12913 | if (sfwordnr < 0) | ||||
12914 | { | ||||
12915 | EMSG2(_(e_intern2), "add_sound_suggest()"); | ||||
12916 | return; | ||||
12917 | } | ||||
12918 | |||||
12919 | /* | ||||
12920 | * go over the list of good words that produce this soundfold word | ||||
12921 | */ | ||||
12922 | nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)(sfwordnr + 1), FALSE); | ||||
12923 | orgnr = 0; | ||||
12924 | while (*nrline != NUL) | ||||
12925 | { | ||||
12926 | /* The wordnr was stored in a minimal nr of bytes as an offset to the | ||||
12927 | * previous wordnr. */ | ||||
12928 | orgnr += bytes2offset(&nrline); | ||||
12929 | |||||
12930 | byts = slang->sl_fbyts; | ||||
12931 | idxs = slang->sl_fidxs; | ||||
12932 | |||||
12933 | /* Lookup the word "orgnr" one of the two tries. */ | ||||
12934 | n = 0; | ||||
12935 | wlen = 0; | ||||
12936 | wordcount = 0; | ||||
12937 | for (;;) | ||||
12938 | { | ||||
12939 | i = 1; | ||||
12940 | if (wordcount == orgnr && byts[n + 1] == NUL) | ||||
12941 | break; /* found end of word */ | ||||
12942 | |||||
12943 | if (byts[n + 1] == NUL) | ||||
12944 | ++wordcount; | ||||
12945 | |||||
12946 | /* skip over the NUL bytes */ | ||||
12947 | for ( ; byts[n + i] == NUL; ++i) | ||||
12948 | if (i > byts[n]) /* safety check */ | ||||
12949 | { | ||||
12950 | STRCPY(theword + wlen, "BAD"); | ||||
12951 | goto badword; | ||||
12952 | } | ||||
12953 | |||||
12954 | /* One of the siblings must have the word. */ | ||||
12955 | for ( ; i < byts[n]; ++i) | ||||
12956 | { | ||||
12957 | wc = idxs[idxs[n + i]]; /* nr of words under this byte */ | ||||
12958 | if (wordcount + wc > orgnr) | ||||
12959 | break; | ||||
12960 | wordcount += wc; | ||||
12961 | } | ||||
12962 | |||||
12963 | theword[wlen++] = byts[n + i]; | ||||
12964 | n = idxs[n + i]; | ||||
12965 | } | ||||
12966 | badword: | ||||
12967 | theword[wlen] = NUL; | ||||
12968 | |||||
12969 | /* Go over the possible flags and regions. */ | ||||
12970 | for (; i <= byts[n] && byts[n + i] == NUL; ++i) | ||||
12971 | { | ||||
12972 | char_u cword[MAXWLEN]; | ||||
12973 | char_u *p; | ||||
12974 | int flags = (int)idxs[n + i]; | ||||
12975 | |||||
Bram Moolenaar | e1438bb | 2006-03-01 22:01:55 +0000 | [diff] [blame] | 12976 | /* Skip words with the NOSUGGEST flag */ |
12977 | if (flags & WF_NOSUGGEST) | ||||
12978 | continue; | ||||
12979 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12980 | if (flags & WF_KEEPCAP) |
12981 | { | ||||
12982 | /* Must find the word in the keep-case tree. */ | ||||
12983 | find_keepcap_word(slang, theword, cword); | ||||
12984 | p = cword; | ||||
12985 | } | ||||
12986 | else | ||||
12987 | { | ||||
12988 | flags |= su->su_badflags; | ||||
12989 | if ((flags & WF_CAPMASK) != 0) | ||||
12990 | { | ||||
12991 | /* Need to fix case according to "flags". */ | ||||
12992 | make_case_word(theword, cword, flags); | ||||
12993 | p = cword; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 12994 | } |
12995 | else | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 12996 | p = theword; |
12997 | } | ||||
12998 | |||||
12999 | /* Add the suggestion. */ | ||||
13000 | if (sps_flags & SPS_DOUBLE) | ||||
13001 | { | ||||
13002 | /* Add the suggestion if the score isn't too bad. */ | ||||
13003 | if (score <= su->su_maxscore) | ||||
13004 | add_suggestion(su, &su->su_sga, p, su->su_badlen, | ||||
13005 | score, 0, FALSE, slang, FALSE); | ||||
13006 | } | ||||
13007 | else | ||||
13008 | { | ||||
13009 | /* Add a penalty for words in another region. */ | ||||
13010 | if ((flags & WF_REGION) | ||||
13011 | && (((unsigned)flags >> 16) & lp->lp_region) == 0) | ||||
13012 | goodscore = SCORE_REGION; | ||||
13013 | else | ||||
13014 | goodscore = 0; | ||||
13015 | |||||
13016 | /* Add a small penalty for changing the first letter from | ||||
13017 | * lower to upper case. Helps for "tath" -> "Kath", which is | ||||
13018 | * less common thatn "tath" -> "path". Don't do it when the | ||||
13019 | * letter is the same, that has already been counted. */ | ||||
13020 | gc = PTR2CHAR(p); | ||||
13021 | if (SPELL_ISUPPER(gc)) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13022 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13023 | bc = PTR2CHAR(su->su_badword); |
13024 | if (!SPELL_ISUPPER(bc) | ||||
13025 | && SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc)) | ||||
13026 | goodscore += SCORE_ICASE / 2; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13027 | } |
13028 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13029 | /* Compute the score for the good word. This only does letter |
13030 | * insert/delete/swap/replace. REP items are not considered, | ||||
13031 | * which may make the score a bit higher. | ||||
13032 | * Use a limit for the score to make it work faster. Use | ||||
13033 | * MAXSCORE(), because RESCORE() will change the score. | ||||
13034 | * If the limit is very high then the iterative method is | ||||
13035 | * inefficient, using an array is quicker. */ | ||||
13036 | limit = MAXSCORE(su->su_sfmaxscore - goodscore, score); | ||||
13037 | if (limit > SCORE_LIMITMAX) | ||||
13038 | goodscore += spell_edit_score(slang, su->su_badword, p); | ||||
13039 | else | ||||
13040 | goodscore += spell_edit_score_limit(slang, su->su_badword, | ||||
13041 | p, limit); | ||||
13042 | |||||
13043 | /* When going over the limit don't bother to do the rest. */ | ||||
13044 | if (goodscore < SCORE_MAXMAX) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13045 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13046 | /* Give a bonus to words seen before. */ |
13047 | goodscore = score_wordcount_adj(slang, goodscore, p, FALSE); | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13048 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13049 | /* Add the suggestion if the score isn't too bad. */ |
13050 | goodscore = RESCORE(goodscore, score); | ||||
13051 | if (goodscore <= su->su_sfmaxscore) | ||||
13052 | add_suggestion(su, &su->su_ga, p, su->su_badlen, | ||||
13053 | goodscore, score, TRUE, slang, TRUE); | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13054 | } |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13055 | } |
13056 | } | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13057 | /* smsg("word %s (%d): %s (%d)", sftword, sftnr, theword, orgnr); */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13058 | } |
13059 | } | ||||
13060 | |||||
13061 | /* | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13062 | * Find word "word" in fold-case tree for "slang" and return the word number. |
13063 | */ | ||||
13064 | static int | ||||
13065 | soundfold_find(slang, word) | ||||
13066 | slang_T *slang; | ||||
13067 | char_u *word; | ||||
13068 | { | ||||
13069 | idx_T arridx = 0; | ||||
13070 | int len; | ||||
13071 | int wlen = 0; | ||||
13072 | int c; | ||||
13073 | char_u *ptr = word; | ||||
13074 | char_u *byts; | ||||
13075 | idx_T *idxs; | ||||
13076 | int wordnr = 0; | ||||
13077 | |||||
13078 | byts = slang->sl_sbyts; | ||||
13079 | idxs = slang->sl_sidxs; | ||||
13080 | |||||
13081 | for (;;) | ||||
13082 | { | ||||
13083 | /* First byte is the number of possible bytes. */ | ||||
13084 | len = byts[arridx++]; | ||||
13085 | |||||
13086 | /* If the first possible byte is a zero the word could end here. | ||||
13087 | * If the word ends we found the word. If not skip the NUL bytes. */ | ||||
13088 | c = ptr[wlen]; | ||||
13089 | if (byts[arridx] == NUL) | ||||
13090 | { | ||||
13091 | if (c == NUL) | ||||
13092 | break; | ||||
13093 | |||||
13094 | /* Skip over the zeros, there can be several. */ | ||||
13095 | while (len > 0 && byts[arridx] == NUL) | ||||
13096 | { | ||||
13097 | ++arridx; | ||||
13098 | --len; | ||||
13099 | } | ||||
13100 | if (len == 0) | ||||
13101 | return -1; /* no children, word should have ended here */ | ||||
13102 | ++wordnr; | ||||
13103 | } | ||||
13104 | |||||
13105 | /* If the word ends we didn't find it. */ | ||||
13106 | if (c == NUL) | ||||
13107 | return -1; | ||||
13108 | |||||
13109 | /* Perform a binary search in the list of accepted bytes. */ | ||||
13110 | if (c == TAB) /* <Tab> is handled like <Space> */ | ||||
13111 | c = ' '; | ||||
13112 | while (byts[arridx] < c) | ||||
13113 | { | ||||
13114 | /* The word count is in the first idxs[] entry of the child. */ | ||||
13115 | wordnr += idxs[idxs[arridx]]; | ||||
13116 | ++arridx; | ||||
13117 | if (--len == 0) /* end of the bytes, didn't find it */ | ||||
13118 | return -1; | ||||
13119 | } | ||||
13120 | if (byts[arridx] != c) /* didn't find the byte */ | ||||
13121 | return -1; | ||||
13122 | |||||
13123 | /* Continue at the child (if there is one). */ | ||||
13124 | arridx = idxs[arridx]; | ||||
13125 | ++wlen; | ||||
13126 | |||||
13127 | /* One space in the good word may stand for several spaces in the | ||||
13128 | * checked word. */ | ||||
13129 | if (c == ' ') | ||||
13130 | while (ptr[wlen] == ' ' || ptr[wlen] == TAB) | ||||
13131 | ++wlen; | ||||
13132 | } | ||||
13133 | |||||
13134 | return wordnr; | ||||
13135 | } | ||||
13136 | |||||
13137 | /* | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 13138 | * Copy "fword" to "cword", fixing case according to "flags". |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13139 | */ |
13140 | static void | ||||
13141 | make_case_word(fword, cword, flags) | ||||
13142 | char_u *fword; | ||||
13143 | char_u *cword; | ||||
13144 | int flags; | ||||
13145 | { | ||||
13146 | if (flags & WF_ALLCAP) | ||||
13147 | /* Make it all upper-case */ | ||||
13148 | allcap_copy(fword, cword); | ||||
13149 | else if (flags & WF_ONECAP) | ||||
13150 | /* Make the first letter upper-case */ | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 13151 | onecap_copy(fword, cword, TRUE); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13152 | else |
13153 | /* Use goodword as-is. */ | ||||
13154 | STRCPY(cword, fword); | ||||
13155 | } | ||||
13156 | |||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 13157 | /* |
13158 | * Use map string "map" for languages "lp". | ||||
13159 | */ | ||||
13160 | static void | ||||
13161 | set_map_str(lp, map) | ||||
13162 | slang_T *lp; | ||||
13163 | char_u *map; | ||||
13164 | { | ||||
13165 | char_u *p; | ||||
13166 | int headc = 0; | ||||
13167 | int c; | ||||
13168 | int i; | ||||
13169 | |||||
13170 | if (*map == NUL) | ||||
13171 | { | ||||
13172 | lp->sl_has_map = FALSE; | ||||
13173 | return; | ||||
13174 | } | ||||
13175 | lp->sl_has_map = TRUE; | ||||
13176 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13177 | /* Init the array and hash tables empty. */ |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 13178 | for (i = 0; i < 256; ++i) |
13179 | lp->sl_map_array[i] = 0; | ||||
13180 | #ifdef FEAT_MBYTE | ||||
13181 | hash_init(&lp->sl_map_hash); | ||||
13182 | #endif | ||||
13183 | |||||
13184 | /* | ||||
13185 | * The similar characters are stored separated with slashes: | ||||
13186 | * "aaa/bbb/ccc/". Fill sl_map_array[c] with the character before c and | ||||
13187 | * before the same slash. For characters above 255 sl_map_hash is used. | ||||
13188 | */ | ||||
13189 | for (p = map; *p != NUL; ) | ||||
13190 | { | ||||
13191 | #ifdef FEAT_MBYTE | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 13192 | c = mb_cptr2char_adv(&p); |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 13193 | #else |
13194 | c = *p++; | ||||
13195 | #endif | ||||
13196 | if (c == '/') | ||||
13197 | headc = 0; | ||||
13198 | else | ||||
13199 | { | ||||
13200 | if (headc == 0) | ||||
13201 | headc = c; | ||||
13202 | |||||
13203 | #ifdef FEAT_MBYTE | ||||
13204 | /* Characters above 255 don't fit in sl_map_array[], put them in | ||||
13205 | * the hash table. Each entry is the char, a NUL the headchar and | ||||
13206 | * a NUL. */ | ||||
13207 | if (c >= 256) | ||||
13208 | { | ||||
13209 | int cl = mb_char2len(c); | ||||
13210 | int headcl = mb_char2len(headc); | ||||
13211 | char_u *b; | ||||
13212 | hash_T hash; | ||||
13213 | hashitem_T *hi; | ||||
13214 | |||||
13215 | b = alloc((unsigned)(cl + headcl + 2)); | ||||
13216 | if (b == NULL) | ||||
13217 | return; | ||||
13218 | mb_char2bytes(c, b); | ||||
13219 | b[cl] = NUL; | ||||
13220 | mb_char2bytes(headc, b + cl + 1); | ||||
13221 | b[cl + 1 + headcl] = NUL; | ||||
13222 | hash = hash_hash(b); | ||||
13223 | hi = hash_lookup(&lp->sl_map_hash, b, hash); | ||||
13224 | if (HASHITEM_EMPTY(hi)) | ||||
13225 | hash_add_item(&lp->sl_map_hash, hi, b, hash); | ||||
13226 | else | ||||
13227 | { | ||||
13228 | /* This should have been checked when generating the .spl | ||||
13229 | * file. */ | ||||
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 13230 | EMSG(_("E783: duplicate char in MAP entry")); |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 13231 | vim_free(b); |
13232 | } | ||||
13233 | } | ||||
13234 | else | ||||
13235 | #endif | ||||
13236 | lp->sl_map_array[c] = headc; | ||||
13237 | } | ||||
13238 | } | ||||
13239 | } | ||||
13240 | |||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13241 | /* |
13242 | * Return TRUE if "c1" and "c2" are similar characters according to the MAP | ||||
13243 | * lines in the .aff file. | ||||
13244 | */ | ||||
13245 | static int | ||||
13246 | similar_chars(slang, c1, c2) | ||||
13247 | slang_T *slang; | ||||
13248 | int c1; | ||||
13249 | int c2; | ||||
13250 | { | ||||
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 13251 | int m1, m2; |
13252 | #ifdef FEAT_MBYTE | ||||
13253 | char_u buf[MB_MAXBYTES]; | ||||
13254 | hashitem_T *hi; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13255 | |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 13256 | if (c1 >= 256) |
13257 | { | ||||
13258 | buf[mb_char2bytes(c1, buf)] = 0; | ||||
13259 | hi = hash_find(&slang->sl_map_hash, buf); | ||||
13260 | if (HASHITEM_EMPTY(hi)) | ||||
13261 | m1 = 0; | ||||
13262 | else | ||||
13263 | m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1); | ||||
13264 | } | ||||
13265 | else | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 13266 | #endif |
Bram Moolenaar | ea42416 | 2005-06-16 21:51:00 +0000 | [diff] [blame] | 13267 | m1 = slang->sl_map_array[c1]; |
13268 | if (m1 == 0) | ||||
13269 | return FALSE; | ||||
13270 | |||||
13271 | |||||
13272 | #ifdef FEAT_MBYTE | ||||
13273 | if (c2 >= 256) | ||||
13274 | { | ||||
13275 | buf[mb_char2bytes(c2, buf)] = 0; | ||||
13276 | hi = hash_find(&slang->sl_map_hash, buf); | ||||
13277 | if (HASHITEM_EMPTY(hi)) | ||||
13278 | m2 = 0; | ||||
13279 | else | ||||
13280 | m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1); | ||||
13281 | } | ||||
13282 | else | ||||
13283 | #endif | ||||
13284 | m2 = slang->sl_map_array[c2]; | ||||
13285 | |||||
13286 | return m1 == m2; | ||||
13287 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13288 | |
13289 | /* | ||||
13290 | * Add a suggestion to the list of suggestions. | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13291 | * For a suggestion that is already in the list the lowest score is remembered. |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13292 | */ |
13293 | static void | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13294 | add_suggestion(su, gap, goodword, badlenarg, score, altscore, had_bonus, |
13295 | slang, maxsf) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13296 | suginfo_T *su; |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13297 | garray_T *gap; /* either su_ga or su_sga */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13298 | char_u *goodword; |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13299 | int badlenarg; /* len of bad word replaced with "goodword" */ |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 13300 | int score; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 13301 | int altscore; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13302 | int had_bonus; /* value for st_had_bonus */ |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 13303 | slang_T *slang; /* language for sound folding */ |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13304 | int maxsf; /* su_maxscore applies to soundfold score, |
13305 | su_sfmaxscore to the total score. */ | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13306 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13307 | int goodlen; /* len of goodword changed */ |
13308 | int badlen; /* len of bad word changed */ | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13309 | suggest_T *stp; |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13310 | suggest_T new_sug; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13311 | int i; |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13312 | char_u *pgood, *pbad; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13313 | |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13314 | /* Minimize "badlen" for consistency. Avoids that changing "the the" to |
13315 | * "thee the" is added next to changing the first "the" the "thee". */ | ||||
13316 | pgood = goodword + STRLEN(goodword); | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13317 | pbad = su->su_badptr + badlenarg; |
13318 | for (;;) | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 13319 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13320 | goodlen = pgood - goodword; |
13321 | badlen = pbad - su->su_badptr; | ||||
13322 | if (goodlen <= 0 || badlen <= 0) | ||||
13323 | break; | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13324 | mb_ptr_back(goodword, pgood); |
13325 | mb_ptr_back(su->su_badptr, pbad); | ||||
13326 | #ifdef FEAT_MBYTE | ||||
13327 | if (has_mbyte) | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 13328 | { |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13329 | if (mb_ptr2char(pgood) != mb_ptr2char(pbad)) |
13330 | break; | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 13331 | } |
13332 | else | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13333 | #endif |
13334 | if (*pgood != *pbad) | ||||
13335 | break; | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 13336 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13337 | |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13338 | if (badlen == 0 && goodlen == 0) |
13339 | /* goodword doesn't change anything; may happen for "the the" changing | ||||
13340 | * the first "the" to itself. */ | ||||
13341 | return; | ||||
Bram Moolenaar | 0c40586 | 2005-06-22 22:26:26 +0000 | [diff] [blame] | 13342 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13343 | /* Check if the word is already there. Also check the length that is |
13344 | * being replaced "thes," -> "these" is a different suggestion from | ||||
13345 | * "thes" -> "these". */ | ||||
13346 | stp = &SUG(*gap, 0); | ||||
13347 | for (i = gap->ga_len; --i >= 0; ++stp) | ||||
13348 | if (stp->st_wordlen == goodlen | ||||
13349 | && stp->st_orglen == badlen | ||||
13350 | && STRNCMP(stp->st_word, goodword, goodlen) == 0) | ||||
13351 | { | ||||
13352 | /* | ||||
13353 | * Found it. Remember the word with the lowest score. | ||||
13354 | */ | ||||
13355 | if (stp->st_slang == NULL) | ||||
13356 | stp->st_slang = slang; | ||||
13357 | |||||
13358 | new_sug.st_score = score; | ||||
13359 | new_sug.st_altscore = altscore; | ||||
13360 | new_sug.st_had_bonus = had_bonus; | ||||
13361 | |||||
13362 | if (stp->st_had_bonus != had_bonus) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13363 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13364 | /* Only one of the two had the soundalike score computed. |
13365 | * Need to do that for the other one now, otherwise the | ||||
13366 | * scores can't be compared. This happens because | ||||
13367 | * suggest_try_change() doesn't compute the soundalike | ||||
13368 | * word to keep it fast, while some special methods set | ||||
13369 | * the soundalike score to zero. */ | ||||
13370 | if (had_bonus) | ||||
13371 | rescore_one(su, stp); | ||||
13372 | else | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13373 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13374 | new_sug.st_word = stp->st_word; |
13375 | new_sug.st_wordlen = stp->st_wordlen; | ||||
13376 | new_sug.st_slang = stp->st_slang; | ||||
13377 | new_sug.st_orglen = badlen; | ||||
13378 | rescore_one(su, &new_sug); | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13379 | } |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13380 | } |
13381 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13382 | if (stp->st_score > new_sug.st_score) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13383 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13384 | stp->st_score = new_sug.st_score; |
13385 | stp->st_altscore = new_sug.st_altscore; | ||||
13386 | stp->st_had_bonus = new_sug.st_had_bonus; | ||||
13387 | } | ||||
13388 | break; | ||||
13389 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13390 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13391 | if (i < 0 && ga_grow(gap, 1) == OK) |
13392 | { | ||||
13393 | /* Add a suggestion. */ | ||||
13394 | stp = &SUG(*gap, gap->ga_len); | ||||
13395 | stp->st_word = vim_strnsave(goodword, goodlen); | ||||
13396 | if (stp->st_word != NULL) | ||||
13397 | { | ||||
13398 | stp->st_wordlen = goodlen; | ||||
13399 | stp->st_score = score; | ||||
13400 | stp->st_altscore = altscore; | ||||
13401 | stp->st_had_bonus = had_bonus; | ||||
13402 | stp->st_orglen = badlen; | ||||
13403 | stp->st_slang = slang; | ||||
13404 | ++gap->ga_len; | ||||
13405 | |||||
13406 | /* If we have too many suggestions now, sort the list and keep | ||||
13407 | * the best suggestions. */ | ||||
13408 | if (gap->ga_len > SUG_MAX_COUNT(su)) | ||||
13409 | { | ||||
13410 | if (maxsf) | ||||
13411 | su->su_sfmaxscore = cleanup_suggestions(gap, | ||||
13412 | su->su_sfmaxscore, SUG_CLEAN_COUNT(su)); | ||||
13413 | else | ||||
13414 | { | ||||
13415 | i = su->su_maxscore; | ||||
13416 | su->su_maxscore = cleanup_suggestions(gap, | ||||
13417 | su->su_maxscore, SUG_CLEAN_COUNT(su)); | ||||
13418 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13419 | } |
13420 | } | ||||
13421 | } | ||||
13422 | } | ||||
13423 | |||||
13424 | /* | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13425 | * Suggestions may in fact be flagged as errors. Esp. for banned words and |
13426 | * for split words, such as "the the". Remove these from the list here. | ||||
13427 | */ | ||||
13428 | static void | ||||
13429 | check_suggestions(su, gap) | ||||
13430 | suginfo_T *su; | ||||
13431 | garray_T *gap; /* either su_ga or su_sga */ | ||||
13432 | { | ||||
13433 | suggest_T *stp; | ||||
13434 | int i; | ||||
13435 | char_u longword[MAXWLEN + 1]; | ||||
13436 | int len; | ||||
13437 | hlf_T attr; | ||||
13438 | |||||
13439 | stp = &SUG(*gap, 0); | ||||
13440 | for (i = gap->ga_len - 1; i >= 0; --i) | ||||
13441 | { | ||||
13442 | /* Need to append what follows to check for "the the". */ | ||||
13443 | STRCPY(longword, stp[i].st_word); | ||||
13444 | len = stp[i].st_wordlen; | ||||
13445 | vim_strncpy(longword + len, su->su_badptr + stp[i].st_orglen, | ||||
13446 | MAXWLEN - len); | ||||
13447 | attr = HLF_COUNT; | ||||
13448 | (void)spell_check(curwin, longword, &attr, NULL, FALSE); | ||||
13449 | if (attr != HLF_COUNT) | ||||
13450 | { | ||||
13451 | /* Remove this entry. */ | ||||
13452 | vim_free(stp[i].st_word); | ||||
13453 | --gap->ga_len; | ||||
13454 | if (i < gap->ga_len) | ||||
13455 | mch_memmove(stp + i, stp + i + 1, | ||||
13456 | sizeof(suggest_T) * (gap->ga_len - i)); | ||||
13457 | } | ||||
13458 | } | ||||
13459 | } | ||||
13460 | |||||
13461 | |||||
13462 | /* | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13463 | * Add a word to be banned. |
13464 | */ | ||||
13465 | static void | ||||
13466 | add_banned(su, word) | ||||
13467 | suginfo_T *su; | ||||
13468 | char_u *word; | ||||
13469 | { | ||||
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 13470 | char_u *s; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13471 | hash_T hash; |
13472 | hashitem_T *hi; | ||||
13473 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13474 | hash = hash_hash(word); |
13475 | hi = hash_lookup(&su->su_banned, word, hash); | ||||
13476 | if (HASHITEM_EMPTY(hi)) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13477 | { |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 13478 | s = vim_strsave(word); |
13479 | if (s != NULL) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13480 | hash_add_item(&su->su_banned, hi, s, hash); |
13481 | } | ||||
13482 | } | ||||
13483 | |||||
13484 | /* | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13485 | * Recompute the score for all suggestions if sound-folding is possible. This |
13486 | * is slow, thus only done for the final results. | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 13487 | */ |
13488 | static void | ||||
13489 | rescore_suggestions(su) | ||||
13490 | suginfo_T *su; | ||||
13491 | { | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 13492 | int i; |
13493 | |||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13494 | if (su->su_sallang != NULL) |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 13495 | for (i = 0; i < su->su_ga.ga_len; ++i) |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13496 | rescore_one(su, &SUG(su->su_ga, i)); |
13497 | } | ||||
13498 | |||||
13499 | /* | ||||
13500 | * Recompute the score for one suggestion if sound-folding is possible. | ||||
13501 | */ | ||||
13502 | static void | ||||
13503 | rescore_one(su, stp) | ||||
Bram Moolenaar | 4effc80 | 2005-09-30 21:12:02 +0000 | [diff] [blame] | 13504 | suginfo_T *su; |
13505 | suggest_T *stp; | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13506 | { |
13507 | slang_T *slang = stp->st_slang; | ||||
13508 | char_u sal_badword[MAXWLEN]; | ||||
Bram Moolenaar | 4effc80 | 2005-09-30 21:12:02 +0000 | [diff] [blame] | 13509 | char_u *p; |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13510 | |
13511 | /* Only rescore suggestions that have no sal score yet and do have a | ||||
13512 | * language. */ | ||||
13513 | if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus) | ||||
13514 | { | ||||
13515 | if (slang == su->su_sallang) | ||||
Bram Moolenaar | 4effc80 | 2005-09-30 21:12:02 +0000 | [diff] [blame] | 13516 | p = su->su_sal_badword; |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13517 | else |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 13518 | { |
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13519 | spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword); |
Bram Moolenaar | 4effc80 | 2005-09-30 21:12:02 +0000 | [diff] [blame] | 13520 | p = sal_badword; |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 13521 | } |
Bram Moolenaar | 4effc80 | 2005-09-30 21:12:02 +0000 | [diff] [blame] | 13522 | |
13523 | stp->st_altscore = stp_sal_score(stp, su, slang, p); | ||||
Bram Moolenaar | 482aaeb | 2005-09-29 18:26:07 +0000 | [diff] [blame] | 13524 | if (stp->st_altscore == SCORE_MAXMAX) |
13525 | stp->st_altscore = SCORE_BIG; | ||||
13526 | stp->st_score = RESCORE(stp->st_score, stp->st_altscore); | ||||
13527 | stp->st_had_bonus = TRUE; | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 13528 | } |
13529 | } | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 13530 | |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13531 | static int |
13532 | #ifdef __BORLANDC__ | ||||
13533 | _RTLENTRYF | ||||
13534 | #endif | ||||
13535 | sug_compare __ARGS((const void *s1, const void *s2)); | ||||
13536 | |||||
13537 | /* | ||||
13538 | * Function given to qsort() to sort the suggestions on st_score. | ||||
Bram Moolenaar | 6b730e1 | 2005-09-16 21:47:57 +0000 | [diff] [blame] | 13539 | * First on "st_score", then "st_altscore" then alphabetically. |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13540 | */ |
13541 | static int | ||||
13542 | #ifdef __BORLANDC__ | ||||
13543 | _RTLENTRYF | ||||
13544 | #endif | ||||
13545 | sug_compare(s1, s2) | ||||
13546 | const void *s1; | ||||
13547 | const void *s2; | ||||
13548 | { | ||||
13549 | suggest_T *p1 = (suggest_T *)s1; | ||||
13550 | suggest_T *p2 = (suggest_T *)s2; | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13551 | int n = p1->st_score - p2->st_score; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13552 | |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13553 | if (n == 0) |
Bram Moolenaar | 6b730e1 | 2005-09-16 21:47:57 +0000 | [diff] [blame] | 13554 | { |
13555 | n = p1->st_altscore - p2->st_altscore; | ||||
13556 | if (n == 0) | ||||
13557 | n = STRICMP(p1->st_word, p2->st_word); | ||||
13558 | } | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13559 | return n; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13560 | } |
13561 | |||||
13562 | /* | ||||
13563 | * Cleanup the suggestions: | ||||
13564 | * - Sort on score. | ||||
13565 | * - Remove words that won't be displayed. | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13566 | * Returns the maximum score in the list or "maxscore" unmodified. |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13567 | */ |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13568 | static int |
13569 | cleanup_suggestions(gap, maxscore, keep) | ||||
13570 | garray_T *gap; | ||||
13571 | int maxscore; | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 13572 | int keep; /* nr of suggestions to keep */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13573 | { |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13574 | suggest_T *stp = &SUG(*gap, 0); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13575 | int i; |
13576 | |||||
13577 | /* Sort the list. */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13578 | qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13579 | |
13580 | /* Truncate the list to the number of suggestions that will be displayed. */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13581 | if (gap->ga_len > keep) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13582 | { |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13583 | for (i = keep; i < gap->ga_len; ++i) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13584 | vim_free(stp[i].st_word); |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13585 | gap->ga_len = keep; |
13586 | return stp[keep - 1].st_score; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13587 | } |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13588 | return maxscore; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13589 | } |
13590 | |||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 13591 | #if defined(FEAT_EVAL) || defined(PROTO) |
13592 | /* | ||||
13593 | * Soundfold a string, for soundfold(). | ||||
13594 | * Result is in allocated memory, NULL for an error. | ||||
13595 | */ | ||||
13596 | char_u * | ||||
13597 | eval_soundfold(word) | ||||
13598 | char_u *word; | ||||
13599 | { | ||||
13600 | langp_T *lp; | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 13601 | char_u sound[MAXWLEN]; |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 13602 | int lpi; |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 13603 | |
13604 | if (curwin->w_p_spell && *curbuf->b_p_spl != NUL) | ||||
13605 | /* Use the sound-folding of the first language that supports it. */ | ||||
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 13606 | for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi) |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 13607 | { |
Bram Moolenaar | 8b96d64 | 2005-09-05 22:05:30 +0000 | [diff] [blame] | 13608 | lp = LANGP_ENTRY(curbuf->b_langp, lpi); |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 13609 | if (lp->lp_slang->sl_sal.ga_len > 0) |
13610 | { | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 13611 | /* soundfold the word */ |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 13612 | spell_soundfold(lp->lp_slang, word, FALSE, sound); |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 13613 | return vim_strsave(sound); |
13614 | } | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 13615 | } |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 13616 | |
13617 | /* No language with sound folding, return word as-is. */ | ||||
13618 | return vim_strsave(word); | ||||
13619 | } | ||||
13620 | #endif | ||||
13621 | |||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13622 | /* |
13623 | * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". | ||||
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 13624 | * |
13625 | * There are many ways to turn a word into a sound-a-like representation. The | ||||
13626 | * oldest is Soundex (1918!). A nice overview can be found in "Approximate | ||||
13627 | * swedish name matching - survey and test of different algorithms" by Klas | ||||
13628 | * Erikson. | ||||
13629 | * | ||||
13630 | * We support two methods: | ||||
13631 | * 1. SOFOFROM/SOFOTO do a simple character mapping. | ||||
13632 | * 2. SAL items define a more advanced sound-folding (and much slower). | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13633 | */ |
13634 | static void | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 13635 | spell_soundfold(slang, inword, folded, res) |
13636 | slang_T *slang; | ||||
13637 | char_u *inword; | ||||
13638 | int folded; /* "inword" is already case-folded */ | ||||
13639 | char_u *res; | ||||
13640 | { | ||||
13641 | char_u fword[MAXWLEN]; | ||||
13642 | char_u *word; | ||||
13643 | |||||
13644 | if (slang->sl_sofo) | ||||
13645 | /* SOFOFROM and SOFOTO used */ | ||||
13646 | spell_soundfold_sofo(slang, inword, res); | ||||
13647 | else | ||||
13648 | { | ||||
13649 | /* SAL items used. Requires the word to be case-folded. */ | ||||
13650 | if (folded) | ||||
13651 | word = inword; | ||||
13652 | else | ||||
13653 | { | ||||
13654 | (void)spell_casefold(inword, STRLEN(inword), fword, MAXWLEN); | ||||
13655 | word = fword; | ||||
13656 | } | ||||
13657 | |||||
13658 | #ifdef FEAT_MBYTE | ||||
13659 | if (has_mbyte) | ||||
13660 | spell_soundfold_wsal(slang, word, res); | ||||
13661 | else | ||||
13662 | #endif | ||||
13663 | spell_soundfold_sal(slang, word, res); | ||||
13664 | } | ||||
13665 | } | ||||
13666 | |||||
13667 | /* | ||||
13668 | * Perform sound folding of "inword" into "res" according to SOFOFROM and | ||||
13669 | * SOFOTO lines. | ||||
13670 | */ | ||||
13671 | static void | ||||
13672 | spell_soundfold_sofo(slang, inword, res) | ||||
13673 | slang_T *slang; | ||||
13674 | char_u *inword; | ||||
13675 | char_u *res; | ||||
13676 | { | ||||
13677 | char_u *s; | ||||
13678 | int ri = 0; | ||||
13679 | int c; | ||||
13680 | |||||
13681 | #ifdef FEAT_MBYTE | ||||
13682 | if (has_mbyte) | ||||
13683 | { | ||||
13684 | int prevc = 0; | ||||
13685 | int *ip; | ||||
13686 | |||||
13687 | /* The sl_sal_first[] table contains the translation for chars up to | ||||
13688 | * 255, sl_sal the rest. */ | ||||
13689 | for (s = inword; *s != NUL; ) | ||||
13690 | { | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 13691 | c = mb_cptr2char_adv(&s); |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 13692 | if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c)) |
13693 | c = ' '; | ||||
13694 | else if (c < 256) | ||||
13695 | c = slang->sl_sal_first[c]; | ||||
13696 | else | ||||
13697 | { | ||||
13698 | ip = ((int **)slang->sl_sal.ga_data)[c & 0xff]; | ||||
13699 | if (ip == NULL) /* empty list, can't match */ | ||||
13700 | c = NUL; | ||||
13701 | else | ||||
13702 | for (;;) /* find "c" in the list */ | ||||
13703 | { | ||||
13704 | if (*ip == 0) /* not found */ | ||||
13705 | { | ||||
13706 | c = NUL; | ||||
13707 | break; | ||||
13708 | } | ||||
13709 | if (*ip == c) /* match! */ | ||||
13710 | { | ||||
13711 | c = ip[1]; | ||||
13712 | break; | ||||
13713 | } | ||||
13714 | ip += 2; | ||||
13715 | } | ||||
13716 | } | ||||
13717 | |||||
13718 | if (c != NUL && c != prevc) | ||||
13719 | { | ||||
13720 | ri += mb_char2bytes(c, res + ri); | ||||
13721 | if (ri + MB_MAXBYTES > MAXWLEN) | ||||
13722 | break; | ||||
13723 | prevc = c; | ||||
13724 | } | ||||
13725 | } | ||||
13726 | } | ||||
13727 | else | ||||
13728 | #endif | ||||
13729 | { | ||||
13730 | /* The sl_sal_first[] table contains the translation. */ | ||||
13731 | for (s = inword; (c = *s) != NUL; ++s) | ||||
13732 | { | ||||
13733 | if (vim_iswhite(c)) | ||||
13734 | c = ' '; | ||||
13735 | else | ||||
13736 | c = slang->sl_sal_first[c]; | ||||
13737 | if (c != NUL && (ri == 0 || res[ri - 1] != c)) | ||||
13738 | res[ri++] = c; | ||||
13739 | } | ||||
13740 | } | ||||
13741 | |||||
13742 | res[ri] = NUL; | ||||
13743 | } | ||||
13744 | |||||
13745 | static void | ||||
13746 | spell_soundfold_sal(slang, inword, res) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13747 | slang_T *slang; |
13748 | char_u *inword; | ||||
13749 | char_u *res; | ||||
13750 | { | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13751 | salitem_T *smp; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13752 | char_u word[MAXWLEN]; |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 13753 | char_u *s = inword; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13754 | char_u *t; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13755 | char_u *pf; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13756 | int i, j, z; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13757 | int reslen; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13758 | int n, k = 0; |
13759 | int z0; | ||||
13760 | int k0; | ||||
13761 | int n0; | ||||
13762 | int c; | ||||
13763 | int pri; | ||||
13764 | int p0 = -333; | ||||
13765 | int c0; | ||||
13766 | |||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 13767 | /* Remove accents, if wanted. We actually remove all non-word characters. |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 13768 | * But keep white space. We need a copy, the word may be changed here. */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13769 | if (slang->sl_rem_accents) |
13770 | { | ||||
13771 | t = word; | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 13772 | while (*s != NUL) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13773 | { |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 13774 | if (vim_iswhite(*s)) |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13775 | { |
13776 | *t++ = ' '; | ||||
13777 | s = skipwhite(s); | ||||
13778 | } | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 13779 | else |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13780 | { |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 13781 | if (spell_iswordp_nmw(s)) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13782 | *t++ = *s; |
13783 | ++s; | ||||
13784 | } | ||||
13785 | } | ||||
13786 | *t = NUL; | ||||
13787 | } | ||||
13788 | else | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 13789 | STRCPY(word, s); |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13790 | |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13791 | smp = (salitem_T *)slang->sl_sal.ga_data; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13792 | |
13793 | /* | ||||
13794 | * This comes from Aspell phonet.cpp. Converted from C++ to C. | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 13795 | * Changed to keep spaces. |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13796 | */ |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13797 | i = reslen = z = 0; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13798 | while ((c = word[i]) != NUL) |
13799 | { | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13800 | /* Start with the first rule that has the character in the word. */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13801 | n = slang->sl_sal_first[c]; |
13802 | z0 = 0; | ||||
13803 | |||||
13804 | if (n >= 0) | ||||
13805 | { | ||||
13806 | /* check all rules for the same letter */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13807 | for (; (s = smp[n].sm_lead)[0] == c; ++n) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13808 | { |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13809 | /* Quickly skip entries that don't match the word. Most |
13810 | * entries are less then three chars, optimize for that. */ | ||||
13811 | k = smp[n].sm_leadlen; | ||||
13812 | if (k > 1) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13813 | { |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13814 | if (word[i + 1] != s[1]) |
13815 | continue; | ||||
13816 | if (k > 2) | ||||
13817 | { | ||||
13818 | for (j = 2; j < k; ++j) | ||||
13819 | if (word[i + j] != s[j]) | ||||
13820 | break; | ||||
13821 | if (j < k) | ||||
13822 | continue; | ||||
13823 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13824 | } |
13825 | |||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 13826 | if ((pf = smp[n].sm_oneof) != NULL) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13827 | { |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 13828 | /* Check for match with one of the chars in "sm_oneof". */ |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13829 | while (*pf != NUL && *pf != word[i + k]) |
13830 | ++pf; | ||||
13831 | if (*pf == NUL) | ||||
13832 | continue; | ||||
13833 | ++k; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13834 | } |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13835 | s = smp[n].sm_rules; |
13836 | pri = 5; /* default priority */ | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13837 | |
13838 | p0 = *s; | ||||
13839 | k0 = k; | ||||
13840 | while (*s == '-' && k > 1) | ||||
13841 | { | ||||
13842 | k--; | ||||
13843 | s++; | ||||
13844 | } | ||||
13845 | if (*s == '<') | ||||
13846 | s++; | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13847 | if (VIM_ISDIGIT(*s)) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13848 | { |
13849 | /* determine priority */ | ||||
13850 | pri = *s - '0'; | ||||
13851 | s++; | ||||
13852 | } | ||||
13853 | if (*s == '^' && *(s + 1) == '^') | ||||
13854 | s++; | ||||
13855 | |||||
13856 | if (*s == NUL | ||||
13857 | || (*s == '^' | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 13858 | && (i == 0 || !(word[i - 1] == ' ' |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 13859 | || spell_iswordp(word + i - 1, curbuf))) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13860 | && (*(s + 1) != '$' |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 13861 | || (!spell_iswordp(word + i + k0, curbuf)))) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13862 | || (*s == '$' && i > 0 |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 13863 | && spell_iswordp(word + i - 1, curbuf) |
13864 | && (!spell_iswordp(word + i + k0, curbuf)))) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13865 | { |
13866 | /* search for followup rules, if: */ | ||||
13867 | /* followup and k > 1 and NO '-' in searchstring */ | ||||
13868 | c0 = word[i + k - 1]; | ||||
13869 | n0 = slang->sl_sal_first[c0]; | ||||
13870 | |||||
13871 | if (slang->sl_followup && k > 1 && n0 >= 0 | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13872 | && p0 != '-' && word[i + k] != NUL) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13873 | { |
13874 | /* test follow-up rule for "word[i + k]" */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13875 | for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13876 | { |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13877 | /* Quickly skip entries that don't match the word. |
13878 | * */ | ||||
13879 | k0 = smp[n0].sm_leadlen; | ||||
13880 | if (k0 > 1) | ||||
13881 | { | ||||
13882 | if (word[i + k] != s[1]) | ||||
13883 | continue; | ||||
13884 | if (k0 > 2) | ||||
13885 | { | ||||
13886 | pf = word + i + k + 1; | ||||
13887 | for (j = 2; j < k0; ++j) | ||||
13888 | if (*pf++ != s[j]) | ||||
13889 | break; | ||||
13890 | if (j < k0) | ||||
13891 | continue; | ||||
13892 | } | ||||
13893 | } | ||||
13894 | k0 += k - 1; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13895 | |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 13896 | if ((pf = smp[n0].sm_oneof) != NULL) |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13897 | { |
13898 | /* Check for match with one of the chars in | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 13899 | * "sm_oneof". */ |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13900 | while (*pf != NUL && *pf != word[i + k0]) |
13901 | ++pf; | ||||
13902 | if (*pf == NUL) | ||||
13903 | continue; | ||||
13904 | ++k0; | ||||
13905 | } | ||||
13906 | |||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13907 | p0 = 5; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13908 | s = smp[n0].sm_rules; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13909 | while (*s == '-') |
13910 | { | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13911 | /* "k0" gets NOT reduced because |
13912 | * "if (k0 == k)" */ | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13913 | s++; |
13914 | } | ||||
13915 | if (*s == '<') | ||||
13916 | s++; | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13917 | if (VIM_ISDIGIT(*s)) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13918 | { |
13919 | p0 = *s - '0'; | ||||
13920 | s++; | ||||
13921 | } | ||||
13922 | |||||
13923 | if (*s == NUL | ||||
13924 | /* *s == '^' cuts */ | ||||
13925 | || (*s == '$' | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 13926 | && !spell_iswordp(word + i + k0, |
13927 | curbuf))) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13928 | { |
13929 | if (k0 == k) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13930 | /* this is just a piece of the string */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13931 | continue; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13932 | |
13933 | if (p0 < pri) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13934 | /* priority too low */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13935 | continue; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13936 | /* rule fits; stop search */ |
13937 | break; | ||||
13938 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13939 | } |
13940 | |||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13941 | if (p0 >= pri && smp[n0].sm_lead[0] == c0) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13942 | continue; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13943 | } |
13944 | |||||
13945 | /* replace string */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13946 | s = smp[n].sm_to; |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 13947 | if (s == NULL) |
13948 | s = (char_u *)""; | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13949 | pf = smp[n].sm_rules; |
13950 | p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13951 | if (p0 == 1 && z == 0) |
13952 | { | ||||
13953 | /* rule with '<' is used */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13954 | if (reslen > 0 && *s != NUL && (res[reslen - 1] == c |
13955 | || res[reslen - 1] == *s)) | ||||
13956 | reslen--; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13957 | z0 = 1; |
13958 | z = 1; | ||||
13959 | k0 = 0; | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 13960 | while (*s != NUL && word[i + k0] != NUL) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13961 | { |
13962 | word[i + k0] = *s; | ||||
13963 | k0++; | ||||
13964 | s++; | ||||
13965 | } | ||||
13966 | if (k > k0) | ||||
13967 | mch_memmove(word + i + k0, word + i + k, | ||||
13968 | STRLEN(word + i + k) + 1); | ||||
13969 | |||||
13970 | /* new "actual letter" */ | ||||
13971 | c = word[i]; | ||||
13972 | } | ||||
13973 | else | ||||
13974 | { | ||||
13975 | /* no '<' rule used */ | ||||
13976 | i += k - 1; | ||||
13977 | z = 0; | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13978 | while (*s != NUL && s[1] != NUL && reslen < MAXWLEN) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13979 | { |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13980 | if (reslen == 0 || res[reslen - 1] != *s) |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 13981 | res[reslen++] = *s; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13982 | s++; |
13983 | } | ||||
13984 | /* new "actual letter" */ | ||||
13985 | c = *s; | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 13986 | if (strstr((char *)pf, "^^") != NULL) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13987 | { |
13988 | if (c != NUL) | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 13989 | res[reslen++] = c; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13990 | mch_memmove(word, word + i + 1, |
13991 | STRLEN(word + i + 1) + 1); | ||||
13992 | i = 0; | ||||
13993 | z0 = 1; | ||||
13994 | } | ||||
13995 | } | ||||
13996 | break; | ||||
13997 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 13998 | } |
13999 | } | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 14000 | else if (vim_iswhite(c)) |
14001 | { | ||||
14002 | c = ' '; | ||||
14003 | k = 1; | ||||
14004 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14005 | |
14006 | if (z0 == 0) | ||||
14007 | { | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14008 | if (k && !p0 && reslen < MAXWLEN && c != NUL |
14009 | && (!slang->sl_collapse || reslen == 0 | ||||
14010 | || res[reslen - 1] != c)) | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14011 | /* condense only double letters */ |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14012 | res[reslen++] = c; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14013 | |
14014 | i++; | ||||
14015 | z = 0; | ||||
14016 | k = 0; | ||||
14017 | } | ||||
14018 | } | ||||
14019 | |||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14020 | res[reslen] = NUL; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14021 | } |
14022 | |||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14023 | #ifdef FEAT_MBYTE |
14024 | /* | ||||
14025 | * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]". | ||||
14026 | * Multi-byte version of spell_soundfold(). | ||||
14027 | */ | ||||
14028 | static void | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 14029 | spell_soundfold_wsal(slang, inword, res) |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14030 | slang_T *slang; |
14031 | char_u *inword; | ||||
14032 | char_u *res; | ||||
14033 | { | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 14034 | salitem_T *smp = (salitem_T *)slang->sl_sal.ga_data; |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14035 | int word[MAXWLEN]; |
14036 | int wres[MAXWLEN]; | ||||
14037 | int l; | ||||
14038 | char_u *s; | ||||
14039 | int *ws; | ||||
14040 | char_u *t; | ||||
14041 | int *pf; | ||||
14042 | int i, j, z; | ||||
14043 | int reslen; | ||||
14044 | int n, k = 0; | ||||
14045 | int z0; | ||||
14046 | int k0; | ||||
14047 | int n0; | ||||
14048 | int c; | ||||
14049 | int pri; | ||||
14050 | int p0 = -333; | ||||
14051 | int c0; | ||||
14052 | int did_white = FALSE; | ||||
14053 | |||||
14054 | /* | ||||
14055 | * Convert the multi-byte string to a wide-character string. | ||||
14056 | * Remove accents, if wanted. We actually remove all non-word characters. | ||||
14057 | * But keep white space. | ||||
14058 | */ | ||||
14059 | n = 0; | ||||
14060 | for (s = inword; *s != NUL; ) | ||||
14061 | { | ||||
14062 | t = s; | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 14063 | c = mb_cptr2char_adv(&s); |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14064 | if (slang->sl_rem_accents) |
14065 | { | ||||
14066 | if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c)) | ||||
14067 | { | ||||
14068 | if (did_white) | ||||
14069 | continue; | ||||
14070 | c = ' '; | ||||
14071 | did_white = TRUE; | ||||
14072 | } | ||||
14073 | else | ||||
14074 | { | ||||
14075 | did_white = FALSE; | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 14076 | if (!spell_iswordp_nmw(t)) |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14077 | continue; |
14078 | } | ||||
14079 | } | ||||
14080 | word[n++] = c; | ||||
14081 | } | ||||
14082 | word[n] = NUL; | ||||
14083 | |||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14084 | /* |
14085 | * This comes from Aspell phonet.cpp. | ||||
14086 | * Converted from C++ to C. Added support for multi-byte chars. | ||||
14087 | * Changed to keep spaces. | ||||
14088 | */ | ||||
14089 | i = reslen = z = 0; | ||||
14090 | while ((c = word[i]) != NUL) | ||||
14091 | { | ||||
14092 | /* Start with the first rule that has the character in the word. */ | ||||
14093 | n = slang->sl_sal_first[c & 0xff]; | ||||
14094 | z0 = 0; | ||||
14095 | |||||
14096 | if (n >= 0) | ||||
14097 | { | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 14098 | /* check all rules for the same index byte */ |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14099 | for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff); ++n) |
14100 | { | ||||
14101 | /* Quickly skip entries that don't match the word. Most | ||||
14102 | * entries are less then three chars, optimize for that. */ | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 14103 | if (c != ws[0]) |
14104 | continue; | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14105 | k = smp[n].sm_leadlen; |
14106 | if (k > 1) | ||||
14107 | { | ||||
14108 | if (word[i + 1] != ws[1]) | ||||
14109 | continue; | ||||
14110 | if (k > 2) | ||||
14111 | { | ||||
14112 | for (j = 2; j < k; ++j) | ||||
14113 | if (word[i + j] != ws[j]) | ||||
14114 | break; | ||||
14115 | if (j < k) | ||||
14116 | continue; | ||||
14117 | } | ||||
14118 | } | ||||
14119 | |||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 14120 | if ((pf = smp[n].sm_oneof_w) != NULL) |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14121 | { |
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 14122 | /* Check for match with one of the chars in "sm_oneof". */ |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14123 | while (*pf != NUL && *pf != word[i + k]) |
14124 | ++pf; | ||||
14125 | if (*pf == NUL) | ||||
14126 | continue; | ||||
14127 | ++k; | ||||
14128 | } | ||||
14129 | s = smp[n].sm_rules; | ||||
14130 | pri = 5; /* default priority */ | ||||
14131 | |||||
14132 | p0 = *s; | ||||
14133 | k0 = k; | ||||
14134 | while (*s == '-' && k > 1) | ||||
14135 | { | ||||
14136 | k--; | ||||
14137 | s++; | ||||
14138 | } | ||||
14139 | if (*s == '<') | ||||
14140 | s++; | ||||
14141 | if (VIM_ISDIGIT(*s)) | ||||
14142 | { | ||||
14143 | /* determine priority */ | ||||
14144 | pri = *s - '0'; | ||||
14145 | s++; | ||||
14146 | } | ||||
14147 | if (*s == '^' && *(s + 1) == '^') | ||||
14148 | s++; | ||||
14149 | |||||
14150 | if (*s == NUL | ||||
14151 | || (*s == '^' | ||||
14152 | && (i == 0 || !(word[i - 1] == ' ' | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 14153 | || spell_iswordp_w(word + i - 1, curbuf))) |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14154 | && (*(s + 1) != '$' |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 14155 | || (!spell_iswordp_w(word + i + k0, curbuf)))) |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14156 | || (*s == '$' && i > 0 |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 14157 | && spell_iswordp_w(word + i - 1, curbuf) |
14158 | && (!spell_iswordp_w(word + i + k0, curbuf)))) | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14159 | { |
14160 | /* search for followup rules, if: */ | ||||
14161 | /* followup and k > 1 and NO '-' in searchstring */ | ||||
14162 | c0 = word[i + k - 1]; | ||||
14163 | n0 = slang->sl_sal_first[c0 & 0xff]; | ||||
14164 | |||||
14165 | if (slang->sl_followup && k > 1 && n0 >= 0 | ||||
14166 | && p0 != '-' && word[i + k] != NUL) | ||||
14167 | { | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 14168 | /* Test follow-up rule for "word[i + k]"; loop over |
14169 | * all entries with the same index byte. */ | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14170 | for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff) |
14171 | == (c0 & 0xff); ++n0) | ||||
14172 | { | ||||
14173 | /* Quickly skip entries that don't match the word. | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 14174 | */ |
14175 | if (c0 != ws[0]) | ||||
14176 | continue; | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14177 | k0 = smp[n0].sm_leadlen; |
14178 | if (k0 > 1) | ||||
14179 | { | ||||
14180 | if (word[i + k] != ws[1]) | ||||
14181 | continue; | ||||
14182 | if (k0 > 2) | ||||
14183 | { | ||||
14184 | pf = word + i + k + 1; | ||||
14185 | for (j = 2; j < k0; ++j) | ||||
14186 | if (*pf++ != ws[j]) | ||||
14187 | break; | ||||
14188 | if (j < k0) | ||||
14189 | continue; | ||||
14190 | } | ||||
14191 | } | ||||
14192 | k0 += k - 1; | ||||
14193 | |||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 14194 | if ((pf = smp[n0].sm_oneof_w) != NULL) |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14195 | { |
14196 | /* Check for match with one of the chars in | ||||
Bram Moolenaar | 42eeac3 | 2005-06-29 22:40:58 +0000 | [diff] [blame] | 14197 | * "sm_oneof". */ |
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14198 | while (*pf != NUL && *pf != word[i + k0]) |
14199 | ++pf; | ||||
14200 | if (*pf == NUL) | ||||
14201 | continue; | ||||
14202 | ++k0; | ||||
14203 | } | ||||
14204 | |||||
14205 | p0 = 5; | ||||
14206 | s = smp[n0].sm_rules; | ||||
14207 | while (*s == '-') | ||||
14208 | { | ||||
14209 | /* "k0" gets NOT reduced because | ||||
14210 | * "if (k0 == k)" */ | ||||
14211 | s++; | ||||
14212 | } | ||||
14213 | if (*s == '<') | ||||
14214 | s++; | ||||
14215 | if (VIM_ISDIGIT(*s)) | ||||
14216 | { | ||||
14217 | p0 = *s - '0'; | ||||
14218 | s++; | ||||
14219 | } | ||||
14220 | |||||
14221 | if (*s == NUL | ||||
14222 | /* *s == '^' cuts */ | ||||
14223 | || (*s == '$' | ||||
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 14224 | && !spell_iswordp_w(word + i + k0, |
14225 | curbuf))) | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14226 | { |
14227 | if (k0 == k) | ||||
14228 | /* this is just a piece of the string */ | ||||
14229 | continue; | ||||
14230 | |||||
14231 | if (p0 < pri) | ||||
14232 | /* priority too low */ | ||||
14233 | continue; | ||||
14234 | /* rule fits; stop search */ | ||||
14235 | break; | ||||
14236 | } | ||||
14237 | } | ||||
14238 | |||||
14239 | if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff) | ||||
14240 | == (c0 & 0xff)) | ||||
14241 | continue; | ||||
14242 | } | ||||
14243 | |||||
14244 | /* replace string */ | ||||
14245 | ws = smp[n].sm_to_w; | ||||
14246 | s = smp[n].sm_rules; | ||||
14247 | p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0; | ||||
14248 | if (p0 == 1 && z == 0) | ||||
14249 | { | ||||
14250 | /* rule with '<' is used */ | ||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 14251 | if (reslen > 0 && ws != NULL && *ws != NUL |
14252 | && (wres[reslen - 1] == c | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14253 | || wres[reslen - 1] == *ws)) |
14254 | reslen--; | ||||
14255 | z0 = 1; | ||||
14256 | z = 1; | ||||
14257 | k0 = 0; | ||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 14258 | if (ws != NULL) |
14259 | while (*ws != NUL && word[i + k0] != NUL) | ||||
14260 | { | ||||
14261 | word[i + k0] = *ws; | ||||
14262 | k0++; | ||||
14263 | ws++; | ||||
14264 | } | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14265 | if (k > k0) |
14266 | mch_memmove(word + i + k0, word + i + k, | ||||
14267 | sizeof(int) * (STRLEN(word + i + k) + 1)); | ||||
14268 | |||||
14269 | /* new "actual letter" */ | ||||
14270 | c = word[i]; | ||||
14271 | } | ||||
14272 | else | ||||
14273 | { | ||||
14274 | /* no '<' rule used */ | ||||
14275 | i += k - 1; | ||||
14276 | z = 0; | ||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 14277 | if (ws != NULL) |
14278 | while (*ws != NUL && ws[1] != NUL | ||||
14279 | && reslen < MAXWLEN) | ||||
14280 | { | ||||
14281 | if (reslen == 0 || wres[reslen - 1] != *ws) | ||||
14282 | wres[reslen++] = *ws; | ||||
14283 | ws++; | ||||
14284 | } | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14285 | /* new "actual letter" */ |
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 14286 | if (ws == NULL) |
14287 | c = NUL; | ||||
14288 | else | ||||
14289 | c = *ws; | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14290 | if (strstr((char *)s, "^^") != NULL) |
14291 | { | ||||
14292 | if (c != NUL) | ||||
14293 | wres[reslen++] = c; | ||||
14294 | mch_memmove(word, word + i + 1, | ||||
14295 | sizeof(int) * (STRLEN(word + i + 1) + 1)); | ||||
14296 | i = 0; | ||||
14297 | z0 = 1; | ||||
14298 | } | ||||
14299 | } | ||||
14300 | break; | ||||
14301 | } | ||||
14302 | } | ||||
14303 | } | ||||
14304 | else if (vim_iswhite(c)) | ||||
14305 | { | ||||
14306 | c = ' '; | ||||
14307 | k = 1; | ||||
14308 | } | ||||
14309 | |||||
14310 | if (z0 == 0) | ||||
14311 | { | ||||
14312 | if (k && !p0 && reslen < MAXWLEN && c != NUL | ||||
14313 | && (!slang->sl_collapse || reslen == 0 | ||||
14314 | || wres[reslen - 1] != c)) | ||||
14315 | /* condense only double letters */ | ||||
14316 | wres[reslen++] = c; | ||||
14317 | |||||
14318 | i++; | ||||
14319 | z = 0; | ||||
14320 | k = 0; | ||||
14321 | } | ||||
14322 | } | ||||
14323 | |||||
14324 | /* Convert wide characters in "wres" to a multi-byte string in "res". */ | ||||
14325 | l = 0; | ||||
14326 | for (n = 0; n < reslen; ++n) | ||||
14327 | { | ||||
14328 | l += mb_char2bytes(wres[n], res + l); | ||||
14329 | if (l + MB_MAXBYTES > MAXWLEN) | ||||
14330 | break; | ||||
14331 | } | ||||
14332 | res[l] = NUL; | ||||
14333 | } | ||||
14334 | #endif | ||||
14335 | |||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 14336 | /* |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14337 | * Compute a score for two sound-a-like words. |
14338 | * This permits up to two inserts/deletes/swaps/etc. to keep things fast. | ||||
14339 | * Instead of a generic loop we write out the code. That keeps it fast by | ||||
14340 | * avoiding checks that will not be possible. | ||||
14341 | */ | ||||
14342 | static int | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14343 | soundalike_score(goodstart, badstart) |
14344 | char_u *goodstart; /* sound-folded good word */ | ||||
14345 | char_u *badstart; /* sound-folded bad word */ | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14346 | { |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14347 | char_u *goodsound = goodstart; |
14348 | char_u *badsound = badstart; | ||||
14349 | int goodlen; | ||||
14350 | int badlen; | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14351 | int n; |
14352 | char_u *pl, *ps; | ||||
14353 | char_u *pl2, *ps2; | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14354 | int score = 0; |
14355 | |||||
14356 | /* adding/inserting "*" at the start (word starts with vowel) shouldn't be | ||||
14357 | * counted so much, vowels halfway the word aren't counted at all. */ | ||||
14358 | if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound) | ||||
14359 | { | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 14360 | if (badsound[1] == goodsound[1] |
14361 | || (badsound[1] != NUL | ||||
14362 | && goodsound[1] != NUL | ||||
14363 | && badsound[2] == goodsound[2])) | ||||
14364 | { | ||||
14365 | /* handle like a substitute */ | ||||
14366 | } | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14367 | else |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 14368 | { |
14369 | score = 2 * SCORE_DEL / 3; | ||||
14370 | if (*badsound == '*') | ||||
14371 | ++badsound; | ||||
14372 | else | ||||
14373 | ++goodsound; | ||||
14374 | } | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14375 | } |
14376 | |||||
14377 | goodlen = STRLEN(goodsound); | ||||
14378 | badlen = STRLEN(badsound); | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14379 | |
14380 | /* Return quickly if the lenghts are too different to be fixed by two | ||||
14381 | * changes. */ | ||||
14382 | n = goodlen - badlen; | ||||
14383 | if (n < -2 || n > 2) | ||||
14384 | return SCORE_MAXMAX; | ||||
14385 | |||||
14386 | if (n > 0) | ||||
14387 | { | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14388 | pl = goodsound; /* goodsound is longest */ |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14389 | ps = badsound; |
14390 | } | ||||
14391 | else | ||||
14392 | { | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14393 | pl = badsound; /* badsound is longest */ |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14394 | ps = goodsound; |
14395 | } | ||||
14396 | |||||
14397 | /* Skip over the identical part. */ | ||||
14398 | while (*pl == *ps && *pl != NUL) | ||||
14399 | { | ||||
14400 | ++pl; | ||||
14401 | ++ps; | ||||
14402 | } | ||||
14403 | |||||
14404 | switch (n) | ||||
14405 | { | ||||
14406 | case -2: | ||||
14407 | case 2: | ||||
14408 | /* | ||||
14409 | * Must delete two characters from "pl". | ||||
14410 | */ | ||||
14411 | ++pl; /* first delete */ | ||||
14412 | while (*pl == *ps) | ||||
14413 | { | ||||
14414 | ++pl; | ||||
14415 | ++ps; | ||||
14416 | } | ||||
14417 | /* strings must be equal after second delete */ | ||||
14418 | if (STRCMP(pl + 1, ps) == 0) | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14419 | return score + SCORE_DEL * 2; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14420 | |
14421 | /* Failed to compare. */ | ||||
14422 | break; | ||||
14423 | |||||
14424 | case -1: | ||||
14425 | case 1: | ||||
14426 | /* | ||||
14427 | * Minimal one delete from "pl" required. | ||||
14428 | */ | ||||
14429 | |||||
14430 | /* 1: delete */ | ||||
14431 | pl2 = pl + 1; | ||||
14432 | ps2 = ps; | ||||
14433 | while (*pl2 == *ps2) | ||||
14434 | { | ||||
14435 | if (*pl2 == NUL) /* reached the end */ | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14436 | return score + SCORE_DEL; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14437 | ++pl2; |
14438 | ++ps2; | ||||
14439 | } | ||||
14440 | |||||
14441 | /* 2: delete then swap, then rest must be equal */ | ||||
14442 | if (pl2[0] == ps2[1] && pl2[1] == ps2[0] | ||||
14443 | && STRCMP(pl2 + 2, ps2 + 2) == 0) | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14444 | return score + SCORE_DEL + SCORE_SWAP; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14445 | |
14446 | /* 3: delete then substitute, then the rest must be equal */ | ||||
14447 | if (STRCMP(pl2 + 1, ps2 + 1) == 0) | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14448 | return score + SCORE_DEL + SCORE_SUBST; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14449 | |
14450 | /* 4: first swap then delete */ | ||||
14451 | if (pl[0] == ps[1] && pl[1] == ps[0]) | ||||
14452 | { | ||||
14453 | pl2 = pl + 2; /* swap, skip two chars */ | ||||
14454 | ps2 = ps + 2; | ||||
14455 | while (*pl2 == *ps2) | ||||
14456 | { | ||||
14457 | ++pl2; | ||||
14458 | ++ps2; | ||||
14459 | } | ||||
14460 | /* delete a char and then strings must be equal */ | ||||
14461 | if (STRCMP(pl2 + 1, ps2) == 0) | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14462 | return score + SCORE_SWAP + SCORE_DEL; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14463 | } |
14464 | |||||
14465 | /* 5: first substitute then delete */ | ||||
14466 | pl2 = pl + 1; /* substitute, skip one char */ | ||||
14467 | ps2 = ps + 1; | ||||
14468 | while (*pl2 == *ps2) | ||||
14469 | { | ||||
14470 | ++pl2; | ||||
14471 | ++ps2; | ||||
14472 | } | ||||
14473 | /* delete a char and then strings must be equal */ | ||||
14474 | if (STRCMP(pl2 + 1, ps2) == 0) | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14475 | return score + SCORE_SUBST + SCORE_DEL; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14476 | |
14477 | /* Failed to compare. */ | ||||
14478 | break; | ||||
14479 | |||||
14480 | case 0: | ||||
14481 | /* | ||||
14482 | * Lenghts are equal, thus changes must result in same length: An | ||||
14483 | * insert is only possible in combination with a delete. | ||||
14484 | * 1: check if for identical strings | ||||
14485 | */ | ||||
14486 | if (*pl == NUL) | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14487 | return score; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14488 | |
14489 | /* 2: swap */ | ||||
14490 | if (pl[0] == ps[1] && pl[1] == ps[0]) | ||||
14491 | { | ||||
14492 | pl2 = pl + 2; /* swap, skip two chars */ | ||||
14493 | ps2 = ps + 2; | ||||
14494 | while (*pl2 == *ps2) | ||||
14495 | { | ||||
14496 | if (*pl2 == NUL) /* reached the end */ | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14497 | return score + SCORE_SWAP; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14498 | ++pl2; |
14499 | ++ps2; | ||||
14500 | } | ||||
14501 | /* 3: swap and swap again */ | ||||
14502 | if (pl2[0] == ps2[1] && pl2[1] == ps2[0] | ||||
14503 | && STRCMP(pl2 + 2, ps2 + 2) == 0) | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14504 | return score + SCORE_SWAP + SCORE_SWAP; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14505 | |
14506 | /* 4: swap and substitute */ | ||||
14507 | if (STRCMP(pl2 + 1, ps2 + 1) == 0) | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14508 | return score + SCORE_SWAP + SCORE_SUBST; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14509 | } |
14510 | |||||
14511 | /* 5: substitute */ | ||||
14512 | pl2 = pl + 1; | ||||
14513 | ps2 = ps + 1; | ||||
14514 | while (*pl2 == *ps2) | ||||
14515 | { | ||||
14516 | if (*pl2 == NUL) /* reached the end */ | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14517 | return score + SCORE_SUBST; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14518 | ++pl2; |
14519 | ++ps2; | ||||
14520 | } | ||||
14521 | |||||
14522 | /* 6: substitute and swap */ | ||||
14523 | if (pl2[0] == ps2[1] && pl2[1] == ps2[0] | ||||
14524 | && STRCMP(pl2 + 2, ps2 + 2) == 0) | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14525 | return score + SCORE_SUBST + SCORE_SWAP; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14526 | |
14527 | /* 7: substitute and substitute */ | ||||
14528 | if (STRCMP(pl2 + 1, ps2 + 1) == 0) | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14529 | return score + SCORE_SUBST + SCORE_SUBST; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14530 | |
14531 | /* 8: insert then delete */ | ||||
14532 | pl2 = pl; | ||||
14533 | ps2 = ps + 1; | ||||
14534 | while (*pl2 == *ps2) | ||||
14535 | { | ||||
14536 | ++pl2; | ||||
14537 | ++ps2; | ||||
14538 | } | ||||
14539 | if (STRCMP(pl2 + 1, ps2) == 0) | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14540 | return score + SCORE_INS + SCORE_DEL; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14541 | |
14542 | /* 9: delete then insert */ | ||||
14543 | pl2 = pl + 1; | ||||
14544 | ps2 = ps; | ||||
14545 | while (*pl2 == *ps2) | ||||
14546 | { | ||||
14547 | ++pl2; | ||||
14548 | ++ps2; | ||||
14549 | } | ||||
14550 | if (STRCMP(pl2, ps2 + 1) == 0) | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 14551 | return score + SCORE_INS + SCORE_DEL; |
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14552 | |
14553 | /* Failed to compare. */ | ||||
14554 | break; | ||||
14555 | } | ||||
14556 | |||||
14557 | return SCORE_MAXMAX; | ||||
14558 | } | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 14559 | |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14560 | /* |
14561 | * Compute the "edit distance" to turn "badword" into "goodword". The less | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14562 | * deletes/inserts/substitutes/swaps are required the lower the score. |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 14563 | * |
Bram Moolenaar | d12a132 | 2005-08-21 22:08:24 +0000 | [diff] [blame] | 14564 | * The algorithm is described by Du and Chang, 1992. |
14565 | * The implementation of the algorithm comes from Aspell editdist.cpp, | ||||
14566 | * edit_distance(). It has been converted from C++ to C and modified to | ||||
14567 | * support multi-byte characters. | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14568 | */ |
14569 | static int | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 14570 | spell_edit_score(slang, badword, goodword) |
14571 | slang_T *slang; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14572 | char_u *badword; |
14573 | char_u *goodword; | ||||
14574 | { | ||||
14575 | int *cnt; | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 14576 | int badlen, goodlen; /* lenghts including NUL */ |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14577 | int j, i; |
14578 | int t; | ||||
14579 | int bc, gc; | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 14580 | int pbc, pgc; |
14581 | #ifdef FEAT_MBYTE | ||||
14582 | char_u *p; | ||||
14583 | int wbadword[MAXWLEN]; | ||||
14584 | int wgoodword[MAXWLEN]; | ||||
14585 | |||||
14586 | if (has_mbyte) | ||||
14587 | { | ||||
14588 | /* Get the characters from the multi-byte strings and put them in an | ||||
14589 | * int array for easy access. */ | ||||
14590 | for (p = badword, badlen = 0; *p != NUL; ) | ||||
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 14591 | wbadword[badlen++] = mb_cptr2char_adv(&p); |
Bram Moolenaar | 97409f1 | 2005-07-08 22:17:29 +0000 | [diff] [blame] | 14592 | wbadword[badlen++] = 0; |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 14593 | for (p = goodword, goodlen = 0; *p != NUL; ) |
Bram Moolenaar | 0fa313a | 2005-08-10 21:07:57 +0000 | [diff] [blame] | 14594 | wgoodword[goodlen++] = mb_cptr2char_adv(&p); |
Bram Moolenaar | 97409f1 | 2005-07-08 22:17:29 +0000 | [diff] [blame] | 14595 | wgoodword[goodlen++] = 0; |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 14596 | } |
14597 | else | ||||
14598 | #endif | ||||
14599 | { | ||||
14600 | badlen = STRLEN(badword) + 1; | ||||
14601 | goodlen = STRLEN(goodword) + 1; | ||||
14602 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14603 | |
14604 | /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */ | ||||
14605 | #define CNT(a, b) cnt[(a) + (b) * (badlen + 1)] | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14606 | cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)), |
14607 | TRUE); | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 14608 | if (cnt == NULL) |
14609 | return 0; /* out of memory */ | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14610 | |
14611 | CNT(0, 0) = 0; | ||||
14612 | for (j = 1; j <= goodlen; ++j) | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 14613 | CNT(0, j) = CNT(0, j - 1) + SCORE_INS; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14614 | |
14615 | for (i = 1; i <= badlen; ++i) | ||||
14616 | { | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 14617 | CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL; |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14618 | for (j = 1; j <= goodlen; ++j) |
14619 | { | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 14620 | #ifdef FEAT_MBYTE |
14621 | if (has_mbyte) | ||||
14622 | { | ||||
14623 | bc = wbadword[i - 1]; | ||||
14624 | gc = wgoodword[j - 1]; | ||||
14625 | } | ||||
14626 | else | ||||
14627 | #endif | ||||
14628 | { | ||||
14629 | bc = badword[i - 1]; | ||||
14630 | gc = goodword[j - 1]; | ||||
14631 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14632 | if (bc == gc) |
14633 | CNT(i, j) = CNT(i - 1, j - 1); | ||||
14634 | else | ||||
14635 | { | ||||
14636 | /* Use a better score when there is only a case difference. */ | ||||
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 14637 | if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14638 | CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1); |
14639 | else | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 14640 | { |
14641 | /* For a similar character use SCORE_SIMILAR. */ | ||||
14642 | if (slang != NULL | ||||
14643 | && slang->sl_has_map | ||||
14644 | && similar_chars(slang, gc, bc)) | ||||
14645 | CNT(i, j) = SCORE_SIMILAR + CNT(i - 1, j - 1); | ||||
14646 | else | ||||
14647 | CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1); | ||||
14648 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14649 | |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 14650 | if (i > 1 && j > 1) |
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14651 | { |
Bram Moolenaar | 9f30f50 | 2005-06-14 22:01:04 +0000 | [diff] [blame] | 14652 | #ifdef FEAT_MBYTE |
14653 | if (has_mbyte) | ||||
14654 | { | ||||
14655 | pbc = wbadword[i - 2]; | ||||
14656 | pgc = wgoodword[j - 2]; | ||||
14657 | } | ||||
14658 | else | ||||
14659 | #endif | ||||
14660 | { | ||||
14661 | pbc = badword[i - 2]; | ||||
14662 | pgc = goodword[j - 2]; | ||||
14663 | } | ||||
14664 | if (bc == pgc && pbc == gc) | ||||
14665 | { | ||||
14666 | t = SCORE_SWAP + CNT(i - 2, j - 2); | ||||
14667 | if (t < CNT(i, j)) | ||||
14668 | CNT(i, j) = t; | ||||
14669 | } | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14670 | } |
14671 | t = SCORE_DEL + CNT(i - 1, j); | ||||
14672 | if (t < CNT(i, j)) | ||||
14673 | CNT(i, j) = t; | ||||
14674 | t = SCORE_INS + CNT(i, j - 1); | ||||
14675 | if (t < CNT(i, j)) | ||||
14676 | CNT(i, j) = t; | ||||
14677 | } | ||||
14678 | } | ||||
14679 | } | ||||
Bram Moolenaar | d857f0e | 2005-06-21 22:37:39 +0000 | [diff] [blame] | 14680 | |
14681 | i = CNT(badlen - 1, goodlen - 1); | ||||
14682 | vim_free(cnt); | ||||
14683 | return i; | ||||
Bram Moolenaar | 9ba0eb8 | 2005-06-13 22:28:56 +0000 | [diff] [blame] | 14684 | } |
Bram Moolenaar | cfc6c43 | 2005-06-06 21:50:35 +0000 | [diff] [blame] | 14685 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 14686 | typedef struct |
14687 | { | ||||
14688 | int badi; | ||||
14689 | int goodi; | ||||
14690 | int score; | ||||
14691 | } limitscore_T; | ||||
14692 | |||||
14693 | /* | ||||
14694 | * Like spell_edit_score(), but with a limit on the score to make it faster. | ||||
14695 | * May return SCORE_MAXMAX when the score is higher than "limit". | ||||
14696 | * | ||||
14697 | * This uses a stack for the edits still to be tried. | ||||
14698 | * The idea comes from Aspell leditdist.cpp. Rewritten in C and added support | ||||
14699 | * for multi-byte characters. | ||||
14700 | */ | ||||
14701 | static int | ||||
14702 | spell_edit_score_limit(slang, badword, goodword, limit) | ||||
14703 | slang_T *slang; | ||||
14704 | char_u *badword; | ||||
14705 | char_u *goodword; | ||||
14706 | int limit; | ||||
14707 | { | ||||
14708 | limitscore_T stack[10]; /* allow for over 3 * 2 edits */ | ||||
14709 | int stackidx; | ||||
14710 | int bi, gi; | ||||
14711 | int bi2, gi2; | ||||
14712 | int bc, gc; | ||||
14713 | int score; | ||||
14714 | int score_off; | ||||
14715 | int minscore; | ||||
14716 | int round; | ||||
14717 | |||||
14718 | #ifdef FEAT_MBYTE | ||||
14719 | /* Multi-byte characters require a bit more work, use a different function | ||||
14720 | * to avoid testing "has_mbyte" quite often. */ | ||||
14721 | if (has_mbyte) | ||||
14722 | return spell_edit_score_limit_w(slang, badword, goodword, limit); | ||||
14723 | #endif | ||||
14724 | |||||
14725 | /* | ||||
14726 | * The idea is to go from start to end over the words. So long as | ||||
14727 | * characters are equal just continue, this always gives the lowest score. | ||||
14728 | * When there is a difference try several alternatives. Each alternative | ||||
14729 | * increases "score" for the edit distance. Some of the alternatives are | ||||
14730 | * pushed unto a stack and tried later, some are tried right away. At the | ||||
14731 | * end of the word the score for one alternative is known. The lowest | ||||
14732 | * possible score is stored in "minscore". | ||||
14733 | */ | ||||
14734 | stackidx = 0; | ||||
14735 | bi = 0; | ||||
14736 | gi = 0; | ||||
14737 | score = 0; | ||||
14738 | minscore = limit + 1; | ||||
14739 | |||||
14740 | for (;;) | ||||
14741 | { | ||||
14742 | /* Skip over an equal part, score remains the same. */ | ||||
14743 | for (;;) | ||||
14744 | { | ||||
14745 | bc = badword[bi]; | ||||
14746 | gc = goodword[gi]; | ||||
14747 | if (bc != gc) /* stop at a char that's different */ | ||||
14748 | break; | ||||
14749 | if (bc == NUL) /* both words end */ | ||||
14750 | { | ||||
14751 | if (score < minscore) | ||||
14752 | minscore = score; | ||||
14753 | goto pop; /* do next alternative */ | ||||
14754 | } | ||||
14755 | ++bi; | ||||
14756 | ++gi; | ||||
14757 | } | ||||
14758 | |||||
14759 | if (gc == NUL) /* goodword ends, delete badword chars */ | ||||
14760 | { | ||||
14761 | do | ||||
14762 | { | ||||
14763 | if ((score += SCORE_DEL) >= minscore) | ||||
14764 | goto pop; /* do next alternative */ | ||||
14765 | } while (badword[++bi] != NUL); | ||||
14766 | minscore = score; | ||||
14767 | } | ||||
14768 | else if (bc == NUL) /* badword ends, insert badword chars */ | ||||
14769 | { | ||||
14770 | do | ||||
14771 | { | ||||
14772 | if ((score += SCORE_INS) >= minscore) | ||||
14773 | goto pop; /* do next alternative */ | ||||
14774 | } while (goodword[++gi] != NUL); | ||||
14775 | minscore = score; | ||||
14776 | } | ||||
14777 | else /* both words continue */ | ||||
14778 | { | ||||
14779 | /* If not close to the limit, perform a change. Only try changes | ||||
14780 | * that may lead to a lower score than "minscore". | ||||
14781 | * round 0: try deleting a char from badword | ||||
14782 | * round 1: try inserting a char in badword */ | ||||
14783 | for (round = 0; round <= 1; ++round) | ||||
14784 | { | ||||
14785 | score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS); | ||||
14786 | if (score_off < minscore) | ||||
14787 | { | ||||
14788 | if (score_off + SCORE_EDIT_MIN >= minscore) | ||||
14789 | { | ||||
14790 | /* Near the limit, rest of the words must match. We | ||||
14791 | * can check that right now, no need to push an item | ||||
14792 | * onto the stack. */ | ||||
14793 | bi2 = bi + 1 - round; | ||||
14794 | gi2 = gi + round; | ||||
14795 | while (goodword[gi2] == badword[bi2]) | ||||
14796 | { | ||||
14797 | if (goodword[gi2] == NUL) | ||||
14798 | { | ||||
14799 | minscore = score_off; | ||||
14800 | break; | ||||
14801 | } | ||||
14802 | ++bi2; | ||||
14803 | ++gi2; | ||||
14804 | } | ||||
14805 | } | ||||
14806 | else | ||||
14807 | { | ||||
14808 | /* try deleting/inserting a character later */ | ||||
14809 | stack[stackidx].badi = bi + 1 - round; | ||||
14810 | stack[stackidx].goodi = gi + round; | ||||
14811 | stack[stackidx].score = score_off; | ||||
14812 | ++stackidx; | ||||
14813 | } | ||||
14814 | } | ||||
14815 | } | ||||
14816 | |||||
14817 | if (score + SCORE_SWAP < minscore) | ||||
14818 | { | ||||
14819 | /* If swapping two characters makes a match then the | ||||
14820 | * substitution is more expensive, thus there is no need to | ||||
14821 | * try both. */ | ||||
14822 | if (gc == badword[bi + 1] && bc == goodword[gi + 1]) | ||||
14823 | { | ||||
14824 | /* Swap two characters, that is: skip them. */ | ||||
14825 | gi += 2; | ||||
14826 | bi += 2; | ||||
14827 | score += SCORE_SWAP; | ||||
14828 | continue; | ||||
14829 | } | ||||
14830 | } | ||||
14831 | |||||
14832 | /* Substitute one character for another which is the same | ||||
14833 | * thing as deleting a character from both goodword and badword. | ||||
14834 | * Use a better score when there is only a case difference. */ | ||||
14835 | if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) | ||||
14836 | score += SCORE_ICASE; | ||||
14837 | else | ||||
14838 | { | ||||
14839 | /* For a similar character use SCORE_SIMILAR. */ | ||||
14840 | if (slang != NULL | ||||
14841 | && slang->sl_has_map | ||||
14842 | && similar_chars(slang, gc, bc)) | ||||
14843 | score += SCORE_SIMILAR; | ||||
14844 | else | ||||
14845 | score += SCORE_SUBST; | ||||
14846 | } | ||||
14847 | |||||
14848 | if (score < minscore) | ||||
14849 | { | ||||
14850 | /* Do the substitution. */ | ||||
14851 | ++gi; | ||||
14852 | ++bi; | ||||
14853 | continue; | ||||
14854 | } | ||||
14855 | } | ||||
14856 | pop: | ||||
14857 | /* | ||||
14858 | * Get here to try the next alternative, pop it from the stack. | ||||
14859 | */ | ||||
14860 | if (stackidx == 0) /* stack is empty, finished */ | ||||
14861 | break; | ||||
14862 | |||||
14863 | /* pop an item from the stack */ | ||||
14864 | --stackidx; | ||||
14865 | gi = stack[stackidx].goodi; | ||||
14866 | bi = stack[stackidx].badi; | ||||
14867 | score = stack[stackidx].score; | ||||
14868 | } | ||||
14869 | |||||
14870 | /* When the score goes over "limit" it may actually be much higher. | ||||
14871 | * Return a very large number to avoid going below the limit when giving a | ||||
14872 | * bonus. */ | ||||
14873 | if (minscore > limit) | ||||
14874 | return SCORE_MAXMAX; | ||||
14875 | return minscore; | ||||
14876 | } | ||||
14877 | |||||
14878 | #ifdef FEAT_MBYTE | ||||
14879 | /* | ||||
14880 | * Multi-byte version of spell_edit_score_limit(). | ||||
14881 | * Keep it in sync with the above! | ||||
14882 | */ | ||||
14883 | static int | ||||
14884 | spell_edit_score_limit_w(slang, badword, goodword, limit) | ||||
14885 | slang_T *slang; | ||||
14886 | char_u *badword; | ||||
14887 | char_u *goodword; | ||||
14888 | int limit; | ||||
14889 | { | ||||
14890 | limitscore_T stack[10]; /* allow for over 3 * 2 edits */ | ||||
14891 | int stackidx; | ||||
14892 | int bi, gi; | ||||
14893 | int bi2, gi2; | ||||
14894 | int bc, gc; | ||||
14895 | int score; | ||||
14896 | int score_off; | ||||
14897 | int minscore; | ||||
14898 | int round; | ||||
14899 | char_u *p; | ||||
14900 | int wbadword[MAXWLEN]; | ||||
14901 | int wgoodword[MAXWLEN]; | ||||
14902 | |||||
14903 | /* Get the characters from the multi-byte strings and put them in an | ||||
14904 | * int array for easy access. */ | ||||
14905 | bi = 0; | ||||
14906 | for (p = badword; *p != NUL; ) | ||||
14907 | wbadword[bi++] = mb_cptr2char_adv(&p); | ||||
14908 | wbadword[bi++] = 0; | ||||
14909 | gi = 0; | ||||
14910 | for (p = goodword; *p != NUL; ) | ||||
14911 | wgoodword[gi++] = mb_cptr2char_adv(&p); | ||||
14912 | wgoodword[gi++] = 0; | ||||
14913 | |||||
14914 | /* | ||||
14915 | * The idea is to go from start to end over the words. So long as | ||||
14916 | * characters are equal just continue, this always gives the lowest score. | ||||
14917 | * When there is a difference try several alternatives. Each alternative | ||||
14918 | * increases "score" for the edit distance. Some of the alternatives are | ||||
14919 | * pushed unto a stack and tried later, some are tried right away. At the | ||||
14920 | * end of the word the score for one alternative is known. The lowest | ||||
14921 | * possible score is stored in "minscore". | ||||
14922 | */ | ||||
14923 | stackidx = 0; | ||||
14924 | bi = 0; | ||||
14925 | gi = 0; | ||||
14926 | score = 0; | ||||
14927 | minscore = limit + 1; | ||||
14928 | |||||
14929 | for (;;) | ||||
14930 | { | ||||
14931 | /* Skip over an equal part, score remains the same. */ | ||||
14932 | for (;;) | ||||
14933 | { | ||||
14934 | bc = wbadword[bi]; | ||||
14935 | gc = wgoodword[gi]; | ||||
14936 | |||||
14937 | if (bc != gc) /* stop at a char that's different */ | ||||
14938 | break; | ||||
14939 | if (bc == NUL) /* both words end */ | ||||
14940 | { | ||||
14941 | if (score < minscore) | ||||
14942 | minscore = score; | ||||
14943 | goto pop; /* do next alternative */ | ||||
14944 | } | ||||
14945 | ++bi; | ||||
14946 | ++gi; | ||||
14947 | } | ||||
14948 | |||||
14949 | if (gc == NUL) /* goodword ends, delete badword chars */ | ||||
14950 | { | ||||
14951 | do | ||||
14952 | { | ||||
14953 | if ((score += SCORE_DEL) >= minscore) | ||||
14954 | goto pop; /* do next alternative */ | ||||
14955 | } while (wbadword[++bi] != NUL); | ||||
14956 | minscore = score; | ||||
14957 | } | ||||
14958 | else if (bc == NUL) /* badword ends, insert badword chars */ | ||||
14959 | { | ||||
14960 | do | ||||
14961 | { | ||||
14962 | if ((score += SCORE_INS) >= minscore) | ||||
14963 | goto pop; /* do next alternative */ | ||||
14964 | } while (wgoodword[++gi] != NUL); | ||||
14965 | minscore = score; | ||||
14966 | } | ||||
14967 | else /* both words continue */ | ||||
14968 | { | ||||
14969 | /* If not close to the limit, perform a change. Only try changes | ||||
14970 | * that may lead to a lower score than "minscore". | ||||
14971 | * round 0: try deleting a char from badword | ||||
14972 | * round 1: try inserting a char in badword */ | ||||
14973 | for (round = 0; round <= 1; ++round) | ||||
14974 | { | ||||
14975 | score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS); | ||||
14976 | if (score_off < minscore) | ||||
14977 | { | ||||
14978 | if (score_off + SCORE_EDIT_MIN >= minscore) | ||||
14979 | { | ||||
14980 | /* Near the limit, rest of the words must match. We | ||||
14981 | * can check that right now, no need to push an item | ||||
14982 | * onto the stack. */ | ||||
14983 | bi2 = bi + 1 - round; | ||||
14984 | gi2 = gi + round; | ||||
14985 | while (wgoodword[gi2] == wbadword[bi2]) | ||||
14986 | { | ||||
14987 | if (wgoodword[gi2] == NUL) | ||||
14988 | { | ||||
14989 | minscore = score_off; | ||||
14990 | break; | ||||
14991 | } | ||||
14992 | ++bi2; | ||||
14993 | ++gi2; | ||||
14994 | } | ||||
14995 | } | ||||
14996 | else | ||||
14997 | { | ||||
14998 | /* try deleting a character from badword later */ | ||||
14999 | stack[stackidx].badi = bi + 1 - round; | ||||
15000 | stack[stackidx].goodi = gi + round; | ||||
15001 | stack[stackidx].score = score_off; | ||||
15002 | ++stackidx; | ||||
15003 | } | ||||
15004 | } | ||||
15005 | } | ||||
15006 | |||||
15007 | if (score + SCORE_SWAP < minscore) | ||||
15008 | { | ||||
15009 | /* If swapping two characters makes a match then the | ||||
15010 | * substitution is more expensive, thus there is no need to | ||||
15011 | * try both. */ | ||||
15012 | if (gc == wbadword[bi + 1] && bc == wgoodword[gi + 1]) | ||||
15013 | { | ||||
15014 | /* Swap two characters, that is: skip them. */ | ||||
15015 | gi += 2; | ||||
15016 | bi += 2; | ||||
15017 | score += SCORE_SWAP; | ||||
15018 | continue; | ||||
15019 | } | ||||
15020 | } | ||||
15021 | |||||
15022 | /* Substitute one character for another which is the same | ||||
15023 | * thing as deleting a character from both goodword and badword. | ||||
15024 | * Use a better score when there is only a case difference. */ | ||||
15025 | if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) | ||||
15026 | score += SCORE_ICASE; | ||||
15027 | else | ||||
15028 | { | ||||
15029 | /* For a similar character use SCORE_SIMILAR. */ | ||||
15030 | if (slang != NULL | ||||
15031 | && slang->sl_has_map | ||||
15032 | && similar_chars(slang, gc, bc)) | ||||
15033 | score += SCORE_SIMILAR; | ||||
15034 | else | ||||
15035 | score += SCORE_SUBST; | ||||
15036 | } | ||||
15037 | |||||
15038 | if (score < minscore) | ||||
15039 | { | ||||
15040 | /* Do the substitution. */ | ||||
15041 | ++gi; | ||||
15042 | ++bi; | ||||
15043 | continue; | ||||
15044 | } | ||||
15045 | } | ||||
15046 | pop: | ||||
15047 | /* | ||||
15048 | * Get here to try the next alternative, pop it from the stack. | ||||
15049 | */ | ||||
15050 | if (stackidx == 0) /* stack is empty, finished */ | ||||
15051 | break; | ||||
15052 | |||||
15053 | /* pop an item from the stack */ | ||||
15054 | --stackidx; | ||||
15055 | gi = stack[stackidx].goodi; | ||||
15056 | bi = stack[stackidx].badi; | ||||
15057 | score = stack[stackidx].score; | ||||
15058 | } | ||||
15059 | |||||
15060 | /* When the score goes over "limit" it may actually be much higher. | ||||
15061 | * Return a very large number to avoid going below the limit when giving a | ||||
15062 | * bonus. */ | ||||
15063 | if (minscore > limit) | ||||
15064 | return SCORE_MAXMAX; | ||||
15065 | return minscore; | ||||
15066 | } | ||||
15067 | #endif | ||||
15068 | |||||
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 15069 | /* |
15070 | * ":spellinfo" | ||||
15071 | */ | ||||
15072 | /*ARGSUSED*/ | ||||
15073 | void | ||||
15074 | ex_spellinfo(eap) | ||||
15075 | exarg_T *eap; | ||||
15076 | { | ||||
15077 | int lpi; | ||||
15078 | langp_T *lp; | ||||
15079 | char_u *p; | ||||
15080 | |||||
15081 | if (no_spell_checking(curwin)) | ||||
15082 | return; | ||||
15083 | |||||
15084 | msg_start(); | ||||
15085 | for (lpi = 0; lpi < curbuf->b_langp.ga_len && !got_int; ++lpi) | ||||
15086 | { | ||||
15087 | lp = LANGP_ENTRY(curbuf->b_langp, lpi); | ||||
15088 | msg_puts((char_u *)"file: "); | ||||
15089 | msg_puts(lp->lp_slang->sl_fname); | ||||
15090 | msg_putchar('\n'); | ||||
15091 | p = lp->lp_slang->sl_info; | ||||
15092 | if (p != NULL) | ||||
15093 | { | ||||
15094 | msg_puts(p); | ||||
15095 | msg_putchar('\n'); | ||||
15096 | } | ||||
15097 | } | ||||
15098 | msg_end(); | ||||
15099 | } | ||||
15100 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 15101 | #define DUMPFLAG_KEEPCASE 1 /* round 2: keep-case tree */ |
15102 | #define DUMPFLAG_COUNT 2 /* include word count */ | ||||
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15103 | #define DUMPFLAG_ICASE 4 /* ignore case when finding matches */ |
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 15104 | #define DUMPFLAG_ONECAP 8 /* pattern starts with capital */ |
15105 | #define DUMPFLAG_ALLCAP 16 /* pattern is all capitals */ | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 15106 | |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15107 | /* |
15108 | * ":spelldump" | ||||
15109 | */ | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15110 | void |
15111 | ex_spelldump(eap) | ||||
15112 | exarg_T *eap; | ||||
15113 | { | ||||
15114 | buf_T *buf = curbuf; | ||||
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15115 | |
15116 | if (no_spell_checking(curwin)) | ||||
15117 | return; | ||||
15118 | |||||
15119 | /* Create a new empty buffer by splitting the window. */ | ||||
15120 | do_cmdline_cmd((char_u *)"new"); | ||||
15121 | if (!bufempty() || !buf_valid(buf)) | ||||
15122 | return; | ||||
15123 | |||||
15124 | spell_dump_compl(buf, NULL, 0, NULL, eap->forceit ? DUMPFLAG_COUNT : 0); | ||||
15125 | |||||
15126 | /* Delete the empty line that we started with. */ | ||||
15127 | if (curbuf->b_ml.ml_line_count > 1) | ||||
15128 | ml_delete(curbuf->b_ml.ml_line_count, FALSE); | ||||
15129 | |||||
15130 | redraw_later(NOT_VALID); | ||||
15131 | } | ||||
15132 | |||||
15133 | /* | ||||
15134 | * Go through all possible words and: | ||||
15135 | * 1. When "pat" is NULL: dump a list of all words in the current buffer. | ||||
15136 | * "ic" and "dir" are not used. | ||||
15137 | * 2. When "pat" is not NULL: add matching words to insert mode completion. | ||||
15138 | */ | ||||
15139 | void | ||||
15140 | spell_dump_compl(buf, pat, ic, dir, dumpflags_arg) | ||||
15141 | buf_T *buf; /* buffer with spell checking */ | ||||
15142 | char_u *pat; /* leading part of the word */ | ||||
15143 | int ic; /* ignore case */ | ||||
15144 | int *dir; /* direction for adding matches */ | ||||
15145 | int dumpflags_arg; /* DUMPFLAG_* */ | ||||
15146 | { | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15147 | langp_T *lp; |
15148 | slang_T *slang; | ||||
15149 | idx_T arridx[MAXWLEN]; | ||||
15150 | int curi[MAXWLEN]; | ||||
15151 | char_u word[MAXWLEN]; | ||||
15152 | int c; | ||||
15153 | char_u *byts; | ||||
15154 | idx_T *idxs; | ||||
15155 | linenr_T lnum = 0; | ||||
15156 | int round; | ||||
15157 | int depth; | ||||
15158 | int n; | ||||
15159 | int flags; | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 15160 | char_u *region_names = NULL; /* region names being used */ |
15161 | int do_region = TRUE; /* dump region names and numbers */ | ||||
15162 | char_u *p; | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 15163 | int lpi; |
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15164 | int dumpflags = dumpflags_arg; |
15165 | int patlen; | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15166 | |
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 15167 | /* When ignoring case or when the pattern starts with capital pass this on |
15168 | * to dump_word(). */ | ||||
15169 | if (pat != NULL) | ||||
15170 | { | ||||
15171 | if (ic) | ||||
15172 | dumpflags |= DUMPFLAG_ICASE; | ||||
15173 | else | ||||
15174 | { | ||||
15175 | n = captype(pat, NULL); | ||||
15176 | if (n == WF_ONECAP) | ||||
15177 | dumpflags |= DUMPFLAG_ONECAP; | ||||
15178 | else if (n == WF_ALLCAP | ||||
15179 | #ifdef FEAT_MBYTE | ||||
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 15180 | && (int)STRLEN(pat) > mb_ptr2len(pat) |
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 15181 | #else |
Bram Moolenaar | 362e1a3 | 2006-03-06 23:29:24 +0000 | [diff] [blame] | 15182 | && (int)STRLEN(pat) > 1 |
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 15183 | #endif |
15184 | ) | ||||
15185 | dumpflags |= DUMPFLAG_ALLCAP; | ||||
15186 | } | ||||
15187 | } | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15188 | |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 15189 | /* Find out if we can support regions: All languages must support the same |
15190 | * regions or none at all. */ | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 15191 | for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi) |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 15192 | { |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 15193 | lp = LANGP_ENTRY(buf->b_langp, lpi); |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 15194 | p = lp->lp_slang->sl_regions; |
15195 | if (p[0] != 0) | ||||
15196 | { | ||||
15197 | if (region_names == NULL) /* first language with regions */ | ||||
15198 | region_names = p; | ||||
15199 | else if (STRCMP(region_names, p) != 0) | ||||
15200 | { | ||||
15201 | do_region = FALSE; /* region names are different */ | ||||
15202 | break; | ||||
15203 | } | ||||
15204 | } | ||||
15205 | } | ||||
15206 | |||||
15207 | if (do_region && region_names != NULL) | ||||
15208 | { | ||||
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15209 | if (pat == NULL) |
15210 | { | ||||
15211 | vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names); | ||||
15212 | ml_append(lnum++, IObuff, (colnr_T)0, FALSE); | ||||
15213 | } | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 15214 | } |
15215 | else | ||||
15216 | do_region = FALSE; | ||||
15217 | |||||
15218 | /* | ||||
15219 | * Loop over all files loaded for the entries in 'spelllang'. | ||||
15220 | */ | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 15221 | for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi) |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15222 | { |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 15223 | lp = LANGP_ENTRY(buf->b_langp, lpi); |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15224 | slang = lp->lp_slang; |
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 15225 | if (slang->sl_fbyts == NULL) /* reloading failed */ |
15226 | continue; | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15227 | |
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15228 | if (pat == NULL) |
15229 | { | ||||
15230 | vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname); | ||||
15231 | ml_append(lnum++, IObuff, (colnr_T)0, FALSE); | ||||
15232 | } | ||||
15233 | |||||
15234 | /* When matching with a pattern and there are no prefixes only use | ||||
15235 | * parts of the tree that match "pat". */ | ||||
15236 | if (pat != NULL && slang->sl_pbyts == NULL) | ||||
15237 | patlen = STRLEN(pat); | ||||
15238 | else | ||||
15239 | patlen = 0; | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15240 | |
15241 | /* round 1: case-folded tree | ||||
15242 | * round 2: keep-case tree */ | ||||
15243 | for (round = 1; round <= 2; ++round) | ||||
15244 | { | ||||
15245 | if (round == 1) | ||||
15246 | { | ||||
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15247 | dumpflags &= ~DUMPFLAG_KEEPCASE; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15248 | byts = slang->sl_fbyts; |
15249 | idxs = slang->sl_fidxs; | ||||
15250 | } | ||||
15251 | else | ||||
15252 | { | ||||
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15253 | dumpflags |= DUMPFLAG_KEEPCASE; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15254 | byts = slang->sl_kbyts; |
15255 | idxs = slang->sl_kidxs; | ||||
15256 | } | ||||
15257 | if (byts == NULL) | ||||
15258 | continue; /* array is empty */ | ||||
15259 | |||||
15260 | depth = 0; | ||||
15261 | arridx[0] = 0; | ||||
15262 | curi[0] = 1; | ||||
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15263 | while (depth >= 0 && !got_int |
15264 | && (pat == NULL || !compl_interrupted)) | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15265 | { |
15266 | if (curi[depth] > byts[arridx[depth]]) | ||||
15267 | { | ||||
15268 | /* Done all bytes at this node, go up one level. */ | ||||
15269 | --depth; | ||||
15270 | line_breakcheck(); | ||||
Bram Moolenaar | a203182 | 2006-03-07 22:29:51 +0000 | [diff] [blame] | 15271 | ins_compl_check_keys(50); |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15272 | } |
15273 | else | ||||
15274 | { | ||||
15275 | /* Do one more byte at this node. */ | ||||
15276 | n = arridx[depth] + curi[depth]; | ||||
15277 | ++curi[depth]; | ||||
15278 | c = byts[n]; | ||||
15279 | if (c == 0) | ||||
15280 | { | ||||
15281 | /* End of word, deal with the word. | ||||
15282 | * Don't use keep-case words in the fold-case tree, | ||||
15283 | * they will appear in the keep-case tree. | ||||
15284 | * Only use the word when the region matches. */ | ||||
15285 | flags = (int)idxs[n]; | ||||
15286 | if ((round == 2 || (flags & WF_KEEPCAP) == 0) | ||||
Bram Moolenaar | ac6e65f | 2005-08-29 22:25:38 +0000 | [diff] [blame] | 15287 | && (flags & WF_NEEDCOMP) == 0 |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 15288 | && (do_region |
15289 | || (flags & WF_REGION) == 0 | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 15290 | || (((unsigned)flags >> 16) |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15291 | & lp->lp_region) != 0)) |
15292 | { | ||||
15293 | word[depth] = NUL; | ||||
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 15294 | if (!do_region) |
15295 | flags &= ~WF_REGION; | ||||
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 15296 | |
15297 | /* Dump the basic word if there is no prefix or | ||||
15298 | * when it's the first one. */ | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 15299 | c = (unsigned)flags >> 24; |
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 15300 | if (c == 0 || curi[depth] == 2) |
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15301 | { |
15302 | dump_word(slang, word, pat, dir, | ||||
15303 | dumpflags, flags, lnum); | ||||
15304 | if (pat == NULL) | ||||
15305 | ++lnum; | ||||
15306 | } | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15307 | |
15308 | /* Apply the prefix, if there is one. */ | ||||
Bram Moolenaar | 0a5fe21 | 2005-06-24 23:01:23 +0000 | [diff] [blame] | 15309 | if (c != 0) |
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15310 | lnum = dump_prefixes(slang, word, pat, dir, |
15311 | dumpflags, flags, lnum); | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15312 | } |
15313 | } | ||||
15314 | else | ||||
15315 | { | ||||
15316 | /* Normal char, go one level deeper. */ | ||||
15317 | word[depth++] = c; | ||||
15318 | arridx[depth] = idxs[n]; | ||||
15319 | curi[depth] = 1; | ||||
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15320 | |
15321 | /* Check if this characters matches with the pattern. | ||||
15322 | * If not skip the whole tree below it. | ||||
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 15323 | * Always ignore case here, dump_word() will check |
15324 | * proper case later. This isn't exactly right when | ||||
15325 | * length changes for multi-byte characters with | ||||
15326 | * ignore case... */ | ||||
15327 | if (depth <= patlen | ||||
15328 | && MB_STRNICMP(word, pat, depth) != 0) | ||||
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15329 | --depth; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15330 | } |
15331 | } | ||||
15332 | } | ||||
15333 | } | ||||
15334 | } | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15335 | } |
15336 | |||||
15337 | /* | ||||
15338 | * Dump one word: apply case modifications and append a line to the buffer. | ||||
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15339 | * When "lnum" is zero add insert mode completion. |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15340 | */ |
15341 | static void | ||||
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 15342 | dump_word(slang, word, pat, dir, dumpflags, wordflags, lnum) |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 15343 | slang_T *slang; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15344 | char_u *word; |
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15345 | char_u *pat; |
15346 | int *dir; | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 15347 | int dumpflags; |
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 15348 | int wordflags; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15349 | linenr_T lnum; |
15350 | { | ||||
15351 | int keepcap = FALSE; | ||||
15352 | char_u *p; | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 15353 | char_u *tw; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15354 | char_u cword[MAXWLEN]; |
Bram Moolenaar | 7887d88 | 2005-07-01 22:33:52 +0000 | [diff] [blame] | 15355 | char_u badword[MAXWLEN + 10]; |
15356 | int i; | ||||
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 15357 | int flags = wordflags; |
15358 | |||||
15359 | if (dumpflags & DUMPFLAG_ONECAP) | ||||
15360 | flags |= WF_ONECAP; | ||||
15361 | if (dumpflags & DUMPFLAG_ALLCAP) | ||||
15362 | flags |= WF_ALLCAP; | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15363 | |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 15364 | if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0) |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15365 | { |
15366 | /* Need to fix case according to "flags". */ | ||||
15367 | make_case_word(word, cword, flags); | ||||
15368 | p = cword; | ||||
15369 | } | ||||
15370 | else | ||||
15371 | { | ||||
15372 | p = word; | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 15373 | if ((dumpflags & DUMPFLAG_KEEPCASE) |
15374 | && ((captype(word, NULL) & WF_KEEPCAP) == 0 | ||||
Bram Moolenaar | 0dc065e | 2005-07-04 22:49:24 +0000 | [diff] [blame] | 15375 | || (flags & WF_FIXCAP) != 0)) |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15376 | keepcap = TRUE; |
15377 | } | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 15378 | tw = p; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15379 | |
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15380 | if (pat == NULL) |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15381 | { |
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15382 | /* Add flags and regions after a slash. */ |
15383 | if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap) | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 15384 | { |
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15385 | STRCPY(badword, p); |
15386 | STRCAT(badword, "/"); | ||||
15387 | if (keepcap) | ||||
15388 | STRCAT(badword, "="); | ||||
15389 | if (flags & WF_BANNED) | ||||
15390 | STRCAT(badword, "!"); | ||||
15391 | else if (flags & WF_RARE) | ||||
15392 | STRCAT(badword, "?"); | ||||
15393 | if (flags & WF_REGION) | ||||
15394 | for (i = 0; i < 7; ++i) | ||||
15395 | if (flags & (0x10000 << i)) | ||||
15396 | sprintf((char *)badword + STRLEN(badword), "%d", i + 1); | ||||
15397 | p = badword; | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 15398 | } |
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 15399 | |
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15400 | if (dumpflags & DUMPFLAG_COUNT) |
15401 | { | ||||
15402 | hashitem_T *hi; | ||||
15403 | |||||
15404 | /* Include the word count for ":spelldump!". */ | ||||
15405 | hi = hash_find(&slang->sl_wordcount, tw); | ||||
15406 | if (!HASHITEM_EMPTY(hi)) | ||||
15407 | { | ||||
15408 | vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d", | ||||
15409 | tw, HI2WC(hi)->wc_count); | ||||
15410 | p = IObuff; | ||||
15411 | } | ||||
15412 | } | ||||
15413 | |||||
15414 | ml_append(lnum, p, (colnr_T)0, FALSE); | ||||
15415 | } | ||||
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 15416 | else if (((dumpflags & DUMPFLAG_ICASE) |
15417 | ? MB_STRNICMP(p, pat, STRLEN(pat)) == 0 | ||||
15418 | : STRNCMP(p, pat, STRLEN(pat)) == 0) | ||||
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15419 | && ins_compl_add_infercase(p, (int)STRLEN(p), |
15420 | dumpflags & DUMPFLAG_ICASE, | ||||
15421 | NULL, *dir, 0) == OK) | ||||
Bram Moolenaar | d0131a8 | 2006-03-04 21:46:13 +0000 | [diff] [blame] | 15422 | /* if dir was BACKWARD then honor it just once */ |
15423 | *dir = FORWARD; | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15424 | } |
15425 | |||||
15426 | /* | ||||
Bram Moolenaar | a1ba811 | 2005-06-28 23:23:32 +0000 | [diff] [blame] | 15427 | * For ":spelldump": Find matching prefixes for "word". Prepend each to |
15428 | * "word" and append a line to the buffer. | ||||
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15429 | * When "lnum" is zero add insert mode completion. |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15430 | * Return the updated line number. |
15431 | */ | ||||
15432 | static linenr_T | ||||
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15433 | dump_prefixes(slang, word, pat, dir, dumpflags, flags, startlnum) |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15434 | slang_T *slang; |
15435 | char_u *word; /* case-folded word */ | ||||
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15436 | char_u *pat; |
15437 | int *dir; | ||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 15438 | int dumpflags; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15439 | int flags; /* flags with prefix ID */ |
15440 | linenr_T startlnum; | ||||
15441 | { | ||||
15442 | idx_T arridx[MAXWLEN]; | ||||
15443 | int curi[MAXWLEN]; | ||||
15444 | char_u prefix[MAXWLEN]; | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 15445 | char_u word_up[MAXWLEN]; |
15446 | int has_word_up = FALSE; | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15447 | int c; |
15448 | char_u *byts; | ||||
15449 | idx_T *idxs; | ||||
15450 | linenr_T lnum = startlnum; | ||||
15451 | int depth; | ||||
15452 | int n; | ||||
15453 | int len; | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15454 | int i; |
15455 | |||||
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 15456 | /* If the word starts with a lower-case letter make the word with an |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 15457 | * upper-case letter in word_up[]. */ |
15458 | c = PTR2CHAR(word); | ||||
15459 | if (SPELL_TOUPPER(c) != c) | ||||
15460 | { | ||||
15461 | onecap_copy(word, word_up, TRUE); | ||||
15462 | has_word_up = TRUE; | ||||
15463 | } | ||||
15464 | |||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15465 | byts = slang->sl_pbyts; |
15466 | idxs = slang->sl_pidxs; | ||||
15467 | if (byts != NULL) /* array not is empty */ | ||||
15468 | { | ||||
15469 | /* | ||||
15470 | * Loop over all prefixes, building them byte-by-byte in prefix[]. | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 15471 | * When at the end of a prefix check that it supports "flags". |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15472 | */ |
15473 | depth = 0; | ||||
15474 | arridx[0] = 0; | ||||
15475 | curi[0] = 1; | ||||
15476 | while (depth >= 0 && !got_int) | ||||
15477 | { | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 15478 | n = arridx[depth]; |
15479 | len = byts[n]; | ||||
15480 | if (curi[depth] > len) | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15481 | { |
15482 | /* Done all bytes at this node, go up one level. */ | ||||
15483 | --depth; | ||||
15484 | line_breakcheck(); | ||||
15485 | } | ||||
15486 | else | ||||
15487 | { | ||||
15488 | /* Do one more byte at this node. */ | ||||
Bram Moolenaar | dfb9ac0 | 2005-07-05 21:36:03 +0000 | [diff] [blame] | 15489 | n += curi[depth]; |
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15490 | ++curi[depth]; |
15491 | c = byts[n]; | ||||
15492 | if (c == 0) | ||||
15493 | { | ||||
15494 | /* End of prefix, find out how many IDs there are. */ | ||||
15495 | for (i = 1; i < len; ++i) | ||||
15496 | if (byts[n + i] != 0) | ||||
15497 | break; | ||||
15498 | curi[depth] += i - 1; | ||||
15499 | |||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 15500 | c = valid_word_prefix(i, n, flags, word, slang, FALSE); |
15501 | if (c != 0) | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15502 | { |
Bram Moolenaar | 9c96f59 | 2005-06-30 21:52:39 +0000 | [diff] [blame] | 15503 | vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1); |
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15504 | dump_word(slang, prefix, pat, dir, dumpflags, |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 15505 | (c & WF_RAREPFX) ? (flags | WF_RARE) |
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15506 | : flags, lnum); |
15507 | if (lnum != 0) | ||||
15508 | ++lnum; | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15509 | } |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 15510 | |
15511 | /* Check for prefix that matches the word when the | ||||
15512 | * first letter is upper-case, but only if the prefix has | ||||
15513 | * a condition. */ | ||||
15514 | if (has_word_up) | ||||
15515 | { | ||||
15516 | c = valid_word_prefix(i, n, flags, word_up, slang, | ||||
15517 | TRUE); | ||||
15518 | if (c != 0) | ||||
15519 | { | ||||
15520 | vim_strncpy(prefix + depth, word_up, | ||||
15521 | MAXWLEN - depth - 1); | ||||
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15522 | dump_word(slang, prefix, pat, dir, dumpflags, |
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 15523 | (c & WF_RAREPFX) ? (flags | WF_RARE) |
Bram Moolenaar | b475fb9 | 2006-03-02 22:40:52 +0000 | [diff] [blame] | 15524 | : flags, lnum); |
15525 | if (lnum != 0) | ||||
15526 | ++lnum; | ||||
Bram Moolenaar | 53805d1 | 2005-08-01 07:08:33 +0000 | [diff] [blame] | 15527 | } |
15528 | } | ||||
Bram Moolenaar | f417f2b | 2005-06-23 22:29:21 +0000 | [diff] [blame] | 15529 | } |
15530 | else | ||||
15531 | { | ||||
15532 | /* Normal char, go one level deeper. */ | ||||
15533 | prefix[depth++] = c; | ||||
15534 | arridx[depth] = idxs[n]; | ||||
15535 | curi[depth] = 1; | ||||
15536 | } | ||||
15537 | } | ||||
15538 | } | ||||
15539 | } | ||||
15540 | |||||
15541 | return lnum; | ||||
15542 | } | ||||
15543 | |||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 15544 | /* |
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 15545 | * Move "p" to the end of word "start". |
15546 | * Uses the spell-checking word characters. | ||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 15547 | */ |
15548 | char_u * | ||||
15549 | spell_to_word_end(start, buf) | ||||
15550 | char_u *start; | ||||
15551 | buf_T *buf; | ||||
15552 | { | ||||
15553 | char_u *p = start; | ||||
15554 | |||||
15555 | while (*p != NUL && spell_iswordp(p, buf)) | ||||
15556 | mb_ptr_adv(p); | ||||
15557 | return p; | ||||
15558 | } | ||||
15559 | |||||
Bram Moolenaar | 8b59de9 | 2005-08-11 19:59:29 +0000 | [diff] [blame] | 15560 | #if defined(FEAT_INS_EXPAND) || defined(PROTO) |
Bram Moolenaar | 8b59de9 | 2005-08-11 19:59:29 +0000 | [diff] [blame] | 15561 | /* |
Bram Moolenaar | a40ceaf | 2006-01-13 22:35:40 +0000 | [diff] [blame] | 15562 | * For Insert mode completion CTRL-X s: |
15563 | * Find start of the word in front of column "startcol". | ||||
15564 | * We don't check if it is badly spelled, with completion we can only change | ||||
15565 | * the word in front of the cursor. | ||||
Bram Moolenaar | 8b59de9 | 2005-08-11 19:59:29 +0000 | [diff] [blame] | 15566 | * Returns the column number of the word. |
15567 | */ | ||||
15568 | int | ||||
15569 | spell_word_start(startcol) | ||||
15570 | int startcol; | ||||
15571 | { | ||||
15572 | char_u *line; | ||||
15573 | char_u *p; | ||||
15574 | int col = 0; | ||||
15575 | |||||
Bram Moolenaar | 9552956 | 2005-08-25 21:21:38 +0000 | [diff] [blame] | 15576 | if (no_spell_checking(curwin)) |
Bram Moolenaar | 8b59de9 | 2005-08-11 19:59:29 +0000 | [diff] [blame] | 15577 | return startcol; |
15578 | |||||
15579 | /* Find a word character before "startcol". */ | ||||
15580 | line = ml_get_curline(); | ||||
15581 | for (p = line + startcol; p > line; ) | ||||
15582 | { | ||||
15583 | mb_ptr_back(line, p); | ||||
15584 | if (spell_iswordp_nmw(p)) | ||||
15585 | break; | ||||
15586 | } | ||||
15587 | |||||
15588 | /* Go back to start of the word. */ | ||||
15589 | while (p > line) | ||||
15590 | { | ||||
15591 | col = p - line; | ||||
15592 | mb_ptr_back(line, p); | ||||
15593 | if (!spell_iswordp(p, curbuf)) | ||||
15594 | break; | ||||
15595 | col = 0; | ||||
15596 | } | ||||
15597 | |||||
Bram Moolenaar | 8b59de9 | 2005-08-11 19:59:29 +0000 | [diff] [blame] | 15598 | return col; |
15599 | } | ||||
15600 | |||||
15601 | /* | ||||
Bram Moolenaar | 4effc80 | 2005-09-30 21:12:02 +0000 | [diff] [blame] | 15602 | * Need to check for 'spellcapcheck' now, the word is removed before |
15603 | * expand_spelling() is called. Therefore the ugly global variable. | ||||
15604 | */ | ||||
15605 | static int spell_expand_need_cap; | ||||
15606 | |||||
15607 | void | ||||
15608 | spell_expand_check_cap(col) | ||||
15609 | colnr_T col; | ||||
15610 | { | ||||
15611 | spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col); | ||||
15612 | } | ||||
15613 | |||||
15614 | /* | ||||
Bram Moolenaar | 8b59de9 | 2005-08-11 19:59:29 +0000 | [diff] [blame] | 15615 | * Get list of spelling suggestions. |
15616 | * Used for Insert mode completion CTRL-X ?. | ||||
15617 | * Returns the number of matches. The matches are in "matchp[]", array of | ||||
15618 | * allocated strings. | ||||
15619 | */ | ||||
15620 | /*ARGSUSED*/ | ||||
15621 | int | ||||
15622 | expand_spelling(lnum, col, pat, matchp) | ||||
15623 | linenr_T lnum; | ||||
15624 | int col; | ||||
15625 | char_u *pat; | ||||
15626 | char_u ***matchp; | ||||
15627 | { | ||||
15628 | garray_T ga; | ||||
15629 | |||||
Bram Moolenaar | 4770d09 | 2006-01-12 23:22:24 +0000 | [diff] [blame] | 15630 | spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE); |
Bram Moolenaar | 8b59de9 | 2005-08-11 19:59:29 +0000 | [diff] [blame] | 15631 | *matchp = ga.ga_data; |
15632 | return ga.ga_len; | ||||
15633 | } | ||||
15634 | #endif | ||||
15635 | |||||
Bram Moolenaar | f71a3db | 2006-03-12 21:50:18 +0000 | [diff] [blame] | 15636 | #endif /* FEAT_SPELL */ |