blob: dc0cbb6b86962da4cc66817e6bbf9c55072fea88 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaarc01140a2006-03-24 22:21:52 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * hashtab.c: Handling of a hashtable with Vim-specific properties.
12 *
13 * Each item in a hashtable has a NUL terminated string key. A key can appear
14 * only once in the table.
15 *
16 * A hash number is computed from the key for quick lookup. When the hashes
17 * of two different keys point to the same entry an algorithm is used to
18 * iterate over other entries in the table until the right one is found.
19 * To make the iteration work removed keys are different from entries where a
20 * key was never present.
21 *
22 * The mechanism has been partly based on how Python Dictionaries are
23 * implemented. The algorithm is from Knuth Vol. 3, Sec. 6.4.
24 *
25 * The hashtable grows to accommodate more entries when needed. At least 1/3
26 * of the entries is empty to keep the lookup efficient (at the cost of extra
27 * memory).
28 */
29
30#include "vim.h"
31
Bram Moolenaarc01140a2006-03-24 22:21:52 +000032#if 0
Bram Moolenaar2ab2e862019-12-04 21:24:53 +010033# define HT_DEBUG // extra checks for table consistency and statistics
Bram Moolenaarc01140a2006-03-24 22:21:52 +000034
Bram Moolenaar2ab2e862019-12-04 21:24:53 +010035static long hash_count_lookup = 0; // count number of hashtab lookups
36static long hash_count_perturb = 0; // count number of "misses"
Bram Moolenaarc01140a2006-03-24 22:21:52 +000037#endif
38
Bram Moolenaar2ab2e862019-12-04 21:24:53 +010039// Magic value for algorithm that walks through the array.
Bram Moolenaarc01140a2006-03-24 22:21:52 +000040#define PERTURB_SHIFT 5
41
Bram Moolenaar92b8b2d2016-01-29 22:36:45 +010042static int hash_may_resize(hashtab_T *ht, int minitems);
Bram Moolenaarc01140a2006-03-24 22:21:52 +000043
Bram Moolenaar2ab2e862019-12-04 21:24:53 +010044#if 0 // currently not used
Bram Moolenaarc01140a2006-03-24 22:21:52 +000045/*
46 * Create an empty hash table.
47 * Returns NULL when out of memory.
48 */
49 hashtab_T *
Bram Moolenaar68c2f632016-01-30 17:24:07 +010050hash_create(void)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000051{
52 hashtab_T *ht;
53
Bram Moolenaarc799fe22019-05-28 23:08:19 +020054 ht = ALLOC_ONE(hashtab_T);
Bram Moolenaarc01140a2006-03-24 22:21:52 +000055 if (ht != NULL)
56 hash_init(ht);
57 return ht;
58}
59#endif
60
61/*
62 * Initialize an empty hash table.
63 */
64 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +010065hash_init(hashtab_T *ht)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000066{
Bram Moolenaar2ab2e862019-12-04 21:24:53 +010067 // This zeroes all "ht_" entries and all the "hi_key" in "ht_smallarray".
Bram Moolenaara80faa82020-04-12 19:37:17 +020068 CLEAR_POINTER(ht);
Bram Moolenaarc01140a2006-03-24 22:21:52 +000069 ht->ht_array = ht->ht_smallarray;
70 ht->ht_mask = HT_INIT_SIZE - 1;
71}
72
73/*
74 * Free the array of a hash table. Does not free the items it contains!
75 * If "ht" is not freed then you should call hash_init() next!
76 */
77 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +010078hash_clear(hashtab_T *ht)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000079{
80 if (ht->ht_array != ht->ht_smallarray)
81 vim_free(ht->ht_array);
82}
83
Bram Moolenaar0e655112020-09-11 20:36:36 +020084#if defined(FEAT_SPELL) || defined(FEAT_TERMINAL) || defined(PROTO)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000085/*
86 * Free the array of a hash table and all the keys it contains. The keys must
87 * have been allocated. "off" is the offset from the start of the allocate
88 * memory to the location of the key (it's always positive).
89 */
90 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +010091hash_clear_all(hashtab_T *ht, int off)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000092{
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000093 long todo;
Bram Moolenaarc01140a2006-03-24 22:21:52 +000094 hashitem_T *hi;
95
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000096 todo = (long)ht->ht_used;
Bram Moolenaarc01140a2006-03-24 22:21:52 +000097 for (hi = ht->ht_array; todo > 0; ++hi)
98 {
99 if (!HASHITEM_EMPTY(hi))
100 {
101 vim_free(hi->hi_key - off);
102 --todo;
103 }
104 }
105 hash_clear(ht);
106}
Bram Moolenaar113e1072019-01-20 15:30:40 +0100107#endif
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000108
109/*
110 * Find "key" in hashtable "ht". "key" must not be NULL.
111 * Always returns a pointer to a hashitem. If the item was not found then
112 * HASHITEM_EMPTY() is TRUE. The pointer is then the place where the key
113 * would be added.
114 * WARNING: The returned pointer becomes invalid when the hashtable is changed
115 * (adding, setting or removing an item)!
116 */
117 hashitem_T *
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100118hash_find(hashtab_T *ht, char_u *key)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000119{
120 return hash_lookup(ht, key, hash_hash(key));
121}
122
123/*
124 * Like hash_find(), but caller computes "hash".
125 */
126 hashitem_T *
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100127hash_lookup(hashtab_T *ht, char_u *key, hash_T hash)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000128{
129 hash_T perturb;
130 hashitem_T *freeitem;
131 hashitem_T *hi;
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100132 unsigned idx;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000133
134#ifdef HT_DEBUG
135 ++hash_count_lookup;
136#endif
137
138 /*
139 * Quickly handle the most common situations:
140 * - return if there is no item at all
141 * - skip over a removed item
142 * - return if the item matches
143 */
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100144 idx = (unsigned)(hash & ht->ht_mask);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000145 hi = &ht->ht_array[idx];
146
147 if (hi->hi_key == NULL)
148 return hi;
149 if (hi->hi_key == HI_KEY_REMOVED)
150 freeitem = hi;
151 else if (hi->hi_hash == hash && STRCMP(hi->hi_key, key) == 0)
152 return hi;
153 else
154 freeitem = NULL;
155
156 /*
157 * Need to search through the table to find the key. The algorithm
158 * to step through the table starts with large steps, gradually becoming
159 * smaller down to (1/4 table size + 1). This means it goes through all
160 * table entries in the end.
161 * When we run into a NULL key it's clear that the key isn't there.
162 * Return the first available slot found (can be a slot of a removed
163 * item).
164 */
165 for (perturb = hash; ; perturb >>= PERTURB_SHIFT)
166 {
167#ifdef HT_DEBUG
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100168 ++hash_count_perturb; // count a "miss" for hashtab lookup
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000169#endif
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100170 idx = (unsigned)((idx << 2U) + idx + perturb + 1U);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000171 hi = &ht->ht_array[idx & ht->ht_mask];
172 if (hi->hi_key == NULL)
173 return freeitem == NULL ? hi : freeitem;
174 if (hi->hi_hash == hash
175 && hi->hi_key != HI_KEY_REMOVED
176 && STRCMP(hi->hi_key, key) == 0)
177 return hi;
178 if (hi->hi_key == HI_KEY_REMOVED && freeitem == NULL)
179 freeitem = hi;
180 }
181}
182
Bram Moolenaar113e1072019-01-20 15:30:40 +0100183#if defined(FEAT_EVAL) || defined(FEAT_SYN_HL) || defined(PROTO)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000184/*
185 * Print the efficiency of hashtable lookups.
186 * Useful when trying different hash algorithms.
187 * Called when exiting.
188 */
189 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100190hash_debug_results(void)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000191{
192#ifdef HT_DEBUG
193 fprintf(stderr, "\r\n\r\n\r\n\r\n");
194 fprintf(stderr, "Number of hashtable lookups: %ld\r\n", hash_count_lookup);
195 fprintf(stderr, "Number of perturb loops: %ld\r\n", hash_count_perturb);
196 fprintf(stderr, "Percentage of perturb loops: %ld%%\r\n",
197 hash_count_perturb * 100 / hash_count_lookup);
198#endif
199}
Bram Moolenaar113e1072019-01-20 15:30:40 +0100200#endif
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000201
202/*
203 * Add item with key "key" to hashtable "ht".
204 * Returns FAIL when out of memory or the key is already present.
205 */
206 int
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100207hash_add(hashtab_T *ht, char_u *key)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000208{
209 hash_T hash = hash_hash(key);
210 hashitem_T *hi;
211
212 hi = hash_lookup(ht, key, hash);
213 if (!HASHITEM_EMPTY(hi))
214 {
Bram Moolenaar95f09602016-11-10 20:01:45 +0100215 internal_error("hash_add()");
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000216 return FAIL;
217 }
218 return hash_add_item(ht, hi, key, hash);
219}
220
221/*
222 * Add item "hi" with "key" to hashtable "ht". "key" must not be NULL and
223 * "hi" must have been obtained with hash_lookup() and point to an empty item.
224 * "hi" is invalid after this!
225 * Returns OK or FAIL (out of memory).
226 */
227 int
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100228hash_add_item(
229 hashtab_T *ht,
230 hashitem_T *hi,
231 char_u *key,
232 hash_T hash)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000233{
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100234 // If resizing failed before and it fails again we can't add an item.
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000235 if (ht->ht_error && hash_may_resize(ht, 0) == FAIL)
236 return FAIL;
237
238 ++ht->ht_used;
Bram Moolenaar21c16f82020-07-14 16:15:34 +0200239 ++ht->ht_changed;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000240 if (hi->hi_key == NULL)
241 ++ht->ht_filled;
242 hi->hi_key = key;
243 hi->hi_hash = hash;
244
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100245 // When the space gets low may resize the array.
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000246 return hash_may_resize(ht, 0);
247}
248
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100249#if 0 // not used
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000250/*
251 * Overwrite hashtable item "hi" with "key". "hi" must point to the item that
252 * is to be overwritten. Thus the number of items in the hashtable doesn't
253 * change.
254 * Although the key must be identical, the pointer may be different, thus it's
255 * set anyway (the key is part of an item with that key).
256 * The caller must take care of freeing the old item.
257 * "hi" is invalid after this!
258 */
259 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100260hash_set(hashitem_T *hi, char_u *key)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000261{
262 hi->hi_key = key;
263}
264#endif
265
266/*
267 * Remove item "hi" from hashtable "ht". "hi" must have been obtained with
268 * hash_lookup().
269 * The caller must take care of freeing the item itself.
270 */
271 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100272hash_remove(hashtab_T *ht, hashitem_T *hi)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000273{
274 --ht->ht_used;
Bram Moolenaar21c16f82020-07-14 16:15:34 +0200275 ++ht->ht_changed;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000276 hi->hi_key = HI_KEY_REMOVED;
277 hash_may_resize(ht, 0);
278}
279
280/*
281 * Lock a hashtable: prevent that ht_array changes.
282 * Don't use this when items are to be added!
283 * Must call hash_unlock() later.
284 */
285 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100286hash_lock(hashtab_T *ht)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000287{
288 ++ht->ht_locked;
289}
290
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000291/*
292 * Lock a hashtable at the specified number of entries.
293 * Caller must make sure no more than "size" entries will be added.
294 * Must call hash_unlock() later.
295 */
296 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100297hash_lock_size(hashtab_T *ht, int size)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000298{
299 (void)hash_may_resize(ht, size);
300 ++ht->ht_locked;
301}
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000302
303/*
304 * Unlock a hashtable: allow ht_array changes again.
305 * Table will be resized (shrink) when necessary.
306 * This must balance a call to hash_lock().
307 */
308 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100309hash_unlock(hashtab_T *ht)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000310{
311 --ht->ht_locked;
312 (void)hash_may_resize(ht, 0);
313}
314
315/*
316 * Shrink a hashtable when there is too much empty space.
317 * Grow a hashtable when there is not enough empty space.
318 * Returns OK or FAIL (out of memory).
319 */
320 static int
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100321hash_may_resize(
322 hashtab_T *ht,
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100323 int minitems) // minimal number of items
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000324{
325 hashitem_T temparray[HT_INIT_SIZE];
326 hashitem_T *oldarray, *newarray;
327 hashitem_T *olditem, *newitem;
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100328 unsigned newi;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000329 int todo;
330 long_u oldsize, newsize;
331 long_u minsize;
332 long_u newmask;
333 hash_T perturb;
334
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100335 // Don't resize a locked table.
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000336 if (ht->ht_locked > 0)
337 return OK;
338
339#ifdef HT_DEBUG
340 if (ht->ht_used > ht->ht_filled)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100341 emsg("hash_may_resize(): more used than filled");
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000342 if (ht->ht_filled >= ht->ht_mask + 1)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100343 emsg("hash_may_resize(): table completely filled");
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000344#endif
345
346 if (minitems == 0)
347 {
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100348 // Return quickly for small tables with at least two NULL items. NULL
349 // items are required for the lookup to decide a key isn't there.
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000350 if (ht->ht_filled < HT_INIT_SIZE - 1
351 && ht->ht_array == ht->ht_smallarray)
352 return OK;
353
354 /*
355 * Grow or refill the array when it's more than 2/3 full (including
356 * removed items, so that they get cleaned up).
357 * Shrink the array when it's less than 1/5 full. When growing it is
358 * at least 1/4 full (avoids repeated grow-shrink operations)
359 */
360 oldsize = ht->ht_mask + 1;
361 if (ht->ht_filled * 3 < oldsize * 2 && ht->ht_used > oldsize / 5)
362 return OK;
363
364 if (ht->ht_used > 1000)
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100365 minsize = ht->ht_used * 2; // it's big, don't make too much room
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000366 else
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100367 minsize = ht->ht_used * 4; // make plenty of room
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000368 }
369 else
370 {
Bram Moolenaar7b73d7e2019-07-26 21:26:34 +0200371 // Use specified size.
372 if ((long_u)minitems < ht->ht_used) // just in case...
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000373 minitems = (int)ht->ht_used;
Bram Moolenaar7b73d7e2019-07-26 21:26:34 +0200374 minsize = (minitems * 3 + 1) / 2; // array is up to 2/3 full
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000375 }
376
377 newsize = HT_INIT_SIZE;
378 while (newsize < minsize)
379 {
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100380 newsize <<= 1; // make sure it's always a power of 2
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000381 if (newsize == 0)
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100382 return FAIL; // overflow
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000383 }
384
385 if (newsize == HT_INIT_SIZE)
386 {
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100387 // Use the small array inside the hashdict structure.
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000388 newarray = ht->ht_smallarray;
389 if (ht->ht_array == newarray)
390 {
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100391 // Moving from ht_smallarray to ht_smallarray! Happens when there
392 // are many removed items. Copy the items to be able to clean up
393 // removed items.
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000394 mch_memmove(temparray, newarray, sizeof(temparray));
395 oldarray = temparray;
396 }
397 else
398 oldarray = ht->ht_array;
Bram Moolenaara80faa82020-04-12 19:37:17 +0200399 CLEAR_FIELD(ht->ht_smallarray);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000400 }
401 else
402 {
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100403 // Allocate an array.
Bram Moolenaara80faa82020-04-12 19:37:17 +0200404 newarray = ALLOC_CLEAR_MULT(hashitem_T, newsize);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000405 if (newarray == NULL)
406 {
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100407 // Out of memory. When there are NULL items still return OK.
408 // Otherwise set ht_error, because lookup may result in a hang if
409 // we add another item.
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000410 if (ht->ht_filled < ht->ht_mask)
411 return OK;
412 ht->ht_error = TRUE;
413 return FAIL;
414 }
415 oldarray = ht->ht_array;
416 }
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000417
418 /*
419 * Move all the items from the old array to the new one, placing them in
420 * the right spot. The new array won't have any removed items, thus this
421 * is also a cleanup action.
422 */
423 newmask = newsize - 1;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000424 todo = (int)ht->ht_used;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000425 for (olditem = oldarray; todo > 0; ++olditem)
426 if (!HASHITEM_EMPTY(olditem))
427 {
428 /*
429 * The algorithm to find the spot to add the item is identical to
430 * the algorithm to find an item in hash_lookup(). But we only
431 * need to search for a NULL key, thus it's simpler.
432 */
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100433 newi = (unsigned)(olditem->hi_hash & newmask);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000434 newitem = &newarray[newi];
435
436 if (newitem->hi_key != NULL)
437 for (perturb = olditem->hi_hash; ; perturb >>= PERTURB_SHIFT)
438 {
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100439 newi = (unsigned)((newi << 2U) + newi + perturb + 1U);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000440 newitem = &newarray[newi & newmask];
441 if (newitem->hi_key == NULL)
442 break;
443 }
444 *newitem = *olditem;
445 --todo;
446 }
447
448 if (ht->ht_array != ht->ht_smallarray)
449 vim_free(ht->ht_array);
450 ht->ht_array = newarray;
451 ht->ht_mask = newmask;
452 ht->ht_filled = ht->ht_used;
Bram Moolenaar21c16f82020-07-14 16:15:34 +0200453 ++ht->ht_changed;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000454 ht->ht_error = FALSE;
455
456 return OK;
457}
458
459/*
460 * Get the hash number for a key.
461 * If you think you know a better hash function: Compile with HT_DEBUG set and
462 * run a script that uses hashtables a lot. Vim will then print statistics
463 * when exiting. Try that with the current hash algorithm and yours. The
464 * lower the percentage the better.
465 */
466 hash_T
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100467hash_hash(char_u *key)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000468{
469 hash_T hash;
470 char_u *p;
471
472 if ((hash = *key) == 0)
Bram Moolenaar0921ecf2016-04-03 22:44:36 +0200473 return (hash_T)0;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000474 p = key + 1;
475
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100476 // A simplistic algorithm that appears to do very well.
477 // Suggested by George Reilly.
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000478 while (*p != NUL)
479 hash = hash * 101 + *p++;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000480
481 return hash;
482}