blob: 816dcf79339e573d88d0f5407c9e15c46111b431 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaarc01140a2006-03-24 22:21:52 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * hashtab.c: Handling of a hashtable with Vim-specific properties.
12 *
13 * Each item in a hashtable has a NUL terminated string key. A key can appear
14 * only once in the table.
15 *
16 * A hash number is computed from the key for quick lookup. When the hashes
17 * of two different keys point to the same entry an algorithm is used to
18 * iterate over other entries in the table until the right one is found.
19 * To make the iteration work removed keys are different from entries where a
20 * key was never present.
21 *
22 * The mechanism has been partly based on how Python Dictionaries are
23 * implemented. The algorithm is from Knuth Vol. 3, Sec. 6.4.
24 *
25 * The hashtable grows to accommodate more entries when needed. At least 1/3
26 * of the entries is empty to keep the lookup efficient (at the cost of extra
27 * memory).
28 */
29
30#include "vim.h"
31
Bram Moolenaarc01140a2006-03-24 22:21:52 +000032#if 0
Bram Moolenaar2ab2e862019-12-04 21:24:53 +010033# define HT_DEBUG // extra checks for table consistency and statistics
Bram Moolenaarc01140a2006-03-24 22:21:52 +000034
Bram Moolenaar2ab2e862019-12-04 21:24:53 +010035static long hash_count_lookup = 0; // count number of hashtab lookups
36static long hash_count_perturb = 0; // count number of "misses"
Bram Moolenaarc01140a2006-03-24 22:21:52 +000037#endif
38
Bram Moolenaar2ab2e862019-12-04 21:24:53 +010039// Magic value for algorithm that walks through the array.
Bram Moolenaarc01140a2006-03-24 22:21:52 +000040#define PERTURB_SHIFT 5
41
Bram Moolenaar92b8b2d2016-01-29 22:36:45 +010042static int hash_may_resize(hashtab_T *ht, int minitems);
Bram Moolenaarc01140a2006-03-24 22:21:52 +000043
Bram Moolenaar2ab2e862019-12-04 21:24:53 +010044#if 0 // currently not used
Bram Moolenaarc01140a2006-03-24 22:21:52 +000045/*
46 * Create an empty hash table.
47 * Returns NULL when out of memory.
48 */
49 hashtab_T *
Bram Moolenaar68c2f632016-01-30 17:24:07 +010050hash_create(void)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000051{
52 hashtab_T *ht;
53
Bram Moolenaarc799fe22019-05-28 23:08:19 +020054 ht = ALLOC_ONE(hashtab_T);
Bram Moolenaarc01140a2006-03-24 22:21:52 +000055 if (ht != NULL)
56 hash_init(ht);
57 return ht;
58}
59#endif
60
61/*
62 * Initialize an empty hash table.
63 */
64 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +010065hash_init(hashtab_T *ht)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000066{
Bram Moolenaar2ab2e862019-12-04 21:24:53 +010067 // This zeroes all "ht_" entries and all the "hi_key" in "ht_smallarray".
Bram Moolenaara80faa82020-04-12 19:37:17 +020068 CLEAR_POINTER(ht);
Bram Moolenaarc01140a2006-03-24 22:21:52 +000069 ht->ht_array = ht->ht_smallarray;
70 ht->ht_mask = HT_INIT_SIZE - 1;
71}
72
73/*
74 * Free the array of a hash table. Does not free the items it contains!
75 * If "ht" is not freed then you should call hash_init() next!
76 */
77 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +010078hash_clear(hashtab_T *ht)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000079{
80 if (ht->ht_array != ht->ht_smallarray)
81 vim_free(ht->ht_array);
82}
83
Bram Moolenaar113e1072019-01-20 15:30:40 +010084#if defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000085/*
86 * Free the array of a hash table and all the keys it contains. The keys must
87 * have been allocated. "off" is the offset from the start of the allocate
88 * memory to the location of the key (it's always positive).
89 */
90 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +010091hash_clear_all(hashtab_T *ht, int off)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000092{
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000093 long todo;
Bram Moolenaarc01140a2006-03-24 22:21:52 +000094 hashitem_T *hi;
95
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000096 todo = (long)ht->ht_used;
Bram Moolenaarc01140a2006-03-24 22:21:52 +000097 for (hi = ht->ht_array; todo > 0; ++hi)
98 {
99 if (!HASHITEM_EMPTY(hi))
100 {
101 vim_free(hi->hi_key - off);
102 --todo;
103 }
104 }
105 hash_clear(ht);
106}
Bram Moolenaar113e1072019-01-20 15:30:40 +0100107#endif
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000108
109/*
110 * Find "key" in hashtable "ht". "key" must not be NULL.
111 * Always returns a pointer to a hashitem. If the item was not found then
112 * HASHITEM_EMPTY() is TRUE. The pointer is then the place where the key
113 * would be added.
114 * WARNING: The returned pointer becomes invalid when the hashtable is changed
115 * (adding, setting or removing an item)!
116 */
117 hashitem_T *
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100118hash_find(hashtab_T *ht, char_u *key)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000119{
120 return hash_lookup(ht, key, hash_hash(key));
121}
122
123/*
124 * Like hash_find(), but caller computes "hash".
125 */
126 hashitem_T *
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100127hash_lookup(hashtab_T *ht, char_u *key, hash_T hash)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000128{
129 hash_T perturb;
130 hashitem_T *freeitem;
131 hashitem_T *hi;
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100132 unsigned idx;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000133
134#ifdef HT_DEBUG
135 ++hash_count_lookup;
136#endif
137
138 /*
139 * Quickly handle the most common situations:
140 * - return if there is no item at all
141 * - skip over a removed item
142 * - return if the item matches
143 */
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100144 idx = (unsigned)(hash & ht->ht_mask);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000145 hi = &ht->ht_array[idx];
146
147 if (hi->hi_key == NULL)
148 return hi;
149 if (hi->hi_key == HI_KEY_REMOVED)
150 freeitem = hi;
151 else if (hi->hi_hash == hash && STRCMP(hi->hi_key, key) == 0)
152 return hi;
153 else
154 freeitem = NULL;
155
156 /*
157 * Need to search through the table to find the key. The algorithm
158 * to step through the table starts with large steps, gradually becoming
159 * smaller down to (1/4 table size + 1). This means it goes through all
160 * table entries in the end.
161 * When we run into a NULL key it's clear that the key isn't there.
162 * Return the first available slot found (can be a slot of a removed
163 * item).
164 */
165 for (perturb = hash; ; perturb >>= PERTURB_SHIFT)
166 {
167#ifdef HT_DEBUG
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100168 ++hash_count_perturb; // count a "miss" for hashtab lookup
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000169#endif
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100170 idx = (unsigned)((idx << 2U) + idx + perturb + 1U);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000171 hi = &ht->ht_array[idx & ht->ht_mask];
172 if (hi->hi_key == NULL)
173 return freeitem == NULL ? hi : freeitem;
174 if (hi->hi_hash == hash
175 && hi->hi_key != HI_KEY_REMOVED
176 && STRCMP(hi->hi_key, key) == 0)
177 return hi;
178 if (hi->hi_key == HI_KEY_REMOVED && freeitem == NULL)
179 freeitem = hi;
180 }
181}
182
Bram Moolenaar113e1072019-01-20 15:30:40 +0100183#if defined(FEAT_EVAL) || defined(FEAT_SYN_HL) || defined(PROTO)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000184/*
185 * Print the efficiency of hashtable lookups.
186 * Useful when trying different hash algorithms.
187 * Called when exiting.
188 */
189 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100190hash_debug_results(void)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000191{
192#ifdef HT_DEBUG
193 fprintf(stderr, "\r\n\r\n\r\n\r\n");
194 fprintf(stderr, "Number of hashtable lookups: %ld\r\n", hash_count_lookup);
195 fprintf(stderr, "Number of perturb loops: %ld\r\n", hash_count_perturb);
196 fprintf(stderr, "Percentage of perturb loops: %ld%%\r\n",
197 hash_count_perturb * 100 / hash_count_lookup);
198#endif
199}
Bram Moolenaar113e1072019-01-20 15:30:40 +0100200#endif
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000201
202/*
203 * Add item with key "key" to hashtable "ht".
204 * Returns FAIL when out of memory or the key is already present.
205 */
206 int
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100207hash_add(hashtab_T *ht, char_u *key)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000208{
209 hash_T hash = hash_hash(key);
210 hashitem_T *hi;
211
212 hi = hash_lookup(ht, key, hash);
213 if (!HASHITEM_EMPTY(hi))
214 {
Bram Moolenaar95f09602016-11-10 20:01:45 +0100215 internal_error("hash_add()");
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000216 return FAIL;
217 }
218 return hash_add_item(ht, hi, key, hash);
219}
220
221/*
222 * Add item "hi" with "key" to hashtable "ht". "key" must not be NULL and
223 * "hi" must have been obtained with hash_lookup() and point to an empty item.
224 * "hi" is invalid after this!
225 * Returns OK or FAIL (out of memory).
226 */
227 int
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100228hash_add_item(
229 hashtab_T *ht,
230 hashitem_T *hi,
231 char_u *key,
232 hash_T hash)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000233{
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100234 // If resizing failed before and it fails again we can't add an item.
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000235 if (ht->ht_error && hash_may_resize(ht, 0) == FAIL)
236 return FAIL;
237
238 ++ht->ht_used;
239 if (hi->hi_key == NULL)
240 ++ht->ht_filled;
241 hi->hi_key = key;
242 hi->hi_hash = hash;
243
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100244 // When the space gets low may resize the array.
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000245 return hash_may_resize(ht, 0);
246}
247
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100248#if 0 // not used
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000249/*
250 * Overwrite hashtable item "hi" with "key". "hi" must point to the item that
251 * is to be overwritten. Thus the number of items in the hashtable doesn't
252 * change.
253 * Although the key must be identical, the pointer may be different, thus it's
254 * set anyway (the key is part of an item with that key).
255 * The caller must take care of freeing the old item.
256 * "hi" is invalid after this!
257 */
258 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100259hash_set(hashitem_T *hi, char_u *key)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000260{
261 hi->hi_key = key;
262}
263#endif
264
265/*
266 * Remove item "hi" from hashtable "ht". "hi" must have been obtained with
267 * hash_lookup().
268 * The caller must take care of freeing the item itself.
269 */
270 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100271hash_remove(hashtab_T *ht, hashitem_T *hi)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000272{
273 --ht->ht_used;
274 hi->hi_key = HI_KEY_REMOVED;
275 hash_may_resize(ht, 0);
276}
277
278/*
279 * Lock a hashtable: prevent that ht_array changes.
280 * Don't use this when items are to be added!
281 * Must call hash_unlock() later.
282 */
283 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100284hash_lock(hashtab_T *ht)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000285{
286 ++ht->ht_locked;
287}
288
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000289/*
290 * Lock a hashtable at the specified number of entries.
291 * Caller must make sure no more than "size" entries will be added.
292 * Must call hash_unlock() later.
293 */
294 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100295hash_lock_size(hashtab_T *ht, int size)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000296{
297 (void)hash_may_resize(ht, size);
298 ++ht->ht_locked;
299}
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000300
301/*
302 * Unlock a hashtable: allow ht_array changes again.
303 * Table will be resized (shrink) when necessary.
304 * This must balance a call to hash_lock().
305 */
306 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100307hash_unlock(hashtab_T *ht)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000308{
309 --ht->ht_locked;
310 (void)hash_may_resize(ht, 0);
311}
312
313/*
314 * Shrink a hashtable when there is too much empty space.
315 * Grow a hashtable when there is not enough empty space.
316 * Returns OK or FAIL (out of memory).
317 */
318 static int
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100319hash_may_resize(
320 hashtab_T *ht,
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100321 int minitems) // minimal number of items
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000322{
323 hashitem_T temparray[HT_INIT_SIZE];
324 hashitem_T *oldarray, *newarray;
325 hashitem_T *olditem, *newitem;
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100326 unsigned newi;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000327 int todo;
328 long_u oldsize, newsize;
329 long_u minsize;
330 long_u newmask;
331 hash_T perturb;
332
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100333 // Don't resize a locked table.
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000334 if (ht->ht_locked > 0)
335 return OK;
336
337#ifdef HT_DEBUG
338 if (ht->ht_used > ht->ht_filled)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100339 emsg("hash_may_resize(): more used than filled");
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000340 if (ht->ht_filled >= ht->ht_mask + 1)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100341 emsg("hash_may_resize(): table completely filled");
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000342#endif
343
344 if (minitems == 0)
345 {
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100346 // Return quickly for small tables with at least two NULL items. NULL
347 // items are required for the lookup to decide a key isn't there.
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000348 if (ht->ht_filled < HT_INIT_SIZE - 1
349 && ht->ht_array == ht->ht_smallarray)
350 return OK;
351
352 /*
353 * Grow or refill the array when it's more than 2/3 full (including
354 * removed items, so that they get cleaned up).
355 * Shrink the array when it's less than 1/5 full. When growing it is
356 * at least 1/4 full (avoids repeated grow-shrink operations)
357 */
358 oldsize = ht->ht_mask + 1;
359 if (ht->ht_filled * 3 < oldsize * 2 && ht->ht_used > oldsize / 5)
360 return OK;
361
362 if (ht->ht_used > 1000)
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100363 minsize = ht->ht_used * 2; // it's big, don't make too much room
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000364 else
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100365 minsize = ht->ht_used * 4; // make plenty of room
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000366 }
367 else
368 {
Bram Moolenaar7b73d7e2019-07-26 21:26:34 +0200369 // Use specified size.
370 if ((long_u)minitems < ht->ht_used) // just in case...
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000371 minitems = (int)ht->ht_used;
Bram Moolenaar7b73d7e2019-07-26 21:26:34 +0200372 minsize = (minitems * 3 + 1) / 2; // array is up to 2/3 full
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000373 }
374
375 newsize = HT_INIT_SIZE;
376 while (newsize < minsize)
377 {
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100378 newsize <<= 1; // make sure it's always a power of 2
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000379 if (newsize == 0)
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100380 return FAIL; // overflow
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000381 }
382
383 if (newsize == HT_INIT_SIZE)
384 {
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100385 // Use the small array inside the hashdict structure.
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000386 newarray = ht->ht_smallarray;
387 if (ht->ht_array == newarray)
388 {
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100389 // Moving from ht_smallarray to ht_smallarray! Happens when there
390 // are many removed items. Copy the items to be able to clean up
391 // removed items.
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000392 mch_memmove(temparray, newarray, sizeof(temparray));
393 oldarray = temparray;
394 }
395 else
396 oldarray = ht->ht_array;
Bram Moolenaara80faa82020-04-12 19:37:17 +0200397 CLEAR_FIELD(ht->ht_smallarray);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000398 }
399 else
400 {
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100401 // Allocate an array.
Bram Moolenaara80faa82020-04-12 19:37:17 +0200402 newarray = ALLOC_CLEAR_MULT(hashitem_T, newsize);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000403 if (newarray == NULL)
404 {
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100405 // Out of memory. When there are NULL items still return OK.
406 // Otherwise set ht_error, because lookup may result in a hang if
407 // we add another item.
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000408 if (ht->ht_filled < ht->ht_mask)
409 return OK;
410 ht->ht_error = TRUE;
411 return FAIL;
412 }
413 oldarray = ht->ht_array;
414 }
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000415
416 /*
417 * Move all the items from the old array to the new one, placing them in
418 * the right spot. The new array won't have any removed items, thus this
419 * is also a cleanup action.
420 */
421 newmask = newsize - 1;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000422 todo = (int)ht->ht_used;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000423 for (olditem = oldarray; todo > 0; ++olditem)
424 if (!HASHITEM_EMPTY(olditem))
425 {
426 /*
427 * The algorithm to find the spot to add the item is identical to
428 * the algorithm to find an item in hash_lookup(). But we only
429 * need to search for a NULL key, thus it's simpler.
430 */
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100431 newi = (unsigned)(olditem->hi_hash & newmask);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000432 newitem = &newarray[newi];
433
434 if (newitem->hi_key != NULL)
435 for (perturb = olditem->hi_hash; ; perturb >>= PERTURB_SHIFT)
436 {
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100437 newi = (unsigned)((newi << 2U) + newi + perturb + 1U);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000438 newitem = &newarray[newi & newmask];
439 if (newitem->hi_key == NULL)
440 break;
441 }
442 *newitem = *olditem;
443 --todo;
444 }
445
446 if (ht->ht_array != ht->ht_smallarray)
447 vim_free(ht->ht_array);
448 ht->ht_array = newarray;
449 ht->ht_mask = newmask;
450 ht->ht_filled = ht->ht_used;
451 ht->ht_error = FALSE;
452
453 return OK;
454}
455
456/*
457 * Get the hash number for a key.
458 * If you think you know a better hash function: Compile with HT_DEBUG set and
459 * run a script that uses hashtables a lot. Vim will then print statistics
460 * when exiting. Try that with the current hash algorithm and yours. The
461 * lower the percentage the better.
462 */
463 hash_T
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100464hash_hash(char_u *key)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000465{
466 hash_T hash;
467 char_u *p;
468
469 if ((hash = *key) == 0)
Bram Moolenaar0921ecf2016-04-03 22:44:36 +0200470 return (hash_T)0;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000471 p = key + 1;
472
Bram Moolenaar2ab2e862019-12-04 21:24:53 +0100473 // A simplistic algorithm that appears to do very well.
474 // Suggested by George Reilly.
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000475 while (*p != NUL)
476 hash = hash * 101 + *p++;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000477
478 return hash;
479}