blob: ad018578b602ee5817d3ecf87f49c48b9198b9b2 [file] [log] [blame]
Bram Moolenaaredf3f972016-08-29 22:49:24 +02001/* vi:set ts=8 sts=4 sw=4 noet:
Bram Moolenaarc01140a2006-03-24 22:21:52 +00002 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * hashtab.c: Handling of a hashtable with Vim-specific properties.
12 *
13 * Each item in a hashtable has a NUL terminated string key. A key can appear
14 * only once in the table.
15 *
16 * A hash number is computed from the key for quick lookup. When the hashes
17 * of two different keys point to the same entry an algorithm is used to
18 * iterate over other entries in the table until the right one is found.
19 * To make the iteration work removed keys are different from entries where a
20 * key was never present.
21 *
22 * The mechanism has been partly based on how Python Dictionaries are
23 * implemented. The algorithm is from Knuth Vol. 3, Sec. 6.4.
24 *
25 * The hashtable grows to accommodate more entries when needed. At least 1/3
26 * of the entries is empty to keep the lookup efficient (at the cost of extra
27 * memory).
28 */
29
30#include "vim.h"
31
Bram Moolenaarc01140a2006-03-24 22:21:52 +000032#if 0
33# define HT_DEBUG /* extra checks for table consistency and statistics */
34
35static long hash_count_lookup = 0; /* count number of hashtab lookups */
36static long hash_count_perturb = 0; /* count number of "misses" */
37#endif
38
39/* Magic value for algorithm that walks through the array. */
40#define PERTURB_SHIFT 5
41
Bram Moolenaar92b8b2d2016-01-29 22:36:45 +010042static int hash_may_resize(hashtab_T *ht, int minitems);
Bram Moolenaarc01140a2006-03-24 22:21:52 +000043
44#if 0 /* currently not used */
45/*
46 * Create an empty hash table.
47 * Returns NULL when out of memory.
48 */
49 hashtab_T *
Bram Moolenaar68c2f632016-01-30 17:24:07 +010050hash_create(void)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000051{
52 hashtab_T *ht;
53
Bram Moolenaarc799fe22019-05-28 23:08:19 +020054 ht = ALLOC_ONE(hashtab_T);
Bram Moolenaarc01140a2006-03-24 22:21:52 +000055 if (ht != NULL)
56 hash_init(ht);
57 return ht;
58}
59#endif
60
61/*
62 * Initialize an empty hash table.
63 */
64 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +010065hash_init(hashtab_T *ht)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000066{
67 /* This zeroes all "ht_" entries and all the "hi_key" in "ht_smallarray". */
68 vim_memset(ht, 0, sizeof(hashtab_T));
69 ht->ht_array = ht->ht_smallarray;
70 ht->ht_mask = HT_INIT_SIZE - 1;
71}
72
73/*
74 * Free the array of a hash table. Does not free the items it contains!
75 * If "ht" is not freed then you should call hash_init() next!
76 */
77 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +010078hash_clear(hashtab_T *ht)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000079{
80 if (ht->ht_array != ht->ht_smallarray)
81 vim_free(ht->ht_array);
82}
83
Bram Moolenaar113e1072019-01-20 15:30:40 +010084#if defined(FEAT_SPELL) || defined(PROTO)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000085/*
86 * Free the array of a hash table and all the keys it contains. The keys must
87 * have been allocated. "off" is the offset from the start of the allocate
88 * memory to the location of the key (it's always positive).
89 */
90 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +010091hash_clear_all(hashtab_T *ht, int off)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000092{
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000093 long todo;
Bram Moolenaarc01140a2006-03-24 22:21:52 +000094 hashitem_T *hi;
95
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000096 todo = (long)ht->ht_used;
Bram Moolenaarc01140a2006-03-24 22:21:52 +000097 for (hi = ht->ht_array; todo > 0; ++hi)
98 {
99 if (!HASHITEM_EMPTY(hi))
100 {
101 vim_free(hi->hi_key - off);
102 --todo;
103 }
104 }
105 hash_clear(ht);
106}
Bram Moolenaar113e1072019-01-20 15:30:40 +0100107#endif
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000108
109/*
110 * Find "key" in hashtable "ht". "key" must not be NULL.
111 * Always returns a pointer to a hashitem. If the item was not found then
112 * HASHITEM_EMPTY() is TRUE. The pointer is then the place where the key
113 * would be added.
114 * WARNING: The returned pointer becomes invalid when the hashtable is changed
115 * (adding, setting or removing an item)!
116 */
117 hashitem_T *
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100118hash_find(hashtab_T *ht, char_u *key)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000119{
120 return hash_lookup(ht, key, hash_hash(key));
121}
122
123/*
124 * Like hash_find(), but caller computes "hash".
125 */
126 hashitem_T *
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100127hash_lookup(hashtab_T *ht, char_u *key, hash_T hash)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000128{
129 hash_T perturb;
130 hashitem_T *freeitem;
131 hashitem_T *hi;
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100132 unsigned idx;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000133
134#ifdef HT_DEBUG
135 ++hash_count_lookup;
136#endif
137
138 /*
139 * Quickly handle the most common situations:
140 * - return if there is no item at all
141 * - skip over a removed item
142 * - return if the item matches
143 */
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100144 idx = (unsigned)(hash & ht->ht_mask);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000145 hi = &ht->ht_array[idx];
146
147 if (hi->hi_key == NULL)
148 return hi;
149 if (hi->hi_key == HI_KEY_REMOVED)
150 freeitem = hi;
151 else if (hi->hi_hash == hash && STRCMP(hi->hi_key, key) == 0)
152 return hi;
153 else
154 freeitem = NULL;
155
156 /*
157 * Need to search through the table to find the key. The algorithm
158 * to step through the table starts with large steps, gradually becoming
159 * smaller down to (1/4 table size + 1). This means it goes through all
160 * table entries in the end.
161 * When we run into a NULL key it's clear that the key isn't there.
162 * Return the first available slot found (can be a slot of a removed
163 * item).
164 */
165 for (perturb = hash; ; perturb >>= PERTURB_SHIFT)
166 {
167#ifdef HT_DEBUG
168 ++hash_count_perturb; /* count a "miss" for hashtab lookup */
169#endif
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100170 idx = (unsigned)((idx << 2U) + idx + perturb + 1U);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000171 hi = &ht->ht_array[idx & ht->ht_mask];
172 if (hi->hi_key == NULL)
173 return freeitem == NULL ? hi : freeitem;
174 if (hi->hi_hash == hash
175 && hi->hi_key != HI_KEY_REMOVED
176 && STRCMP(hi->hi_key, key) == 0)
177 return hi;
178 if (hi->hi_key == HI_KEY_REMOVED && freeitem == NULL)
179 freeitem = hi;
180 }
181}
182
Bram Moolenaar113e1072019-01-20 15:30:40 +0100183#if defined(FEAT_EVAL) || defined(FEAT_SYN_HL) || defined(PROTO)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000184/*
185 * Print the efficiency of hashtable lookups.
186 * Useful when trying different hash algorithms.
187 * Called when exiting.
188 */
189 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100190hash_debug_results(void)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000191{
192#ifdef HT_DEBUG
193 fprintf(stderr, "\r\n\r\n\r\n\r\n");
194 fprintf(stderr, "Number of hashtable lookups: %ld\r\n", hash_count_lookup);
195 fprintf(stderr, "Number of perturb loops: %ld\r\n", hash_count_perturb);
196 fprintf(stderr, "Percentage of perturb loops: %ld%%\r\n",
197 hash_count_perturb * 100 / hash_count_lookup);
198#endif
199}
Bram Moolenaar113e1072019-01-20 15:30:40 +0100200#endif
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000201
202/*
203 * Add item with key "key" to hashtable "ht".
204 * Returns FAIL when out of memory or the key is already present.
205 */
206 int
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100207hash_add(hashtab_T *ht, char_u *key)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000208{
209 hash_T hash = hash_hash(key);
210 hashitem_T *hi;
211
212 hi = hash_lookup(ht, key, hash);
213 if (!HASHITEM_EMPTY(hi))
214 {
Bram Moolenaar95f09602016-11-10 20:01:45 +0100215 internal_error("hash_add()");
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000216 return FAIL;
217 }
218 return hash_add_item(ht, hi, key, hash);
219}
220
221/*
222 * Add item "hi" with "key" to hashtable "ht". "key" must not be NULL and
223 * "hi" must have been obtained with hash_lookup() and point to an empty item.
224 * "hi" is invalid after this!
225 * Returns OK or FAIL (out of memory).
226 */
227 int
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100228hash_add_item(
229 hashtab_T *ht,
230 hashitem_T *hi,
231 char_u *key,
232 hash_T hash)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000233{
234 /* If resizing failed before and it fails again we can't add an item. */
235 if (ht->ht_error && hash_may_resize(ht, 0) == FAIL)
236 return FAIL;
237
238 ++ht->ht_used;
239 if (hi->hi_key == NULL)
240 ++ht->ht_filled;
241 hi->hi_key = key;
242 hi->hi_hash = hash;
243
244 /* When the space gets low may resize the array. */
245 return hash_may_resize(ht, 0);
246}
247
248#if 0 /* not used */
249/*
250 * Overwrite hashtable item "hi" with "key". "hi" must point to the item that
251 * is to be overwritten. Thus the number of items in the hashtable doesn't
252 * change.
253 * Although the key must be identical, the pointer may be different, thus it's
254 * set anyway (the key is part of an item with that key).
255 * The caller must take care of freeing the old item.
256 * "hi" is invalid after this!
257 */
258 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100259hash_set(hashitem_T *hi, char_u *key)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000260{
261 hi->hi_key = key;
262}
263#endif
264
265/*
266 * Remove item "hi" from hashtable "ht". "hi" must have been obtained with
267 * hash_lookup().
268 * The caller must take care of freeing the item itself.
269 */
270 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100271hash_remove(hashtab_T *ht, hashitem_T *hi)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000272{
273 --ht->ht_used;
274 hi->hi_key = HI_KEY_REMOVED;
275 hash_may_resize(ht, 0);
276}
277
278/*
279 * Lock a hashtable: prevent that ht_array changes.
280 * Don't use this when items are to be added!
281 * Must call hash_unlock() later.
282 */
283 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100284hash_lock(hashtab_T *ht)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000285{
286 ++ht->ht_locked;
287}
288
289#if 0 /* currently not used */
290/*
291 * Lock a hashtable at the specified number of entries.
292 * Caller must make sure no more than "size" entries will be added.
293 * Must call hash_unlock() later.
294 */
295 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100296hash_lock_size(hashtab_T *ht, int size)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000297{
298 (void)hash_may_resize(ht, size);
299 ++ht->ht_locked;
300}
301#endif
302
303/*
304 * Unlock a hashtable: allow ht_array changes again.
305 * Table will be resized (shrink) when necessary.
306 * This must balance a call to hash_lock().
307 */
308 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100309hash_unlock(hashtab_T *ht)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000310{
311 --ht->ht_locked;
312 (void)hash_may_resize(ht, 0);
313}
314
315/*
316 * Shrink a hashtable when there is too much empty space.
317 * Grow a hashtable when there is not enough empty space.
318 * Returns OK or FAIL (out of memory).
319 */
320 static int
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100321hash_may_resize(
322 hashtab_T *ht,
323 int minitems) /* minimal number of items */
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000324{
325 hashitem_T temparray[HT_INIT_SIZE];
326 hashitem_T *oldarray, *newarray;
327 hashitem_T *olditem, *newitem;
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100328 unsigned newi;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000329 int todo;
330 long_u oldsize, newsize;
331 long_u minsize;
332 long_u newmask;
333 hash_T perturb;
334
335 /* Don't resize a locked table. */
336 if (ht->ht_locked > 0)
337 return OK;
338
339#ifdef HT_DEBUG
340 if (ht->ht_used > ht->ht_filled)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100341 emsg("hash_may_resize(): more used than filled");
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000342 if (ht->ht_filled >= ht->ht_mask + 1)
Bram Moolenaarf9e3e092019-01-13 23:38:42 +0100343 emsg("hash_may_resize(): table completely filled");
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000344#endif
345
346 if (minitems == 0)
347 {
348 /* Return quickly for small tables with at least two NULL items. NULL
349 * items are required for the lookup to decide a key isn't there. */
350 if (ht->ht_filled < HT_INIT_SIZE - 1
351 && ht->ht_array == ht->ht_smallarray)
352 return OK;
353
354 /*
355 * Grow or refill the array when it's more than 2/3 full (including
356 * removed items, so that they get cleaned up).
357 * Shrink the array when it's less than 1/5 full. When growing it is
358 * at least 1/4 full (avoids repeated grow-shrink operations)
359 */
360 oldsize = ht->ht_mask + 1;
361 if (ht->ht_filled * 3 < oldsize * 2 && ht->ht_used > oldsize / 5)
362 return OK;
363
364 if (ht->ht_used > 1000)
365 minsize = ht->ht_used * 2; /* it's big, don't make too much room */
366 else
367 minsize = ht->ht_used * 4; /* make plenty of room */
368 }
369 else
370 {
371 /* Use specified size. */
372 if ((long_u)minitems < ht->ht_used) /* just in case... */
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000373 minitems = (int)ht->ht_used;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000374 minsize = minitems * 3 / 2; /* array is up to 2/3 full */
375 }
376
377 newsize = HT_INIT_SIZE;
378 while (newsize < minsize)
379 {
380 newsize <<= 1; /* make sure it's always a power of 2 */
381 if (newsize == 0)
382 return FAIL; /* overflow */
383 }
384
385 if (newsize == HT_INIT_SIZE)
386 {
387 /* Use the small array inside the hashdict structure. */
388 newarray = ht->ht_smallarray;
389 if (ht->ht_array == newarray)
390 {
391 /* Moving from ht_smallarray to ht_smallarray! Happens when there
392 * are many removed items. Copy the items to be able to clean up
393 * removed items. */
394 mch_memmove(temparray, newarray, sizeof(temparray));
395 oldarray = temparray;
396 }
397 else
398 oldarray = ht->ht_array;
399 }
400 else
401 {
402 /* Allocate an array. */
Bram Moolenaarc799fe22019-05-28 23:08:19 +0200403 newarray = ALLOC_MULT(hashitem_T, newsize);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000404 if (newarray == NULL)
405 {
406 /* Out of memory. When there are NULL items still return OK.
407 * Otherwise set ht_error, because lookup may result in a hang if
408 * we add another item. */
409 if (ht->ht_filled < ht->ht_mask)
410 return OK;
411 ht->ht_error = TRUE;
412 return FAIL;
413 }
414 oldarray = ht->ht_array;
415 }
416 vim_memset(newarray, 0, (size_t)(sizeof(hashitem_T) * newsize));
417
418 /*
419 * Move all the items from the old array to the new one, placing them in
420 * the right spot. The new array won't have any removed items, thus this
421 * is also a cleanup action.
422 */
423 newmask = newsize - 1;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000424 todo = (int)ht->ht_used;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000425 for (olditem = oldarray; todo > 0; ++olditem)
426 if (!HASHITEM_EMPTY(olditem))
427 {
428 /*
429 * The algorithm to find the spot to add the item is identical to
430 * the algorithm to find an item in hash_lookup(). But we only
431 * need to search for a NULL key, thus it's simpler.
432 */
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100433 newi = (unsigned)(olditem->hi_hash & newmask);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000434 newitem = &newarray[newi];
435
436 if (newitem->hi_key != NULL)
437 for (perturb = olditem->hi_hash; ; perturb >>= PERTURB_SHIFT)
438 {
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100439 newi = (unsigned)((newi << 2U) + newi + perturb + 1U);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000440 newitem = &newarray[newi & newmask];
441 if (newitem->hi_key == NULL)
442 break;
443 }
444 *newitem = *olditem;
445 --todo;
446 }
447
448 if (ht->ht_array != ht->ht_smallarray)
449 vim_free(ht->ht_array);
450 ht->ht_array = newarray;
451 ht->ht_mask = newmask;
452 ht->ht_filled = ht->ht_used;
453 ht->ht_error = FALSE;
454
455 return OK;
456}
457
458/*
459 * Get the hash number for a key.
460 * If you think you know a better hash function: Compile with HT_DEBUG set and
461 * run a script that uses hashtables a lot. Vim will then print statistics
462 * when exiting. Try that with the current hash algorithm and yours. The
463 * lower the percentage the better.
464 */
465 hash_T
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100466hash_hash(char_u *key)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000467{
468 hash_T hash;
469 char_u *p;
470
471 if ((hash = *key) == 0)
Bram Moolenaar0921ecf2016-04-03 22:44:36 +0200472 return (hash_T)0;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000473 p = key + 1;
474
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000475 /* A simplistic algorithm that appears to do very well.
476 * Suggested by George Reilly. */
477 while (*p != NUL)
478 hash = hash * 101 + *p++;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000479
480 return hash;
481}