blob: d1ba2c95286d81714631a39cfb7d9bfe529dfc5e [file] [log] [blame]
Bram Moolenaarc01140a2006-03-24 22:21:52 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * hashtab.c: Handling of a hashtable with Vim-specific properties.
12 *
13 * Each item in a hashtable has a NUL terminated string key. A key can appear
14 * only once in the table.
15 *
16 * A hash number is computed from the key for quick lookup. When the hashes
17 * of two different keys point to the same entry an algorithm is used to
18 * iterate over other entries in the table until the right one is found.
19 * To make the iteration work removed keys are different from entries where a
20 * key was never present.
21 *
22 * The mechanism has been partly based on how Python Dictionaries are
23 * implemented. The algorithm is from Knuth Vol. 3, Sec. 6.4.
24 *
25 * The hashtable grows to accommodate more entries when needed. At least 1/3
26 * of the entries is empty to keep the lookup efficient (at the cost of extra
27 * memory).
28 */
29
30#include "vim.h"
31
32#if defined(FEAT_EVAL) || defined(FEAT_SYN_HL) || defined(PROTO)
33
34#if 0
35# define HT_DEBUG /* extra checks for table consistency and statistics */
36
37static long hash_count_lookup = 0; /* count number of hashtab lookups */
38static long hash_count_perturb = 0; /* count number of "misses" */
39#endif
40
41/* Magic value for algorithm that walks through the array. */
42#define PERTURB_SHIFT 5
43
Bram Moolenaar92b8b2d2016-01-29 22:36:45 +010044static int hash_may_resize(hashtab_T *ht, int minitems);
Bram Moolenaarc01140a2006-03-24 22:21:52 +000045
46#if 0 /* currently not used */
47/*
48 * Create an empty hash table.
49 * Returns NULL when out of memory.
50 */
51 hashtab_T *
Bram Moolenaar68c2f632016-01-30 17:24:07 +010052hash_create(void)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000053{
54 hashtab_T *ht;
55
56 ht = (hashtab_T *)alloc(sizeof(hashtab_T));
57 if (ht != NULL)
58 hash_init(ht);
59 return ht;
60}
61#endif
62
63/*
64 * Initialize an empty hash table.
65 */
66 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +010067hash_init(hashtab_T *ht)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000068{
69 /* This zeroes all "ht_" entries and all the "hi_key" in "ht_smallarray". */
70 vim_memset(ht, 0, sizeof(hashtab_T));
71 ht->ht_array = ht->ht_smallarray;
72 ht->ht_mask = HT_INIT_SIZE - 1;
73}
74
75/*
76 * Free the array of a hash table. Does not free the items it contains!
77 * If "ht" is not freed then you should call hash_init() next!
78 */
79 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +010080hash_clear(hashtab_T *ht)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000081{
82 if (ht->ht_array != ht->ht_smallarray)
83 vim_free(ht->ht_array);
84}
85
86/*
87 * Free the array of a hash table and all the keys it contains. The keys must
88 * have been allocated. "off" is the offset from the start of the allocate
89 * memory to the location of the key (it's always positive).
90 */
91 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +010092hash_clear_all(hashtab_T *ht, int off)
Bram Moolenaarc01140a2006-03-24 22:21:52 +000093{
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000094 long todo;
Bram Moolenaarc01140a2006-03-24 22:21:52 +000095 hashitem_T *hi;
96
Bram Moolenaara93fa7e2006-04-17 22:14:47 +000097 todo = (long)ht->ht_used;
Bram Moolenaarc01140a2006-03-24 22:21:52 +000098 for (hi = ht->ht_array; todo > 0; ++hi)
99 {
100 if (!HASHITEM_EMPTY(hi))
101 {
102 vim_free(hi->hi_key - off);
103 --todo;
104 }
105 }
106 hash_clear(ht);
107}
108
109/*
110 * Find "key" in hashtable "ht". "key" must not be NULL.
111 * Always returns a pointer to a hashitem. If the item was not found then
112 * HASHITEM_EMPTY() is TRUE. The pointer is then the place where the key
113 * would be added.
114 * WARNING: The returned pointer becomes invalid when the hashtable is changed
115 * (adding, setting or removing an item)!
116 */
117 hashitem_T *
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100118hash_find(hashtab_T *ht, char_u *key)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000119{
120 return hash_lookup(ht, key, hash_hash(key));
121}
122
123/*
124 * Like hash_find(), but caller computes "hash".
125 */
126 hashitem_T *
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100127hash_lookup(hashtab_T *ht, char_u *key, hash_T hash)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000128{
129 hash_T perturb;
130 hashitem_T *freeitem;
131 hashitem_T *hi;
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100132 unsigned idx;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000133
134#ifdef HT_DEBUG
135 ++hash_count_lookup;
136#endif
137
138 /*
139 * Quickly handle the most common situations:
140 * - return if there is no item at all
141 * - skip over a removed item
142 * - return if the item matches
143 */
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100144 idx = (unsigned)(hash & ht->ht_mask);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000145 hi = &ht->ht_array[idx];
146
147 if (hi->hi_key == NULL)
148 return hi;
149 if (hi->hi_key == HI_KEY_REMOVED)
150 freeitem = hi;
151 else if (hi->hi_hash == hash && STRCMP(hi->hi_key, key) == 0)
152 return hi;
153 else
154 freeitem = NULL;
155
156 /*
157 * Need to search through the table to find the key. The algorithm
158 * to step through the table starts with large steps, gradually becoming
159 * smaller down to (1/4 table size + 1). This means it goes through all
160 * table entries in the end.
161 * When we run into a NULL key it's clear that the key isn't there.
162 * Return the first available slot found (can be a slot of a removed
163 * item).
164 */
165 for (perturb = hash; ; perturb >>= PERTURB_SHIFT)
166 {
167#ifdef HT_DEBUG
168 ++hash_count_perturb; /* count a "miss" for hashtab lookup */
169#endif
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100170 idx = (unsigned)((idx << 2U) + idx + perturb + 1U);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000171 hi = &ht->ht_array[idx & ht->ht_mask];
172 if (hi->hi_key == NULL)
173 return freeitem == NULL ? hi : freeitem;
174 if (hi->hi_hash == hash
175 && hi->hi_key != HI_KEY_REMOVED
176 && STRCMP(hi->hi_key, key) == 0)
177 return hi;
178 if (hi->hi_key == HI_KEY_REMOVED && freeitem == NULL)
179 freeitem = hi;
180 }
181}
182
183/*
184 * Print the efficiency of hashtable lookups.
185 * Useful when trying different hash algorithms.
186 * Called when exiting.
187 */
188 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100189hash_debug_results(void)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000190{
191#ifdef HT_DEBUG
192 fprintf(stderr, "\r\n\r\n\r\n\r\n");
193 fprintf(stderr, "Number of hashtable lookups: %ld\r\n", hash_count_lookup);
194 fprintf(stderr, "Number of perturb loops: %ld\r\n", hash_count_perturb);
195 fprintf(stderr, "Percentage of perturb loops: %ld%%\r\n",
196 hash_count_perturb * 100 / hash_count_lookup);
197#endif
198}
199
200/*
201 * Add item with key "key" to hashtable "ht".
202 * Returns FAIL when out of memory or the key is already present.
203 */
204 int
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100205hash_add(hashtab_T *ht, char_u *key)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000206{
207 hash_T hash = hash_hash(key);
208 hashitem_T *hi;
209
210 hi = hash_lookup(ht, key, hash);
211 if (!HASHITEM_EMPTY(hi))
212 {
213 EMSG2(_(e_intern2), "hash_add()");
214 return FAIL;
215 }
216 return hash_add_item(ht, hi, key, hash);
217}
218
219/*
220 * Add item "hi" with "key" to hashtable "ht". "key" must not be NULL and
221 * "hi" must have been obtained with hash_lookup() and point to an empty item.
222 * "hi" is invalid after this!
223 * Returns OK or FAIL (out of memory).
224 */
225 int
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100226hash_add_item(
227 hashtab_T *ht,
228 hashitem_T *hi,
229 char_u *key,
230 hash_T hash)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000231{
232 /* If resizing failed before and it fails again we can't add an item. */
233 if (ht->ht_error && hash_may_resize(ht, 0) == FAIL)
234 return FAIL;
235
236 ++ht->ht_used;
237 if (hi->hi_key == NULL)
238 ++ht->ht_filled;
239 hi->hi_key = key;
240 hi->hi_hash = hash;
241
242 /* When the space gets low may resize the array. */
243 return hash_may_resize(ht, 0);
244}
245
246#if 0 /* not used */
247/*
248 * Overwrite hashtable item "hi" with "key". "hi" must point to the item that
249 * is to be overwritten. Thus the number of items in the hashtable doesn't
250 * change.
251 * Although the key must be identical, the pointer may be different, thus it's
252 * set anyway (the key is part of an item with that key).
253 * The caller must take care of freeing the old item.
254 * "hi" is invalid after this!
255 */
256 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100257hash_set(hashitem_T *hi, char_u *key)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000258{
259 hi->hi_key = key;
260}
261#endif
262
263/*
264 * Remove item "hi" from hashtable "ht". "hi" must have been obtained with
265 * hash_lookup().
266 * The caller must take care of freeing the item itself.
267 */
268 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100269hash_remove(hashtab_T *ht, hashitem_T *hi)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000270{
271 --ht->ht_used;
272 hi->hi_key = HI_KEY_REMOVED;
273 hash_may_resize(ht, 0);
274}
275
276/*
277 * Lock a hashtable: prevent that ht_array changes.
278 * Don't use this when items are to be added!
279 * Must call hash_unlock() later.
280 */
281 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100282hash_lock(hashtab_T *ht)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000283{
284 ++ht->ht_locked;
285}
286
287#if 0 /* currently not used */
288/*
289 * Lock a hashtable at the specified number of entries.
290 * Caller must make sure no more than "size" entries will be added.
291 * Must call hash_unlock() later.
292 */
293 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100294hash_lock_size(hashtab_T *ht, int size)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000295{
296 (void)hash_may_resize(ht, size);
297 ++ht->ht_locked;
298}
299#endif
300
301/*
302 * Unlock a hashtable: allow ht_array changes again.
303 * Table will be resized (shrink) when necessary.
304 * This must balance a call to hash_lock().
305 */
306 void
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100307hash_unlock(hashtab_T *ht)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000308{
309 --ht->ht_locked;
310 (void)hash_may_resize(ht, 0);
311}
312
313/*
314 * Shrink a hashtable when there is too much empty space.
315 * Grow a hashtable when there is not enough empty space.
316 * Returns OK or FAIL (out of memory).
317 */
318 static int
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100319hash_may_resize(
320 hashtab_T *ht,
321 int minitems) /* minimal number of items */
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000322{
323 hashitem_T temparray[HT_INIT_SIZE];
324 hashitem_T *oldarray, *newarray;
325 hashitem_T *olditem, *newitem;
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100326 unsigned newi;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000327 int todo;
328 long_u oldsize, newsize;
329 long_u minsize;
330 long_u newmask;
331 hash_T perturb;
332
333 /* Don't resize a locked table. */
334 if (ht->ht_locked > 0)
335 return OK;
336
337#ifdef HT_DEBUG
338 if (ht->ht_used > ht->ht_filled)
339 EMSG("hash_may_resize(): more used than filled");
340 if (ht->ht_filled >= ht->ht_mask + 1)
341 EMSG("hash_may_resize(): table completely filled");
342#endif
343
344 if (minitems == 0)
345 {
346 /* Return quickly for small tables with at least two NULL items. NULL
347 * items are required for the lookup to decide a key isn't there. */
348 if (ht->ht_filled < HT_INIT_SIZE - 1
349 && ht->ht_array == ht->ht_smallarray)
350 return OK;
351
352 /*
353 * Grow or refill the array when it's more than 2/3 full (including
354 * removed items, so that they get cleaned up).
355 * Shrink the array when it's less than 1/5 full. When growing it is
356 * at least 1/4 full (avoids repeated grow-shrink operations)
357 */
358 oldsize = ht->ht_mask + 1;
359 if (ht->ht_filled * 3 < oldsize * 2 && ht->ht_used > oldsize / 5)
360 return OK;
361
362 if (ht->ht_used > 1000)
363 minsize = ht->ht_used * 2; /* it's big, don't make too much room */
364 else
365 minsize = ht->ht_used * 4; /* make plenty of room */
366 }
367 else
368 {
369 /* Use specified size. */
370 if ((long_u)minitems < ht->ht_used) /* just in case... */
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000371 minitems = (int)ht->ht_used;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000372 minsize = minitems * 3 / 2; /* array is up to 2/3 full */
373 }
374
375 newsize = HT_INIT_SIZE;
376 while (newsize < minsize)
377 {
378 newsize <<= 1; /* make sure it's always a power of 2 */
379 if (newsize == 0)
380 return FAIL; /* overflow */
381 }
382
383 if (newsize == HT_INIT_SIZE)
384 {
385 /* Use the small array inside the hashdict structure. */
386 newarray = ht->ht_smallarray;
387 if (ht->ht_array == newarray)
388 {
389 /* Moving from ht_smallarray to ht_smallarray! Happens when there
390 * are many removed items. Copy the items to be able to clean up
391 * removed items. */
392 mch_memmove(temparray, newarray, sizeof(temparray));
393 oldarray = temparray;
394 }
395 else
396 oldarray = ht->ht_array;
397 }
398 else
399 {
400 /* Allocate an array. */
401 newarray = (hashitem_T *)alloc((unsigned)
402 (sizeof(hashitem_T) * newsize));
403 if (newarray == NULL)
404 {
405 /* Out of memory. When there are NULL items still return OK.
406 * Otherwise set ht_error, because lookup may result in a hang if
407 * we add another item. */
408 if (ht->ht_filled < ht->ht_mask)
409 return OK;
410 ht->ht_error = TRUE;
411 return FAIL;
412 }
413 oldarray = ht->ht_array;
414 }
415 vim_memset(newarray, 0, (size_t)(sizeof(hashitem_T) * newsize));
416
417 /*
418 * Move all the items from the old array to the new one, placing them in
419 * the right spot. The new array won't have any removed items, thus this
420 * is also a cleanup action.
421 */
422 newmask = newsize - 1;
Bram Moolenaara93fa7e2006-04-17 22:14:47 +0000423 todo = (int)ht->ht_used;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000424 for (olditem = oldarray; todo > 0; ++olditem)
425 if (!HASHITEM_EMPTY(olditem))
426 {
427 /*
428 * The algorithm to find the spot to add the item is identical to
429 * the algorithm to find an item in hash_lookup(). But we only
430 * need to search for a NULL key, thus it's simpler.
431 */
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100432 newi = (unsigned)(olditem->hi_hash & newmask);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000433 newitem = &newarray[newi];
434
435 if (newitem->hi_key != NULL)
436 for (perturb = olditem->hi_hash; ; perturb >>= PERTURB_SHIFT)
437 {
Bram Moolenaar97d4ea72012-11-28 18:31:54 +0100438 newi = (unsigned)((newi << 2U) + newi + perturb + 1U);
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000439 newitem = &newarray[newi & newmask];
440 if (newitem->hi_key == NULL)
441 break;
442 }
443 *newitem = *olditem;
444 --todo;
445 }
446
447 if (ht->ht_array != ht->ht_smallarray)
448 vim_free(ht->ht_array);
449 ht->ht_array = newarray;
450 ht->ht_mask = newmask;
451 ht->ht_filled = ht->ht_used;
452 ht->ht_error = FALSE;
453
454 return OK;
455}
456
457/*
458 * Get the hash number for a key.
459 * If you think you know a better hash function: Compile with HT_DEBUG set and
460 * run a script that uses hashtables a lot. Vim will then print statistics
461 * when exiting. Try that with the current hash algorithm and yours. The
462 * lower the percentage the better.
463 */
464 hash_T
Bram Moolenaar68c2f632016-01-30 17:24:07 +0100465hash_hash(char_u *key)
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000466{
467 hash_T hash;
468 char_u *p;
469
470 if ((hash = *key) == 0)
471 return (hash_T)0; /* Empty keys are not allowed, but we don't
472 want to crash if we get one. */
473 p = key + 1;
474
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000475 /* A simplistic algorithm that appears to do very well.
476 * Suggested by George Reilly. */
477 while (*p != NUL)
478 hash = hash * 101 + *p++;
Bram Moolenaarc01140a2006-03-24 22:21:52 +0000479
480 return hash;
481}
482
483#endif