Blame - src/hashtable.c - android_external_vim

blob: 4b378aa0c64b62cd81c93813583c2b28ee43ec61 [file] [log] [blame]

Bram Moolenaar	6abd8e9	2005-01-19 22:21:15 +0000	[diff] [blame^]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* VIM - Vi IMproved by Bram Moolenaar
				4	*
				5	* Do ":help uganda" in Vim to read copying and usage conditions.
				6	* Do ":help credits" in Vim to see a list of people who contributed.
				7	* See README.txt for an overview of the Vim source code.
				8	*/
				9
				10	/*
				11	* hashtable.c: Handling of a hashtable with Vim-specific properties.
				12	*
				13	* Each item in a hashtable has a NUL terminated string key. A key can appear
				14	* only once in the table.
				15	*
				16	* A hash number is computed from the key for quick lookup. When the hashes
				17	* of two different keys point to the same entry an algorithm is used to
				18	* iterate over other entries in the table until the right one is found.
				19	* To make the iteration work removed keys are different from entries where a
				20	* key was never present.
				21	*
				22	* The mechanism has been partly based on how Python Dictionaries are
				23	* implemented. The algorithm is from Knuth Vol. 3, Sec. 6.4.
				24	*
				25	* The hashtable grows to accommodate more entries when needed. At least 1/3
				26	* of the entries is empty to keep the lookup efficient (at the cost of extra
				27	* memory).
				28	*/
				29
				30	#include "vim.h"
				31
				32	#if defined(FEAT_EVAL) \|\| defined(FEAT_SYN_HL) \|\| defined(PROTO)
				33
				34	#if 1
				35	# define HT_DEBUG /* extra checks for table consistency */
				36	#endif
				37
				38	/* Magic value for algorithm that walks through the array. */
				39	#define PERTURB_SHIFT 5
				40
				41	static hashitem hash_lookup __ARGS((hashtable ht, char_u *key, long_u hash));
				42	static int hash_add_item __ARGS((hashtable ht, hashitem hi, char_u *key, long_u hash));
				43	static int hash_may_resize __ARGS((hashtable *ht));
				44	static long_u hash_hash __ARGS((char_u *key));
				45
				46	#if 0 /* not used */
				47	/*
				48	* Create an empty hash table.
				49	* Returns NULL when out of memory.
				50	*/
				51	hashtable *
				52	hash_create()
				53	{
				54	hashtable *ht;
				55
				56	ht = (hashtable *)alloc(sizeof(hashtable));
				57	if (ht != NULL)
				58	hash_init(ht);
				59	return ht;
				60	}
				61	#endif
				62
				63	/*
				64	* Initialize an empty hash table.
				65	*/
				66	void
				67	hash_init(ht)
				68	hashtable *ht;
				69	{
				70	/* This zeroes all "ht_" entries and all the "hi_key" in "ht_smallarray". */
				71	vim_memset(ht, 0, sizeof(hashtable));
				72	ht->ht_array = ht->ht_smallarray;
				73	ht->ht_mask = HT_INIT_SIZE - 1;
				74	}
				75
				76	#if 0 /* not used */
				77	/*
				78	* Free a hash table. Does not free the items it contains!
				79	*/
				80	void
				81	hash_free(ht)
				82	hashtable *ht;
				83	{
				84	if (ht->ht_array != ht->ht_smallarray)
				85	vim_free(ht->ht_array);
				86	vim_free(ht);
				87	}
				88	#endif
				89
				90	/*
				91	* Find "key" in hashtable "ht". "key" must not be NULL.
				92	* Always returns a pointer to a hashitem. If the item was not found then
				93	* HASHITEM_EMPTY() is TRUE. The pointer is then the place where the key
				94	* would be added.
				95	* WARNING: The returned pointer becomes invalid when the hashtable is changed
				96	* (adding, setting or removing an item)!
				97	*/
				98	hashitem *
				99	hash_find(ht, key)
				100	hashtable *ht;
				101	char_u *key;
				102	{
				103	return hash_lookup(ht, key, hash_hash(key));
				104	}
				105
				106	/*
				107	* Like hash_find(), but caller computes "hash".
				108	*/
				109	static hashitem *
				110	hash_lookup(ht, key, hash)
				111	hashtable *ht;
				112	char_u *key;
				113	long_u hash;
				114	{
				115	long_u perturb;
				116	hashitem *freeitem;
				117	hashitem *hi;
				118	int idx;
				119
				120	/*
				121	* Quickly handle the most common situations:
				122	* - return if there is no item at all
				123	* - skip over a removed item
				124	* - return if the item matches
				125	*/
				126	idx = hash & ht->ht_mask;
				127	hi = &ht->ht_array[idx];
				128
				129	if (hi->hi_key == NULL)
				130	return hi;
				131	if (hi->hi_key == HI_KEY_REMOVED)
				132	freeitem = hi;
				133	else if (hi->hi_hash == hash && STRCMP(hi->hi_key, key) == 0)
				134	return hi;
				135	else
				136	freeitem = NULL;
				137
				138	/*
				139	* Need to search through the table to find the key. The algorithm
				140	* to step through the table starts with large steps, gradually becoming
				141	* smaller down to (1/4 table size + 1). This means it goes through all
				142	* table entries in the end.
				143	* When we run into a NULL key it's clear that the key isn't there.
				144	* Return the first available slot found (can be a slot of a removed
				145	* item).
				146	*/
				147	for (perturb = hash; ; perturb >>= PERTURB_SHIFT)
				148	{
				149	idx = (idx << 2) + idx + perturb + 1;
				150	hi = &ht->ht_array[idx & ht->ht_mask];
				151	if (hi->hi_key == NULL)
				152	return freeitem == NULL ? hi : freeitem;
				153	if (hi->hi_hash == hash
				154	&& hi->hi_key != HI_KEY_REMOVED
				155	&& STRCMP(hi->hi_key, key) == 0)
				156	return hi;
				157	if (hi->hi_key == HI_KEY_REMOVED && freeitem == NULL)
				158	freeitem = hi;
				159	}
				160	}
				161
				162	/*
				163	* Add item with key "key" to hashtable "ht".
				164	* Returns FAIL when out of memory or the key is already present.
				165	*/
				166	int
				167	hash_add(ht, key)
				168	hashtable *ht;
				169	char_u *key;
				170	{
				171	long_u hash = hash_hash(key);
				172	hashitem *hi;
				173
				174	hi = hash_lookup(ht, key, hash);
				175	if (!HASHITEM_EMPTY(hi))
				176	{
				177	EMSG2(_(e_intern2), "hash_add()");
				178	return FAIL;
				179	}
				180	return hash_add_item(ht, hi, key, hash);
				181	}
				182
				183	/*
				184	* Add item "hi" with "key" to hashtable "ht". "key" must not be NULL and
				185	* "hi" must have been obtained with hash_lookup() and point to an empty item.
				186	* "hi" is invalid after this!
				187	* Returns OK or FAIL (out of memory).
				188	*/
				189	static int
				190	hash_add_item(ht, hi, key, hash)
				191	hashtable *ht;
				192	hashitem *hi;
				193	char_u *key;
				194	long_u hash;
				195	{
				196	/* If resizing failed before and it fails again we can't add an item. */
				197	if (ht->ht_error && hash_may_resize(ht) == FAIL)
				198	return FAIL;
				199
				200	++ht->ht_used;
				201	if (hi->hi_key == NULL)
				202	++ht->ht_filled;
				203	hi->hi_key = key;
				204	hi->hi_hash = hash;
				205
				206	/* When the space gets low may resize the array. */
				207	return hash_may_resize(ht);
				208	}
				209
				210	#if 0 /* not used */
				211	/*
				212	* Overwrite hashtable item "hi" with "key". "hi" must point to the item that
				213	* is to be overwritten. Thus the number of items in the hashtable doesn't
				214	* change.
				215	* Although the key must be identical, the pointer may be different, thus it's
				216	* set anyway (the key is part of an item with that key).
				217	* The caller must take care of freeing the old item.
				218	* "hi" is invalid after this!
				219	*/
				220	void
				221	hash_set(hi, key)
				222	hashitem *hi;
				223	char_u *key;
				224	{
				225	hi->hi_key = key;
				226	}
				227	#endif
				228
				229	/*
				230	* Remove item "hi" from hashtable "ht". "hi" must have been obtained with
				231	* hash_lookup() and point to a used empty item.
				232	* The caller must take care of freeing the item.
				233	*/
				234	void
				235	hash_remove(ht, hi)
				236	hashtable *ht;
				237	hashitem *hi;
				238	{
				239	--ht->ht_used;
				240	hi->hi_key = HI_KEY_REMOVED;
				241	hash_may_resize(ht);
				242	}
				243
				244	/*
				245	* Shrink a hashtable when there is too much empty space.
				246	* Grow a hashtable when there is not enough empty space.
				247	* Returns OK or FAIL (out of memory).
				248	*/
				249	static int
				250	hash_may_resize(ht)
				251	hashtable *ht;
				252	{
				253	hashitem temparray[HT_INIT_SIZE];
				254	hashitem oldarray, newarray;
				255	hashitem olditem, newitem;
				256	int newi;
				257	int todo;
				258	long_u oldsize, newsize;
				259	long_u minsize;
				260	long_u newmask;
				261	long_u perturb;
				262
				263	#ifdef HT_DEBUG
				264	if (ht->ht_used > ht->ht_filled)
				265	EMSG("hash_may_resize(): more used than filled");
				266	if (ht->ht_filled >= ht->ht_mask + 1)
				267	EMSG("hash_may_resize(): table completely filled");
				268	#endif
				269
				270	/* Return quickly for small tables with at least two NULL items. NULL
				271	* items are required for the lookup to decide a key isn't there. */
				272	if (ht->ht_filled < HT_INIT_SIZE - 1 && ht->ht_array == ht->ht_smallarray)
				273	return OK;
				274
				275	/*
				276	* Grow or refill the array when it's more than 2/3 full (including
				277	* removed items, so that they get cleaned up).
				278	* Shrink the array when it's less than 1/5 full. When growing it is at
				279	* least 1/4 full (avoids repeated grow-shrink operations)
				280	*/
				281	oldsize = ht->ht_mask + 1;
				282	if (ht->ht_filled * 3 < oldsize * 2 && ht->ht_used > oldsize / 5)
				283	return OK;
				284
				285	if (ht->ht_used > 10000)
				286	minsize = ht->ht_used * 2; /* it's big, don't make too much room */
				287	else
				288	minsize = ht->ht_used * 4; /* make plenty of room */
				289	newsize = HT_INIT_SIZE;
				290	while (newsize < minsize)
				291	{
				292	newsize <<= 1; /* make sure it's always a power of 2 */
				293	if (newsize == 0)
				294	return FAIL; /* overflow */
				295	}
				296
				297	if (newsize == HT_INIT_SIZE)
				298	{
				299	/* Use the small array inside the hashdict structure. */
				300	newarray = ht->ht_smallarray;
				301	if (ht->ht_array == newarray)
				302	{
				303	/* Moving from ht_smallarray to ht_smallarray! Happens when there
				304	* are many removed items. Copy the items to be able to clean up
				305	* removed items. */
				306	mch_memmove(temparray, newarray, sizeof(temparray));
				307	oldarray = temparray;
				308	}
				309	else
				310	oldarray = ht->ht_array;
				311	}
				312	else
				313	{
				314	/* Allocate an array. */
				315	newarray = (hashitem )alloc((unsigned)(sizeof(hashitem) newsize));
				316	if (newarray == NULL)
				317	{
				318	/* Out of memory. When there are NULL items still return OK.
				319	* Otherwise set ht_error, because lookup may result in a hang if
				320	* we add another item. */
				321	if (ht->ht_filled < ht->ht_mask)
				322	return OK;
				323	ht->ht_error = TRUE;
				324	return FAIL;
				325	}
				326	oldarray = ht->ht_array;
				327	}
				328	vim_memset(newarray, 0, (size_t)(sizeof(hashitem) * newsize));
				329
				330	/*
				331	* Move all the items from the old array to the new one, placing them in
				332	* the right spot. The new array won't have any removed items, thus this
				333	* is also a cleanup action.
				334	*/
				335	newmask = newsize - 1;
				336	todo = ht->ht_used;
				337	for (olditem = oldarray; todo > 0; ++olditem)
				338	if (olditem->hi_key != NULL && olditem->hi_key != HI_KEY_REMOVED)
				339	{
				340	/*
				341	* The algorithm to find the spot to add the item is identical to
				342	* the algorithm to find an item in hash_lookup(). But we only
				343	* need to search for a NULL key, thus it's simpler.
				344	*/
				345	newi = olditem->hi_hash & newmask;
				346	newitem = &newarray[newi];
				347
				348	if (newitem->hi_key != NULL)
				349	for (perturb = olditem->hi_hash; ; perturb >>= PERTURB_SHIFT)
				350	{
				351	newi = (newi << 2) + newi + perturb + 1;
				352	newitem = &newarray[newi & newmask];
				353	if (newitem->hi_key == NULL)
				354	break;
				355	}
				356	newitem = olditem;
				357	--todo;
				358	}
				359
				360	if (ht->ht_array != ht->ht_smallarray)
				361	vim_free(ht->ht_array);
				362	ht->ht_array = newarray;
				363	ht->ht_mask = newmask;
				364	ht->ht_filled = ht->ht_used;
				365	ht->ht_error = FALSE;
				366
				367	return OK;
				368	}
				369
				370	/*
				371	* Get the hash number for a key. Uses the ElfHash algorithm, which is
				372	* supposed to have an even distribution (suggested by Charles Campbell).
				373	*/
				374	static long_u
				375	hash_hash(key)
				376	char_u *key;
				377	{
				378	long_u hash = 0;
				379	long_u g;
				380	char_u *p = key;
				381
				382	while (*p != NUL)
				383	{
				384	hash = (hash << 4) + p++; / clear low 4 bits of hash, add char */
				385	g = hash & 0xf0000000L; /* g has high 4 bits of hash only */
				386	if (g != 0)
				387	hash ^= g >> 24; /* xor g's high 4 bits into hash */
				388	}
				389
				390	return hash;
				391	}
				392
				393	#endif