| Jeff Brown | 66fbde3 | 2011-11-14 18:29:15 -0800 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (C) 2011 The Android Open Source Project | 
|  | 3 | * | 
|  | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 5 | * you may not use this file except in compliance with the License. | 
|  | 6 | * You may obtain a copy of the License at | 
|  | 7 | * | 
|  | 8 | *      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 9 | * | 
|  | 10 | * Unless required by applicable law or agreed to in writing, software | 
|  | 11 | * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 13 | * See the License for the specific language governing permissions and | 
|  | 14 | * limitations under the License. | 
|  | 15 | */ | 
|  | 16 |  | 
|  | 17 | #ifndef ANDROID_BASIC_HASHTABLE_H | 
|  | 18 | #define ANDROID_BASIC_HASHTABLE_H | 
|  | 19 |  | 
|  | 20 | #include <stdint.h> | 
|  | 21 | #include <sys/types.h> | 
|  | 22 | #include <utils/SharedBuffer.h> | 
|  | 23 | #include <utils/TypeHelpers.h> | 
|  | 24 |  | 
|  | 25 | namespace android { | 
|  | 26 |  | 
|  | 27 | /* Implementation type.  Nothing to see here. */ | 
|  | 28 | class BasicHashtableImpl { | 
|  | 29 | protected: | 
|  | 30 | struct Bucket { | 
|  | 31 | // The collision flag indicates that the bucket is part of a collision chain | 
|  | 32 | // such that at least two entries both hash to this bucket.  When true, we | 
|  | 33 | // may need to seek further along the chain to find the entry. | 
|  | 34 | static const uint32_t COLLISION = 0x80000000UL; | 
|  | 35 |  | 
|  | 36 | // The present flag indicates that the bucket contains an initialized entry value. | 
|  | 37 | static const uint32_t PRESENT   = 0x40000000UL; | 
|  | 38 |  | 
|  | 39 | // Mask for 30 bits worth of the hash code that are stored within the bucket to | 
|  | 40 | // speed up lookups and rehashing by eliminating the need to recalculate the | 
|  | 41 | // hash code of the entry's key. | 
|  | 42 | static const uint32_t HASH_MASK = 0x3fffffffUL; | 
|  | 43 |  | 
|  | 44 | // Combined value that stores the collision and present flags as well as | 
|  | 45 | // a 30 bit hash code. | 
|  | 46 | uint32_t cookie; | 
|  | 47 |  | 
|  | 48 | // Storage for the entry begins here. | 
|  | 49 | char entry[0]; | 
|  | 50 | }; | 
|  | 51 |  | 
|  | 52 | BasicHashtableImpl(size_t entrySize, bool hasTrivialDestructor, | 
|  | 53 | size_t minimumInitialCapacity, float loadFactor); | 
|  | 54 | BasicHashtableImpl(const BasicHashtableImpl& other); | 
|  | 55 |  | 
|  | 56 | void dispose(); | 
|  | 57 |  | 
|  | 58 | inline void edit() { | 
|  | 59 | if (mBuckets && !SharedBuffer::bufferFromData(mBuckets)->onlyOwner()) { | 
|  | 60 | clone(); | 
|  | 61 | } | 
|  | 62 | } | 
|  | 63 |  | 
|  | 64 | void setTo(const BasicHashtableImpl& other); | 
|  | 65 | void clear(); | 
|  | 66 |  | 
|  | 67 | ssize_t next(ssize_t index) const; | 
|  | 68 | ssize_t find(ssize_t index, hash_t hash, const void* __restrict__ key) const; | 
|  | 69 | size_t add(hash_t hash, const void* __restrict__ entry); | 
|  | 70 | void removeAt(size_t index); | 
|  | 71 | void rehash(size_t minimumCapacity, float loadFactor); | 
|  | 72 |  | 
|  | 73 | const size_t mBucketSize; // number of bytes per bucket including the entry | 
|  | 74 | const bool mHasTrivialDestructor; // true if the entry type does not require destruction | 
|  | 75 | size_t mCapacity;         // number of buckets that can be filled before exceeding load factor | 
|  | 76 | float mLoadFactor;        // load factor | 
|  | 77 | size_t mSize;             // number of elements actually in the table | 
|  | 78 | size_t mFilledBuckets;    // number of buckets for which collision or present is true | 
|  | 79 | size_t mBucketCount;      // number of slots in the mBuckets array | 
|  | 80 | void* mBuckets;           // array of buckets, as a SharedBuffer | 
|  | 81 |  | 
|  | 82 | inline const Bucket& bucketAt(const void* __restrict__ buckets, size_t index) const { | 
|  | 83 | return *reinterpret_cast<const Bucket*>( | 
|  | 84 | static_cast<const uint8_t*>(buckets) + index * mBucketSize); | 
|  | 85 | } | 
|  | 86 |  | 
|  | 87 | inline Bucket& bucketAt(void* __restrict__ buckets, size_t index) const { | 
|  | 88 | return *reinterpret_cast<Bucket*>(static_cast<uint8_t*>(buckets) + index * mBucketSize); | 
|  | 89 | } | 
|  | 90 |  | 
|  | 91 | virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const = 0; | 
|  | 92 | virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const = 0; | 
|  | 93 | virtual void destroyBucketEntry(Bucket& bucket) const = 0; | 
|  | 94 |  | 
|  | 95 | private: | 
|  | 96 | void clone(); | 
|  | 97 |  | 
|  | 98 | // Allocates a bucket array as a SharedBuffer. | 
|  | 99 | void* allocateBuckets(size_t count) const; | 
|  | 100 |  | 
|  | 101 | // Releases a bucket array's associated SharedBuffer. | 
|  | 102 | void releaseBuckets(void* __restrict__ buckets, size_t count) const; | 
|  | 103 |  | 
|  | 104 | // Destroys the contents of buckets (invokes destroyBucketEntry for each | 
|  | 105 | // populated bucket if needed). | 
|  | 106 | void destroyBuckets(void* __restrict__ buckets, size_t count) const; | 
|  | 107 |  | 
|  | 108 | // Copies the content of buckets (copies the cookie and invokes copyBucketEntry | 
|  | 109 | // for each populated bucket if needed). | 
|  | 110 | void copyBuckets(const void* __restrict__ fromBuckets, | 
|  | 111 | void* __restrict__ toBuckets, size_t count) const; | 
|  | 112 |  | 
|  | 113 | // Determines the appropriate size of a bucket array to store a certain minimum | 
|  | 114 | // number of entries and returns its effective capacity. | 
|  | 115 | static void determineCapacity(size_t minimumCapacity, float loadFactor, | 
|  | 116 | size_t* __restrict__ outBucketCount, size_t* __restrict__ outCapacity); | 
|  | 117 |  | 
|  | 118 | // Trim a hash code to 30 bits to match what we store in the bucket's cookie. | 
|  | 119 | inline static hash_t trimHash(hash_t hash) { | 
|  | 120 | return (hash & Bucket::HASH_MASK) ^ (hash >> 30); | 
|  | 121 | } | 
|  | 122 |  | 
|  | 123 | // Returns the index of the first bucket that is in the collision chain | 
|  | 124 | // for the specified hash code, given the total number of buckets. | 
|  | 125 | // (Primary hash) | 
|  | 126 | inline static size_t chainStart(hash_t hash, size_t count) { | 
|  | 127 | return hash % count; | 
|  | 128 | } | 
|  | 129 |  | 
|  | 130 | // Returns the increment to add to a bucket index to seek to the next bucket | 
|  | 131 | // in the collision chain for the specified hash code, given the total number of buckets. | 
|  | 132 | // (Secondary hash) | 
|  | 133 | inline static size_t chainIncrement(hash_t hash, size_t count) { | 
|  | 134 | return ((hash >> 7) | (hash << 25)) % (count - 1) + 1; | 
|  | 135 | } | 
|  | 136 |  | 
|  | 137 | // Returns the index of the next bucket that is in the collision chain | 
|  | 138 | // that is defined by the specified increment, given the total number of buckets. | 
|  | 139 | inline static size_t chainSeek(size_t index, size_t increment, size_t count) { | 
|  | 140 | return (index + increment) % count; | 
|  | 141 | } | 
|  | 142 | }; | 
|  | 143 |  | 
|  | 144 | /* | 
|  | 145 | * A BasicHashtable stores entries that are indexed by hash code in place | 
|  | 146 | * within an array.  The basic operations are finding entries by key, | 
|  | 147 | * adding new entries and removing existing entries. | 
|  | 148 | * | 
|  | 149 | * This class provides a very limited set of operations with simple semantics. | 
|  | 150 | * It is intended to be used as a building block to construct more complex | 
|  | 151 | * and interesting data structures such as HashMap.  Think very hard before | 
|  | 152 | * adding anything extra to BasicHashtable, it probably belongs at a | 
|  | 153 | * higher level of abstraction. | 
|  | 154 | * | 
|  | 155 | * TKey: The key type. | 
|  | 156 | * TEntry: The entry type which is what is actually stored in the array. | 
|  | 157 | * | 
|  | 158 | * TKey must support the following contract: | 
|  | 159 | *     bool operator==(const TKey& other) const;  // return true if equal | 
|  | 160 | *     bool operator!=(const TKey& other) const;  // return true if unequal | 
|  | 161 | * | 
|  | 162 | * TEntry must support the following contract: | 
|  | 163 | *     const TKey& getKey() const;  // get the key from the entry | 
|  | 164 | * | 
|  | 165 | * This class supports storing entries with duplicate keys.  Of course, it can't | 
|  | 166 | * tell them apart during removal so only the first entry will be removed. | 
|  | 167 | * We do this because it means that operations like add() can't fail. | 
|  | 168 | */ | 
|  | 169 | template <typename TKey, typename TEntry> | 
|  | 170 | class BasicHashtable : private BasicHashtableImpl { | 
|  | 171 | public: | 
|  | 172 | /* Creates a hashtable with the specified minimum initial capacity. | 
|  | 173 | * The underlying array will be created when the first entry is added. | 
|  | 174 | * | 
|  | 175 | * minimumInitialCapacity: The minimum initial capacity for the hashtable. | 
|  | 176 | *     Default is 0. | 
|  | 177 | * loadFactor: The desired load factor for the hashtable, between 0 and 1. | 
|  | 178 | *     Default is 0.75. | 
|  | 179 | */ | 
|  | 180 | BasicHashtable(size_t minimumInitialCapacity = 0, float loadFactor = 0.75f); | 
|  | 181 |  | 
|  | 182 | /* Copies a hashtable. | 
|  | 183 | * The underlying storage is shared copy-on-write. | 
|  | 184 | */ | 
|  | 185 | BasicHashtable(const BasicHashtable& other); | 
|  | 186 |  | 
|  | 187 | /* Clears and destroys the hashtable. | 
|  | 188 | */ | 
|  | 189 | virtual ~BasicHashtable(); | 
|  | 190 |  | 
|  | 191 | /* Making this hashtable a copy of the other hashtable. | 
|  | 192 | * The underlying storage is shared copy-on-write. | 
|  | 193 | * | 
|  | 194 | * other: The hashtable to copy. | 
|  | 195 | */ | 
|  | 196 | inline BasicHashtable<TKey, TEntry>& operator =(const BasicHashtable<TKey, TEntry> & other) { | 
|  | 197 | setTo(other); | 
|  | 198 | return *this; | 
|  | 199 | } | 
|  | 200 |  | 
|  | 201 | /* Returns the number of entries in the hashtable. | 
|  | 202 | */ | 
|  | 203 | inline size_t size() const { | 
|  | 204 | return mSize; | 
|  | 205 | } | 
|  | 206 |  | 
|  | 207 | /* Returns the capacity of the hashtable, which is the number of elements that can | 
|  | 208 | * added to the hashtable without requiring it to be grown. | 
|  | 209 | */ | 
|  | 210 | inline size_t capacity() const { | 
|  | 211 | return mCapacity; | 
|  | 212 | } | 
|  | 213 |  | 
|  | 214 | /* Returns the number of buckets that the hashtable has, which is the size of its | 
|  | 215 | * underlying array. | 
|  | 216 | */ | 
|  | 217 | inline size_t bucketCount() const { | 
|  | 218 | return mBucketCount; | 
|  | 219 | } | 
|  | 220 |  | 
|  | 221 | /* Returns the load factor of the hashtable. */ | 
|  | 222 | inline float loadFactor() const { | 
|  | 223 | return mLoadFactor; | 
|  | 224 | }; | 
|  | 225 |  | 
|  | 226 | /* Returns a const reference to the entry at the specified index. | 
|  | 227 | * | 
|  | 228 | * index:   The index of the entry to retrieve.  Must be a valid index within | 
|  | 229 | *          the bounds of the hashtable. | 
|  | 230 | */ | 
|  | 231 | inline const TEntry& entryAt(size_t index) const { | 
|  | 232 | return entryFor(bucketAt(mBuckets, index)); | 
|  | 233 | } | 
|  | 234 |  | 
|  | 235 | /* Returns a non-const reference to the entry at the specified index. | 
|  | 236 | * | 
|  | 237 | * index: The index of the entry to edit.  Must be a valid index within | 
|  | 238 | *        the bounds of the hashtable. | 
|  | 239 | */ | 
|  | 240 | inline TEntry& editEntryAt(size_t index) { | 
|  | 241 | edit(); | 
|  | 242 | return entryFor(bucketAt(mBuckets, index)); | 
|  | 243 | } | 
|  | 244 |  | 
|  | 245 | /* Clears the hashtable. | 
|  | 246 | * All entries in the hashtable are destroyed immediately. | 
|  | 247 | * If you need to do something special with the entries in the hashtable then iterate | 
|  | 248 | * over them and do what you need before clearing the hashtable. | 
|  | 249 | */ | 
|  | 250 | inline void clear() { | 
|  | 251 | BasicHashtableImpl::clear(); | 
|  | 252 | } | 
|  | 253 |  | 
|  | 254 | /* Returns the index of the next entry in the hashtable given the index of a previous entry. | 
|  | 255 | * If the given index is -1, then returns the index of the first entry in the hashtable, | 
|  | 256 | * if there is one, or -1 otherwise. | 
|  | 257 | * If the given index is not -1, then returns the index of the next entry in the hashtable, | 
|  | 258 | * in strictly increasing order, or -1 if there are none left. | 
|  | 259 | * | 
|  | 260 | * index:   The index of the previous entry that was iterated, or -1 to begin | 
|  | 261 | *          iteration at the beginning of the hashtable. | 
|  | 262 | */ | 
|  | 263 | inline ssize_t next(ssize_t index) const { | 
|  | 264 | return BasicHashtableImpl::next(index); | 
|  | 265 | } | 
|  | 266 |  | 
|  | 267 | /* Finds the index of an entry with the specified key. | 
|  | 268 | * If the given index is -1, then returns the index of the first matching entry, | 
|  | 269 | * otherwise returns the index of the next matching entry. | 
|  | 270 | * If the hashtable contains multiple entries with keys that match the requested | 
|  | 271 | * key, then the sequence of entries returned is arbitrary. | 
|  | 272 | * Returns -1 if no entry was found. | 
|  | 273 | * | 
|  | 274 | * index:   The index of the previous entry with the specified key, or -1 to | 
|  | 275 | *          find the first matching entry. | 
|  | 276 | * hash:    The hashcode of the key. | 
|  | 277 | * key:     The key. | 
|  | 278 | */ | 
|  | 279 | inline ssize_t find(ssize_t index, hash_t hash, const TKey& key) const { | 
|  | 280 | return BasicHashtableImpl::find(index, hash, &key); | 
|  | 281 | } | 
|  | 282 |  | 
|  | 283 | /* Adds the entry to the hashtable. | 
|  | 284 | * Returns the index of the newly added entry. | 
|  | 285 | * If an entry with the same key already exists, then a duplicate entry is added. | 
|  | 286 | * If the entry will not fit, then the hashtable's capacity is increased and | 
|  | 287 | * its contents are rehashed.  See rehash(). | 
|  | 288 | * | 
|  | 289 | * hash:    The hashcode of the key. | 
|  | 290 | * entry:   The entry to add. | 
|  | 291 | */ | 
|  | 292 | inline size_t add(hash_t hash, const TEntry& entry) { | 
|  | 293 | return BasicHashtableImpl::add(hash, &entry); | 
|  | 294 | } | 
|  | 295 |  | 
|  | 296 | /* Removes the entry with the specified index from the hashtable. | 
|  | 297 | * The entry is destroyed immediately. | 
|  | 298 | * The index must be valid. | 
|  | 299 | * | 
|  | 300 | * The hashtable is not compacted after an item is removed, so it is legal | 
|  | 301 | * to continue iterating over the hashtable using next() or find(). | 
|  | 302 | * | 
|  | 303 | * index:   The index of the entry to remove.  Must be a valid index within the | 
|  | 304 | *          bounds of the hashtable, and it must refer to an existing entry. | 
|  | 305 | */ | 
|  | 306 | inline void removeAt(size_t index) { | 
|  | 307 | BasicHashtableImpl::removeAt(index); | 
|  | 308 | } | 
|  | 309 |  | 
|  | 310 | /* Rehashes the contents of the hashtable. | 
|  | 311 | * Grows the hashtable to at least the specified minimum capacity or the | 
|  | 312 | * current number of elements, whichever is larger. | 
|  | 313 | * | 
|  | 314 | * Rehashing causes all entries to be copied and the entry indices may change. | 
|  | 315 | * Although the hash codes are cached by the hashtable, rehashing can be an | 
|  | 316 | * expensive operation and should be avoided unless the hashtable's size | 
|  | 317 | * needs to be changed. | 
|  | 318 | * | 
|  | 319 | * Rehashing is the only way to change the capacity or load factor of the | 
|  | 320 | * hashtable once it has been created.  It can be used to compact the | 
|  | 321 | * hashtable by choosing a minimum capacity that is smaller than the current | 
|  | 322 | * capacity (such as 0). | 
|  | 323 | * | 
|  | 324 | * minimumCapacity: The desired minimum capacity after rehashing. | 
|  | 325 | * loadFactor: The desired load factor after rehashing. | 
|  | 326 | */ | 
|  | 327 | inline void rehash(size_t minimumCapacity, float loadFactor) { | 
|  | 328 | BasicHashtableImpl::rehash(minimumCapacity, loadFactor); | 
|  | 329 | } | 
|  | 330 |  | 
|  | 331 | protected: | 
|  | 332 | static inline const TEntry& entryFor(const Bucket& bucket) { | 
|  | 333 | return reinterpret_cast<const TEntry&>(bucket.entry); | 
|  | 334 | } | 
|  | 335 |  | 
|  | 336 | static inline TEntry& entryFor(Bucket& bucket) { | 
|  | 337 | return reinterpret_cast<TEntry&>(bucket.entry); | 
|  | 338 | } | 
|  | 339 |  | 
|  | 340 | virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const; | 
|  | 341 | virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const; | 
|  | 342 | virtual void destroyBucketEntry(Bucket& bucket) const; | 
|  | 343 |  | 
|  | 344 | private: | 
|  | 345 | // For dumping the raw contents of a hashtable during testing. | 
|  | 346 | friend class BasicHashtableTest; | 
|  | 347 | inline uint32_t cookieAt(size_t index) const { | 
|  | 348 | return bucketAt(mBuckets, index).cookie; | 
|  | 349 | } | 
|  | 350 | }; | 
|  | 351 |  | 
|  | 352 | template <typename TKey, typename TEntry> | 
|  | 353 | BasicHashtable<TKey, TEntry>::BasicHashtable(size_t minimumInitialCapacity, float loadFactor) : | 
|  | 354 | BasicHashtableImpl(sizeof(TEntry), traits<TEntry>::has_trivial_dtor, | 
|  | 355 | minimumInitialCapacity, loadFactor) { | 
|  | 356 | } | 
|  | 357 |  | 
|  | 358 | template <typename TKey, typename TEntry> | 
|  | 359 | BasicHashtable<TKey, TEntry>::BasicHashtable(const BasicHashtable<TKey, TEntry>& other) : | 
|  | 360 | BasicHashtableImpl(other) { | 
|  | 361 | } | 
|  | 362 |  | 
|  | 363 | template <typename TKey, typename TEntry> | 
|  | 364 | BasicHashtable<TKey, TEntry>::~BasicHashtable() { | 
|  | 365 | dispose(); | 
|  | 366 | } | 
|  | 367 |  | 
|  | 368 | template <typename TKey, typename TEntry> | 
|  | 369 | bool BasicHashtable<TKey, TEntry>::compareBucketKey(const Bucket& bucket, | 
|  | 370 | const void* __restrict__ key) const { | 
|  | 371 | return entryFor(bucket).getKey() == *static_cast<const TKey*>(key); | 
|  | 372 | } | 
|  | 373 |  | 
|  | 374 | template <typename TKey, typename TEntry> | 
|  | 375 | void BasicHashtable<TKey, TEntry>::initializeBucketEntry(Bucket& bucket, | 
|  | 376 | const void* __restrict__ entry) const { | 
|  | 377 | if (!traits<TEntry>::has_trivial_copy) { | 
|  | 378 | new (&entryFor(bucket)) TEntry(*(static_cast<const TEntry*>(entry))); | 
|  | 379 | } else { | 
|  | 380 | memcpy(&entryFor(bucket), entry, sizeof(TEntry)); | 
|  | 381 | } | 
|  | 382 | } | 
|  | 383 |  | 
|  | 384 | template <typename TKey, typename TEntry> | 
|  | 385 | void BasicHashtable<TKey, TEntry>::destroyBucketEntry(Bucket& bucket) const { | 
|  | 386 | if (!traits<TEntry>::has_trivial_dtor) { | 
|  | 387 | entryFor(bucket).~TEntry(); | 
|  | 388 | } | 
|  | 389 | } | 
|  | 390 |  | 
|  | 391 | }; // namespace android | 
|  | 392 |  | 
|  | 393 | #endif // ANDROID_BASIC_HASHTABLE_H |