Smallhash: refactor and fixes
authorCampbell Barton <ideasman42@gmail.com>
Thu, 30 Jan 2014 09:51:44 +0000 (20:51 +1100)
committerCampbell Barton <ideasman42@gmail.com>
Thu, 30 Jan 2014 10:10:55 +0000 (21:10 +1100)
- BLI_smallhash_remove didnt decrement total entries.
- rename vars to match closer to ghash.
- smallhash_lookup returns NULL when no entry found.
- using a zero value key wasn't supported.
- no need to memset or calloc bucket arrays
- add asserts for unsupported conditions.
- added BLI_smallhash_lookup_p

source/blender/blenlib/BLI_smallhash.h
source/blender/blenlib/intern/smallhash.c

index ec7b27f6651a4ada66fd66713a6dc21ce59706ea..07bd9975467bd364bc754e4bdb75ce05d602787f 100644 (file)
@@ -42,29 +42,31 @@ typedef struct {
 /*how much stack space to use before dynamically allocating memory*/
 #define SMSTACKSIZE 64
 typedef struct SmallHash {
-       SmallHashEntry *table;
-       SmallHashEntry _stacktable[SMSTACKSIZE];
-       SmallHashEntry _copytable[SMSTACKSIZE];
-       SmallHashEntry *stacktable, *copytable;
-       unsigned int used;
-       unsigned int curhash;
-       unsigned int size;
+       SmallHashEntry *buckets;
+       SmallHashEntry *buckets_stack;
+       SmallHashEntry *buckets_copy;
+       SmallHashEntry _buckets_stack[SMSTACKSIZE];
+       SmallHashEntry _buckets_copy[SMSTACKSIZE];
+       unsigned int nbuckets;
+       unsigned int nentries;
+       unsigned int cursize;
 } SmallHash;
 
 typedef struct {
-       SmallHash *hash;
+       SmallHash *sh;
        unsigned int i;
 } SmallHashIter;
 
-void    BLI_smallhash_init(SmallHash *hash) ATTR_NONNULL(1);
-void    BLI_smallhash_release(SmallHash *hash) ATTR_NONNULL(1);
-void    BLI_smallhash_insert(SmallHash *hash, uintptr_t key, void *item) ATTR_NONNULL(1);
-void    BLI_smallhash_remove(SmallHash *hash, uintptr_t key) ATTR_NONNULL(1);
-void   *BLI_smallhash_lookup(SmallHash *hash, uintptr_t key) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT;
-bool    BLI_smallhash_haskey(SmallHash *hash, uintptr_t key) ATTR_NONNULL(1);
-int     BLI_smallhash_count(SmallHash *hash)  ATTR_NONNULL(1);
+void    BLI_smallhash_init(SmallHash *sh) ATTR_NONNULL(1);
+void    BLI_smallhash_release(SmallHash *sh) ATTR_NONNULL(1);
+void    BLI_smallhash_insert(SmallHash *sh, uintptr_t key, void *item) ATTR_NONNULL(1);
+bool    BLI_smallhash_remove(SmallHash *sh, uintptr_t key) ATTR_NONNULL(1);
+void   *BLI_smallhash_lookup(SmallHash *sh, uintptr_t key) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT;
+void  **BLI_smallhash_lookup_p(SmallHash *sh, uintptr_t key) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT;
+bool    BLI_smallhash_haskey(SmallHash *sh, uintptr_t key) ATTR_NONNULL(1);
+int     BLI_smallhash_count(SmallHash *sh)  ATTR_NONNULL(1);
 void   *BLI_smallhash_iternext(SmallHashIter *iter, uintptr_t *key)  ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT;
-void   *BLI_smallhash_iternew(SmallHash *hash, SmallHashIter *iter, uintptr_t *key) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT;
-/* void BLI_smallhash_print(SmallHash *hash); */ /* UNUSED */
+void   *BLI_smallhash_iternew(SmallHash *sh, SmallHashIter *iter, uintptr_t *key) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT;
+/* void BLI_smallhash_print(SmallHash *sh); */ /* UNUSED */
 
 #endif /* __BLI_SMALLHASH_H__ */
index 3812d2955c98fcfdf817b65ec6d8d65450e71f5d..7f9acab1f37ef67db4f75b0effbbcc34780b5f2b 100644 (file)
@@ -35,6 +35,7 @@
  */
 
 #include <string.h>
+#include <stdlib.h>
 
 #include "MEM_guardedalloc.h"
 #include "BLI_utildefines.h"
@@ -53,6 +54,7 @@
  */
 #define SMHASH_CELL_UNUSED  ((void *)0x7FFFFFFF)
 #define SMHASH_CELL_FREE    ((void *)0x7FFFFFFD)
+#define SMHASH_KEY_UNUSED ((uintptr_t)-1)
 
 /* typically this re-assigns 'h' */
 #define SMHASH_NEXT(h, hoff)  ( \
 
 extern const unsigned int hashsizes[];
 
-void BLI_smallhash_init(SmallHash *hash)
+/**
+ * Check if the number of items in the smallhash is large enough to require more buckets.
+ */
+BLI_INLINE bool smallhash_test_expand_buckets(const unsigned int nentries, const unsigned int nbuckets)
 {
-       unsigned int i;
-
-       memset(hash, 0, sizeof(*hash));
-
-       hash->table = hash->_stacktable;
-       hash->curhash = 2;
-       hash->size = hashsizes[hash->curhash];
+       return nentries * 3 > nbuckets;
+}
 
-       hash->copytable = hash->_copytable;
-       hash->stacktable = hash->_stacktable;
+BLI_INLINE void smallhash_init_empty(SmallHash *sh)
+{
+       unsigned int i;
 
-       for (i = 0; i < hash->size; i++) {
-               hash->table[i].val = SMHASH_CELL_FREE;
+       for (i = 0; i < sh->nbuckets; i++) {
+               sh->buckets[i].key = SMHASH_KEY_UNUSED;
+               sh->buckets[i].val = SMHASH_CELL_FREE;
        }
 }
 
-/*NOTE: does *not* free *hash itself!  only the direct data!*/
-void BLI_smallhash_release(SmallHash *hash)
+BLI_INLINE SmallHashEntry *smallhash_lookup(SmallHash *sh, const uintptr_t key)
 {
-       if (hash->table != hash->stacktable) {
-               MEM_freeN(hash->table);
+       SmallHashEntry *e;
+       unsigned int h = (unsigned int)key;
+       unsigned int hoff = 1;
+
+       BLI_assert(key != SMHASH_KEY_UNUSED);
+
+       /* note: there are always more buckets then entries,
+        * so we know there will always be a free bucket if the key isn't found. */
+       for (e = &sh->buckets[h % sh->nbuckets];
+            e->val != SMHASH_CELL_FREE;
+            h = SMHASH_NEXT(h, hoff), e = &sh->buckets[h % sh->nbuckets])
+       {
+               if (e->key == key) {
+                       /* should never happen because unused keys are zero'd */
+                       BLI_assert(e->val != SMHASH_CELL_UNUSED);
+                       return e;
+               }
        }
+
+       return NULL;
 }
 
-BLI_INLINE SmallHashEntry *smallhash_lookup_first_free(SmallHash *hash, uintptr_t key)
+BLI_INLINE SmallHashEntry *smallhash_lookup_first_free(SmallHash *sh, const uintptr_t key)
 {
-       SmallHashEntry *entry;
+       SmallHashEntry *e;
        unsigned int h = (unsigned int)key;
        unsigned int hoff = 1;
 
-       for (entry = &hash->table[h % hash->size];
-            !ELEM(entry->val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE);
-            h = SMHASH_NEXT(h, hoff), entry = &hash->table[h % hash->size])
+       for (e = &sh->buckets[h % sh->nbuckets];
+            !ELEM(e->val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE);
+            h = SMHASH_NEXT(h, hoff), e = &sh->buckets[h % sh->nbuckets])
        {
-               /* Nothing else to do! */
+               /* pass */
        }
 
-       return entry;
+       return e;
 }
 
-void BLI_smallhash_insert(SmallHash *hash, uintptr_t key, void *item)
+BLI_INLINE void smallhash_resize_buckets(SmallHash *sh, const unsigned int nbuckets)
 {
-       SmallHashEntry *entry;
+       SmallHashEntry *buckets_old = sh->buckets;
+       const unsigned int nbuckets_old = sh->nbuckets;
+       unsigned int i = 0;
 
-       if (hash->size < hash->used * 3) {
-               unsigned int newsize = hashsizes[++hash->curhash];
-               SmallHashEntry *tmp;
-               unsigned int i = 0;
+       BLI_assert(sh->nbuckets != nbuckets);
 
-               if (hash->table != hash->stacktable || newsize > SMSTACKSIZE) {
-                       tmp = MEM_callocN(sizeof(*hash->table) * newsize, __func__);
-               }
-               else {
-                       SWAP(SmallHashEntry *, hash->stacktable, hash->copytable);
-                       tmp = hash->stacktable;
-               }
+       if (buckets_old == sh->buckets_stack && nbuckets <= SMSTACKSIZE) {
+               SWAP(SmallHashEntry *, sh->buckets_stack, sh->buckets_copy);
+               sh->buckets = sh->buckets_stack;
+       }
+       else {
+               sh->buckets = MEM_mallocN(sizeof(*sh->buckets) * nbuckets, __func__);
+       }
 
-               SWAP(SmallHashEntry *, tmp, hash->table);
+       sh->nbuckets = nbuckets;
 
-               hash->size = newsize;
+       smallhash_init_empty(sh);
 
-               for (i = 0; i < hash->size; i++) {
-                       hash->table[i].val = SMHASH_CELL_FREE;
+       for (i = 0; i < nbuckets_old; i++) {
+               if (!ELEM(buckets_old[i].val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE)) {
+                       SmallHashEntry *e = smallhash_lookup_first_free(sh, buckets_old[i].key);
+                       e->key = buckets_old[i].key;
+                       e->val = buckets_old[i].val;
                }
+       }
 
-               for (i = 0; i < hashsizes[hash->curhash - 1]; i++) {
-                       if (ELEM(tmp[i].val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE)) {
-                               continue;
-                       }
+       if (buckets_old != sh->buckets_stack && buckets_old != sh->buckets_copy) {
+               MEM_freeN(buckets_old);
+       }
+}
 
-                       entry = smallhash_lookup_first_free(hash, tmp[i].key);
-                       entry->key = tmp[i].key;
-                       entry->val = tmp[i].val;
-               }
+void BLI_smallhash_init(SmallHash *sh)
+{
+       /* assume 'sh' is uninitialized */
 
-               if (tmp != hash->stacktable && tmp != hash->copytable) {
-                       MEM_freeN(tmp);
-               }
-       }
+       sh->nentries = 0;
+       sh->cursize = 2;
+       sh->nbuckets = hashsizes[sh->cursize];
 
-       entry = smallhash_lookup_first_free(hash, key);
-       entry->key = key;
-       entry->val = item;
+       sh->buckets         = sh->_buckets_stack;
+       sh->buckets_copy    = sh->_buckets_copy;
+       sh->buckets_stack   = sh->_buckets_stack;
 
-       hash->used++;
+       smallhash_init_empty(sh);
 }
 
-BLI_INLINE SmallHashEntry *smallhash_lookup(SmallHash *hash, uintptr_t key)
+/*NOTE: does *not* free *sh itself!  only the direct data!*/
+void BLI_smallhash_release(SmallHash *sh)
 {
-       SmallHashEntry *entry;
-       unsigned int h = (unsigned int)key;
-       unsigned int hoff = 1;
+       if (sh->buckets != sh->buckets_stack) {
+               MEM_freeN(sh->buckets);
+       }
+}
 
-       for (entry = &hash->table[h % hash->size];
-            ((entry->key != key) || (entry->val == SMHASH_CELL_UNUSED)) && (entry->val != SMHASH_CELL_FREE);
-            h = SMHASH_NEXT(h, hoff), entry = &hash->table[h % hash->size])
-       {
-               /* Nothing else to do! */
+void BLI_smallhash_insert(SmallHash *sh, uintptr_t key, void *val)
+{
+       SmallHashEntry *e;
+
+       BLI_assert(key != SMHASH_KEY_UNUSED);
+       BLI_assert(!ELEM(val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE));
+       BLI_assert(BLI_smallhash_haskey(sh, key) == false);
+
+       if (UNLIKELY(smallhash_test_expand_buckets(++sh->nentries, sh->nbuckets))) {
+               smallhash_resize_buckets(sh, hashsizes[++sh->cursize]);
        }
 
-       return entry;
+       e = smallhash_lookup_first_free(sh, key);
+       e->key = key;
+       e->val = val;
 }
 
-void BLI_smallhash_remove(SmallHash *hash, uintptr_t key)
+bool BLI_smallhash_remove(SmallHash *sh, uintptr_t key)
 {
-       SmallHashEntry *entry = smallhash_lookup(hash, key);
+       SmallHashEntry *e = smallhash_lookup(sh, key);
 
-       if (entry->val != SMHASH_CELL_FREE) {
-               entry->key = 0;
-               entry->val = SMHASH_CELL_UNUSED;
+       if (e) {
+               e->key = SMHASH_KEY_UNUSED;
+               e->val = SMHASH_CELL_UNUSED;
+               sh->nentries--;
+
+               return true;
+       }
+       else {
+               return false;
        }
 }
 
-void *BLI_smallhash_lookup(SmallHash *hash, uintptr_t key)
+void *BLI_smallhash_lookup(SmallHash *sh, uintptr_t key)
 {
-       SmallHashEntry *entry = smallhash_lookup(hash, key);
+       SmallHashEntry *e = smallhash_lookup(sh, key);
 
-       return ELEM(entry->val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE) ? NULL : entry->val;
+       return e ? e->val : NULL;
 }
 
+void **BLI_smallhash_lookup_p(SmallHash *sh, uintptr_t key)
+{
+       SmallHashEntry *e = smallhash_lookup(sh, key);
+
+       return e ? &e->val : NULL;
+}
 
-bool BLI_smallhash_haskey(SmallHash *hash, uintptr_t key)
+bool BLI_smallhash_haskey(SmallHash *sh, uintptr_t key)
 {
-       SmallHashEntry *entry = smallhash_lookup(hash, key);
+       SmallHashEntry *e = smallhash_lookup(sh, key);
 
-       return !ELEM(entry->val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE);
+       return (e != NULL);
 }
 
-int BLI_smallhash_count(SmallHash *hash)
+int BLI_smallhash_count(SmallHash *sh)
 {
-       return (int)hash->used;
+       return (int)sh->nentries;
 }
 
 void *BLI_smallhash_iternext(SmallHashIter *iter, uintptr_t *key)
 {
-       while (iter->i < iter->hash->size) {
-               if ((iter->hash->table[iter->i].val != SMHASH_CELL_UNUSED) &&
-                   (iter->hash->table[iter->i].val != SMHASH_CELL_FREE))
+       while (iter->i < iter->sh->nbuckets) {
+               if ((iter->sh->buckets[iter->i].val != SMHASH_CELL_UNUSED) &&
+                   (iter->sh->buckets[iter->i].val != SMHASH_CELL_FREE))
                {
                        if (key) {
-                               *key = iter->hash->table[iter->i].key;
+                               *key = iter->sh->buckets[iter->i].key;
                        }
 
-                       return iter->hash->table[iter->i++].val;
+                       return iter->sh->buckets[iter->i++].val;
                }
 
                iter->i++;
@@ -217,9 +257,9 @@ void *BLI_smallhash_iternext(SmallHashIter *iter, uintptr_t *key)
        return NULL;
 }
 
-void *BLI_smallhash_iternew(SmallHash *hash, SmallHashIter *iter, uintptr_t *key)
+void *BLI_smallhash_iternew(SmallHash *sh, SmallHashIter *iter, uintptr_t *key)
 {
-       iter->hash = hash;
+       iter->sh = sh;
        iter->i = 0;
 
        return BLI_smallhash_iternext(iter, key);
@@ -228,23 +268,23 @@ void *BLI_smallhash_iternew(SmallHash *hash, SmallHashIter *iter, uintptr_t *key
 /* note, this was called _print_smhash in knifetool.c
  * it may not be intended for general use - campbell */
 #if 0
-void BLI_smallhash_print(SmallHash *hash)
+void BLI_smallhash_print(SmallHash *sh)
 {
-       int i, linecol = 79, c = 0;
+       unsigned int i, linecol = 79, c = 0;
 
        printf("{");
-       for (i = 0; i < hash->size; i++) {
-               if (hash->table[i].val == SMHASH_CELL_UNUSED) {
+       for (i = 0; i < sh->nbuckets; i++) {
+               if (sh->buckets[i].val == SMHASH_CELL_UNUSED) {
                        printf("--u-");
                }
-               else if (hash->table[i].val == SMHASH_CELL_FREE) {
+               else if (sh->buckets[i].val == SMHASH_CELL_FREE) {
                        printf("--f-");
                }
                else {
-                       printf("%2x", (unsigned int)hash->table[i].key);
+                       printf("%2x", (unsigned int)sh->buckets[i].key);
                }
 
-               if (i != hash->size - 1)
+               if (i != sh->nbuckets - 1)
                        printf(", ");
 
                c += 6;