Cleanup: remove redundant doxygen \file argument
[blender.git] / source / blender / blenlib / intern / smallhash.c
index d255b4a9af693e6fe0f95602895b87f47d95c5d9..9c4f60322e68b779041934514a24e41bf4ac5fa4 100644 (file)
@@ -1,6 +1,4 @@
 /*
- * ***** BEGIN GPL LICENSE BLOCK *****
- *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
  * as published by the Free Software Foundation; either version 2
  *
  * The Original Code is Copyright (C) 2008 Blender Foundation.
  * All rights reserved.
+ */
+
+/** \file \ingroup bli
+ *
+ * A light stack-friendly hash library, it uses stack space for relatively small, fixed size hash tables
+ * but falls back to heap memory once the stack limits reached (#SMSTACKSIZE).
+ *
+ * based on a doubling hashing approach (non-chaining) which uses more buckets then entries
+ * stepping over buckets when two keys share the same hash so any key can find a free bucket.
+ *
+ * See: https://en.wikipedia.org/wiki/Double_hashing
+ *
+ * \warning This should _only_ be used for small hashes where allocating a hash every time is unacceptable.
+ * Otherwise #GHash should be used instead.
  *
- * The Original Code is: all of this file.
+ * #SmallHashEntry.key
+ * - ``SMHASH_KEY_UNUSED`` means the key in the cell has not been initialized.
  *
- * Contributor(s): Joseph Eagar.
+ * #SmallHashEntry.val
+ * - ``SMHASH_CELL_UNUSED`` means this cell is inside a key series.
+ * - ``SMHASH_CELL_FREE`` means this cell terminates a key series.
  *
- * ***** END GPL LICENSE BLOCK *****
+ * Note that the values and keys are often pointers or index values,
+ * use the maximum values to avoid real pointers colliding with magic numbers.
  */
 
 #include <string.h>
+#include <stdlib.h>
+
+#include "BLI_sys_types.h"
 
 #include "MEM_guardedalloc.h"
+
 #include "BLI_utildefines.h"
 
 #include "BLI_smallhash.h"
 
-/* SMHASH_CELL_UNUSED means this cell is inside a key series,
- * while SMHASH_CELL_FREE means this cell terminates a key series.
- *
- * no chance of anyone shoving INT32_MAX-2 into a *val pointer, I
- * imagine.  hopefully.
- *
- * note: these have the SMHASH suffix because we may want to make them public.
- */
-#define SMHASH_CELL_UNUSED  ((void *)0x7FFFFFFF)
-#define SMHASH_CELL_FREE    ((void *)0x7FFFFFFD)
-
-#ifdef __GNUC__
-#  pragma GCC diagnostic error "-Wsign-conversion"
-#  if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406  /* gcc4.6+ only */
-#    pragma GCC diagnostic error "-Wsign-compare"
-#    pragma GCC diagnostic error "-Wconversion"
-#  endif
-#endif
+#include "BLI_strict_flags.h"
+
+#define SMHASH_KEY_UNUSED   ((uintptr_t)(UINTPTR_MAX - 0))
+#define SMHASH_CELL_FREE    ((void *)   (UINTPTR_MAX - 1))
+#define SMHASH_CELL_UNUSED  ((void *)   (UINTPTR_MAX - 2))
 
 /* typically this re-assigns 'h' */
 #define SMHASH_NEXT(h, hoff)  ( \
-       CHECK_TYPE_INLINE(&(h),    unsigned int), \
-       CHECK_TYPE_INLINE(&(hoff), unsigned int), \
+       CHECK_TYPE_INLINE(&(h),    uint *), \
+       CHECK_TYPE_INLINE(&(hoff), uint *), \
        ((h) + (((hoff) = ((hoff) * 2) + 1), (hoff))) \
        )
 
-extern unsigned int hashsizes[];
 
-void BLI_smallhash_init(SmallHash *hash)
+/* nothing uses BLI_smallhash_remove yet */
+// #define USE_REMOVE
+
+BLI_INLINE bool smallhash_val_is_used(const void *val)
 {
-       unsigned int i;
+#ifdef USE_REMOVE
+       return !ELEM(val, SMHASH_CELL_FREE, SMHASH_CELL_UNUSED);
+#else
+       return (val != SMHASH_CELL_FREE);
+#endif
+}
 
-       memset(hash, 0, sizeof(*hash));
+extern const uint hashsizes[];
 
-       hash->table = hash->_stacktable;
-       hash->curhash = 2;
-       hash->size = hashsizes[hash->curhash];
+BLI_INLINE uint smallhash_key(const uintptr_t key)
+{
+       return (uint)key;
+}
 
-       hash->copytable = hash->_copytable;
-       hash->stacktable = hash->_stacktable;
+/**
+ * Check if the number of items in the smallhash is large enough to require more buckets.
+ */
+BLI_INLINE bool smallhash_test_expand_buckets(const uint nentries, const uint nbuckets)
+{
+       /* (approx * 1.5) */
+       return (nentries + (nentries >> 1)) > nbuckets;
+}
+
+BLI_INLINE void smallhash_init_empty(SmallHash *sh)
+{
+       uint i;
 
-       for (i = 0; i < hash->size; i++) {
-               hash->table[i].val = SMHASH_CELL_FREE;
+       for (i = 0; i < sh->nbuckets; i++) {
+               sh->buckets[i].key = SMHASH_KEY_UNUSED;
+               sh->buckets[i].val = SMHASH_CELL_FREE;
        }
 }
 
-/*NOTE: does *not* free *hash itself!  only the direct data!*/
-void BLI_smallhash_release(SmallHash *hash)
+/**
+ * Increase initial bucket size to match a reserved amount.
+ */
+BLI_INLINE void smallhash_buckets_reserve(SmallHash *sh, const uint nentries_reserve)
 {
-       if (hash->table != hash->stacktable) {
-               MEM_freeN(hash->table);
+       while (smallhash_test_expand_buckets(nentries_reserve, sh->nbuckets)) {
+               sh->nbuckets = hashsizes[++sh->cursize];
        }
 }
 
-void BLI_smallhash_insert(SmallHash *hash, uintptr_t key, void *item)
+BLI_INLINE SmallHashEntry *smallhash_lookup(const SmallHash *sh, const uintptr_t key)
 {
-       unsigned int h, hoff = 1;
+       SmallHashEntry *e;
+       uint h = smallhash_key(key);
+       uint hoff = 1;
 
-       if (hash->size < hash->used * 3) {
-               unsigned int newsize = hashsizes[++hash->curhash];
-               SmallHashEntry *tmp;
-               unsigned int i = 0;
+       BLI_assert(key != SMHASH_KEY_UNUSED);
 
-               if (hash->table != hash->stacktable || newsize > SMSTACKSIZE) {
-                       tmp = MEM_callocN(sizeof(*hash->table) * newsize, __func__);
-               }
-               else {
-                       SWAP(SmallHashEntry *, hash->stacktable, hash->copytable);
-                       tmp = hash->stacktable;
+       /* note: there are always more buckets than entries,
+        * so we know there will always be a free bucket if the key isn't found. */
+       for (e = &sh->buckets[h % sh->nbuckets];
+            e->val != SMHASH_CELL_FREE;
+            h = SMHASH_NEXT(h, hoff), e = &sh->buckets[h % sh->nbuckets])
+       {
+               if (e->key == key) {
+                       /* should never happen because unused keys are zero'd */
+                       BLI_assert(e->val != SMHASH_CELL_UNUSED);
+                       return e;
                }
+       }
 
-               SWAP(SmallHashEntry *, tmp, hash->table);
+       return NULL;
+}
 
-               hash->size = newsize;
+BLI_INLINE SmallHashEntry *smallhash_lookup_first_free(SmallHash *sh, const uintptr_t key)
+{
+       SmallHashEntry *e;
+       uint h = smallhash_key(key);
+       uint hoff = 1;
 
-               for (i = 0; i < hash->size; i++) {
-                       hash->table[i].val = SMHASH_CELL_FREE;
-               }
+       for (e = &sh->buckets[h % sh->nbuckets];
+            smallhash_val_is_used(e->val);
+            h = SMHASH_NEXT(h, hoff), e = &sh->buckets[h % sh->nbuckets])
+       {
+               /* pass */
+       }
 
-               for (i = 0; i < hashsizes[hash->curhash - 1]; i++) {
-                       if (ELEM(tmp[i].val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE)) {
-                               continue;
-                       }
+       return e;
+}
 
-                       h = (unsigned int)(tmp[i].key);
-                       hoff = 1;
-                       while (!ELEM(hash->table[h % newsize].val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE)) {
-                               h = SMHASH_NEXT(h, hoff);
-                       }
+BLI_INLINE void smallhash_resize_buckets(SmallHash *sh, const uint nbuckets)
+{
+       SmallHashEntry *buckets_old = sh->buckets;
+       const uint nbuckets_old = sh->nbuckets;
+       const bool was_alloc = (buckets_old != sh->buckets_stack);
+       uint i = 0;
+
+       BLI_assert(sh->nbuckets != nbuckets);
+       if (nbuckets <= SMSTACKSIZE) {
+               const size_t size = sizeof(*buckets_old) * nbuckets_old;
+               buckets_old = alloca(size);
+               memcpy(buckets_old, sh->buckets, size);
+
+               sh->buckets = sh->buckets_stack;
+       }
+       else {
+               sh->buckets = MEM_mallocN(sizeof(*sh->buckets) * nbuckets, __func__);
+       }
 
-                       h %= newsize;
+       sh->nbuckets = nbuckets;
 
-                       hash->table[h].key = tmp[i].key;
-                       hash->table[h].val = tmp[i].val;
-               }
+       smallhash_init_empty(sh);
 
-               if (tmp != hash->stacktable && tmp != hash->copytable) {
-                       MEM_freeN(tmp);
+       for (i = 0; i < nbuckets_old; i++) {
+               if (smallhash_val_is_used(buckets_old[i].val)) {
+                       SmallHashEntry *e = smallhash_lookup_first_free(sh, buckets_old[i].key);
+                       e->key = buckets_old[i].key;
+                       e->val = buckets_old[i].val;
                }
        }
 
-       h = (unsigned int)(key);
-       hoff = 1;
+       if (was_alloc) {
+               MEM_freeN(buckets_old);
+       }
+}
+
+void BLI_smallhash_init_ex(SmallHash *sh,
+                           const uint nentries_reserve)
+{
+       /* assume 'sh' is uninitialized */
+
+       sh->nentries = 0;
+       sh->cursize = 2;
+       sh->nbuckets = hashsizes[sh->cursize];
 
-       while (!ELEM(hash->table[h % hash->size].val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE)) {
-               h = SMHASH_NEXT(h, hoff);
+       sh->buckets = sh->buckets_stack;
+
+       if (nentries_reserve) {
+               smallhash_buckets_reserve(sh, nentries_reserve);
+
+               if (sh->nbuckets > SMSTACKSIZE) {
+                       sh->buckets = MEM_mallocN(sizeof(*sh->buckets) * sh->nbuckets, __func__);
+               }
        }
 
-       h %= hash->size;
-       hash->table[h].key = key;
-       hash->table[h].val = item;
+       smallhash_init_empty(sh);
+}
 
-       hash->used++;
+void BLI_smallhash_init(SmallHash *sh)
+{
+       BLI_smallhash_init_ex(sh, 0);
 }
 
-void BLI_smallhash_remove(SmallHash *hash, uintptr_t key)
+/* NOTE: does *not* free *sh itself!  only the direct data! */
+void BLI_smallhash_release(SmallHash *sh)
 {
-       unsigned int h, hoff = 1;
+       if (sh->buckets != sh->buckets_stack) {
+               MEM_freeN(sh->buckets);
+       }
+}
 
-       h = (unsigned int)(key);
+void BLI_smallhash_insert(SmallHash *sh, uintptr_t key, void *val)
+{
+       SmallHashEntry *e;
 
-       while ((hash->table[h % hash->size].key != key) ||
-              (hash->table[h % hash->size].val == SMHASH_CELL_UNUSED))
-       {
-               if (hash->table[h % hash->size].val == SMHASH_CELL_FREE) {
-                       return;
-               }
+       BLI_assert(key != SMHASH_KEY_UNUSED);
+       BLI_assert(smallhash_val_is_used(val));
+       BLI_assert(BLI_smallhash_haskey(sh, key) == false);
 
-               h = SMHASH_NEXT(h, hoff);
+       if (UNLIKELY(smallhash_test_expand_buckets(++sh->nentries, sh->nbuckets))) {
+               smallhash_resize_buckets(sh, hashsizes[++sh->cursize]);
        }
 
-       h %= hash->size;
-       hash->table[h].key = 0;
-       hash->table[h].val = SMHASH_CELL_UNUSED;
+       e = smallhash_lookup_first_free(sh, key);
+       e->key = key;
+       e->val = val;
 }
 
-void *BLI_smallhash_lookup(SmallHash *hash, uintptr_t key)
+/**
+ * Inserts a new value to a key that may already be in ghash.
+ *
+ * Avoids #BLI_smallhash_remove, #BLI_smallhash_insert calls (double lookups)
+ *
+ * \returns true if a new key has been added.
+ */
+bool BLI_smallhash_reinsert(SmallHash *sh, uintptr_t key, void *item)
 {
-       unsigned int h, hoff = 1;
-       void *v;
+       SmallHashEntry *e = smallhash_lookup(sh, key);
+       if (e) {
+               e->val = item;
+               return false;
+       }
+       else {
+               BLI_smallhash_insert(sh, key, item);
+               return true;
+       }
+}
 
-       h = (unsigned int)(key);
+#ifdef USE_REMOVE
+bool BLI_smallhash_remove(SmallHash *sh, uintptr_t key)
+{
+       SmallHashEntry *e = smallhash_lookup(sh, key);
 
-       while ((hash->table[h % hash->size].key != key) ||
-              (hash->table[h % hash->size].val == SMHASH_CELL_UNUSED))
-       {
-               if (hash->table[h % hash->size].val == SMHASH_CELL_FREE) {
-                       return NULL;
-               }
+       if (e) {
+               e->key = SMHASH_KEY_UNUSED;
+               e->val = SMHASH_CELL_UNUSED;
+               sh->nentries--;
 
-               h = SMHASH_NEXT(h, hoff);
+               return true;
        }
-
-       v = hash->table[h % hash->size].val;
-       if (ELEM(v, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE)) {
-               return NULL;
+       else {
+               return false;
        }
-
-       return v;
 }
+#endif
 
+void *BLI_smallhash_lookup(const SmallHash *sh, uintptr_t key)
+{
+       SmallHashEntry *e = smallhash_lookup(sh, key);
 
-int BLI_smallhash_haskey(SmallHash *hash, uintptr_t key)
+       return e ? e->val : NULL;
+}
+
+void **BLI_smallhash_lookup_p(const SmallHash *sh, uintptr_t key)
 {
-       unsigned int h = (unsigned int)(key);
-       unsigned int hoff = 1;
+       SmallHashEntry *e = smallhash_lookup(sh, key);
 
-       while ((hash->table[h % hash->size].key != key) ||
-              (hash->table[h % hash->size].val == SMHASH_CELL_UNUSED))
-       {
-               if (hash->table[h % hash->size].val == SMHASH_CELL_FREE) {
-                       return 0;
-               }
+       return e ? &e->val : NULL;
+}
 
-               h = SMHASH_NEXT(h, hoff);
-       }
+bool BLI_smallhash_haskey(const SmallHash *sh, uintptr_t key)
+{
+       SmallHashEntry *e = smallhash_lookup(sh, key);
 
-       return !ELEM(hash->table[h % hash->size].val, SMHASH_CELL_UNUSED, SMHASH_CELL_FREE);
+       return (e != NULL);
 }
 
-int BLI_smallhash_count(SmallHash *hash)
+int BLI_smallhash_len(const SmallHash *sh)
 {
-       return (int)hash->used;
+       return (int)sh->nentries;
 }
 
-void *BLI_smallhash_iternext(SmallHashIter *iter, uintptr_t *key)
+BLI_INLINE SmallHashEntry *smallhash_iternext(SmallHashIter *iter, uintptr_t *key)
 {
-       while (iter->i < iter->hash->size) {
-               if ((iter->hash->table[iter->i].val != SMHASH_CELL_UNUSED) &&
-                   (iter->hash->table[iter->i].val != SMHASH_CELL_FREE))
-               {
+       while (iter->i < iter->sh->nbuckets) {
+               if (smallhash_val_is_used(iter->sh->buckets[iter->i].val)) {
                        if (key) {
-                               *key = iter->hash->table[iter->i].key;
+                               *key = iter->sh->buckets[iter->i].key;
                        }
 
-                       iter->i++;
-
-                       return iter->hash->table[iter->i - 1].val;
+                       return &iter->sh->buckets[iter->i++];
                }
 
                iter->i++;
@@ -238,34 +323,60 @@ void *BLI_smallhash_iternext(SmallHashIter *iter, uintptr_t *key)
        return NULL;
 }
 
-void *BLI_smallhash_iternew(SmallHash *hash, SmallHashIter *iter, uintptr_t *key)
+void *BLI_smallhash_iternext(SmallHashIter *iter, uintptr_t *key)
 {
-       iter->hash = hash;
+       SmallHashEntry *e = smallhash_iternext(iter, key);
+
+       return e ? e->val : NULL;
+}
+
+void **BLI_smallhash_iternext_p(SmallHashIter *iter, uintptr_t *key)
+{
+       SmallHashEntry *e = smallhash_iternext(iter, key);
+
+       return e ? &e->val : NULL;
+}
+
+void *BLI_smallhash_iternew(const SmallHash *sh, SmallHashIter *iter, uintptr_t *key)
+{
+       iter->sh = sh;
        iter->i = 0;
 
        return BLI_smallhash_iternext(iter, key);
 }
 
+void **BLI_smallhash_iternew_p(const SmallHash *sh, SmallHashIter *iter, uintptr_t *key)
+{
+       iter->sh = sh;
+       iter->i = 0;
+
+       return BLI_smallhash_iternext_p(iter, key);
+}
+
+
+/** \name Debugging & Introspection
+ * \{ */
+
 /* note, this was called _print_smhash in knifetool.c
  * it may not be intended for general use - campbell */
 #if 0
-void BLI_smallhash_print(SmallHash *hash)
+void BLI_smallhash_print(SmallHash *sh)
 {
-       int i, linecol = 79, c = 0;
+       uint i, linecol = 79, c = 0;
 
        printf("{");
-       for (i = 0; i < hash->size; i++) {
-               if (hash->table[i].val == SMHASH_CELL_UNUSED) {
+       for (i = 0; i < sh->nbuckets; i++) {
+               if (sh->buckets[i].val == SMHASH_CELL_UNUSED) {
                        printf("--u-");
                }
-               else if (hash->table[i].val == SMHASH_CELL_FREE) {
+               else if (sh->buckets[i].val == SMHASH_CELL_FREE) {
                        printf("--f-");
                }
                else {
-                       printf("%2x", (unsigned int)hash->table[i].key);
+                       printf("%2x", (uint)sh->buckets[i].key);
                }
 
-               if (i != hash->size - 1)
+               if (i != sh->nbuckets - 1)
                        printf(", ");
 
                c += 6;
@@ -279,3 +390,41 @@ void BLI_smallhash_print(SmallHash *hash)
        fflush(stdout);
 }
 #endif
+
+#ifdef DEBUG
+/**
+ * Measure how well the hash function performs
+ * (1.0 is perfect - no stepping needed).
+ *
+ * Smaller is better!
+ */
+double BLI_smallhash_calc_quality(SmallHash *sh)
+{
+       uint64_t sum = 0;
+       uint i;
+
+       if (sh->nentries == 0)
+               return -1.0;
+
+       for (i = 0; i < sh->nbuckets; i++) {
+               if (sh->buckets[i].key != SMHASH_KEY_UNUSED) {
+                       uint64_t count = 0;
+                       SmallHashEntry *e, *e_final = &sh->buckets[i];
+                       uint h = smallhash_key(e_final->key);
+                       uint hoff = 1;
+
+                       for (e = &sh->buckets[h % sh->nbuckets];
+                            e != e_final;
+                            h = SMHASH_NEXT(h, hoff), e = &sh->buckets[h % sh->nbuckets])
+                       {
+                               count += 1;
+                       }
+
+                       sum += count;
+               }
+       }
+       return ((double)(sh->nentries + sum) / (double)sh->nentries);
+}
+#endif
+
+/** \} */