Smallhash: BLI_smallhash_calc_quality
[blender.git] / source / blender / blenlib / intern / smallhash.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2008 Blender Foundation.
19  * All rights reserved.
20  *
21  * The Original Code is: all of this file.
22  *
23  * Contributor(s): Joseph Eagar.
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 /** \file blender/blenlib/intern/smallhash.c
29  *  \ingroup bli
30  *
31  * A light stack-friendly hash library, it uses stack space for smallish hash tables
32  * but falls back to heap memory once the stack limits reached.
33  *
34  * based on a doubling non-chaining approach  which uses more buckets then entries
35  * stepping over buckets when two keys share the same hash so any key can find a free bucket.
36  *
37  *
38  * #SmallHashEntry.key
39  * - ``SMHASH_KEY_UNUSED`` means the key in the cell has not been initialized.
40  *
41  * #SmallHashEntry.val
42  * - ``SMHASH_CELL_UNUSED`` means this cell is inside a key series.
43  * - ``SMHASH_CELL_FREE`` means this cell terminates a key series.
44  *
45  * Note that the values and keys are often pointers or index values,
46  * use the maximum values to avoid real pointers colliding with magic numbers.
47  */
48
49 #include <string.h>
50 #include <stdlib.h>
51 #include <BLI_sys_types.h>
52
53 #include "MEM_guardedalloc.h"
54
55 #include "BLI_utildefines.h"
56
57 #include "BLI_smallhash.h"
58
59 #include "BLI_strict_flags.h"
60
61 #define SMHASH_KEY_UNUSED   ((uintptr_t)(UINTPTR_MAX - 0))
62 #define SMHASH_CELL_FREE    ((void *)   (UINTPTR_MAX - 1))
63 #define SMHASH_CELL_UNUSED  ((void *)   (UINTPTR_MAX - 2))
64
65 /* typically this re-assigns 'h' */
66 #define SMHASH_NEXT(h, hoff)  ( \
67         CHECK_TYPE_INLINE(&(h),    unsigned int *), \
68         CHECK_TYPE_INLINE(&(hoff), unsigned int *), \
69         ((h) + (((hoff) = ((hoff) * 2) + 1), (hoff))) \
70         )
71
72
73 /* nothing uses BLI_smallhash_remove yet */
74 // #define USE_REMOVE
75
76 BLI_INLINE bool smallhash_val_is_used(const void *val)
77 {
78 #ifdef USE_REMOVE
79         return !ELEM(val, SMHASH_CELL_FREE, SMHASH_CELL_UNUSED);
80 #else
81         return (val != SMHASH_CELL_FREE);
82 #endif
83 }
84
85 extern const unsigned int hashsizes[];
86
87 BLI_INLINE unsigned int smallhash_key(const uintptr_t key)
88 {
89         return (unsigned int)key;
90 }
91
92 /**
93  * Check if the number of items in the smallhash is large enough to require more buckets.
94  */
95 BLI_INLINE bool smallhash_test_expand_buckets(const unsigned int nentries, const unsigned int nbuckets)
96 {
97         /* (approx * 1.5) */
98         return (nentries + (nentries >> 1)) > nbuckets;
99 }
100
101 BLI_INLINE void smallhash_init_empty(SmallHash *sh)
102 {
103         unsigned int i;
104
105         for (i = 0; i < sh->nbuckets; i++) {
106                 sh->buckets[i].key = SMHASH_KEY_UNUSED;
107                 sh->buckets[i].val = SMHASH_CELL_FREE;
108         }
109 }
110
111 /**
112  * Increase initial bucket size to match a reserved amount.
113  */
114 BLI_INLINE void smallhash_buckets_reserve(SmallHash *sh, const unsigned int nentries_reserve)
115 {
116         while (smallhash_test_expand_buckets(nentries_reserve, sh->nbuckets)) {
117                 sh->nbuckets = hashsizes[++sh->cursize];
118         }
119 }
120
121 BLI_INLINE SmallHashEntry *smallhash_lookup(SmallHash *sh, const uintptr_t key)
122 {
123         SmallHashEntry *e;
124         unsigned int h = smallhash_key(key);
125         unsigned int hoff = 1;
126
127         BLI_assert(key != SMHASH_KEY_UNUSED);
128
129         /* note: there are always more buckets then entries,
130          * so we know there will always be a free bucket if the key isn't found. */
131         for (e = &sh->buckets[h % sh->nbuckets];
132              e->val != SMHASH_CELL_FREE;
133              h = SMHASH_NEXT(h, hoff), e = &sh->buckets[h % sh->nbuckets])
134         {
135                 if (e->key == key) {
136                         /* should never happen because unused keys are zero'd */
137                         BLI_assert(e->val != SMHASH_CELL_UNUSED);
138                         return e;
139                 }
140         }
141
142         return NULL;
143 }
144
145 BLI_INLINE SmallHashEntry *smallhash_lookup_first_free(SmallHash *sh, const uintptr_t key)
146 {
147         SmallHashEntry *e;
148         unsigned int h = smallhash_key(key);
149         unsigned int hoff = 1;
150
151         for (e = &sh->buckets[h % sh->nbuckets];
152              smallhash_val_is_used(e->val);
153              h = SMHASH_NEXT(h, hoff), e = &sh->buckets[h % sh->nbuckets])
154         {
155                 /* pass */
156         }
157
158         return e;
159 }
160
161 BLI_INLINE void smallhash_resize_buckets(SmallHash *sh, const unsigned int nbuckets)
162 {
163         SmallHashEntry *buckets_old = sh->buckets;
164         const unsigned int nbuckets_old = sh->nbuckets;
165         const bool was_alloc = (buckets_old != sh->buckets_stack);
166         unsigned int i = 0;
167
168         BLI_assert(sh->nbuckets != nbuckets);
169         if (nbuckets <= SMSTACKSIZE) {
170                 const size_t size = sizeof(*buckets_old) * nbuckets_old;
171                 buckets_old = alloca(size);
172                 memcpy(buckets_old, sh->buckets, size);
173
174                 sh->buckets = sh->buckets_stack;
175         }
176         else {
177                 sh->buckets = MEM_mallocN(sizeof(*sh->buckets) * nbuckets, __func__);
178         }
179
180         sh->nbuckets = nbuckets;
181
182         smallhash_init_empty(sh);
183
184         for (i = 0; i < nbuckets_old; i++) {
185                 if (smallhash_val_is_used(buckets_old[i].val)) {
186                         SmallHashEntry *e = smallhash_lookup_first_free(sh, buckets_old[i].key);
187                         e->key = buckets_old[i].key;
188                         e->val = buckets_old[i].val;
189                 }
190         }
191
192         if (was_alloc) {
193                 MEM_freeN(buckets_old);
194         }
195 }
196
197 void BLI_smallhash_init_ex(SmallHash *sh,
198                            const unsigned int nentries_reserve)
199 {
200         /* assume 'sh' is uninitialized */
201
202         sh->nentries = 0;
203         sh->cursize = 2;
204         sh->nbuckets = hashsizes[sh->cursize];
205
206         sh->buckets = sh->buckets_stack;
207
208         if (nentries_reserve) {
209                 smallhash_buckets_reserve(sh, nentries_reserve);
210
211                 if (sh->nbuckets > SMSTACKSIZE) {
212                         sh->buckets = MEM_mallocN(sizeof(*sh->buckets) * sh->nbuckets, __func__);
213                 }
214         }
215
216         smallhash_init_empty(sh);
217 }
218
219 void BLI_smallhash_init(SmallHash *sh)
220 {
221         BLI_smallhash_init_ex(sh, 0);
222 }
223
224 /*NOTE: does *not* free *sh itself!  only the direct data!*/
225 void BLI_smallhash_release(SmallHash *sh)
226 {
227         if (sh->buckets != sh->buckets_stack) {
228                 MEM_freeN(sh->buckets);
229         }
230 }
231
232 void BLI_smallhash_insert(SmallHash *sh, uintptr_t key, void *val)
233 {
234         SmallHashEntry *e;
235
236         BLI_assert(key != SMHASH_KEY_UNUSED);
237         BLI_assert(smallhash_val_is_used(val));
238         BLI_assert(BLI_smallhash_haskey(sh, key) == false);
239
240         if (UNLIKELY(smallhash_test_expand_buckets(++sh->nentries, sh->nbuckets))) {
241                 smallhash_resize_buckets(sh, hashsizes[++sh->cursize]);
242         }
243
244         e = smallhash_lookup_first_free(sh, key);
245         e->key = key;
246         e->val = val;
247 }
248
249 #ifdef USE_REMOVE
250 bool BLI_smallhash_remove(SmallHash *sh, uintptr_t key)
251 {
252         SmallHashEntry *e = smallhash_lookup(sh, key);
253
254         if (e) {
255                 e->key = SMHASH_KEY_UNUSED;
256                 e->val = SMHASH_CELL_UNUSED;
257                 sh->nentries--;
258
259                 return true;
260         }
261         else {
262                 return false;
263         }
264 }
265 #endif
266
267 void *BLI_smallhash_lookup(SmallHash *sh, uintptr_t key)
268 {
269         SmallHashEntry *e = smallhash_lookup(sh, key);
270
271         return e ? e->val : NULL;
272 }
273
274 void **BLI_smallhash_lookup_p(SmallHash *sh, uintptr_t key)
275 {
276         SmallHashEntry *e = smallhash_lookup(sh, key);
277
278         return e ? &e->val : NULL;
279 }
280
281 bool BLI_smallhash_haskey(SmallHash *sh, uintptr_t key)
282 {
283         SmallHashEntry *e = smallhash_lookup(sh, key);
284
285         return (e != NULL);
286 }
287
288 int BLI_smallhash_count(SmallHash *sh)
289 {
290         return (int)sh->nentries;
291 }
292
293 void *BLI_smallhash_iternext(SmallHashIter *iter, uintptr_t *key)
294 {
295         while (iter->i < iter->sh->nbuckets) {
296                 if (smallhash_val_is_used(iter->sh->buckets[iter->i].val)) {
297                         if (key) {
298                                 *key = iter->sh->buckets[iter->i].key;
299                         }
300
301                         return iter->sh->buckets[iter->i++].val;
302                 }
303
304                 iter->i++;
305         }
306
307         return NULL;
308 }
309
310 void *BLI_smallhash_iternew(SmallHash *sh, SmallHashIter *iter, uintptr_t *key)
311 {
312         iter->sh = sh;
313         iter->i = 0;
314
315         return BLI_smallhash_iternext(iter, key);
316 }
317
318 /** \name Debugging & Introspection
319  * \{ */
320
321 /* note, this was called _print_smhash in knifetool.c
322  * it may not be intended for general use - campbell */
323 #if 0
324 void BLI_smallhash_print(SmallHash *sh)
325 {
326         unsigned int i, linecol = 79, c = 0;
327
328         printf("{");
329         for (i = 0; i < sh->nbuckets; i++) {
330                 if (sh->buckets[i].val == SMHASH_CELL_UNUSED) {
331                         printf("--u-");
332                 }
333                 else if (sh->buckets[i].val == SMHASH_CELL_FREE) {
334                         printf("--f-");
335                 }
336                 else {
337                         printf("%2x", (unsigned int)sh->buckets[i].key);
338                 }
339
340                 if (i != sh->nbuckets - 1)
341                         printf(", ");
342
343                 c += 6;
344
345                 if (c >= linecol) {
346                         printf("\n ");
347                         c = 0;
348                 }
349         }
350
351         fflush(stdout);
352 }
353 #endif
354
355 #ifdef DEBUG
356 /**
357  * Measure how well the hash function performs
358  * (1.0 is perfect - no stepping needed).
359  *
360  * Smaller is better!
361  */
362 double BLI_smallhash_calc_quality(SmallHash *sh)
363 {
364         uint64_t sum = 0;
365         unsigned int i;
366
367         if (sh->nentries == 0)
368                 return -1.0;
369
370         for (i = 0; i < sh->nbuckets; i++) {
371                 if (sh->buckets[i].key != SMHASH_KEY_UNUSED) {
372                         uint64_t count = 0;
373                         SmallHashEntry *e, *e_final = &sh->buckets[i];
374                         unsigned int h = smallhash_key(e_final->key);
375                         unsigned int hoff = 1;
376
377                         for (e = &sh->buckets[h % sh->nbuckets];
378                              e != e_final;
379                              h = SMHASH_NEXT(h, hoff), e = &sh->buckets[h % sh->nbuckets])
380                         {
381                                 count += 1;
382                         }
383
384                         sum += count;
385                 }
386         }
387         return ((double)(sh->nentries + sum) / (double)sh->nentries);
388 }
389 #endif
390
391 /** \} */