924c84d72d0ca3b26b7e44859fdf5928164f6eff
[blender.git] / tests / gtests / blenlib / BLI_ghash_performance_test.cc
1 /* Apache License, Version 2.0 */
2
3 #include "testing/testing.h"
4 #include "BLI_ressource_strings.h"
5
6 #define GHASH_INTERNAL_API
7
8 extern "C" {
9 #include "MEM_guardedalloc.h"
10 #include "BLI_utildefines.h"
11 #include "BLI_ghash.h"
12 #include "BLI_rand.h"
13 #include "BLI_string.h"
14 #include "PIL_time_utildefines.h"
15 }
16
17 /* Using http://corpora.uni-leipzig.de/downloads/eng_wikipedia_2010_1M-text.tar.gz
18  * (1 million of words, about 122MB of text) from http://corpora.informatik.uni-leipzig.de/download.html */
19 //#define TEXT_CORPUS_PATH "/path/to/TĂ©lĂ©chargements/eng_wikipedia_2010_1M-text/eng_wikipedia_2010_1M-sentences.txt"
20
21 /* Resizing the hash has a huge cost over global filling operation! */
22 //#define GHASH_RESERVE
23
24 /* Run the longest tests! */
25 //#define GHASH_RUN_BIG
26
27 /* Size of 'small case' ghash (number of entries). */
28 #define TESTCASE_SIZE_SMALL 17
29
30 #define PRINTF_GHASH_STATS(_gh) \
31 { \
32         double q, lf, var, pempty, poverloaded; \
33         int bigb; \
34         q = BLI_ghash_calc_quality_ex((_gh), &lf, &var, &pempty, &poverloaded, &bigb); \
35         printf("GHash stats (%u entries):\n\t" \
36                "Quality (the lower the better): %f\n\tVariance (the lower the better): %f\n\tLoad: %f\n\t" \
37                "Empty buckets: %.2f%%\n\tOverloaded buckets: %.2f%% (biggest bucket: %d)\n", \
38                BLI_ghash_size(_gh), q, var, lf, pempty * 100.0, poverloaded * 100.0, bigb); \
39 } void (0)
40
41 /* Str: whole text, lines and words from a 'corpus' text. */
42
43 static void str_ghash_tests(GHash *ghash, const char *id)
44 {
45         printf("\n========== STARTING %s ==========\n", id);
46
47 #ifdef TEXT_CORPUS_PATH
48         size_t sz = 0;
49         char *data;
50         {
51                 struct stat st;
52                 if (stat(TEXT_CORPUS_PATH, &st) == 0)
53                         sz = st.st_size;
54         }
55         if (sz != 0) {
56                 FILE *f = fopen(TEXT_CORPUS_PATH, "r");
57
58                 data = (char *)MEM_mallocN(sz + 1, __func__);
59                 if (fread(data, sizeof(*data), sz, f) != sz) {
60                         printf("ERROR in reading file %s!", TEXT_CORPUS_PATH);
61                         MEM_freeN(data);
62                         data = BLI_strdup(words10k);
63                 }
64                 data[sz] = '\0';
65                 fclose(f);
66         }
67         else {
68                 data = BLI_strdup(words10k);
69         }
70 #else
71         char *data = BLI_strdup(words10k);
72 #endif
73         char *data_p = BLI_strdup(data);
74         char *data_w = BLI_strdup(data);
75         char *data_bis = BLI_strdup(data);
76
77         {
78                 char *p, *w, *c_p, *c_w;
79
80                 TIMEIT_START(string_insert);
81
82 #ifdef GHASH_RESERVE
83                 BLI_ghash_reserve(ghash, strlen(data) / 32);  /* rough estimation... */
84 #endif
85
86                 BLI_ghash_insert(ghash, data, SET_INT_IN_POINTER(data[0]));
87
88                 for (p = c_p = data_p, w = c_w = data_w; *c_w; c_w++, c_p++) {
89                         if (*c_p == '.') {
90                                 *c_p = *c_w = '\0';
91                                 if (!BLI_ghash_haskey(ghash, p)) {
92                                         BLI_ghash_insert(ghash, p, SET_INT_IN_POINTER(p[0]));
93                                 }
94                                 if (!BLI_ghash_haskey(ghash, w)) {
95                                         BLI_ghash_insert(ghash, w, SET_INT_IN_POINTER(w[0]));
96                                 }
97                                 p = c_p + 1;
98                                 w = c_w + 1;
99                         }
100                         else if (*c_w == ' ') {
101                                 *c_w = '\0';
102                                 if (!BLI_ghash_haskey(ghash, w)) {
103                                         BLI_ghash_insert(ghash, w, SET_INT_IN_POINTER(w[0]));
104                                 }
105                                 w = c_w + 1;
106                         }
107                 }
108
109                 TIMEIT_END(string_insert);
110         }
111
112         PRINTF_GHASH_STATS(ghash);
113
114         {
115                 char *p, *w, *c;
116                 void *v;
117
118                 TIMEIT_START(string_lookup);
119
120                 v = BLI_ghash_lookup(ghash, data_bis);
121                 EXPECT_EQ(GET_INT_FROM_POINTER(v), data_bis[0]);
122
123                 for (p = w = c = data_bis; *c; c++) {
124                         if (*c == '.') {
125                                 *c = '\0';
126                                 v = BLI_ghash_lookup(ghash, w);
127                                 EXPECT_EQ(GET_INT_FROM_POINTER(v), w[0]);
128                                 v = BLI_ghash_lookup(ghash, p);
129                                 EXPECT_EQ(GET_INT_FROM_POINTER(v), p[0]);
130                                 p = w = c + 1;
131                         }
132                         else if (*c == ' ') {
133                                 *c = '\0';
134                                 v = BLI_ghash_lookup(ghash, w);
135                                 EXPECT_EQ(GET_INT_FROM_POINTER(v), w[0]);
136                                 w = c + 1;
137                         }
138                 }
139
140                 TIMEIT_END(string_lookup);
141         }
142
143         BLI_ghash_free(ghash, NULL, NULL);
144         MEM_freeN(data);
145         MEM_freeN(data_p);
146         MEM_freeN(data_w);
147         MEM_freeN(data_bis);
148
149         printf("========== ENDED %s ==========\n\n", id);
150 }
151
152 TEST(ghash, TextGHash)
153 {
154         GHash *ghash = BLI_ghash_new(BLI_ghashutil_strhash_p, BLI_ghashutil_strcmp, __func__);
155
156         str_ghash_tests(ghash, "StrGHash - GHash");
157 }
158
159 TEST(ghash, TextMurmur2a)
160 {
161         GHash *ghash = BLI_ghash_new(BLI_ghashutil_strhash_p_murmur, BLI_ghashutil_strcmp, __func__);
162
163         str_ghash_tests(ghash, "StrGHash - Murmur");
164 }
165
166
167 /* Int: uniform 100M first integers. */
168
169 static void int_ghash_tests(GHash *ghash, const char *id, const unsigned int nbr)
170 {
171         printf("\n========== STARTING %s ==========\n", id);
172
173         {
174                 unsigned int i = nbr;
175
176                 TIMEIT_START(int_insert);
177
178 #ifdef GHASH_RESERVE
179                 BLI_ghash_reserve(ghash, nbr);
180 #endif
181
182                 while (i--) {
183                         BLI_ghash_insert(ghash, SET_UINT_IN_POINTER(i), SET_UINT_IN_POINTER(i));
184                 }
185
186                 TIMEIT_END(int_insert);
187         }
188
189         PRINTF_GHASH_STATS(ghash);
190
191         {
192                 unsigned int i = nbr;
193
194                 TIMEIT_START(int_lookup);
195
196                 while (i--) {
197                         void *v = BLI_ghash_lookup(ghash, SET_UINT_IN_POINTER(i));
198                         EXPECT_EQ(GET_UINT_FROM_POINTER(v), i);
199                 }
200
201                 TIMEIT_END(int_lookup);
202         }
203
204         {
205                 void *k, *v;
206
207                 TIMEIT_START(int_pop);
208
209                 GHashIterState pop_state = {0};
210
211                 while (BLI_ghash_pop(ghash, &pop_state, &k, &v)) {
212                         EXPECT_EQ(k, v);
213                 }
214
215                 TIMEIT_END(int_pop);
216         }
217         EXPECT_EQ(BLI_ghash_size(ghash), 0);
218
219         BLI_ghash_free(ghash, NULL, NULL);
220
221         printf("========== ENDED %s ==========\n\n", id);
222 }
223
224 TEST(ghash, IntGHash12000)
225 {
226         GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, __func__);
227
228         int_ghash_tests(ghash, "IntGHash - GHash - 12000", 12000);
229 }
230
231 #ifdef GHASH_RUN_BIG
232 TEST(ghash, IntGHash100000000)
233 {
234         GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, __func__);
235
236         int_ghash_tests(ghash, "IntGHash - GHash - 100000000", 100000000);
237 }
238 #endif
239
240 TEST(ghash, IntMurmur2a12000)
241 {
242         GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, BLI_ghashutil_intcmp, __func__);
243
244         int_ghash_tests(ghash, "IntGHash - Murmur - 12000", 12000);
245 }
246
247 #ifdef GHASH_RUN_BIG
248 TEST(ghash, IntMurmur2a100000000)
249 {
250         GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, BLI_ghashutil_intcmp, __func__);
251
252         int_ghash_tests(ghash, "IntGHash - Murmur - 100000000", 100000000);
253 }
254 #endif
255
256 /* Int: random 50M integers. */
257
258 static void randint_ghash_tests(GHash *ghash, const char *id, const unsigned int nbr)
259 {
260         printf("\n========== STARTING %s ==========\n", id);
261
262         unsigned int *data = (unsigned int *)MEM_mallocN(sizeof(*data) * (size_t)nbr, __func__);
263         unsigned int *dt;
264         unsigned int i;
265
266         {
267                 RNG *rng = BLI_rng_new(0);
268                 for (i = nbr, dt = data; i--; dt++) {
269                         *dt = BLI_rng_get_uint(rng);
270                 }
271                 BLI_rng_free(rng);
272         }
273
274         {
275                 TIMEIT_START(int_insert);
276
277 #ifdef GHASH_RESERVE
278                 BLI_ghash_reserve(ghash, nbr);
279 #endif
280
281                 for (i = nbr, dt = data; i--; dt++) {
282                         BLI_ghash_insert(ghash, SET_UINT_IN_POINTER(*dt), SET_UINT_IN_POINTER(*dt));
283                 }
284
285                 TIMEIT_END(int_insert);
286         }
287
288         PRINTF_GHASH_STATS(ghash);
289
290         {
291                 TIMEIT_START(int_lookup);
292
293                 for (i = nbr, dt = data; i--; dt++) {
294                         void *v = BLI_ghash_lookup(ghash, SET_UINT_IN_POINTER(*dt));
295                         EXPECT_EQ(GET_UINT_FROM_POINTER(v), *dt);
296                 }
297
298                 TIMEIT_END(int_lookup);
299         }
300
301         BLI_ghash_free(ghash, NULL, NULL);
302
303         printf("========== ENDED %s ==========\n\n", id);
304 }
305
306 TEST(ghash, IntRandGHash12000)
307 {
308         GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, __func__);
309
310         randint_ghash_tests(ghash, "RandIntGHash - GHash - 12000", 12000);
311 }
312
313 #ifdef GHASH_RUN_BIG
314 TEST(ghash, IntRandGHash50000000)
315 {
316         GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, __func__);
317
318         randint_ghash_tests(ghash, "RandIntGHash - GHash - 50000000", 50000000);
319 }
320 #endif
321
322 TEST(ghash, IntRandMurmur2a12000)
323 {
324         GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, BLI_ghashutil_intcmp, __func__);
325
326         randint_ghash_tests(ghash, "RandIntGHash - Murmur - 12000", 12000);
327 }
328
329 #ifdef GHASH_RUN_BIG
330 TEST(ghash, IntRandMurmur2a50000000)
331 {
332         GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, BLI_ghashutil_intcmp, __func__);
333
334         randint_ghash_tests(ghash, "RandIntGHash - Murmur - 50000000", 50000000);
335 }
336 #endif
337
338 static unsigned int ghashutil_tests_nohash_p(const void *p)
339 {
340         return GET_UINT_FROM_POINTER(p);
341 }
342
343 static bool ghashutil_tests_cmp_p(const void *a, const void *b)
344 {
345         return a != b;
346 }
347
348 TEST(ghash, Int4NoHash12000)
349 {
350         GHash *ghash = BLI_ghash_new(ghashutil_tests_nohash_p, ghashutil_tests_cmp_p, __func__);
351
352         randint_ghash_tests(ghash, "RandIntGHash - No Hash - 12000", 12000);
353 }
354
355 #ifdef GHASH_RUN_BIG
356 TEST(ghash, Int4NoHash50000000)
357 {
358         GHash *ghash = BLI_ghash_new(ghashutil_tests_nohash_p, ghashutil_tests_cmp_p, __func__);
359
360         randint_ghash_tests(ghash, "RandIntGHash - No Hash - 50000000", 50000000);
361 }
362 #endif
363
364 /* Int_v4: 20M of randomly-generated integer vectors. */
365
366 static void int4_ghash_tests(GHash *ghash, const char *id, const unsigned int nbr)
367 {
368         printf("\n========== STARTING %s ==========\n", id);
369
370         void *data_v = MEM_mallocN(sizeof(unsigned int[4]) * (size_t)nbr, __func__);
371         unsigned int (*data)[4] = (unsigned int (*)[4])data_v;
372         unsigned int (*dt)[4];
373         unsigned int i, j;
374
375         {
376                 RNG *rng = BLI_rng_new(0);
377                 for (i = nbr, dt = data; i--; dt++) {
378                         for (j = 4; j--; ) {
379                                 (*dt)[j] = BLI_rng_get_uint(rng);
380                         }
381                 }
382                 BLI_rng_free(rng);
383         }
384
385         {
386                 TIMEIT_START(int_v4_insert);
387
388 #ifdef GHASH_RESERVE
389                 BLI_ghash_reserve(ghash, nbr);
390 #endif
391
392                 for (i = nbr, dt = data; i--; dt++) {
393                         BLI_ghash_insert(ghash, *dt, SET_UINT_IN_POINTER(i));
394                 }
395
396                 TIMEIT_END(int_v4_insert);
397         }
398
399         PRINTF_GHASH_STATS(ghash);
400
401         {
402                 TIMEIT_START(int_v4_lookup);
403
404                 for (i = nbr, dt = data; i--; dt++) {
405                         void *v = BLI_ghash_lookup(ghash, (void *)(*dt));
406                         EXPECT_EQ(GET_UINT_FROM_POINTER(v), i);
407                 }
408
409                 TIMEIT_END(int_v4_lookup);
410         }
411
412         BLI_ghash_free(ghash, NULL, NULL);
413         MEM_freeN(data);
414
415         printf("========== ENDED %s ==========\n\n", id);
416 }
417
418 TEST(ghash, Int4GHash2000)
419 {
420         GHash *ghash = BLI_ghash_new(BLI_ghashutil_uinthash_v4_p, BLI_ghashutil_uinthash_v4_cmp, __func__);
421
422         int4_ghash_tests(ghash, "Int4GHash - GHash - 2000", 2000);
423 }
424
425 #ifdef GHASH_RUN_BIG
426 TEST(ghash, Int4GHash20000000)
427 {
428         GHash *ghash = BLI_ghash_new(BLI_ghashutil_uinthash_v4_p, BLI_ghashutil_uinthash_v4_cmp, __func__);
429
430         int4_ghash_tests(ghash, "Int4GHash - GHash - 20000000", 20000000);
431 }
432 #endif
433
434 TEST(ghash, Int4Murmur2a2000)
435 {
436         GHash *ghash = BLI_ghash_new(BLI_ghashutil_uinthash_v4_p_murmur, BLI_ghashutil_uinthash_v4_cmp, __func__);
437
438         int4_ghash_tests(ghash, "Int4GHash - Murmur - 2000", 2000);
439 }
440
441 #ifdef GHASH_RUN_BIG
442 TEST(ghash, Int4Murmur2a20000000)
443 {
444         GHash *ghash = BLI_ghash_new(BLI_ghashutil_uinthash_v4_p_murmur, BLI_ghashutil_uinthash_v4_cmp, __func__);
445
446         int4_ghash_tests(ghash, "Int4GHash - Murmur - 20000000", 20000000);
447 }
448 #endif
449
450 /* MultiSmall: create and manipulate a lot of very small ghashes (90% < 10 items, 9% < 100 items, 1% < 1000 items). */
451
452 static void multi_small_ghash_tests_one(GHash *ghash, RNG *rng, const unsigned int nbr)
453 {
454         unsigned int *data = (unsigned int *)MEM_mallocN(sizeof(*data) * (size_t)nbr, __func__);
455         unsigned int *dt;
456         unsigned int i;
457
458         for (i = nbr, dt = data; i--; dt++) {
459                 *dt = BLI_rng_get_uint(rng);
460         }
461
462 #ifdef GHASH_RESERVE
463         BLI_ghash_reserve(ghash, nbr);
464 #endif
465
466         for (i = nbr, dt = data; i--; dt++) {
467                 BLI_ghash_insert(ghash, SET_UINT_IN_POINTER(*dt), SET_UINT_IN_POINTER(*dt));
468         }
469
470         for (i = nbr, dt = data; i--; dt++) {
471                 void *v = BLI_ghash_lookup(ghash, SET_UINT_IN_POINTER(*dt));
472                 EXPECT_EQ(GET_UINT_FROM_POINTER(v), *dt);
473         }
474
475         BLI_ghash_clear(ghash, NULL, NULL);
476 }
477
478 static void multi_small_ghash_tests(GHash *ghash, const char *id, const unsigned int nbr)
479 {
480         printf("\n========== STARTING %s ==========\n", id);
481
482         RNG *rng = BLI_rng_new(0);
483
484         TIMEIT_START(multi_small_ghash);
485
486         unsigned int i = nbr;
487         while (i--) {
488                 const int nbr = 1 + (BLI_rng_get_int(rng) % TESTCASE_SIZE_SMALL) * (!(i % 100) ? 100 : (!(i % 10) ? 10 : 1));
489                 multi_small_ghash_tests_one(ghash, rng, nbr);
490         }
491
492         TIMEIT_END(multi_small_ghash);
493
494         TIMEIT_START(multi_small2_ghash);
495
496         unsigned int i = nbr;
497         while (i--) {
498                 const int nbr = 1 + (BLI_rng_get_int(rng) % TESTCASE_SIZE_SMALL) / 2 * (!(i % 100) ? 100 : (!(i % 10) ? 10 : 1));
499                 multi_small_ghash_tests_one(ghash, rng, nbr);
500         }
501
502         TIMEIT_END(multi_small2_ghash);
503
504         BLI_ghash_free(ghash, NULL, NULL);
505         BLI_rng_free(rng);
506
507         printf("========== ENDED %s ==========\n\n", id);
508 }
509
510 TEST(ghash, MultiRandIntGHash2000)
511 {
512         GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, __func__);
513
514         multi_small_ghash_tests(ghash, "MultiSmall RandIntGHash - GHash - 2000", 2000);
515 }
516
517 TEST(ghash, MultiRandIntGHash200000)
518 {
519         GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, __func__);
520
521         multi_small_ghash_tests(ghash, "MultiSmall RandIntGHash - GHash - 200000", 200000);
522 }
523
524 TEST(ghash, MultiRandIntMurmur2a2000)
525 {
526         GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, BLI_ghashutil_intcmp, __func__);
527
528         multi_small_ghash_tests(ghash, "MultiSmall RandIntGHash - Murmur2a - 2000", 2000);
529 }
530
531 TEST(ghash, MultiRandIntMurmur2a200000)
532 {
533         GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, BLI_ghashutil_intcmp, __func__);
534
535         multi_small_ghash_tests(ghash, "MultiSmall RandIntGHash - Murmur2a - 200000", 200000);
536 }