Cleanup: warning & whitespace
[blender.git] / source / blender / blenlib / intern / BLI_kdopbvh.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2006 by NaN Holding BV.
19  * All rights reserved.
20  *
21  * The Original Code is: all of this file.
22  *
23  * Contributor(s): Daniel Genrich, Andre Pinto
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 /** \file blender/blenlib/intern/BLI_kdopbvh.c
29  *  \ingroup bli
30  *  \brief BVH-tree implementation.
31  *
32  * KD-Overlap-BVH, implements a bvh-tree structure with support for:
33  *
34  * - Ray-cast:
35  *   #BLI_bvhtree_ray_cast, #BVHRayCastData
36  * - Nearest point on surface:
37  *   #BLI_bvhtree_find_nearest, #BVHNearestData
38  * - Overlapping 2 trees:
39  *   #BLI_bvhtree_overlap, #BVHOverlapData_Shared, #BVHOverlapData_Thread
40  */
41
42 #include <assert.h>
43
44 #include "MEM_guardedalloc.h"
45
46 #include "BLI_utildefines.h"
47 #include "BLI_alloca.h"
48 #include "BLI_stack.h"
49 #include "BLI_kdopbvh.h"
50 #include "BLI_math.h"
51 #include "BLI_strict_flags.h"
52
53 #ifdef _OPENMP
54 #include <omp.h>
55 #endif
56
57 /* used for iterative_raycast */
58 // #define USE_SKIP_LINKS
59
60 #define MAX_TREETYPE 32
61
62 /* Setting zero so we can catch bugs in OpenMP/KDOPBVH.
63  * TODO(sergey): Deduplicate the limits with PBVH from BKE.
64  */
65 #ifdef _OPENMP
66 #  ifdef DEBUG
67 #    define KDOPBVH_OMP_LIMIT 0
68 #  else
69 #    define KDOPBVH_OMP_LIMIT 1024
70 #  endif
71 #endif
72
73 typedef unsigned char axis_t;
74
75 typedef struct BVHNode {
76         struct BVHNode **children;
77         struct BVHNode *parent; /* some user defined traversed need that */
78 #ifdef USE_SKIP_LINKS
79         struct BVHNode *skip[2];
80 #endif
81         float *bv;      /* Bounding volume of all nodes, max 13 axis */
82         int index;      /* face, edge, vertex index */
83         char totnode;   /* how many nodes are used, used for speedup */
84         char main_axis; /* Axis used to split this node */
85 } BVHNode;
86
87 /* keep under 26 bytes for speed purposes */
88 struct BVHTree {
89         BVHNode **nodes;
90         BVHNode *nodearray;     /* pre-alloc branch nodes */
91         BVHNode **nodechild;    /* pre-alloc childs for nodes */
92         float   *nodebv;        /* pre-alloc bounding-volumes for nodes */
93         float epsilon;          /* epslion is used for inflation of the k-dop      */
94         int totleaf;            /* leafs */
95         int totbranch;
96         axis_t start_axis, stop_axis;  /* KDOP_AXES array indices according to axis */
97         axis_t axis;                   /* kdop type (6 => OBB, 7 => AABB, ...) */
98         char tree_type;                /* type of tree (4 => quadtree) */
99 };
100
101 /* optimization, ensure we stay small */
102 BLI_STATIC_ASSERT((sizeof(void *) == 8 && sizeof(BVHTree) <= 48) ||
103                   (sizeof(void *) == 4 && sizeof(BVHTree) <= 32),
104                   "over sized")
105
106 /* avoid duplicating vars in BVHOverlapData_Thread */
107 typedef struct BVHOverlapData_Shared {
108         const BVHTree *tree1, *tree2;
109         axis_t start_axis, stop_axis;
110
111         /* use for callbacks */
112         BVHTree_OverlapCallback callback;
113         void *userdata;
114 } BVHOverlapData_Shared;
115
116 typedef struct BVHOverlapData_Thread {
117         BVHOverlapData_Shared *shared;
118         struct BLI_Stack *overlap;  /* store BVHTreeOverlap */
119         /* use for callbacks */
120         int thread;
121 } BVHOverlapData_Thread;
122
123 typedef struct BVHNearestData {
124         BVHTree *tree;
125         const float *co;
126         BVHTree_NearestPointCallback callback;
127         void    *userdata;
128         float proj[13];         /* coordinates projection over axis */
129         BVHTreeNearest nearest;
130
131 } BVHNearestData;
132
133 typedef struct BVHRayCastData {
134         BVHTree *tree;
135
136         BVHTree_RayCastCallback callback;
137         void    *userdata;
138
139
140         BVHTreeRay ray;
141
142 #ifdef USE_KDOPBVH_WATERTIGHT
143         struct IsectRayPrecalc isect_precalc;
144 #endif
145
146         /* initialized by bvhtree_ray_cast_data_precalc */
147         float ray_dot_axis[13];
148         float idot_axis[13];
149         int index[6];
150
151         BVHTreeRayHit hit;
152 } BVHRayCastData;
153
154
155 /**
156  * Bounding Volume Hierarchy Definition
157  *
158  * Notes: From OBB until 26-DOP --> all bounding volumes possible, just choose type below
159  * Notes: You have to choose the type at compile time ITM
160  * Notes: You can choose the tree type --> binary, quad, octree, choose below
161  */
162
163 static const float KDOP_AXES[13][3] = {
164         {1.0, 0, 0}, {0, 1.0, 0}, {0, 0, 1.0}, {1.0, 1.0, 1.0}, {1.0, -1.0, 1.0}, {1.0, 1.0, -1.0},
165         {1.0, -1.0, -1.0}, {1.0, 1.0, 0}, {1.0, 0, 1.0}, {0, 1.0, 1.0}, {1.0, -1.0, 0}, {1.0, 0, -1.0},
166         {0, 1.0, -1.0}
167 };
168
169 MINLINE axis_t min_axis(axis_t a, axis_t b)
170 {
171         return (a < b) ? a : b;
172 }
173 #if 0
174 MINLINE axis_t max_axis(axis_t a, axis_t b)
175 {
176         return (b < a) ? a : b;
177 }
178 #endif
179
180 #if 0
181
182 /*
183  * Generic push and pop heap
184  */
185 #define PUSH_HEAP_BODY(HEAP_TYPE, PRIORITY, heap, heap_size)                  \
186         {                                                                         \
187                 HEAP_TYPE element = heap[heap_size - 1];                              \
188                 int child = heap_size - 1;                                            \
189                 while (child != 0) {                                                  \
190                         int parent = (child - 1) / 2;                                     \
191                         if (PRIORITY(element, heap[parent])) {                            \
192                                 heap[child] = heap[parent];                                   \
193                                 child = parent;                                               \
194                         }                                                                 \
195                         else {                                                            \
196                                 break;                                                        \
197                         }                                                                 \
198                 }                                                                     \
199                 heap[child] = element;                                                \
200         } (void)0
201
202 #define POP_HEAP_BODY(HEAP_TYPE, PRIORITY, heap, heap_size)                   \
203         {                                                                         \
204                 HEAP_TYPE element = heap[heap_size - 1];                              \
205                 int parent = 0;                                                       \
206                 while (parent < (heap_size - 1) / 2) {                                \
207                         int child2 = (parent + 1) * 2;                                    \
208                         if (PRIORITY(heap[child2 - 1], heap[child2])) {                   \
209                                 child2--;                                                     \
210                         }                                                                 \
211                         if (PRIORITY(element, heap[child2])) {                            \
212                                 break;                                                        \
213                         }                                                                 \
214                         heap[parent] = heap[child2];                                      \
215                         parent = child2;                                                  \
216                 }                                                                     \
217                 heap[parent] = element;                                               \
218         } (void)0
219
220 static bool ADJUST_MEMORY(void *local_memblock, void **memblock, int new_size, int *max_size, int size_per_item)
221 {
222         int new_max_size = *max_size * 2;
223         void *new_memblock = NULL;
224
225         if (new_size <= *max_size) {
226                 return true;
227         }
228
229         if (*memblock == local_memblock) {
230                 new_memblock = malloc(size_per_item * new_max_size);
231                 memcpy(new_memblock, *memblock, size_per_item * *max_size);
232         }
233         else {
234                 new_memblock = realloc(*memblock, size_per_item * new_max_size);
235         }
236
237         if (new_memblock) {
238                 *memblock = new_memblock;
239                 *max_size = new_max_size;
240                 return true;
241         }
242         else {
243                 return false;
244         }
245 }
246 #endif
247
248 /**
249  * Introsort
250  * with permission deriven from the following Java code:
251  * http://ralphunden.net/content/tutorials/a-guide-to-introsort/
252  * and he derived it from the SUN STL
253  */
254
255 //static int size_threshold = 16;
256
257 #if 0
258 /**
259  * Common methods for all algorithms
260  */
261 static int floor_lg(int a)
262 {
263         return (int)(floor(log(a) / log(2)));
264 }
265 #endif
266
267 static void node_minmax_init(const BVHTree *tree, BVHNode *node)
268 {
269         axis_t axis_iter;
270         float (*bv)[2] = (float (*)[2])node->bv;
271
272         for (axis_iter = tree->start_axis; axis_iter != tree->stop_axis; axis_iter++) {
273                 bv[axis_iter][0] =  FLT_MAX;
274                 bv[axis_iter][1] = -FLT_MAX;
275         }
276 }
277
278 /**
279  * Insertion sort algorithm
280  */
281 static void bvh_insertionsort(BVHNode **a, int lo, int hi, int axis)
282 {
283         int i, j;
284         BVHNode *t;
285         for (i = lo; i < hi; i++) {
286                 j = i;
287                 t = a[i];
288                 while ((j != lo) && (t->bv[axis] < (a[j - 1])->bv[axis])) {
289                         a[j] = a[j - 1];
290                         j--;
291                 }
292                 a[j] = t;
293         }
294 }
295
296 static int bvh_partition(BVHNode **a, int lo, int hi, BVHNode *x, int axis)
297 {
298         int i = lo, j = hi;
299         while (1) {
300                 while ((a[i])->bv[axis] < x->bv[axis]) i++;
301                 j--;
302                 while (x->bv[axis] < (a[j])->bv[axis]) j--;
303                 if (!(i < j))
304                         return i;
305                 SWAP(BVHNode *, a[i], a[j]);
306                 i++;
307         }
308 }
309
310 #if 0
311 /**
312  * Heapsort algorithm
313  */
314 static void bvh_downheap(BVHNode **a, int i, int n, int lo, int axis)
315 {
316         BVHNode *d = a[lo + i - 1];
317         int child;
318         while (i <= n / 2) {
319                 child = 2 * i;
320                 if ((child < n) && ((a[lo + child - 1])->bv[axis] < (a[lo + child])->bv[axis])) {
321                         child++;
322                 }
323                 if (!(d->bv[axis] < (a[lo + child - 1])->bv[axis])) break;
324                 a[lo + i - 1] = a[lo + child - 1];
325                 i = child;
326         }
327         a[lo + i - 1] = d;
328 }
329
330 static void bvh_heapsort(BVHNode **a, int lo, int hi, int axis)
331 {
332         int n = hi - lo, i;
333         for (i = n / 2; i >= 1; i = i - 1) {
334                 bvh_downheap(a, i, n, lo, axis);
335         }
336         for (i = n; i > 1; i = i - 1) {
337                 SWAP(BVHNode *, a[lo], a[lo + i - 1]);
338                 bvh_downheap(a, 1, i - 1, lo, axis);
339         }
340 }
341 #endif
342
343 static BVHNode *bvh_medianof3(BVHNode **a, int lo, int mid, int hi, int axis)  /* returns Sortable */
344 {
345         if ((a[mid])->bv[axis] < (a[lo])->bv[axis]) {
346                 if ((a[hi])->bv[axis] < (a[mid])->bv[axis])
347                         return a[mid];
348                 else {
349                         if ((a[hi])->bv[axis] < (a[lo])->bv[axis])
350                                 return a[hi];
351                         else
352                                 return a[lo];
353                 }
354         }
355         else {
356                 if ((a[hi])->bv[axis] < (a[mid])->bv[axis]) {
357                         if ((a[hi])->bv[axis] < (a[lo])->bv[axis])
358                                 return a[lo];
359                         else
360                                 return a[hi];
361                 }
362                 else
363                         return a[mid];
364         }
365 }
366
367 #if 0
368 /*
369  * Quicksort algorithm modified for Introsort
370  */
371 static void bvh_introsort_loop(BVHNode **a, int lo, int hi, int depth_limit, int axis)
372 {
373         int p;
374
375         while (hi - lo > size_threshold) {
376                 if (depth_limit == 0) {
377                         bvh_heapsort(a, lo, hi, axis);
378                         return;
379                 }
380                 depth_limit = depth_limit - 1;
381                 p = bvh_partition(a, lo, hi, bvh_medianof3(a, lo, lo + ((hi - lo) / 2) + 1, hi - 1, axis), axis);
382                 bvh_introsort_loop(a, p, hi, depth_limit, axis);
383                 hi = p;
384         }
385 }
386
387 static void sort(BVHNode **a0, int begin, int end, int axis)
388 {
389         if (begin < end) {
390                 BVHNode **a = a0;
391                 bvh_introsort_loop(a, begin, end, 2 * floor_lg(end - begin), axis);
392                 bvh_insertionsort(a, begin, end, axis);
393         }
394 }
395
396 static void sort_along_axis(BVHTree *tree, int start, int end, int axis)
397 {
398         sort(tree->nodes, start, end, axis);
399 }
400 #endif
401
402 /**
403  * \note after a call to this function you can expect one of:
404  * - every node to left of a[n] are smaller or equal to it
405  * - every node to the right of a[n] are greater or equal to it */
406 static int partition_nth_element(BVHNode **a, int _begin, int _end, int n, int axis)
407 {
408         int begin = _begin, end = _end, cut;
409         while (end - begin > 3) {
410                 cut = bvh_partition(a, begin, end, bvh_medianof3(a, begin, (begin + end) / 2, end - 1, axis), axis);
411                 if (cut <= n)
412                         begin = cut;
413                 else
414                         end = cut;
415         }
416         bvh_insertionsort(a, begin, end, axis);
417
418         return n;
419 }
420
421 #ifdef USE_SKIP_LINKS
422 static void build_skip_links(BVHTree *tree, BVHNode *node, BVHNode *left, BVHNode *right)
423 {
424         int i;
425         
426         node->skip[0] = left;
427         node->skip[1] = right;
428         
429         for (i = 0; i < node->totnode; i++) {
430                 if (i + 1 < node->totnode)
431                         build_skip_links(tree, node->children[i], left, node->children[i + 1]);
432                 else
433                         build_skip_links(tree, node->children[i], left, right);
434
435                 left = node->children[i];
436         }
437 }
438 #endif
439
440 /*
441  * BVHTree bounding volumes functions
442  */
443 static void create_kdop_hull(BVHTree *tree, BVHNode *node, const float *co, int numpoints, int moving)
444 {
445         float newminmax;
446         float *bv = node->bv;
447         int k;
448         axis_t axis_iter;
449         
450         /* don't init boudings for the moving case */
451         if (!moving) {
452                 node_minmax_init(tree, node);
453         }
454
455         for (k = 0; k < numpoints; k++) {
456                 /* for all Axes. */
457                 for (axis_iter = tree->start_axis; axis_iter < tree->stop_axis; axis_iter++) {
458                         newminmax = dot_v3v3(&co[k * 3], KDOP_AXES[axis_iter]);
459                         if (newminmax < bv[2 * axis_iter])
460                                 bv[2 * axis_iter] = newminmax;
461                         if (newminmax > bv[(2 * axis_iter) + 1])
462                                 bv[(2 * axis_iter) + 1] = newminmax;
463                 }
464         }
465 }
466
467 /**
468  * \note depends on the fact that the BVH's for each face is already build
469  */
470 static void refit_kdop_hull(BVHTree *tree, BVHNode *node, int start, int end)
471 {
472         float newmin, newmax;
473         float *bv = node->bv;
474         int j;
475         axis_t axis_iter;
476
477         node_minmax_init(tree, node);
478
479         for (j = start; j < end; j++) {
480                 /* for all Axes. */
481                 for (axis_iter = tree->start_axis; axis_iter < tree->stop_axis; axis_iter++) {
482                         newmin = tree->nodes[j]->bv[(2 * axis_iter)];
483                         if ((newmin < bv[(2 * axis_iter)]))
484                                 bv[(2 * axis_iter)] = newmin;
485
486                         newmax = tree->nodes[j]->bv[(2 * axis_iter) + 1];
487                         if ((newmax > bv[(2 * axis_iter) + 1]))
488                                 bv[(2 * axis_iter) + 1] = newmax;
489                 }
490         }
491
492 }
493
494 /**
495  * only supports x,y,z axis in the moment
496  * but we should use a plain and simple function here for speed sake */
497 static char get_largest_axis(const float *bv)
498 {
499         float middle_point[3];
500
501         middle_point[0] = (bv[1]) - (bv[0]); /* x axis */
502         middle_point[1] = (bv[3]) - (bv[2]); /* y axis */
503         middle_point[2] = (bv[5]) - (bv[4]); /* z axis */
504         if (middle_point[0] > middle_point[1]) {
505                 if (middle_point[0] > middle_point[2])
506                         return 1;  /* max x axis */
507                 else
508                         return 5;  /* max z axis */
509         }
510         else {
511                 if (middle_point[1] > middle_point[2])
512                         return 3;  /* max y axis */
513                 else
514                         return 5;  /* max z axis */
515         }
516 }
517
518 /**
519  * bottom-up update of bvh node BV
520  * join the children on the parent BV */
521 static void node_join(BVHTree *tree, BVHNode *node)
522 {
523         int i;
524         axis_t axis_iter;
525
526         node_minmax_init(tree, node);
527         
528         for (i = 0; i < tree->tree_type; i++) {
529                 if (node->children[i]) {
530                         for (axis_iter = tree->start_axis; axis_iter < tree->stop_axis; axis_iter++) {
531                                 /* update minimum */
532                                 if (node->children[i]->bv[(2 * axis_iter)] < node->bv[(2 * axis_iter)])
533                                         node->bv[(2 * axis_iter)] = node->children[i]->bv[(2 * axis_iter)];
534
535                                 /* update maximum */
536                                 if (node->children[i]->bv[(2 * axis_iter) + 1] > node->bv[(2 * axis_iter) + 1])
537                                         node->bv[(2 * axis_iter) + 1] = node->children[i]->bv[(2 * axis_iter) + 1];
538                         }
539                 }
540                 else
541                         break;
542         }
543 }
544
545 /*
546  * Debug and information functions
547  */
548 #if 0
549 static void bvhtree_print_tree(BVHTree *tree, BVHNode *node, int depth)
550 {
551         int i;
552         axis_t axis_iter;
553
554         for (i = 0; i < depth; i++) printf(" ");
555         printf(" - %d (%ld): ", node->index, (long int)(node - tree->nodearray));
556         for (axis_iter = (axis_t)(2 * tree->start_axis);
557              axis_iter < (axis_t)(2 * tree->stop_axis);
558              axis_iter++)
559         {
560                 printf("%.3f ", node->bv[axis_iter]);
561         }
562         printf("\n");
563
564         for (i = 0; i < tree->tree_type; i++)
565                 if (node->children[i])
566                         bvhtree_print_tree(tree, node->children[i], depth + 1);
567 }
568
569 static void bvhtree_info(BVHTree *tree)
570 {
571         printf("BVHTree info\n");
572         printf("tree_type = %d, axis = %d, epsilon = %f\n", tree->tree_type, tree->axis, tree->epsilon);
573         printf("nodes = %d, branches = %d, leafs = %d\n", tree->totbranch + tree->totleaf,  tree->totbranch, tree->totleaf);
574         printf("Memory per node = %ldbytes\n", sizeof(BVHNode) + sizeof(BVHNode *) * tree->tree_type + sizeof(float) * tree->axis);
575         printf("BV memory = %dbytes\n", (int)MEM_allocN_len(tree->nodebv));
576
577         printf("Total memory = %ldbytes\n", sizeof(BVHTree) +
578                MEM_allocN_len(tree->nodes) +
579                MEM_allocN_len(tree->nodearray) +
580                MEM_allocN_len(tree->nodechild) +
581                MEM_allocN_len(tree->nodebv));
582
583 //      bvhtree_print_tree(tree, tree->nodes[tree->totleaf], 0);
584 }
585 #endif
586
587 #if 0
588
589
590 static void verify_tree(BVHTree *tree)
591 {
592         int i, j, check = 0;
593         
594         /* check the pointer list */
595         for (i = 0; i < tree->totleaf; i++) {
596                 if (tree->nodes[i]->parent == NULL) {
597                         printf("Leaf has no parent: %d\n", i);
598                 }
599                 else {
600                         for (j = 0; j < tree->tree_type; j++) {
601                                 if (tree->nodes[i]->parent->children[j] == tree->nodes[i])
602                                         check = 1;
603                         }
604                         if (!check) {
605                                 printf("Parent child relationship doesn't match: %d\n", i);
606                         }
607                         check = 0;
608                 }
609         }
610         
611         /* check the leaf list */
612         for (i = 0; i < tree->totleaf; i++) {
613                 if (tree->nodearray[i].parent == NULL) {
614                         printf("Leaf has no parent: %d\n", i);
615                 }
616                 else {
617                         for (j = 0; j < tree->tree_type; j++) {
618                                 if (tree->nodearray[i].parent->children[j] == &tree->nodearray[i])
619                                         check = 1;
620                         }
621                         if (!check) {
622                                 printf("Parent child relationship doesn't match: %d\n", i);
623                         }
624                         check = 0;
625                 }
626         }
627         
628         printf("branches: %d, leafs: %d, total: %d\n", tree->totbranch, tree->totleaf, tree->totbranch + tree->totleaf);
629 }
630 #endif
631
632 /* Helper data and structures to build a min-leaf generalized implicit tree
633  * This code can be easily reduced (basicly this is only method to calculate pow(k, n) in O(1).. and stuff like that) */
634 typedef struct BVHBuildHelper {
635         int tree_type;              /* */
636         int totleafs;               /* */
637
638         int leafs_per_child[32];    /* Min number of leafs that are archievable from a node at depth N */
639         int branches_on_level[32];  /* Number of nodes at depth N (tree_type^N) */
640
641         int remain_leafs;           /* Number of leafs that are placed on the level that is not 100% filled */
642
643 } BVHBuildHelper;
644
645 static void build_implicit_tree_helper(BVHTree *tree, BVHBuildHelper *data)
646 {
647         int depth = 0;
648         int remain;
649         int nnodes;
650
651         data->totleafs = tree->totleaf;
652         data->tree_type = tree->tree_type;
653
654         /* Calculate the smallest tree_type^n such that tree_type^n >= num_leafs */
655         for (data->leafs_per_child[0] = 1;
656              data->leafs_per_child[0] <  data->totleafs;
657              data->leafs_per_child[0] *= data->tree_type)
658         {
659                 /* pass */
660         }
661
662         data->branches_on_level[0] = 1;
663
664         for (depth = 1; (depth < 32) && data->leafs_per_child[depth - 1]; depth++) {
665                 data->branches_on_level[depth] = data->branches_on_level[depth - 1] * data->tree_type;
666                 data->leafs_per_child[depth] = data->leafs_per_child[depth - 1] / data->tree_type;
667         }
668
669         remain = data->totleafs - data->leafs_per_child[1];
670         nnodes = (remain + data->tree_type - 2) / (data->tree_type - 1);
671         data->remain_leafs = remain + nnodes;
672 }
673
674 // return the min index of all the leafs archivable with the given branch
675 static int implicit_leafs_index(BVHBuildHelper *data, int depth, int child_index)
676 {
677         int min_leaf_index = child_index * data->leafs_per_child[depth - 1];
678         if (min_leaf_index <= data->remain_leafs)
679                 return min_leaf_index;
680         else if (data->leafs_per_child[depth])
681                 return data->totleafs - (data->branches_on_level[depth - 1] - child_index) * data->leafs_per_child[depth];
682         else
683                 return data->remain_leafs;
684 }
685
686 /**
687  * Generalized implicit tree build
688  *
689  * An implicit tree is a tree where its structure is implied, thus there is no need to store child pointers or indexs.
690  * Its possible to find the position of the child or the parent with simple maths (multiplication and adittion). This type
691  * of tree is for example used on heaps.. where node N has its childs at indexs N*2 and N*2+1.
692  *
693  * Although in this case the tree type is general.. and not know until runtime.
694  * tree_type stands for the maximum number of childs that a tree node can have.
695  * All tree types >= 2 are supported.
696  *
697  * Advantages of the used trees include:
698  *  - No need to store child/parent relations (they are implicit);
699  *  - Any node child always has an index greater than the parent;
700  *  - Brother nodes are sequential in memory;
701  *
702  *
703  * Some math relations derived for general implicit trees:
704  *
705  *   K = tree_type, ( 2 <= K )
706  *   ROOT = 1
707  *   N child of node A = A * K + (2 - K) + N, (0 <= N < K)
708  *
709  * Util methods:
710  *   TODO...
711  *    (looping elements, knowing if its a leaf or not.. etc...)
712  */
713
714 /* This functions returns the number of branches needed to have the requested number of leafs. */
715 static int implicit_needed_branches(int tree_type, int leafs)
716 {
717         return max_ii(1, (leafs + tree_type - 3) / (tree_type - 1));
718 }
719
720 /**
721  * This function handles the problem of "sorting" the leafs (along the split_axis).
722  *
723  * It arranges the elements in the given partitions such that:
724  *  - any element in partition N is less or equal to any element in partition N+1.
725  *  - if all elements are different all partition will get the same subset of elements
726  *    as if the array was sorted.
727  *
728  * partition P is described as the elements in the range ( nth[P], nth[P+1] ]
729  *
730  * TODO: This can be optimized a bit by doing a specialized nth_element instead of K nth_elements
731  */
732 static void split_leafs(BVHNode **leafs_array, int *nth, int partitions, int split_axis)
733 {
734         int i;
735         for (i = 0; i < partitions - 1; i++) {
736                 if (nth[i] >= nth[partitions])
737                         break;
738
739                 partition_nth_element(leafs_array, nth[i], nth[partitions], nth[i + 1], split_axis);
740         }
741 }
742
743 /**
744  * This functions builds an optimal implicit tree from the given leafs.
745  * Where optimal stands for:
746  *  - The resulting tree will have the smallest number of branches;
747  *  - At most only one branch will have NULL childs;
748  *  - All leafs will be stored at level N or N+1.
749  *
750  * This function creates an implicit tree on branches_array, the leafs are given on the leafs_array.
751  *
752  * The tree is built per depth levels. First branches at depth 1.. then branches at depth 2.. etc..
753  * The reason is that we can build level N+1 from level N without any data dependencies.. thus it allows
754  * to use multithread building.
755  *
756  * To archive this is necessary to find how much leafs are accessible from a certain branch, BVHBuildHelper
757  * implicit_needed_branches and implicit_leafs_index are auxiliary functions to solve that "optimal-split".
758  */
759 static void non_recursive_bvh_div_nodes(BVHTree *tree, BVHNode *branches_array, BVHNode **leafs_array, int num_leafs)
760 {
761         int i;
762
763         const int tree_type   = tree->tree_type;
764         const int tree_offset = 2 - tree->tree_type; /* this value is 0 (on binary trees) and negative on the others */
765         const int num_branches = implicit_needed_branches(tree_type, num_leafs);
766
767         BVHBuildHelper data;
768         int depth;
769         
770         /* set parent from root node to NULL */
771         BVHNode *tmp = branches_array + 0;
772         tmp->parent = NULL;
773
774         /* Most of bvhtree code relies on 1-leaf trees having at least one branch
775          * We handle that special case here */
776         if (num_leafs == 1) {
777                 BVHNode *root = branches_array + 0;
778                 refit_kdop_hull(tree, root, 0, num_leafs);
779                 root->main_axis = get_largest_axis(root->bv) / 2;
780                 root->totnode = 1;
781                 root->children[0] = leafs_array[0];
782                 root->children[0]->parent = root;
783                 return;
784         }
785
786         branches_array--;  /* Implicit trees use 1-based indexs */
787
788         build_implicit_tree_helper(tree, &data);
789
790         /* Loop tree levels (log N) loops */
791         for (i = 1, depth = 1; i <= num_branches; i = i * tree_type + tree_offset, depth++) {
792                 const int first_of_next_level = i * tree_type + tree_offset;
793                 const int end_j = min_ii(first_of_next_level, num_branches + 1);  /* index of last branch on this level */
794                 int j;
795
796                 /* Loop all branches on this level */
797
798 #pragma omp parallel for private(j) schedule(static) if (num_leafs > KDOPBVH_OMP_LIMIT)
799                 for (j = i; j < end_j; j++) {
800                         int k;
801                         const int parent_level_index = j - i;
802                         BVHNode *parent = branches_array + j;
803                         int nth_positions[MAX_TREETYPE + 1];
804                         char split_axis;
805
806                         int parent_leafs_begin = implicit_leafs_index(&data, depth, parent_level_index);
807                         int parent_leafs_end   = implicit_leafs_index(&data, depth, parent_level_index + 1);
808
809                         /* This calculates the bounding box of this branch
810                          * and chooses the largest axis as the axis to divide leafs */
811                         refit_kdop_hull(tree, parent, parent_leafs_begin, parent_leafs_end);
812                         split_axis = get_largest_axis(parent->bv);
813
814                         /* Save split axis (this can be used on raytracing to speedup the query time) */
815                         parent->main_axis = split_axis / 2;
816
817                         /* Split the childs along the split_axis, note: its not needed to sort the whole leafs array
818                          * Only to assure that the elements are partitioned on a way that each child takes the elements
819                          * it would take in case the whole array was sorted.
820                          * Split_leafs takes care of that "sort" problem. */
821                         nth_positions[0] = parent_leafs_begin;
822                         nth_positions[tree_type] = parent_leafs_end;
823                         for (k = 1; k < tree_type; k++) {
824                                 int child_index = j * tree_type + tree_offset + k;
825                                 int child_level_index = child_index - first_of_next_level; /* child level index */
826                                 nth_positions[k] = implicit_leafs_index(&data, depth + 1, child_level_index);
827                         }
828
829                         split_leafs(leafs_array, nth_positions, tree_type, split_axis);
830
831
832                         /* Setup children and totnode counters
833                          * Not really needed but currently most of BVH code relies on having an explicit children structure */
834                         for (k = 0; k < tree_type; k++) {
835                                 int child_index = j * tree_type + tree_offset + k;
836                                 int child_level_index = child_index - first_of_next_level; /* child level index */
837
838                                 int child_leafs_begin = implicit_leafs_index(&data, depth + 1, child_level_index);
839                                 int child_leafs_end   = implicit_leafs_index(&data, depth + 1, child_level_index + 1);
840
841                                 if (child_leafs_end - child_leafs_begin > 1) {
842                                         parent->children[k] = branches_array + child_index;
843                                         parent->children[k]->parent = parent;
844                                 }
845                                 else if (child_leafs_end - child_leafs_begin == 1) {
846                                         parent->children[k] = leafs_array[child_leafs_begin];
847                                         parent->children[k]->parent = parent;
848                                 }
849                                 else {
850                                         break;
851                                 }
852
853                                 parent->totnode = (char)(k + 1);
854                         }
855                 }
856         }
857 }
858
859 /* -------------------------------------------------------------------- */
860 /* BLI_bvhtree api */
861
862 /**
863  * \note many callers don't check for ``NULL`` return.
864  */
865 BVHTree *BLI_bvhtree_new(int maxsize, float epsilon, char tree_type, char axis)
866 {
867         BVHTree *tree;
868         int numnodes, i;
869
870         BLI_assert(tree_type >= 2 && tree_type <= MAX_TREETYPE);
871
872         tree = MEM_callocN(sizeof(BVHTree), "BVHTree");
873
874         /* tree epsilon must be >= FLT_EPSILON
875          * so that tangent rays can still hit a bounding volume..
876          * this bug would show up when casting a ray aligned with a kdop-axis and with an edge of 2 faces */
877         epsilon = max_ff(FLT_EPSILON, epsilon);
878
879         if (tree) {
880                 tree->epsilon = epsilon;
881                 tree->tree_type = tree_type;
882                 tree->axis = axis;
883
884                 if (axis == 26) {
885                         tree->start_axis = 0;
886                         tree->stop_axis = 13;
887                 }
888                 else if (axis == 18) {
889                         tree->start_axis = 7;
890                         tree->stop_axis = 13;
891                 }
892                 else if (axis == 14) {
893                         tree->start_axis = 0;
894                         tree->stop_axis = 7;
895                 }
896                 else if (axis == 8) { /* AABB */
897                         tree->start_axis = 0;
898                         tree->stop_axis = 4;
899                 }
900                 else if (axis == 6) { /* OBB */
901                         tree->start_axis = 0;
902                         tree->stop_axis = 3;
903                 }
904                 else {
905                         /* should never happen! */
906                         BLI_assert(0);
907
908                         goto fail;
909                 }
910
911
912                 /* Allocate arrays */
913                 numnodes = maxsize + implicit_needed_branches(tree_type, maxsize) + tree_type;
914
915                 tree->nodes = MEM_callocN(sizeof(BVHNode *) * (size_t)numnodes, "BVHNodes");
916                 tree->nodebv = MEM_callocN(sizeof(float) * (size_t)(axis * numnodes), "BVHNodeBV");
917                 tree->nodechild = MEM_callocN(sizeof(BVHNode *) * (size_t)(tree_type * numnodes), "BVHNodeBV");
918                 tree->nodearray = MEM_callocN(sizeof(BVHNode) * (size_t)numnodes, "BVHNodeArray");
919                 
920                 if (UNLIKELY((!tree->nodes) ||
921                              (!tree->nodebv) ||
922                              (!tree->nodechild) ||
923                              (!tree->nodearray)))
924                 {
925                         goto fail;
926                 }
927
928                 /* link the dynamic bv and child links */
929                 for (i = 0; i < numnodes; i++) {
930                         tree->nodearray[i].bv = &tree->nodebv[i * axis];
931                         tree->nodearray[i].children = &tree->nodechild[i * tree_type];
932                 }
933                 
934         }
935         return tree;
936
937
938 fail:
939         MEM_SAFE_FREE(tree->nodes);
940         MEM_SAFE_FREE(tree->nodebv);
941         MEM_SAFE_FREE(tree->nodechild);
942         MEM_SAFE_FREE(tree->nodearray);
943
944         MEM_freeN(tree);
945
946         return NULL;
947 }
948
949 void BLI_bvhtree_free(BVHTree *tree)
950 {
951         if (tree) {
952                 MEM_freeN(tree->nodes);
953                 MEM_freeN(tree->nodearray);
954                 MEM_freeN(tree->nodebv);
955                 MEM_freeN(tree->nodechild);
956                 MEM_freeN(tree);
957         }
958 }
959
960 void BLI_bvhtree_balance(BVHTree *tree)
961 {
962         int i;
963
964         BVHNode *branches_array = tree->nodearray + tree->totleaf;
965         BVHNode **leafs_array    = tree->nodes;
966
967         /* This function should only be called once (some big bug goes here if its being called more than once per tree) */
968         BLI_assert(tree->totbranch == 0);
969
970         /* Build the implicit tree */
971         non_recursive_bvh_div_nodes(tree, branches_array, leafs_array, tree->totleaf);
972
973         /* current code expects the branches to be linked to the nodes array
974          * we perform that linkage here */
975         tree->totbranch = implicit_needed_branches(tree->tree_type, tree->totleaf);
976         for (i = 0; i < tree->totbranch; i++)
977                 tree->nodes[tree->totleaf + i] = branches_array + i;
978
979 #ifdef USE_SKIP_LINKS
980         build_skip_links(tree, tree->nodes[tree->totleaf], NULL, NULL);
981 #endif
982
983         /* bvhtree_info(tree); */
984 }
985
986 void BLI_bvhtree_insert(BVHTree *tree, int index, const float co[3], int numpoints)
987 {
988         axis_t axis_iter;
989         BVHNode *node = NULL;
990
991         /* insert should only possible as long as tree->totbranch is 0 */
992         BLI_assert(tree->totbranch <= 0);
993         BLI_assert((size_t)tree->totleaf < MEM_allocN_len(tree->nodes) / sizeof(*(tree->nodes)));
994
995         node = tree->nodes[tree->totleaf] = &(tree->nodearray[tree->totleaf]);
996         tree->totleaf++;
997
998         create_kdop_hull(tree, node, co, numpoints, 0);
999         node->index = index;
1000
1001         /* inflate the bv with some epsilon */
1002         for (axis_iter = tree->start_axis; axis_iter < tree->stop_axis; axis_iter++) {
1003                 node->bv[(2 * axis_iter)] -= tree->epsilon; /* minimum */
1004                 node->bv[(2 * axis_iter) + 1] += tree->epsilon; /* maximum */
1005         }
1006 }
1007
1008
1009 /* call before BLI_bvhtree_update_tree() */
1010 bool BLI_bvhtree_update_node(BVHTree *tree, int index, const float co[3], const float co_moving[3], int numpoints)
1011 {
1012         BVHNode *node = NULL;
1013         axis_t axis_iter;
1014         
1015         /* check if index exists */
1016         if (index > tree->totleaf)
1017                 return false;
1018         
1019         node = tree->nodearray + index;
1020         
1021         create_kdop_hull(tree, node, co, numpoints, 0);
1022         
1023         if (co_moving)
1024                 create_kdop_hull(tree, node, co_moving, numpoints, 1);
1025         
1026         /* inflate the bv with some epsilon */
1027         for (axis_iter = tree->start_axis; axis_iter < tree->stop_axis; axis_iter++) {
1028                 node->bv[(2 * axis_iter)]     -= tree->epsilon; /* minimum */
1029                 node->bv[(2 * axis_iter) + 1] += tree->epsilon; /* maximum */
1030         }
1031
1032         return true;
1033 }
1034
1035 /* call BLI_bvhtree_update_node() first for every node/point/triangle */
1036 void BLI_bvhtree_update_tree(BVHTree *tree)
1037 {
1038         /* Update bottom=>top
1039          * TRICKY: the way we build the tree all the childs have an index greater than the parent
1040          * This allows us todo a bottom up update by starting on the bigger numbered branch */
1041
1042         BVHNode **root  = tree->nodes + tree->totleaf;
1043         BVHNode **index = tree->nodes + tree->totleaf + tree->totbranch - 1;
1044
1045         for (; index >= root; index--)
1046                 node_join(tree, *index);
1047 }
1048
1049 float BLI_bvhtree_getepsilon(const BVHTree *tree)
1050 {
1051         return tree->epsilon;
1052 }
1053
1054
1055 /* -------------------------------------------------------------------- */
1056 /* BLI_bvhtree_overlap */
1057
1058 /**
1059  * overlap - is it possible for 2 bv's to collide ?
1060  */
1061 static bool tree_overlap_test(const BVHNode *node1, const BVHNode *node2, axis_t start_axis, axis_t stop_axis)
1062 {
1063         const float *bv1     = node1->bv + (start_axis << 1);
1064         const float *bv2     = node2->bv + (start_axis << 1);
1065         const float *bv1_end = node1->bv + (stop_axis  << 1);
1066         
1067         /* test all axis if min + max overlap */
1068         for (; bv1 != bv1_end; bv1 += 2, bv2 += 2) {
1069                 if ((bv1[0] > bv2[1]) || (bv2[0] > bv1[1])) {
1070                         return 0;
1071                 }
1072         }
1073
1074         return 1;
1075 }
1076
1077 static void tree_overlap_traverse(
1078         BVHOverlapData_Thread *data_thread,
1079         const BVHNode *node1, const BVHNode *node2)
1080 {
1081         BVHOverlapData_Shared *data = data_thread->shared;
1082         int j;
1083
1084         if (tree_overlap_test(node1, node2, data->start_axis, data->stop_axis)) {
1085                 /* check if node1 is a leaf */
1086                 if (!node1->totnode) {
1087                         /* check if node2 is a leaf */
1088                         if (!node2->totnode) {
1089                                 BVHTreeOverlap *overlap;
1090
1091                                 if (UNLIKELY(node1 == node2)) {
1092                                         return;
1093                                 }
1094
1095                                 /* both leafs, insert overlap! */
1096                                 overlap = BLI_stack_push_r(data_thread->overlap);
1097                                 overlap->indexA = node1->index;
1098                                 overlap->indexB = node2->index;
1099                         }
1100                         else {
1101                                 for (j = 0; j < data->tree2->tree_type; j++) {
1102                                         if (node2->children[j]) {
1103                                                 tree_overlap_traverse(data_thread, node1, node2->children[j]);
1104                                         }
1105                                 }
1106                         }
1107                 }
1108                 else {
1109                         for (j = 0; j < data->tree2->tree_type; j++) {
1110                                 if (node1->children[j]) {
1111                                         tree_overlap_traverse(data_thread, node1->children[j], node2);
1112                                 }
1113                         }
1114                 }
1115         }
1116 }
1117
1118 /**
1119  * a version of #tree_overlap_traverse that runs a callback to check if the nodes really intersect.
1120  */
1121 static void tree_overlap_traverse_cb(
1122         BVHOverlapData_Thread *data_thread,
1123         const BVHNode *node1, const BVHNode *node2)
1124 {
1125         BVHOverlapData_Shared *data = data_thread->shared;
1126         int j;
1127
1128         if (tree_overlap_test(node1, node2, data->start_axis, data->stop_axis)) {
1129                 /* check if node1 is a leaf */
1130                 if (!node1->totnode) {
1131                         /* check if node2 is a leaf */
1132                         if (!node2->totnode) {
1133                                 BVHTreeOverlap *overlap;
1134
1135                                 if (UNLIKELY(node1 == node2)) {
1136                                         return;
1137                                 }
1138
1139                                 /* only difference to tree_overlap_traverse! */
1140                                 if (data->callback(data->userdata, node1->index, node2->index, data_thread->thread)) {
1141                                         /* both leafs, insert overlap! */
1142                                         overlap = BLI_stack_push_r(data_thread->overlap);
1143                                         overlap->indexA = node1->index;
1144                                         overlap->indexB = node2->index;
1145                                 }
1146                         }
1147                         else {
1148                                 for (j = 0; j < data->tree2->tree_type; j++) {
1149                                         if (node2->children[j]) {
1150                                                 tree_overlap_traverse_cb(data_thread, node1, node2->children[j]);
1151                                         }
1152                                 }
1153                         }
1154                 }
1155                 else {
1156                         for (j = 0; j < data->tree2->tree_type; j++) {
1157                                 if (node1->children[j]) {
1158                                         tree_overlap_traverse_cb(data_thread, node1->children[j], node2);
1159                                 }
1160                         }
1161                 }
1162         }
1163 }
1164
1165 /**
1166  * Use to check the total number of threads #BLI_bvhtree_overlap will use.
1167  *
1168  * \warning Must be the first tree passed to #BLI_bvhtree_overlap!
1169  */
1170 int BLI_bvhtree_overlap_thread_num(const BVHTree *tree)
1171 {
1172         return (int)MIN2(tree->tree_type, tree->nodes[tree->totleaf]->totnode);
1173 }
1174
1175 BVHTreeOverlap *BLI_bvhtree_overlap(
1176         const BVHTree *tree1, const BVHTree *tree2, unsigned int *r_overlap_tot,
1177         /* optional callback to test the overlap before adding (must be thread-safe!) */
1178         BVHTree_OverlapCallback callback, void *userdata)
1179 {
1180         const int thread_num = BLI_bvhtree_overlap_thread_num(tree1);
1181         int j;
1182         size_t total = 0;
1183         BVHTreeOverlap *overlap = NULL, *to = NULL;
1184         BVHOverlapData_Shared data_shared;
1185         BVHOverlapData_Thread *data = BLI_array_alloca(data, (size_t)thread_num);
1186         axis_t start_axis, stop_axis;
1187         
1188         /* check for compatibility of both trees (can't compare 14-DOP with 18-DOP) */
1189         if (UNLIKELY((tree1->axis != tree2->axis) &&
1190                      (tree1->axis == 14 || tree2->axis == 14) &&
1191                      (tree1->axis == 18 || tree2->axis == 18)))
1192         {
1193                 BLI_assert(0);
1194                 return NULL;
1195         }
1196
1197         start_axis = min_axis(tree1->start_axis, tree2->start_axis);
1198         stop_axis  = min_axis(tree1->stop_axis,  tree2->stop_axis);
1199         
1200         /* fast check root nodes for collision before doing big splitting + traversal */
1201         if (!tree_overlap_test(tree1->nodes[tree1->totleaf], tree2->nodes[tree2->totleaf], start_axis, stop_axis)) {
1202                 return NULL;
1203         }
1204
1205         data_shared.tree1 = tree1;
1206         data_shared.tree2 = tree2;
1207         data_shared.start_axis = start_axis;
1208         data_shared.stop_axis = stop_axis;
1209
1210         /* can be NULL */
1211         data_shared.callback = callback;
1212         data_shared.userdata = userdata;
1213
1214         for (j = 0; j < thread_num; j++) {
1215                 /* init BVHOverlapData_Thread */
1216                 data[j].shared = &data_shared;
1217                 data[j].overlap = BLI_stack_new(sizeof(BVHTreeOverlap), __func__);
1218
1219                 /* for callback */
1220                 data[j].thread = j;
1221         }
1222
1223 #pragma omp parallel for private(j) schedule(static)  if (tree1->totleaf > KDOPBVH_OMP_LIMIT)
1224         for (j = 0; j < thread_num; j++) {
1225                 if (callback) {
1226                         tree_overlap_traverse_cb(&data[j], tree1->nodes[tree1->totleaf]->children[j], tree2->nodes[tree2->totleaf]);
1227                 }
1228                 else {
1229                         tree_overlap_traverse(&data[j], tree1->nodes[tree1->totleaf]->children[j], tree2->nodes[tree2->totleaf]);
1230                 }
1231         }
1232         
1233         for (j = 0; j < thread_num; j++)
1234                 total += BLI_stack_count(data[j].overlap);
1235         
1236         to = overlap = MEM_mallocN(sizeof(BVHTreeOverlap) * total, "BVHTreeOverlap");
1237         
1238         for (j = 0; j < thread_num; j++) {
1239                 unsigned int count = (unsigned int)BLI_stack_count(data[j].overlap);
1240                 BLI_stack_pop_n(data[j].overlap, to, count);
1241                 BLI_stack_free(data[j].overlap);
1242                 to += count;
1243         }
1244
1245         *r_overlap_tot = (unsigned int)total;
1246         return overlap;
1247 }
1248
1249 /* Determines the nearest point of the given node BV. Returns the squared distance to that point. */
1250 static float calc_nearest_point_squared(const float proj[3], BVHNode *node, float nearest[3])
1251 {
1252         int i;
1253         const float *bv = node->bv;
1254
1255         /* nearest on AABB hull */
1256         for (i = 0; i != 3; i++, bv += 2) {
1257                 if (bv[0] > proj[i])
1258                         nearest[i] = bv[0];
1259                 else if (bv[1] < proj[i])
1260                         nearest[i] = bv[1];
1261                 else
1262                         nearest[i] = proj[i]; 
1263         }
1264
1265 #if 0
1266         /* nearest on a general hull */
1267         copy_v3_v3(nearest, data->co);
1268         for (i = data->tree->start_axis; i != data->tree->stop_axis; i++, bv += 2) {
1269                 float proj = dot_v3v3(nearest, KDOP_AXES[i]);
1270                 float dl = bv[0] - proj;
1271                 float du = bv[1] - proj;
1272
1273                 if (dl > 0) {
1274                         madd_v3_v3fl(nearest, KDOP_AXES[i], dl);
1275                 }
1276                 else if (du < 0) {
1277                         madd_v3_v3fl(nearest, KDOP_AXES[i], du);
1278                 }
1279         }
1280 #endif
1281
1282         return len_squared_v3v3(proj, nearest);
1283 }
1284
1285 /* TODO: use a priority queue to reduce the number of nodes looked on */
1286 static void dfs_find_nearest_dfs(BVHNearestData *data, BVHNode *node)
1287 {
1288         if (node->totnode == 0) {
1289                 if (data->callback)
1290                         data->callback(data->userdata, node->index, data->co, &data->nearest);
1291                 else {
1292                         data->nearest.index = node->index;
1293                         data->nearest.dist_sq = calc_nearest_point_squared(data->proj, node, data->nearest.co);
1294                 }
1295         }
1296         else {
1297                 /* Better heuristic to pick the closest node to dive on */
1298                 int i;
1299                 float nearest[3];
1300
1301                 if (data->proj[node->main_axis] <= node->children[0]->bv[node->main_axis * 2 + 1]) {
1302
1303                         for (i = 0; i != node->totnode; i++) {
1304                                 if (calc_nearest_point_squared(data->proj, node->children[i], nearest) >= data->nearest.dist_sq)
1305                                         continue;
1306                                 dfs_find_nearest_dfs(data, node->children[i]);
1307                         }
1308                 }
1309                 else {
1310                         for (i = node->totnode - 1; i >= 0; i--) {
1311                                 if (calc_nearest_point_squared(data->proj, node->children[i], nearest) >= data->nearest.dist_sq)
1312                                         continue;
1313                                 dfs_find_nearest_dfs(data, node->children[i]);
1314                         }
1315                 }
1316         }
1317 }
1318
1319 static void dfs_find_nearest_begin(BVHNearestData *data, BVHNode *node)
1320 {
1321         float nearest[3], dist_sq;
1322         dist_sq = calc_nearest_point_squared(data->proj, node, nearest);
1323         if (dist_sq >= data->nearest.dist_sq) {
1324                 return;
1325         }
1326         dfs_find_nearest_dfs(data, node);
1327 }
1328
1329
1330 #if 0
1331
1332 typedef struct NodeDistance {
1333         BVHNode *node;
1334         float dist;
1335
1336 } NodeDistance;
1337
1338 #define DEFAULT_FIND_NEAREST_HEAP_SIZE 1024
1339
1340 #define NodeDistance_priority(a, b) ((a).dist < (b).dist)
1341
1342 static void NodeDistance_push_heap(NodeDistance *heap, int heap_size)
1343 PUSH_HEAP_BODY(NodeDistance, NodeDistance_priority, heap, heap_size)
1344
1345 static void NodeDistance_pop_heap(NodeDistance *heap, int heap_size)
1346 POP_HEAP_BODY(NodeDistance, NodeDistance_priority, heap, heap_size)
1347
1348 /* NN function that uses an heap.. this functions leads to an optimal number of min-distance
1349  * but for normal tri-faces and BV 6-dop.. a simple dfs with local heuristics (as implemented
1350  * in source/blender/blenkernel/intern/shrinkwrap.c) works faster.
1351  *
1352  * It may make sense to use this function if the callback queries are very slow.. or if its impossible
1353  * to get a nice heuristic
1354  *
1355  * this function uses "malloc/free" instead of the MEM_* because it intends to be openmp safe */
1356 static void bfs_find_nearest(BVHNearestData *data, BVHNode *node)
1357 {
1358         int i;
1359         NodeDistance default_heap[DEFAULT_FIND_NEAREST_HEAP_SIZE];
1360         NodeDistance *heap = default_heap, current;
1361         int heap_size = 0, max_heap_size = sizeof(default_heap) / sizeof(default_heap[0]);
1362         float nearest[3];
1363
1364         int callbacks = 0, push_heaps = 0;
1365
1366         if (node->totnode == 0) {
1367                 dfs_find_nearest_dfs(data, node);
1368                 return;
1369         }
1370
1371         current.node = node;
1372         current.dist = calc_nearest_point(data->proj, node, nearest);
1373
1374         while (current.dist < data->nearest.dist) {
1375 //              printf("%f : %f\n", current.dist, data->nearest.dist);
1376                 for (i = 0; i < current.node->totnode; i++) {
1377                         BVHNode *child = current.node->children[i];
1378                         if (child->totnode == 0) {
1379                                 callbacks++;
1380                                 dfs_find_nearest_dfs(data, child);
1381                         }
1382                         else {
1383                                 /* adjust heap size */
1384                                 if ((heap_size >= max_heap_size) &&
1385                                     ADJUST_MEMORY(default_heap, (void **)&heap, heap_size + 1, &max_heap_size, sizeof(heap[0])) == false)
1386                                 {
1387                                         printf("WARNING: bvh_find_nearest got out of memory\n");
1388
1389                                         if (heap != default_heap)
1390                                                 free(heap);
1391
1392                                         return;
1393                                 }
1394
1395                                 heap[heap_size].node = current.node->children[i];
1396                                 heap[heap_size].dist = calc_nearest_point(data->proj, current.node->children[i], nearest);
1397
1398                                 if (heap[heap_size].dist >= data->nearest.dist) continue;
1399                                 heap_size++;
1400
1401                                 NodeDistance_push_heap(heap, heap_size);
1402                                 //                      PUSH_HEAP_BODY(NodeDistance, NodeDistance_priority, heap, heap_size);
1403                                 push_heaps++;
1404                         }
1405                 }
1406                 
1407                 if (heap_size == 0) break;
1408
1409                 current = heap[0];
1410                 NodeDistance_pop_heap(heap, heap_size);
1411 //              POP_HEAP_BODY(NodeDistance, NodeDistance_priority, heap, heap_size);
1412                 heap_size--;
1413         }
1414
1415 //      printf("hsize=%d, callbacks=%d, pushs=%d\n", heap_size, callbacks, push_heaps);
1416
1417         if (heap != default_heap)
1418                 free(heap);
1419 }
1420 #endif
1421
1422
1423 int BLI_bvhtree_find_nearest(BVHTree *tree, const float co[3], BVHTreeNearest *nearest,
1424                              BVHTree_NearestPointCallback callback, void *userdata)
1425 {
1426         axis_t axis_iter;
1427
1428         BVHNearestData data;
1429         BVHNode *root = tree->nodes[tree->totleaf];
1430
1431         /* init data to search */
1432         data.tree = tree;
1433         data.co = co;
1434
1435         data.callback = callback;
1436         data.userdata = userdata;
1437
1438         for (axis_iter = data.tree->start_axis; axis_iter != data.tree->stop_axis; axis_iter++) {
1439                 data.proj[axis_iter] = dot_v3v3(data.co, KDOP_AXES[axis_iter]);
1440         }
1441
1442         if (nearest) {
1443                 memcpy(&data.nearest, nearest, sizeof(*nearest));
1444         }
1445         else {
1446                 data.nearest.index = -1;
1447                 data.nearest.dist_sq = FLT_MAX;
1448         }
1449
1450         /* dfs search */
1451         if (root)
1452                 dfs_find_nearest_begin(&data, root);
1453
1454         /* copy back results */
1455         if (nearest) {
1456                 memcpy(nearest, &data.nearest, sizeof(*nearest));
1457         }
1458
1459         return data.nearest.index;
1460 }
1461
1462
1463 /**
1464  * Raycast - BLI_bvhtree_ray_cast
1465  *
1466  * raycast is done by performing a DFS on the BVHTree and saving the closest hit
1467  */
1468
1469
1470 /* Determines the distance that the ray must travel to hit the bounding volume of the given node */
1471 static float ray_nearest_hit(BVHRayCastData *data, const float bv[6])
1472 {
1473         int i;
1474
1475         float low = 0, upper = data->hit.dist;
1476
1477         for (i = 0; i != 3; i++, bv += 2) {
1478                 if (data->ray_dot_axis[i] == 0.0f) {
1479                         /* axis aligned ray */
1480                         if (data->ray.origin[i] < bv[0] - data->ray.radius ||
1481                             data->ray.origin[i] > bv[1] + data->ray.radius)
1482                         {
1483                                 return FLT_MAX;
1484                         }
1485                 }
1486                 else {
1487                         float ll = (bv[0] - data->ray.radius - data->ray.origin[i]) / data->ray_dot_axis[i];
1488                         float lu = (bv[1] + data->ray.radius - data->ray.origin[i]) / data->ray_dot_axis[i];
1489
1490                         if (data->ray_dot_axis[i] > 0.0f) {
1491                                 if (ll > low) low = ll;
1492                                 if (lu < upper) upper = lu;
1493                         }
1494                         else {
1495                                 if (lu > low) low = lu;
1496                                 if (ll < upper) upper = ll;
1497                         }
1498         
1499                         if (low > upper) return FLT_MAX;
1500                 }
1501         }
1502         return low;
1503 }
1504
1505 /**
1506  * Determines the distance that the ray must travel to hit the bounding volume of the given node
1507  * Based on Tactical Optimization of Ray/Box Intersection, by Graham Fyffe
1508  * [http://tog.acm.org/resources/RTNews/html/rtnv21n1.html#art9]
1509  *
1510  * TODO this doesn't take data->ray.radius into consideration */
1511 static float fast_ray_nearest_hit(const BVHRayCastData *data, const BVHNode *node)
1512 {
1513         const float *bv = node->bv;
1514         
1515         float t1x = (bv[data->index[0]] - data->ray.origin[0]) * data->idot_axis[0];
1516         float t2x = (bv[data->index[1]] - data->ray.origin[0]) * data->idot_axis[0];
1517         float t1y = (bv[data->index[2]] - data->ray.origin[1]) * data->idot_axis[1];
1518         float t2y = (bv[data->index[3]] - data->ray.origin[1]) * data->idot_axis[1];
1519         float t1z = (bv[data->index[4]] - data->ray.origin[2]) * data->idot_axis[2];
1520         float t2z = (bv[data->index[5]] - data->ray.origin[2]) * data->idot_axis[2];
1521
1522         if ((t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) ||
1523             (t2x < 0.0f || t2y < 0.0f || t2z < 0.0f) ||
1524             (t1x > data->hit.dist || t1y > data->hit.dist || t1z > data->hit.dist))
1525         {
1526                 return FLT_MAX;
1527         }
1528         else {
1529                 return max_fff(t1x, t1y, t1z);
1530         }
1531 }
1532
1533 static void dfs_raycast(BVHRayCastData *data, BVHNode *node)
1534 {
1535         int i;
1536
1537         /* ray-bv is really fast.. and simple tests revealed its worth to test it
1538          * before calling the ray-primitive functions */
1539         /* XXX: temporary solution for particles until fast_ray_nearest_hit supports ray.radius */
1540         float dist = (data->ray.radius == 0.0f) ? fast_ray_nearest_hit(data, node) : ray_nearest_hit(data, node->bv);
1541         if (dist >= data->hit.dist) return;
1542
1543         if (node->totnode == 0) {
1544                 if (data->callback) {
1545                         data->callback(data->userdata, node->index, &data->ray, &data->hit);
1546                 }
1547                 else {
1548                         data->hit.index = node->index;
1549                         data->hit.dist  = dist;
1550                         madd_v3_v3v3fl(data->hit.co, data->ray.origin, data->ray.direction, dist);
1551                 }
1552         }
1553         else {
1554                 /* pick loop direction to dive into the tree (based on ray direction and split axis) */
1555                 if (data->ray_dot_axis[node->main_axis] > 0.0f) {
1556                         for (i = 0; i != node->totnode; i++) {
1557                                 dfs_raycast(data, node->children[i]);
1558                         }
1559                 }
1560                 else {
1561                         for (i = node->totnode - 1; i >= 0; i--) {
1562                                 dfs_raycast(data, node->children[i]);
1563                         }
1564                 }
1565         }
1566 }
1567
1568 static void dfs_raycast_all(BVHRayCastData *data, BVHNode *node)
1569 {
1570         int i;
1571
1572         /* ray-bv is really fast.. and simple tests revealed its worth to test it
1573          * before calling the ray-primitive functions */
1574         /* XXX: temporary solution for particles until fast_ray_nearest_hit supports ray.radius */
1575         float dist = (data->ray.radius == 0.0f) ? fast_ray_nearest_hit(data, node) : ray_nearest_hit(data, node->bv);
1576
1577         if (node->totnode == 0) {
1578                 if (data->callback) {
1579                         data->hit.index = -1;
1580                         data->hit.dist = FLT_MAX;
1581                         data->callback(data->userdata, node->index, &data->ray, &data->hit);
1582                 }
1583                 else {
1584                         data->hit.index = node->index;
1585                         data->hit.dist  = dist;
1586                         madd_v3_v3v3fl(data->hit.co, data->ray.origin, data->ray.direction, dist);
1587                 }
1588         }
1589         else {
1590                 /* pick loop direction to dive into the tree (based on ray direction and split axis) */
1591                 if (data->ray_dot_axis[node->main_axis] > 0.0f) {
1592                         for (i = 0; i != node->totnode; i++) {
1593                                 dfs_raycast_all(data, node->children[i]);
1594                         }
1595                 }
1596                 else {
1597                         for (i = node->totnode - 1; i >= 0; i--) {
1598                                 dfs_raycast_all(data, node->children[i]);
1599                         }
1600                 }
1601         }
1602 }
1603
1604 #if 0
1605 static void iterative_raycast(BVHRayCastData *data, BVHNode *node)
1606 {
1607         while (node) {
1608                 float dist = fast_ray_nearest_hit(data, node);
1609                 if (dist >= data->hit.dist) {
1610                         node = node->skip[1];
1611                         continue;
1612                 }
1613
1614                 if (node->totnode == 0) {
1615                         if (data->callback) {
1616                                 data->callback(data->userdata, node->index, &data->ray, &data->hit);
1617                         }
1618                         else {
1619                                 data->hit.index = node->index;
1620                                 data->hit.dist  = dist;
1621                                 madd_v3_v3v3fl(data->hit.co, data->ray.origin, data->ray.direction, dist);
1622                         }
1623                         
1624                         node = node->skip[1];
1625                 }
1626                 else {
1627                         node = node->children[0];
1628                 }
1629         }
1630 }
1631 #endif
1632
1633 static void bvhtree_ray_cast_data_precalc(BVHRayCastData *data, int flag)
1634 {
1635         int i;
1636
1637         for (i = 0; i < 3; i++) {
1638                 data->ray_dot_axis[i] = dot_v3v3(data->ray.direction, KDOP_AXES[i]);
1639                 data->idot_axis[i] = 1.0f / data->ray_dot_axis[i];
1640
1641                 if (fabsf(data->ray_dot_axis[i]) < FLT_EPSILON) {
1642                         data->ray_dot_axis[i] = 0.0;
1643                 }
1644                 data->index[2 * i] = data->idot_axis[i] < 0.0f ? 1 : 0;
1645                 data->index[2 * i + 1] = 1 - data->index[2 * i];
1646                 data->index[2 * i]   += 2 * i;
1647                 data->index[2 * i + 1] += 2 * i;
1648         }
1649
1650 #ifdef USE_KDOPBVH_WATERTIGHT
1651         if (flag & BVH_RAYCAST_WATERTIGHT) {
1652                 isect_ray_tri_watertight_v3_precalc(&data->isect_precalc, data->ray.direction);
1653                 data->ray.isect_precalc = &data->isect_precalc;
1654         }
1655         else {
1656                 data->ray.isect_precalc = NULL;
1657         }
1658 #else
1659         UNUSED_VARS(flag);
1660 #endif
1661 }
1662
1663 int BLI_bvhtree_ray_cast_ex(
1664         BVHTree *tree, const float co[3], const float dir[3], float radius, BVHTreeRayHit *hit,
1665         BVHTree_RayCastCallback callback, void *userdata,
1666         int flag)
1667 {
1668         BVHRayCastData data;
1669         BVHNode *root = tree->nodes[tree->totleaf];
1670
1671         BLI_ASSERT_UNIT_V3(dir);
1672
1673         data.tree = tree;
1674
1675         data.callback = callback;
1676         data.userdata = userdata;
1677
1678         copy_v3_v3(data.ray.origin,    co);
1679         copy_v3_v3(data.ray.direction, dir);
1680         data.ray.radius = radius;
1681
1682         bvhtree_ray_cast_data_precalc(&data, flag);
1683
1684         if (hit) {
1685                 memcpy(&data.hit, hit, sizeof(*hit));
1686         }
1687         else {
1688                 data.hit.index = -1;
1689                 data.hit.dist = FLT_MAX;
1690         }
1691
1692         if (root) {
1693                 dfs_raycast(&data, root);
1694 //              iterative_raycast(&data, root);
1695         }
1696
1697
1698         if (hit)
1699                 memcpy(hit, &data.hit, sizeof(*hit));
1700
1701         return data.hit.index;
1702 }
1703
1704 int BLI_bvhtree_ray_cast(
1705         BVHTree *tree, const float co[3], const float dir[3], float radius, BVHTreeRayHit *hit,
1706         BVHTree_RayCastCallback callback, void *userdata)
1707 {
1708         return BLI_bvhtree_ray_cast_ex(tree, co, dir, radius, hit, callback, userdata, BVH_RAYCAST_DEFAULT);
1709 }
1710
1711 float BLI_bvhtree_bb_raycast(const float bv[6], const float light_start[3], const float light_end[3], float pos[3])
1712 {
1713         BVHRayCastData data;
1714         float dist;
1715
1716         data.hit.dist = FLT_MAX;
1717         
1718         /* get light direction */
1719         sub_v3_v3v3(data.ray.direction, light_end, light_start);
1720         
1721         data.ray.radius = 0.0;
1722         
1723         copy_v3_v3(data.ray.origin, light_start);
1724
1725         normalize_v3(data.ray.direction);
1726         copy_v3_v3(data.ray_dot_axis, data.ray.direction);
1727         
1728         dist = ray_nearest_hit(&data, bv);
1729
1730         madd_v3_v3v3fl(pos, light_start, data.ray.direction, dist);
1731
1732         return dist;
1733         
1734 }
1735
1736 /**
1737  * Calls the callback for every ray intersection
1738  */
1739 int BLI_bvhtree_ray_cast_all_ex(
1740         BVHTree *tree, const float co[3], const float dir[3], float radius,
1741         BVHTree_RayCastCallback callback, void *userdata,
1742         int flag)
1743 {
1744         BVHRayCastData data;
1745         BVHNode *root = tree->nodes[tree->totleaf];
1746
1747         BLI_ASSERT_UNIT_V3(dir);
1748
1749         data.tree = tree;
1750
1751         data.callback = callback;
1752         data.userdata = userdata;
1753
1754         copy_v3_v3(data.ray.origin,    co);
1755         copy_v3_v3(data.ray.direction, dir);
1756         data.ray.radius = radius;
1757
1758         bvhtree_ray_cast_data_precalc(&data, flag);
1759
1760         data.hit.index = -1;
1761         data.hit.dist = FLT_MAX;
1762
1763         if (root) {
1764                 dfs_raycast_all(&data, root);
1765         }
1766
1767         return data.hit.index;
1768 }
1769
1770 int BLI_bvhtree_ray_cast_all(
1771         BVHTree *tree, const float co[3], const float dir[3], float radius,
1772         BVHTree_RayCastCallback callback, void *userdata)
1773 {
1774         return BLI_bvhtree_ray_cast_all_ex(tree, co, dir, radius, callback, userdata, BVH_RAYCAST_DEFAULT);
1775 }
1776
1777 /**
1778  * Range Query - as request by broken :P
1779  *
1780  * Allocs and fills an array with the indexs of node that are on the given spherical range (center, radius) 
1781  * Returns the size of the array.
1782  */
1783 typedef struct RangeQueryData {
1784         BVHTree *tree;
1785         const float *center;
1786         float radius_sq;  /* squared radius */
1787
1788         int hits;
1789
1790         BVHTree_RangeQuery callback;
1791         void *userdata;
1792 } RangeQueryData;
1793
1794
1795 static void dfs_range_query(RangeQueryData *data, BVHNode *node)
1796 {
1797         if (node->totnode == 0) {
1798 #if 0   /*UNUSED*/
1799                 /* Calculate the node min-coords (if the node was a point then this is the point coordinates) */
1800                 float co[3];
1801                 co[0] = node->bv[0];
1802                 co[1] = node->bv[2];
1803                 co[2] = node->bv[4];
1804 #endif
1805         }
1806         else {
1807                 int i;
1808                 for (i = 0; i != node->totnode; i++) {
1809                         float nearest[3];
1810                         float dist_sq = calc_nearest_point_squared(data->center, node->children[i], nearest);
1811                         if (dist_sq < data->radius_sq) {
1812                                 /* Its a leaf.. call the callback */
1813                                 if (node->children[i]->totnode == 0) {
1814                                         data->hits++;
1815                                         data->callback(data->userdata, node->children[i]->index, dist_sq);
1816                                 }
1817                                 else
1818                                         dfs_range_query(data, node->children[i]);
1819                         }
1820                 }
1821         }
1822 }
1823
1824 int BLI_bvhtree_range_query(BVHTree *tree, const float co[3], float radius, BVHTree_RangeQuery callback, void *userdata)
1825 {
1826         BVHNode *root = tree->nodes[tree->totleaf];
1827
1828         RangeQueryData data;
1829         data.tree = tree;
1830         data.center = co;
1831         data.radius_sq = radius * radius;
1832         data.hits = 0;
1833
1834         data.callback = callback;
1835         data.userdata = userdata;
1836
1837         if (root != NULL) {
1838                 float nearest[3];
1839                 float dist_sq = calc_nearest_point_squared(data.center, root, nearest);
1840                 if (dist_sq < data.radius_sq) {
1841                         /* Its a leaf.. call the callback */
1842                         if (root->totnode == 0) {
1843                                 data.hits++;
1844                                 data.callback(data.userdata, root->index, dist_sq);
1845                         }
1846                         else
1847                                 dfs_range_query(&data, root);
1848                 }
1849         }
1850
1851         return data.hits;
1852 }