Cleanup: comment blocks
[blender.git] / source / blender / blenlib / intern / BLI_kdopbvh.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2006 by NaN Holding BV.
19  * All rights reserved.
20  *
21  * The Original Code is: all of this file.
22  *
23  * Contributor(s): Daniel Genrich, Andre Pinto
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 /** \file blender/blenlib/intern/BLI_kdopbvh.c
29  *  \ingroup bli
30  *  \brief BVH-tree implementation.
31  *
32  * k-DOP BVH (Discrete Oriented Polytope, Bounding Volume Hierarchy).
33  * A k-DOP is represented as k/2 pairs of min , max values for k/2 directions (intervals, "slabs").
34  *
35  * See: http://www.gris.uni-tuebingen.de/people/staff/jmezger/papers/bvh.pdf
36  *
37  * implements a bvh-tree structure with support for:
38  *
39  * - Ray-cast:
40  *   #BLI_bvhtree_ray_cast, #BVHRayCastData
41  * - Nearest point on surface:
42  *   #BLI_bvhtree_find_nearest, #BVHNearestData
43  * - Overlapping 2 trees:
44  *   #BLI_bvhtree_overlap, #BVHOverlapData_Shared, #BVHOverlapData_Thread
45  * - Range Query:
46  *   #BLI_bvhtree_range_query
47  */
48
49 #include <assert.h>
50
51 #include "MEM_guardedalloc.h"
52
53 #include "BLI_utildefines.h"
54 #include "BLI_alloca.h"
55 #include "BLI_stack.h"
56 #include "BLI_kdopbvh.h"
57 #include "BLI_math.h"
58 #include "BLI_strict_flags.h"
59 #include "BLI_task.h"
60
61 /* used for iterative_raycast */
62 // #define USE_SKIP_LINKS
63
64 #define MAX_TREETYPE 32
65
66 /* Setting zero so we can catch bugs in BLI_task/KDOPBVH.
67  * TODO(sergey): Deduplicate the limits with PBVH from BKE.
68  */
69 #ifdef DEBUG
70 #  define KDOPBVH_THREAD_LEAF_THRESHOLD 0
71 #else
72 #  define KDOPBVH_THREAD_LEAF_THRESHOLD 1024
73 #endif
74
75
76 /* -------------------------------------------------------------------- */
77
78 /** \name Struct Definitions
79  * \{ */
80
81 typedef unsigned char axis_t;
82
83 typedef struct BVHNode {
84         struct BVHNode **children;
85         struct BVHNode *parent; /* some user defined traversed need that */
86 #ifdef USE_SKIP_LINKS
87         struct BVHNode *skip[2];
88 #endif
89         float *bv;      /* Bounding volume of all nodes, max 13 axis */
90         int index;      /* face, edge, vertex index */
91         char totnode;   /* how many nodes are used, used for speedup */
92         char main_axis; /* Axis used to split this node */
93 } BVHNode;
94
95 /* keep under 26 bytes for speed purposes */
96 struct BVHTree {
97         BVHNode **nodes;
98         BVHNode *nodearray;     /* pre-alloc branch nodes */
99         BVHNode **nodechild;    /* pre-alloc childs for nodes */
100         float   *nodebv;        /* pre-alloc bounding-volumes for nodes */
101         float epsilon;          /* epslion is used for inflation of the k-dop      */
102         int totleaf;            /* leafs */
103         int totbranch;
104         axis_t start_axis, stop_axis;  /* bvhtree_kdop_axes array indices according to axis */
105         axis_t axis;                   /* kdop type (6 => OBB, 7 => AABB, ...) */
106         char tree_type;                /* type of tree (4 => quadtree) */
107 };
108
109 /* optimization, ensure we stay small */
110 BLI_STATIC_ASSERT((sizeof(void *) == 8 && sizeof(BVHTree) <= 48) ||
111                   (sizeof(void *) == 4 && sizeof(BVHTree) <= 32),
112                   "over sized")
113
114 /* avoid duplicating vars in BVHOverlapData_Thread */
115 typedef struct BVHOverlapData_Shared {
116         const BVHTree *tree1, *tree2;
117         axis_t start_axis, stop_axis;
118
119         /* use for callbacks */
120         BVHTree_OverlapCallback callback;
121         void *userdata;
122 } BVHOverlapData_Shared;
123
124 typedef struct BVHOverlapData_Thread {
125         BVHOverlapData_Shared *shared;
126         struct BLI_Stack *overlap;  /* store BVHTreeOverlap */
127         /* use for callbacks */
128         int thread;
129 } BVHOverlapData_Thread;
130
131 typedef struct BVHNearestData {
132         BVHTree *tree;
133         const float *co;
134         BVHTree_NearestPointCallback callback;
135         void    *userdata;
136         float proj[13];         /* coordinates projection over axis */
137         BVHTreeNearest nearest;
138
139 } BVHNearestData;
140
141 typedef struct BVHRayCastData {
142         BVHTree *tree;
143
144         BVHTree_RayCastCallback callback;
145         void    *userdata;
146
147
148         BVHTreeRay ray;
149
150 #ifdef USE_KDOPBVH_WATERTIGHT
151         struct IsectRayPrecalc isect_precalc;
152 #endif
153
154         /* initialized by bvhtree_ray_cast_data_precalc */
155         float ray_dot_axis[13];
156         float idot_axis[13];
157         int index[6];
158
159         BVHTreeRayHit hit;
160 } BVHRayCastData;
161
162 typedef struct BVHNearestRayData {
163         BVHTree *tree;
164         BVHTree_NearestToRayCallback callback;
165         void    *userdata;
166
167         struct {
168                 bool sign[3];
169                 float origin[3];
170                 float direction[3];
171
172                 float direction_scaled_square[3];
173                 float inv_dir[3];
174
175                 float cdot_axis[3];
176         } ray;
177
178         bool pick_smallest[3];
179
180         BVHTreeNearest nearest;
181
182         float scale[3];
183 } BVHNearestRayData;
184
185 /** \} */
186
187
188 /**
189  * Bounding Volume Hierarchy Definition
190  *
191  * Notes: From OBB until 26-DOP --> all bounding volumes possible, just choose type below
192  * Notes: You have to choose the type at compile time ITM
193  * Notes: You can choose the tree type --> binary, quad, octree, choose below
194  */
195
196 const float bvhtree_kdop_axes[13][3] = {
197         {1.0, 0, 0}, {0, 1.0, 0}, {0, 0, 1.0}, {1.0, 1.0, 1.0}, {1.0, -1.0, 1.0}, {1.0, 1.0, -1.0},
198         {1.0, -1.0, -1.0}, {1.0, 1.0, 0}, {1.0, 0, 1.0}, {0, 1.0, 1.0}, {1.0, -1.0, 0}, {1.0, 0, -1.0},
199         {0, 1.0, -1.0}
200 };
201
202
203 /* -------------------------------------------------------------------- */
204
205 /** \name Utility Functions
206  * \{ */
207
208 MINLINE axis_t min_axis(axis_t a, axis_t b)
209 {
210         return (a < b) ? a : b;
211 }
212 #if 0
213 MINLINE axis_t max_axis(axis_t a, axis_t b)
214 {
215         return (b < a) ? a : b;
216 }
217 #endif
218
219 #if 0
220
221 /*
222  * Generic push and pop heap
223  */
224 #define PUSH_HEAP_BODY(HEAP_TYPE, PRIORITY, heap, heap_size)                  \
225         {                                                                         \
226                 HEAP_TYPE element = heap[heap_size - 1];                              \
227                 int child = heap_size - 1;                                            \
228                 while (child != 0) {                                                  \
229                         int parent = (child - 1) / 2;                                     \
230                         if (PRIORITY(element, heap[parent])) {                            \
231                                 heap[child] = heap[parent];                                   \
232                                 child = parent;                                               \
233                         }                                                                 \
234                         else {                                                            \
235                                 break;                                                        \
236                         }                                                                 \
237                 }                                                                     \
238                 heap[child] = element;                                                \
239         } (void)0
240
241 #define POP_HEAP_BODY(HEAP_TYPE, PRIORITY, heap, heap_size)                   \
242         {                                                                         \
243                 HEAP_TYPE element = heap[heap_size - 1];                              \
244                 int parent = 0;                                                       \
245                 while (parent < (heap_size - 1) / 2) {                                \
246                         int child2 = (parent + 1) * 2;                                    \
247                         if (PRIORITY(heap[child2 - 1], heap[child2])) {                   \
248                                 child2--;                                                     \
249                         }                                                                 \
250                         if (PRIORITY(element, heap[child2])) {                            \
251                                 break;                                                        \
252                         }                                                                 \
253                         heap[parent] = heap[child2];                                      \
254                         parent = child2;                                                  \
255                 }                                                                     \
256                 heap[parent] = element;                                               \
257         } (void)0
258
259 static bool ADJUST_MEMORY(void *local_memblock, void **memblock, int new_size, int *max_size, int size_per_item)
260 {
261         int new_max_size = *max_size * 2;
262         void *new_memblock = NULL;
263
264         if (new_size <= *max_size) {
265                 return true;
266         }
267
268         if (*memblock == local_memblock) {
269                 new_memblock = malloc(size_per_item * new_max_size);
270                 memcpy(new_memblock, *memblock, size_per_item * *max_size);
271         }
272         else {
273                 new_memblock = realloc(*memblock, size_per_item * new_max_size);
274         }
275
276         if (new_memblock) {
277                 *memblock = new_memblock;
278                 *max_size = new_max_size;
279                 return true;
280         }
281         else {
282                 return false;
283         }
284 }
285 #endif
286
287 /**
288  * Introsort
289  * with permission deriven from the following Java code:
290  * http://ralphunden.net/content/tutorials/a-guide-to-introsort/
291  * and he derived it from the SUN STL
292  */
293
294 //static int size_threshold = 16;
295
296 #if 0
297 /**
298  * Common methods for all algorithms
299  */
300 static int floor_lg(int a)
301 {
302         return (int)(floor(log(a) / log(2)));
303 }
304 #endif
305
306 static void node_minmax_init(const BVHTree *tree, BVHNode *node)
307 {
308         axis_t axis_iter;
309         float (*bv)[2] = (float (*)[2])node->bv;
310
311         for (axis_iter = tree->start_axis; axis_iter != tree->stop_axis; axis_iter++) {
312                 bv[axis_iter][0] =  FLT_MAX;
313                 bv[axis_iter][1] = -FLT_MAX;
314         }
315 }
316
317 /** \} */
318
319
320 /* -------------------------------------------------------------------- */
321
322 /** \name Balance Utility Functions
323  * \{ */
324
325 /**
326  * Insertion sort algorithm
327  */
328 static void bvh_insertionsort(BVHNode **a, int lo, int hi, int axis)
329 {
330         int i, j;
331         BVHNode *t;
332         for (i = lo; i < hi; i++) {
333                 j = i;
334                 t = a[i];
335                 while ((j != lo) && (t->bv[axis] < (a[j - 1])->bv[axis])) {
336                         a[j] = a[j - 1];
337                         j--;
338                 }
339                 a[j] = t;
340         }
341 }
342
343 static int bvh_partition(BVHNode **a, int lo, int hi, BVHNode *x, int axis)
344 {
345         int i = lo, j = hi;
346         while (1) {
347                 while ((a[i])->bv[axis] < x->bv[axis]) i++;
348                 j--;
349                 while (x->bv[axis] < (a[j])->bv[axis]) j--;
350                 if (!(i < j))
351                         return i;
352                 SWAP(BVHNode *, a[i], a[j]);
353                 i++;
354         }
355 }
356
357 #if 0
358 /**
359  * Heapsort algorithm
360  */
361 static void bvh_downheap(BVHNode **a, int i, int n, int lo, int axis)
362 {
363         BVHNode *d = a[lo + i - 1];
364         int child;
365         while (i <= n / 2) {
366                 child = 2 * i;
367                 if ((child < n) && ((a[lo + child - 1])->bv[axis] < (a[lo + child])->bv[axis])) {
368                         child++;
369                 }
370                 if (!(d->bv[axis] < (a[lo + child - 1])->bv[axis])) break;
371                 a[lo + i - 1] = a[lo + child - 1];
372                 i = child;
373         }
374         a[lo + i - 1] = d;
375 }
376
377 static void bvh_heapsort(BVHNode **a, int lo, int hi, int axis)
378 {
379         int n = hi - lo, i;
380         for (i = n / 2; i >= 1; i = i - 1) {
381                 bvh_downheap(a, i, n, lo, axis);
382         }
383         for (i = n; i > 1; i = i - 1) {
384                 SWAP(BVHNode *, a[lo], a[lo + i - 1]);
385                 bvh_downheap(a, 1, i - 1, lo, axis);
386         }
387 }
388 #endif
389
390 static BVHNode *bvh_medianof3(BVHNode **a, int lo, int mid, int hi, int axis)  /* returns Sortable */
391 {
392         if ((a[mid])->bv[axis] < (a[lo])->bv[axis]) {
393                 if ((a[hi])->bv[axis] < (a[mid])->bv[axis])
394                         return a[mid];
395                 else {
396                         if ((a[hi])->bv[axis] < (a[lo])->bv[axis])
397                                 return a[hi];
398                         else
399                                 return a[lo];
400                 }
401         }
402         else {
403                 if ((a[hi])->bv[axis] < (a[mid])->bv[axis]) {
404                         if ((a[hi])->bv[axis] < (a[lo])->bv[axis])
405                                 return a[lo];
406                         else
407                                 return a[hi];
408                 }
409                 else
410                         return a[mid];
411         }
412 }
413
414 #if 0
415 /*
416  * Quicksort algorithm modified for Introsort
417  */
418 static void bvh_introsort_loop(BVHNode **a, int lo, int hi, int depth_limit, int axis)
419 {
420         int p;
421
422         while (hi - lo > size_threshold) {
423                 if (depth_limit == 0) {
424                         bvh_heapsort(a, lo, hi, axis);
425                         return;
426                 }
427                 depth_limit = depth_limit - 1;
428                 p = bvh_partition(a, lo, hi, bvh_medianof3(a, lo, lo + ((hi - lo) / 2) + 1, hi - 1, axis), axis);
429                 bvh_introsort_loop(a, p, hi, depth_limit, axis);
430                 hi = p;
431         }
432 }
433
434 static void sort(BVHNode **a0, int begin, int end, int axis)
435 {
436         if (begin < end) {
437                 BVHNode **a = a0;
438                 bvh_introsort_loop(a, begin, end, 2 * floor_lg(end - begin), axis);
439                 bvh_insertionsort(a, begin, end, axis);
440         }
441 }
442
443 static void sort_along_axis(BVHTree *tree, int start, int end, int axis)
444 {
445         sort(tree->nodes, start, end, axis);
446 }
447 #endif
448
449 /**
450  * \note after a call to this function you can expect one of:
451  * - every node to left of a[n] are smaller or equal to it
452  * - every node to the right of a[n] are greater or equal to it */
453 static int partition_nth_element(BVHNode **a, int _begin, int _end, int n, int axis)
454 {
455         int begin = _begin, end = _end, cut;
456         while (end - begin > 3) {
457                 cut = bvh_partition(a, begin, end, bvh_medianof3(a, begin, (begin + end) / 2, end - 1, axis), axis);
458                 if (cut <= n)
459                         begin = cut;
460                 else
461                         end = cut;
462         }
463         bvh_insertionsort(a, begin, end, axis);
464
465         return n;
466 }
467
468 #ifdef USE_SKIP_LINKS
469 static void build_skip_links(BVHTree *tree, BVHNode *node, BVHNode *left, BVHNode *right)
470 {
471         int i;
472         
473         node->skip[0] = left;
474         node->skip[1] = right;
475         
476         for (i = 0; i < node->totnode; i++) {
477                 if (i + 1 < node->totnode)
478                         build_skip_links(tree, node->children[i], left, node->children[i + 1]);
479                 else
480                         build_skip_links(tree, node->children[i], left, right);
481
482                 left = node->children[i];
483         }
484 }
485 #endif
486
487 /*
488  * BVHTree bounding volumes functions
489  */
490 static void create_kdop_hull(BVHTree *tree, BVHNode *node, const float *co, int numpoints, int moving)
491 {
492         float newminmax;
493         float *bv = node->bv;
494         int k;
495         axis_t axis_iter;
496         
497         /* don't init boudings for the moving case */
498         if (!moving) {
499                 node_minmax_init(tree, node);
500         }
501
502         for (k = 0; k < numpoints; k++) {
503                 /* for all Axes. */
504                 for (axis_iter = tree->start_axis; axis_iter < tree->stop_axis; axis_iter++) {
505                         newminmax = dot_v3v3(&co[k * 3], bvhtree_kdop_axes[axis_iter]);
506                         if (newminmax < bv[2 * axis_iter])
507                                 bv[2 * axis_iter] = newminmax;
508                         if (newminmax > bv[(2 * axis_iter) + 1])
509                                 bv[(2 * axis_iter) + 1] = newminmax;
510                 }
511         }
512 }
513
514 /**
515  * \note depends on the fact that the BVH's for each face is already build
516  */
517 static void refit_kdop_hull(BVHTree *tree, BVHNode *node, int start, int end)
518 {
519         float newmin, newmax;
520         float *bv = node->bv;
521         int j;
522         axis_t axis_iter;
523
524         node_minmax_init(tree, node);
525
526         for (j = start; j < end; j++) {
527                 /* for all Axes. */
528                 for (axis_iter = tree->start_axis; axis_iter < tree->stop_axis; axis_iter++) {
529                         newmin = tree->nodes[j]->bv[(2 * axis_iter)];
530                         if ((newmin < bv[(2 * axis_iter)]))
531                                 bv[(2 * axis_iter)] = newmin;
532
533                         newmax = tree->nodes[j]->bv[(2 * axis_iter) + 1];
534                         if ((newmax > bv[(2 * axis_iter) + 1]))
535                                 bv[(2 * axis_iter) + 1] = newmax;
536                 }
537         }
538
539 }
540
541 /**
542  * only supports x,y,z axis in the moment
543  * but we should use a plain and simple function here for speed sake */
544 static char get_largest_axis(const float *bv)
545 {
546         float middle_point[3];
547
548         middle_point[0] = (bv[1]) - (bv[0]); /* x axis */
549         middle_point[1] = (bv[3]) - (bv[2]); /* y axis */
550         middle_point[2] = (bv[5]) - (bv[4]); /* z axis */
551         if (middle_point[0] > middle_point[1]) {
552                 if (middle_point[0] > middle_point[2])
553                         return 1;  /* max x axis */
554                 else
555                         return 5;  /* max z axis */
556         }
557         else {
558                 if (middle_point[1] > middle_point[2])
559                         return 3;  /* max y axis */
560                 else
561                         return 5;  /* max z axis */
562         }
563 }
564
565 /**
566  * bottom-up update of bvh node BV
567  * join the children on the parent BV */
568 static void node_join(BVHTree *tree, BVHNode *node)
569 {
570         int i;
571         axis_t axis_iter;
572
573         node_minmax_init(tree, node);
574         
575         for (i = 0; i < tree->tree_type; i++) {
576                 if (node->children[i]) {
577                         for (axis_iter = tree->start_axis; axis_iter < tree->stop_axis; axis_iter++) {
578                                 /* update minimum */
579                                 if (node->children[i]->bv[(2 * axis_iter)] < node->bv[(2 * axis_iter)])
580                                         node->bv[(2 * axis_iter)] = node->children[i]->bv[(2 * axis_iter)];
581
582                                 /* update maximum */
583                                 if (node->children[i]->bv[(2 * axis_iter) + 1] > node->bv[(2 * axis_iter) + 1])
584                                         node->bv[(2 * axis_iter) + 1] = node->children[i]->bv[(2 * axis_iter) + 1];
585                         }
586                 }
587                 else
588                         break;
589         }
590 }
591
592 /*
593  * Debug and information functions
594  */
595 #if 0
596 static void bvhtree_print_tree(BVHTree *tree, BVHNode *node, int depth)
597 {
598         int i;
599         axis_t axis_iter;
600
601         for (i = 0; i < depth; i++) printf(" ");
602         printf(" - %d (%ld): ", node->index, (long int)(node - tree->nodearray));
603         for (axis_iter = (axis_t)(2 * tree->start_axis);
604              axis_iter < (axis_t)(2 * tree->stop_axis);
605              axis_iter++)
606         {
607                 printf("%.3f ", node->bv[axis_iter]);
608         }
609         printf("\n");
610
611         for (i = 0; i < tree->tree_type; i++)
612                 if (node->children[i])
613                         bvhtree_print_tree(tree, node->children[i], depth + 1);
614 }
615
616 static void bvhtree_info(BVHTree *tree)
617 {
618         printf("BVHTree info\n");
619         printf("tree_type = %d, axis = %d, epsilon = %f\n", tree->tree_type, tree->axis, tree->epsilon);
620         printf("nodes = %d, branches = %d, leafs = %d\n", tree->totbranch + tree->totleaf,  tree->totbranch, tree->totleaf);
621         printf("Memory per node = %ldbytes\n", sizeof(BVHNode) + sizeof(BVHNode *) * tree->tree_type + sizeof(float) * tree->axis);
622         printf("BV memory = %dbytes\n", (int)MEM_allocN_len(tree->nodebv));
623
624         printf("Total memory = %ldbytes\n", sizeof(BVHTree) +
625                MEM_allocN_len(tree->nodes) +
626                MEM_allocN_len(tree->nodearray) +
627                MEM_allocN_len(tree->nodechild) +
628                MEM_allocN_len(tree->nodebv));
629
630 //      bvhtree_print_tree(tree, tree->nodes[tree->totleaf], 0);
631 }
632 #endif
633
634 #if 0
635
636
637 static void verify_tree(BVHTree *tree)
638 {
639         int i, j, check = 0;
640         
641         /* check the pointer list */
642         for (i = 0; i < tree->totleaf; i++) {
643                 if (tree->nodes[i]->parent == NULL) {
644                         printf("Leaf has no parent: %d\n", i);
645                 }
646                 else {
647                         for (j = 0; j < tree->tree_type; j++) {
648                                 if (tree->nodes[i]->parent->children[j] == tree->nodes[i])
649                                         check = 1;
650                         }
651                         if (!check) {
652                                 printf("Parent child relationship doesn't match: %d\n", i);
653                         }
654                         check = 0;
655                 }
656         }
657         
658         /* check the leaf list */
659         for (i = 0; i < tree->totleaf; i++) {
660                 if (tree->nodearray[i].parent == NULL) {
661                         printf("Leaf has no parent: %d\n", i);
662                 }
663                 else {
664                         for (j = 0; j < tree->tree_type; j++) {
665                                 if (tree->nodearray[i].parent->children[j] == &tree->nodearray[i])
666                                         check = 1;
667                         }
668                         if (!check) {
669                                 printf("Parent child relationship doesn't match: %d\n", i);
670                         }
671                         check = 0;
672                 }
673         }
674         
675         printf("branches: %d, leafs: %d, total: %d\n", tree->totbranch, tree->totleaf, tree->totbranch + tree->totleaf);
676 }
677 #endif
678
679 /* Helper data and structures to build a min-leaf generalized implicit tree
680  * This code can be easily reduced (basicly this is only method to calculate pow(k, n) in O(1).. and stuff like that) */
681 typedef struct BVHBuildHelper {
682         int tree_type;              /* */
683         int totleafs;               /* */
684
685         int leafs_per_child[32];    /* Min number of leafs that are archievable from a node at depth N */
686         int branches_on_level[32];  /* Number of nodes at depth N (tree_type^N) */
687
688         int remain_leafs;           /* Number of leafs that are placed on the level that is not 100% filled */
689
690 } BVHBuildHelper;
691
692 static void build_implicit_tree_helper(BVHTree *tree, BVHBuildHelper *data)
693 {
694         int depth = 0;
695         int remain;
696         int nnodes;
697
698         data->totleafs = tree->totleaf;
699         data->tree_type = tree->tree_type;
700
701         /* Calculate the smallest tree_type^n such that tree_type^n >= num_leafs */
702         for (data->leafs_per_child[0] = 1;
703              data->leafs_per_child[0] <  data->totleafs;
704              data->leafs_per_child[0] *= data->tree_type)
705         {
706                 /* pass */
707         }
708
709         data->branches_on_level[0] = 1;
710
711         for (depth = 1; (depth < 32) && data->leafs_per_child[depth - 1]; depth++) {
712                 data->branches_on_level[depth] = data->branches_on_level[depth - 1] * data->tree_type;
713                 data->leafs_per_child[depth] = data->leafs_per_child[depth - 1] / data->tree_type;
714         }
715
716         remain = data->totleafs - data->leafs_per_child[1];
717         nnodes = (remain + data->tree_type - 2) / (data->tree_type - 1);
718         data->remain_leafs = remain + nnodes;
719 }
720
721 // return the min index of all the leafs archivable with the given branch
722 static int implicit_leafs_index(BVHBuildHelper *data, int depth, int child_index)
723 {
724         int min_leaf_index = child_index * data->leafs_per_child[depth - 1];
725         if (min_leaf_index <= data->remain_leafs)
726                 return min_leaf_index;
727         else if (data->leafs_per_child[depth])
728                 return data->totleafs - (data->branches_on_level[depth - 1] - child_index) * data->leafs_per_child[depth];
729         else
730                 return data->remain_leafs;
731 }
732
733 /**
734  * Generalized implicit tree build
735  *
736  * An implicit tree is a tree where its structure is implied, thus there is no need to store child pointers or indexs.
737  * Its possible to find the position of the child or the parent with simple maths (multiplication and adittion). This type
738  * of tree is for example used on heaps.. where node N has its childs at indexs N*2 and N*2+1.
739  *
740  * Although in this case the tree type is general.. and not know until runtime.
741  * tree_type stands for the maximum number of childs that a tree node can have.
742  * All tree types >= 2 are supported.
743  *
744  * Advantages of the used trees include:
745  *  - No need to store child/parent relations (they are implicit);
746  *  - Any node child always has an index greater than the parent;
747  *  - Brother nodes are sequential in memory;
748  *
749  *
750  * Some math relations derived for general implicit trees:
751  *
752  *   K = tree_type, ( 2 <= K )
753  *   ROOT = 1
754  *   N child of node A = A * K + (2 - K) + N, (0 <= N < K)
755  *
756  * Util methods:
757  *   TODO...
758  *    (looping elements, knowing if its a leaf or not.. etc...)
759  */
760
761 /* This functions returns the number of branches needed to have the requested number of leafs. */
762 static int implicit_needed_branches(int tree_type, int leafs)
763 {
764         return max_ii(1, (leafs + tree_type - 3) / (tree_type - 1));
765 }
766
767 /**
768  * This function handles the problem of "sorting" the leafs (along the split_axis).
769  *
770  * It arranges the elements in the given partitions such that:
771  *  - any element in partition N is less or equal to any element in partition N+1.
772  *  - if all elements are different all partition will get the same subset of elements
773  *    as if the array was sorted.
774  *
775  * partition P is described as the elements in the range ( nth[P], nth[P+1] ]
776  *
777  * TODO: This can be optimized a bit by doing a specialized nth_element instead of K nth_elements
778  */
779 static void split_leafs(BVHNode **leafs_array, int *nth, int partitions, int split_axis)
780 {
781         int i;
782         for (i = 0; i < partitions - 1; i++) {
783                 if (nth[i] >= nth[partitions])
784                         break;
785
786                 partition_nth_element(leafs_array, nth[i], nth[partitions], nth[i + 1], split_axis);
787         }
788 }
789
790 typedef struct BVHDivNodesData {
791         BVHTree *tree;
792         BVHNode *branches_array;
793         BVHNode **leafs_array;
794
795         int tree_type;
796         int tree_offset;
797
798         BVHBuildHelper *data;
799
800         int depth;
801         int i;
802         int first_of_next_level;
803 } BVHDivNodesData;
804
805 static void non_recursive_bvh_div_nodes_task_cb(void *userdata, const int j)
806 {
807         BVHDivNodesData *data = userdata;
808
809         int k;
810         const int parent_level_index = j - data->i;
811         BVHNode *parent = data->branches_array + j;
812         int nth_positions[MAX_TREETYPE + 1];
813         char split_axis;
814
815         int parent_leafs_begin = implicit_leafs_index(data->data, data->depth, parent_level_index);
816         int parent_leafs_end   = implicit_leafs_index(data->data, data->depth, parent_level_index + 1);
817
818         /* This calculates the bounding box of this branch
819          * and chooses the largest axis as the axis to divide leafs */
820         refit_kdop_hull(data->tree, parent, parent_leafs_begin, parent_leafs_end);
821         split_axis = get_largest_axis(parent->bv);
822
823         /* Save split axis (this can be used on raytracing to speedup the query time) */
824         parent->main_axis = split_axis / 2;
825
826         /* Split the childs along the split_axis, note: its not needed to sort the whole leafs array
827          * Only to assure that the elements are partitioned on a way that each child takes the elements
828          * it would take in case the whole array was sorted.
829          * Split_leafs takes care of that "sort" problem. */
830         nth_positions[0] = parent_leafs_begin;
831         nth_positions[data->tree_type] = parent_leafs_end;
832         for (k = 1; k < data->tree_type; k++) {
833                 const int child_index = j * data->tree_type + data->tree_offset + k;
834                 const int child_level_index = child_index - data->first_of_next_level; /* child level index */
835                 nth_positions[k] = implicit_leafs_index(data->data, data->depth + 1, child_level_index);
836         }
837
838         split_leafs(data->leafs_array, nth_positions, data->tree_type, split_axis);
839
840         /* Setup children and totnode counters
841          * Not really needed but currently most of BVH code relies on having an explicit children structure */
842         for (k = 0; k < data->tree_type; k++) {
843                 const int child_index = j * data->tree_type + data->tree_offset + k;
844                 const int child_level_index = child_index - data->first_of_next_level; /* child level index */
845
846                 const int child_leafs_begin = implicit_leafs_index(data->data, data->depth + 1, child_level_index);
847                 const int child_leafs_end   = implicit_leafs_index(data->data, data->depth + 1, child_level_index + 1);
848
849                 if (child_leafs_end - child_leafs_begin > 1) {
850                         parent->children[k] = data->branches_array + child_index;
851                         parent->children[k]->parent = parent;
852                 }
853                 else if (child_leafs_end - child_leafs_begin == 1) {
854                         parent->children[k] = data->leafs_array[child_leafs_begin];
855                         parent->children[k]->parent = parent;
856                 }
857                 else {
858                         break;
859                 }
860
861                 parent->totnode = (char)(k + 1);
862         }
863 }
864
865 /**
866  * This functions builds an optimal implicit tree from the given leafs.
867  * Where optimal stands for:
868  *  - The resulting tree will have the smallest number of branches;
869  *  - At most only one branch will have NULL childs;
870  *  - All leafs will be stored at level N or N+1.
871  *
872  * This function creates an implicit tree on branches_array, the leafs are given on the leafs_array.
873  *
874  * The tree is built per depth levels. First branches at depth 1.. then branches at depth 2.. etc..
875  * The reason is that we can build level N+1 from level N without any data dependencies.. thus it allows
876  * to use multithread building.
877  *
878  * To archive this is necessary to find how much leafs are accessible from a certain branch, BVHBuildHelper
879  * implicit_needed_branches and implicit_leafs_index are auxiliary functions to solve that "optimal-split".
880  */
881 static void non_recursive_bvh_div_nodes(BVHTree *tree, BVHNode *branches_array, BVHNode **leafs_array, int num_leafs)
882 {
883         int i;
884
885         const int tree_type   = tree->tree_type;
886         const int tree_offset = 2 - tree->tree_type; /* this value is 0 (on binary trees) and negative on the others */
887         const int num_branches = implicit_needed_branches(tree_type, num_leafs);
888
889         BVHBuildHelper data;
890         int depth;
891         
892         /* set parent from root node to NULL */
893         BVHNode *tmp = branches_array + 0;
894         tmp->parent = NULL;
895
896         /* Most of bvhtree code relies on 1-leaf trees having at least one branch
897          * We handle that special case here */
898         if (num_leafs == 1) {
899                 BVHNode *root = branches_array + 0;
900                 refit_kdop_hull(tree, root, 0, num_leafs);
901                 root->main_axis = get_largest_axis(root->bv) / 2;
902                 root->totnode = 1;
903                 root->children[0] = leafs_array[0];
904                 root->children[0]->parent = root;
905                 return;
906         }
907
908         branches_array--;  /* Implicit trees use 1-based indexs */
909
910         build_implicit_tree_helper(tree, &data);
911
912         BVHDivNodesData cb_data = {
913                 .tree = tree, .branches_array = branches_array, .leafs_array = leafs_array,
914                 .tree_type = tree_type, .tree_offset = tree_offset, .data = &data,
915                 .first_of_next_level = 0, .depth = 0, .i = 0,
916         };
917
918         /* Loop tree levels (log N) loops */
919         for (i = 1, depth = 1; i <= num_branches; i = i * tree_type + tree_offset, depth++) {
920                 const int first_of_next_level = i * tree_type + tree_offset;
921                 const int end_j = min_ii(first_of_next_level, num_branches + 1);  /* index of last branch on this level */
922
923                 /* Loop all branches on this level */
924                 cb_data.first_of_next_level = first_of_next_level;
925                 cb_data.i = i;
926                 cb_data.depth = depth;
927
928                 BLI_task_parallel_range(
929                             i, end_j, &cb_data, non_recursive_bvh_div_nodes_task_cb,
930                             num_leafs > KDOPBVH_THREAD_LEAF_THRESHOLD);
931         }
932 }
933
934 /** \} */
935
936
937 /* -------------------------------------------------------------------- */
938
939 /** \name BLI_bvhtree API
940  * \{ */
941
942 /**
943  * \note many callers don't check for ``NULL`` return.
944  */
945 BVHTree *BLI_bvhtree_new(int maxsize, float epsilon, char tree_type, char axis)
946 {
947         BVHTree *tree;
948         int numnodes, i;
949
950         BLI_assert(tree_type >= 2 && tree_type <= MAX_TREETYPE);
951
952         tree = MEM_callocN(sizeof(BVHTree), "BVHTree");
953
954         /* tree epsilon must be >= FLT_EPSILON
955          * so that tangent rays can still hit a bounding volume..
956          * this bug would show up when casting a ray aligned with a kdop-axis and with an edge of 2 faces */
957         epsilon = max_ff(FLT_EPSILON, epsilon);
958
959         if (tree) {
960                 tree->epsilon = epsilon;
961                 tree->tree_type = tree_type;
962                 tree->axis = axis;
963
964                 if (axis == 26) {
965                         tree->start_axis = 0;
966                         tree->stop_axis = 13;
967                 }
968                 else if (axis == 18) {
969                         tree->start_axis = 7;
970                         tree->stop_axis = 13;
971                 }
972                 else if (axis == 14) {
973                         tree->start_axis = 0;
974                         tree->stop_axis = 7;
975                 }
976                 else if (axis == 8) { /* AABB */
977                         tree->start_axis = 0;
978                         tree->stop_axis = 4;
979                 }
980                 else if (axis == 6) { /* OBB */
981                         tree->start_axis = 0;
982                         tree->stop_axis = 3;
983                 }
984                 else {
985                         /* should never happen! */
986                         BLI_assert(0);
987
988                         goto fail;
989                 }
990
991
992                 /* Allocate arrays */
993                 numnodes = maxsize + implicit_needed_branches(tree_type, maxsize) + tree_type;
994
995                 tree->nodes = MEM_callocN(sizeof(BVHNode *) * (size_t)numnodes, "BVHNodes");
996                 tree->nodebv = MEM_callocN(sizeof(float) * (size_t)(axis * numnodes), "BVHNodeBV");
997                 tree->nodechild = MEM_callocN(sizeof(BVHNode *) * (size_t)(tree_type * numnodes), "BVHNodeBV");
998                 tree->nodearray = MEM_callocN(sizeof(BVHNode) * (size_t)numnodes, "BVHNodeArray");
999                 
1000                 if (UNLIKELY((!tree->nodes) ||
1001                              (!tree->nodebv) ||
1002                              (!tree->nodechild) ||
1003                              (!tree->nodearray)))
1004                 {
1005                         goto fail;
1006                 }
1007
1008                 /* link the dynamic bv and child links */
1009                 for (i = 0; i < numnodes; i++) {
1010                         tree->nodearray[i].bv = &tree->nodebv[i * axis];
1011                         tree->nodearray[i].children = &tree->nodechild[i * tree_type];
1012                 }
1013                 
1014         }
1015         return tree;
1016
1017
1018 fail:
1019         MEM_SAFE_FREE(tree->nodes);
1020         MEM_SAFE_FREE(tree->nodebv);
1021         MEM_SAFE_FREE(tree->nodechild);
1022         MEM_SAFE_FREE(tree->nodearray);
1023
1024         MEM_freeN(tree);
1025
1026         return NULL;
1027 }
1028
1029 void BLI_bvhtree_free(BVHTree *tree)
1030 {
1031         if (tree) {
1032                 MEM_freeN(tree->nodes);
1033                 MEM_freeN(tree->nodearray);
1034                 MEM_freeN(tree->nodebv);
1035                 MEM_freeN(tree->nodechild);
1036                 MEM_freeN(tree);
1037         }
1038 }
1039
1040 void BLI_bvhtree_balance(BVHTree *tree)
1041 {
1042         int i;
1043
1044         BVHNode *branches_array = tree->nodearray + tree->totleaf;
1045         BVHNode **leafs_array    = tree->nodes;
1046
1047         /* This function should only be called once (some big bug goes here if its being called more than once per tree) */
1048         BLI_assert(tree->totbranch == 0);
1049
1050         /* Build the implicit tree */
1051         non_recursive_bvh_div_nodes(tree, branches_array, leafs_array, tree->totleaf);
1052
1053         /* current code expects the branches to be linked to the nodes array
1054          * we perform that linkage here */
1055         tree->totbranch = implicit_needed_branches(tree->tree_type, tree->totleaf);
1056         for (i = 0; i < tree->totbranch; i++)
1057                 tree->nodes[tree->totleaf + i] = branches_array + i;
1058
1059 #ifdef USE_SKIP_LINKS
1060         build_skip_links(tree, tree->nodes[tree->totleaf], NULL, NULL);
1061 #endif
1062
1063         /* bvhtree_info(tree); */
1064 }
1065
1066 void BLI_bvhtree_insert(BVHTree *tree, int index, const float co[3], int numpoints)
1067 {
1068         axis_t axis_iter;
1069         BVHNode *node = NULL;
1070
1071         /* insert should only possible as long as tree->totbranch is 0 */
1072         BLI_assert(tree->totbranch <= 0);
1073         BLI_assert((size_t)tree->totleaf < MEM_allocN_len(tree->nodes) / sizeof(*(tree->nodes)));
1074
1075         node = tree->nodes[tree->totleaf] = &(tree->nodearray[tree->totleaf]);
1076         tree->totleaf++;
1077
1078         create_kdop_hull(tree, node, co, numpoints, 0);
1079         node->index = index;
1080
1081         /* inflate the bv with some epsilon */
1082         for (axis_iter = tree->start_axis; axis_iter < tree->stop_axis; axis_iter++) {
1083                 node->bv[(2 * axis_iter)] -= tree->epsilon; /* minimum */
1084                 node->bv[(2 * axis_iter) + 1] += tree->epsilon; /* maximum */
1085         }
1086 }
1087
1088
1089 /* call before BLI_bvhtree_update_tree() */
1090 bool BLI_bvhtree_update_node(BVHTree *tree, int index, const float co[3], const float co_moving[3], int numpoints)
1091 {
1092         BVHNode *node = NULL;
1093         axis_t axis_iter;
1094         
1095         /* check if index exists */
1096         if (index > tree->totleaf)
1097                 return false;
1098         
1099         node = tree->nodearray + index;
1100         
1101         create_kdop_hull(tree, node, co, numpoints, 0);
1102         
1103         if (co_moving)
1104                 create_kdop_hull(tree, node, co_moving, numpoints, 1);
1105         
1106         /* inflate the bv with some epsilon */
1107         for (axis_iter = tree->start_axis; axis_iter < tree->stop_axis; axis_iter++) {
1108                 node->bv[(2 * axis_iter)]     -= tree->epsilon; /* minimum */
1109                 node->bv[(2 * axis_iter) + 1] += tree->epsilon; /* maximum */
1110         }
1111
1112         return true;
1113 }
1114
1115 /* call BLI_bvhtree_update_node() first for every node/point/triangle */
1116 void BLI_bvhtree_update_tree(BVHTree *tree)
1117 {
1118         /* Update bottom=>top
1119          * TRICKY: the way we build the tree all the childs have an index greater than the parent
1120          * This allows us todo a bottom up update by starting on the bigger numbered branch */
1121
1122         BVHNode **root  = tree->nodes + tree->totleaf;
1123         BVHNode **index = tree->nodes + tree->totleaf + tree->totbranch - 1;
1124
1125         for (; index >= root; index--)
1126                 node_join(tree, *index);
1127 }
1128 /**
1129  * Number of times #BLI_bvhtree_insert has been called.
1130  * mainly useful for asserts functions to check we added the correct number.
1131  */
1132 int BLI_bvhtree_get_size(const BVHTree *tree)
1133 {
1134         return tree->totleaf;
1135 }
1136
1137 float BLI_bvhtree_get_epsilon(const BVHTree *tree)
1138 {
1139         return tree->epsilon;
1140 }
1141
1142 /** \} */
1143
1144
1145 /* -------------------------------------------------------------------- */
1146
1147 /** \name BLI_bvhtree_overlap
1148  * \{ */
1149
1150 /**
1151  * overlap - is it possible for 2 bv's to collide ?
1152  */
1153 static bool tree_overlap_test(const BVHNode *node1, const BVHNode *node2, axis_t start_axis, axis_t stop_axis)
1154 {
1155         const float *bv1     = node1->bv + (start_axis << 1);
1156         const float *bv2     = node2->bv + (start_axis << 1);
1157         const float *bv1_end = node1->bv + (stop_axis  << 1);
1158         
1159         /* test all axis if min + max overlap */
1160         for (; bv1 != bv1_end; bv1 += 2, bv2 += 2) {
1161                 if ((bv1[0] > bv2[1]) || (bv2[0] > bv1[1])) {
1162                         return 0;
1163                 }
1164         }
1165
1166         return 1;
1167 }
1168
1169 static void tree_overlap_traverse(
1170         BVHOverlapData_Thread *data_thread,
1171         const BVHNode *node1, const BVHNode *node2)
1172 {
1173         BVHOverlapData_Shared *data = data_thread->shared;
1174         int j;
1175
1176         if (tree_overlap_test(node1, node2, data->start_axis, data->stop_axis)) {
1177                 /* check if node1 is a leaf */
1178                 if (!node1->totnode) {
1179                         /* check if node2 is a leaf */
1180                         if (!node2->totnode) {
1181                                 BVHTreeOverlap *overlap;
1182
1183                                 if (UNLIKELY(node1 == node2)) {
1184                                         return;
1185                                 }
1186
1187                                 /* both leafs, insert overlap! */
1188                                 overlap = BLI_stack_push_r(data_thread->overlap);
1189                                 overlap->indexA = node1->index;
1190                                 overlap->indexB = node2->index;
1191                         }
1192                         else {
1193                                 for (j = 0; j < data->tree2->tree_type; j++) {
1194                                         if (node2->children[j]) {
1195                                                 tree_overlap_traverse(data_thread, node1, node2->children[j]);
1196                                         }
1197                                 }
1198                         }
1199                 }
1200                 else {
1201                         for (j = 0; j < data->tree2->tree_type; j++) {
1202                                 if (node1->children[j]) {
1203                                         tree_overlap_traverse(data_thread, node1->children[j], node2);
1204                                 }
1205                         }
1206                 }
1207         }
1208 }
1209
1210 /**
1211  * a version of #tree_overlap_traverse that runs a callback to check if the nodes really intersect.
1212  */
1213 static void tree_overlap_traverse_cb(
1214         BVHOverlapData_Thread *data_thread,
1215         const BVHNode *node1, const BVHNode *node2)
1216 {
1217         BVHOverlapData_Shared *data = data_thread->shared;
1218         int j;
1219
1220         if (tree_overlap_test(node1, node2, data->start_axis, data->stop_axis)) {
1221                 /* check if node1 is a leaf */
1222                 if (!node1->totnode) {
1223                         /* check if node2 is a leaf */
1224                         if (!node2->totnode) {
1225                                 BVHTreeOverlap *overlap;
1226
1227                                 if (UNLIKELY(node1 == node2)) {
1228                                         return;
1229                                 }
1230
1231                                 /* only difference to tree_overlap_traverse! */
1232                                 if (data->callback(data->userdata, node1->index, node2->index, data_thread->thread)) {
1233                                         /* both leafs, insert overlap! */
1234                                         overlap = BLI_stack_push_r(data_thread->overlap);
1235                                         overlap->indexA = node1->index;
1236                                         overlap->indexB = node2->index;
1237                                 }
1238                         }
1239                         else {
1240                                 for (j = 0; j < data->tree2->tree_type; j++) {
1241                                         if (node2->children[j]) {
1242                                                 tree_overlap_traverse_cb(data_thread, node1, node2->children[j]);
1243                                         }
1244                                 }
1245                         }
1246                 }
1247                 else {
1248                         for (j = 0; j < data->tree2->tree_type; j++) {
1249                                 if (node1->children[j]) {
1250                                         tree_overlap_traverse_cb(data_thread, node1->children[j], node2);
1251                                 }
1252                         }
1253                 }
1254         }
1255 }
1256
1257 /**
1258  * Use to check the total number of threads #BLI_bvhtree_overlap will use.
1259  *
1260  * \warning Must be the first tree passed to #BLI_bvhtree_overlap!
1261  */
1262 int BLI_bvhtree_overlap_thread_num(const BVHTree *tree)
1263 {
1264         return (int)MIN2(tree->tree_type, tree->nodes[tree->totleaf]->totnode);
1265 }
1266
1267 static void bvhtree_overlap_task_cb(void *userdata, const int j)
1268 {
1269         BVHOverlapData_Thread *data = &((BVHOverlapData_Thread *)userdata)[j];
1270         BVHOverlapData_Shared *data_shared = data->shared;
1271
1272         if (data_shared->callback) {
1273                 tree_overlap_traverse_cb(
1274                             data, data_shared->tree1->nodes[data_shared->tree1->totleaf]->children[j],
1275                             data_shared->tree2->nodes[data_shared->tree2->totleaf]);
1276         }
1277         else {
1278                 tree_overlap_traverse(
1279                             data, data_shared->tree1->nodes[data_shared->tree1->totleaf]->children[j],
1280                             data_shared->tree2->nodes[data_shared->tree2->totleaf]);
1281         }
1282 }
1283
1284 BVHTreeOverlap *BLI_bvhtree_overlap(
1285         const BVHTree *tree1, const BVHTree *tree2, unsigned int *r_overlap_tot,
1286         /* optional callback to test the overlap before adding (must be thread-safe!) */
1287         BVHTree_OverlapCallback callback, void *userdata)
1288 {
1289         const int thread_num = BLI_bvhtree_overlap_thread_num(tree1);
1290         int j;
1291         size_t total = 0;
1292         BVHTreeOverlap *overlap = NULL, *to = NULL;
1293         BVHOverlapData_Shared data_shared;
1294         BVHOverlapData_Thread *data = BLI_array_alloca(data, (size_t)thread_num);
1295         axis_t start_axis, stop_axis;
1296         
1297         /* check for compatibility of both trees (can't compare 14-DOP with 18-DOP) */
1298         if (UNLIKELY((tree1->axis != tree2->axis) &&
1299                      (tree1->axis == 14 || tree2->axis == 14) &&
1300                      (tree1->axis == 18 || tree2->axis == 18)))
1301         {
1302                 BLI_assert(0);
1303                 return NULL;
1304         }
1305
1306         start_axis = min_axis(tree1->start_axis, tree2->start_axis);
1307         stop_axis  = min_axis(tree1->stop_axis,  tree2->stop_axis);
1308         
1309         /* fast check root nodes for collision before doing big splitting + traversal */
1310         if (!tree_overlap_test(tree1->nodes[tree1->totleaf], tree2->nodes[tree2->totleaf], start_axis, stop_axis)) {
1311                 return NULL;
1312         }
1313
1314         data_shared.tree1 = tree1;
1315         data_shared.tree2 = tree2;
1316         data_shared.start_axis = start_axis;
1317         data_shared.stop_axis = stop_axis;
1318
1319         /* can be NULL */
1320         data_shared.callback = callback;
1321         data_shared.userdata = userdata;
1322
1323         for (j = 0; j < thread_num; j++) {
1324                 /* init BVHOverlapData_Thread */
1325                 data[j].shared = &data_shared;
1326                 data[j].overlap = BLI_stack_new(sizeof(BVHTreeOverlap), __func__);
1327
1328                 /* for callback */
1329                 data[j].thread = j;
1330         }
1331
1332         BLI_task_parallel_range(
1333                     0, thread_num, data, bvhtree_overlap_task_cb,
1334                     tree1->totleaf > KDOPBVH_THREAD_LEAF_THRESHOLD);
1335         
1336         for (j = 0; j < thread_num; j++)
1337                 total += BLI_stack_count(data[j].overlap);
1338         
1339         to = overlap = MEM_mallocN(sizeof(BVHTreeOverlap) * total, "BVHTreeOverlap");
1340         
1341         for (j = 0; j < thread_num; j++) {
1342                 unsigned int count = (unsigned int)BLI_stack_count(data[j].overlap);
1343                 BLI_stack_pop_n(data[j].overlap, to, count);
1344                 BLI_stack_free(data[j].overlap);
1345                 to += count;
1346         }
1347
1348         *r_overlap_tot = (unsigned int)total;
1349         return overlap;
1350 }
1351
1352 /** \} */
1353
1354
1355 /* -------------------------------------------------------------------- */
1356
1357 /** \name BLI_bvhtree_find_nearest
1358  * \{ */
1359
1360 /* Determines the nearest point of the given node BV. Returns the squared distance to that point. */
1361 static float calc_nearest_point_squared(const float proj[3], BVHNode *node, float nearest[3])
1362 {
1363         int i;
1364         const float *bv = node->bv;
1365
1366         /* nearest on AABB hull */
1367         for (i = 0; i != 3; i++, bv += 2) {
1368                 if (bv[0] > proj[i])
1369                         nearest[i] = bv[0];
1370                 else if (bv[1] < proj[i])
1371                         nearest[i] = bv[1];
1372                 else
1373                         nearest[i] = proj[i]; 
1374         }
1375
1376 #if 0
1377         /* nearest on a general hull */
1378         copy_v3_v3(nearest, data->co);
1379         for (i = data->tree->start_axis; i != data->tree->stop_axis; i++, bv += 2) {
1380                 float proj = dot_v3v3(nearest, bvhtree_kdop_axes[i]);
1381                 float dl = bv[0] - proj;
1382                 float du = bv[1] - proj;
1383
1384                 if (dl > 0) {
1385                         madd_v3_v3fl(nearest, bvhtree_kdop_axes[i], dl);
1386                 }
1387                 else if (du < 0) {
1388                         madd_v3_v3fl(nearest, bvhtree_kdop_axes[i], du);
1389                 }
1390         }
1391 #endif
1392
1393         return len_squared_v3v3(proj, nearest);
1394 }
1395
1396 /* TODO: use a priority queue to reduce the number of nodes looked on */
1397 static void dfs_find_nearest_dfs(BVHNearestData *data, BVHNode *node)
1398 {
1399         if (node->totnode == 0) {
1400                 if (data->callback)
1401                         data->callback(data->userdata, node->index, data->co, &data->nearest);
1402                 else {
1403                         data->nearest.index = node->index;
1404                         data->nearest.dist_sq = calc_nearest_point_squared(data->proj, node, data->nearest.co);
1405                 }
1406         }
1407         else {
1408                 /* Better heuristic to pick the closest node to dive on */
1409                 int i;
1410                 float nearest[3];
1411
1412                 if (data->proj[node->main_axis] <= node->children[0]->bv[node->main_axis * 2 + 1]) {
1413
1414                         for (i = 0; i != node->totnode; i++) {
1415                                 if (calc_nearest_point_squared(data->proj, node->children[i], nearest) >= data->nearest.dist_sq)
1416                                         continue;
1417                                 dfs_find_nearest_dfs(data, node->children[i]);
1418                         }
1419                 }
1420                 else {
1421                         for (i = node->totnode - 1; i >= 0; i--) {
1422                                 if (calc_nearest_point_squared(data->proj, node->children[i], nearest) >= data->nearest.dist_sq)
1423                                         continue;
1424                                 dfs_find_nearest_dfs(data, node->children[i]);
1425                         }
1426                 }
1427         }
1428 }
1429
1430 static void dfs_find_nearest_begin(BVHNearestData *data, BVHNode *node)
1431 {
1432         float nearest[3], dist_sq;
1433         dist_sq = calc_nearest_point_squared(data->proj, node, nearest);
1434         if (dist_sq >= data->nearest.dist_sq) {
1435                 return;
1436         }
1437         dfs_find_nearest_dfs(data, node);
1438 }
1439
1440
1441 #if 0
1442
1443 typedef struct NodeDistance {
1444         BVHNode *node;
1445         float dist;
1446
1447 } NodeDistance;
1448
1449 #define DEFAULT_FIND_NEAREST_HEAP_SIZE 1024
1450
1451 #define NodeDistance_priority(a, b) ((a).dist < (b).dist)
1452
1453 static void NodeDistance_push_heap(NodeDistance *heap, int heap_size)
1454 PUSH_HEAP_BODY(NodeDistance, NodeDistance_priority, heap, heap_size)
1455
1456 static void NodeDistance_pop_heap(NodeDistance *heap, int heap_size)
1457 POP_HEAP_BODY(NodeDistance, NodeDistance_priority, heap, heap_size)
1458
1459 /* NN function that uses an heap.. this functions leads to an optimal number of min-distance
1460  * but for normal tri-faces and BV 6-dop.. a simple dfs with local heuristics (as implemented
1461  * in source/blender/blenkernel/intern/shrinkwrap.c) works faster.
1462  *
1463  * It may make sense to use this function if the callback queries are very slow.. or if its impossible
1464  * to get a nice heuristic
1465  *
1466  * this function uses "malloc/free" instead of the MEM_* because it intends to be thread safe */
1467 static void bfs_find_nearest(BVHNearestData *data, BVHNode *node)
1468 {
1469         int i;
1470         NodeDistance default_heap[DEFAULT_FIND_NEAREST_HEAP_SIZE];
1471         NodeDistance *heap = default_heap, current;
1472         int heap_size = 0, max_heap_size = sizeof(default_heap) / sizeof(default_heap[0]);
1473         float nearest[3];
1474
1475         int callbacks = 0, push_heaps = 0;
1476
1477         if (node->totnode == 0) {
1478                 dfs_find_nearest_dfs(data, node);
1479                 return;
1480         }
1481
1482         current.node = node;
1483         current.dist = calc_nearest_point(data->proj, node, nearest);
1484
1485         while (current.dist < data->nearest.dist) {
1486 //              printf("%f : %f\n", current.dist, data->nearest.dist);
1487                 for (i = 0; i < current.node->totnode; i++) {
1488                         BVHNode *child = current.node->children[i];
1489                         if (child->totnode == 0) {
1490                                 callbacks++;
1491                                 dfs_find_nearest_dfs(data, child);
1492                         }
1493                         else {
1494                                 /* adjust heap size */
1495                                 if ((heap_size >= max_heap_size) &&
1496                                     ADJUST_MEMORY(default_heap, (void **)&heap, heap_size + 1, &max_heap_size, sizeof(heap[0])) == false)
1497                                 {
1498                                         printf("WARNING: bvh_find_nearest got out of memory\n");
1499
1500                                         if (heap != default_heap)
1501                                                 free(heap);
1502
1503                                         return;
1504                                 }
1505
1506                                 heap[heap_size].node = current.node->children[i];
1507                                 heap[heap_size].dist = calc_nearest_point(data->proj, current.node->children[i], nearest);
1508
1509                                 if (heap[heap_size].dist >= data->nearest.dist) continue;
1510                                 heap_size++;
1511
1512                                 NodeDistance_push_heap(heap, heap_size);
1513                                 //                      PUSH_HEAP_BODY(NodeDistance, NodeDistance_priority, heap, heap_size);
1514                                 push_heaps++;
1515                         }
1516                 }
1517                 
1518                 if (heap_size == 0) break;
1519
1520                 current = heap[0];
1521                 NodeDistance_pop_heap(heap, heap_size);
1522 //              POP_HEAP_BODY(NodeDistance, NodeDistance_priority, heap, heap_size);
1523                 heap_size--;
1524         }
1525
1526 //      printf("hsize=%d, callbacks=%d, pushs=%d\n", heap_size, callbacks, push_heaps);
1527
1528         if (heap != default_heap)
1529                 free(heap);
1530 }
1531 #endif
1532
1533
1534 int BLI_bvhtree_find_nearest(
1535         BVHTree *tree, const float co[3], BVHTreeNearest *nearest,
1536         BVHTree_NearestPointCallback callback, void *userdata)
1537 {
1538         axis_t axis_iter;
1539
1540         BVHNearestData data;
1541         BVHNode *root = tree->nodes[tree->totleaf];
1542
1543         /* init data to search */
1544         data.tree = tree;
1545         data.co = co;
1546
1547         data.callback = callback;
1548         data.userdata = userdata;
1549
1550         for (axis_iter = data.tree->start_axis; axis_iter != data.tree->stop_axis; axis_iter++) {
1551                 data.proj[axis_iter] = dot_v3v3(data.co, bvhtree_kdop_axes[axis_iter]);
1552         }
1553
1554         if (nearest) {
1555                 memcpy(&data.nearest, nearest, sizeof(*nearest));
1556         }
1557         else {
1558                 data.nearest.index = -1;
1559                 data.nearest.dist_sq = FLT_MAX;
1560         }
1561
1562         /* dfs search */
1563         if (root)
1564                 dfs_find_nearest_begin(&data, root);
1565
1566         /* copy back results */
1567         if (nearest) {
1568                 memcpy(nearest, &data.nearest, sizeof(*nearest));
1569         }
1570
1571         return data.nearest.index;
1572 }
1573
1574 /** \} */
1575
1576
1577 /* -------------------------------------------------------------------- */
1578
1579 /** \name BLI_bvhtree_ray_cast
1580  *
1581  * raycast is done by performing a DFS on the BVHTree and saving the closest hit.
1582  *
1583  * \{ */
1584
1585
1586 /* Determines the distance that the ray must travel to hit the bounding volume of the given node */
1587 static float ray_nearest_hit(const BVHRayCastData *data, const float bv[6])
1588 {
1589         int i;
1590
1591         float low = 0, upper = data->hit.dist;
1592
1593         for (i = 0; i != 3; i++, bv += 2) {
1594                 if (data->ray_dot_axis[i] == 0.0f) {
1595                         /* axis aligned ray */
1596                         if (data->ray.origin[i] < bv[0] - data->ray.radius ||
1597                             data->ray.origin[i] > bv[1] + data->ray.radius)
1598                         {
1599                                 return FLT_MAX;
1600                         }
1601                 }
1602                 else {
1603                         float ll = (bv[0] - data->ray.radius - data->ray.origin[i]) / data->ray_dot_axis[i];
1604                         float lu = (bv[1] + data->ray.radius - data->ray.origin[i]) / data->ray_dot_axis[i];
1605
1606                         if (data->ray_dot_axis[i] > 0.0f) {
1607                                 if (ll > low) low = ll;
1608                                 if (lu < upper) upper = lu;
1609                         }
1610                         else {
1611                                 if (lu > low) low = lu;
1612                                 if (ll < upper) upper = ll;
1613                         }
1614         
1615                         if (low > upper) return FLT_MAX;
1616                 }
1617         }
1618         return low;
1619 }
1620
1621 /**
1622  * Determines the distance that the ray must travel to hit the bounding volume of the given node
1623  * Based on Tactical Optimization of Ray/Box Intersection, by Graham Fyffe
1624  * [http://tog.acm.org/resources/RTNews/html/rtnv21n1.html#art9]
1625  *
1626  * TODO this doesn't take data->ray.radius into consideration */
1627 static float fast_ray_nearest_hit(const BVHRayCastData *data, const BVHNode *node)
1628 {
1629         const float *bv = node->bv;
1630         
1631         float t1x = (bv[data->index[0]] - data->ray.origin[0]) * data->idot_axis[0];
1632         float t2x = (bv[data->index[1]] - data->ray.origin[0]) * data->idot_axis[0];
1633         float t1y = (bv[data->index[2]] - data->ray.origin[1]) * data->idot_axis[1];
1634         float t2y = (bv[data->index[3]] - data->ray.origin[1]) * data->idot_axis[1];
1635         float t1z = (bv[data->index[4]] - data->ray.origin[2]) * data->idot_axis[2];
1636         float t2z = (bv[data->index[5]] - data->ray.origin[2]) * data->idot_axis[2];
1637
1638         if ((t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) ||
1639             (t2x < 0.0f || t2y < 0.0f || t2z < 0.0f) ||
1640             (t1x > data->hit.dist || t1y > data->hit.dist || t1z > data->hit.dist))
1641         {
1642                 return FLT_MAX;
1643         }
1644         else {
1645                 return max_fff(t1x, t1y, t1z);
1646         }
1647 }
1648
1649 static void dfs_raycast(BVHRayCastData *data, BVHNode *node)
1650 {
1651         int i;
1652
1653         /* ray-bv is really fast.. and simple tests revealed its worth to test it
1654          * before calling the ray-primitive functions */
1655         /* XXX: temporary solution for particles until fast_ray_nearest_hit supports ray.radius */
1656         float dist = (data->ray.radius == 0.0f) ? fast_ray_nearest_hit(data, node) : ray_nearest_hit(data, node->bv);
1657         if (dist >= data->hit.dist) {
1658                 return;
1659         }
1660
1661         if (node->totnode == 0) {
1662                 if (data->callback) {
1663                         data->callback(data->userdata, node->index, &data->ray, &data->hit);
1664                 }
1665                 else {
1666                         data->hit.index = node->index;
1667                         data->hit.dist  = dist;
1668                         madd_v3_v3v3fl(data->hit.co, data->ray.origin, data->ray.direction, dist);
1669                 }
1670         }
1671         else {
1672                 /* pick loop direction to dive into the tree (based on ray direction and split axis) */
1673                 if (data->ray_dot_axis[node->main_axis] > 0.0f) {
1674                         for (i = 0; i != node->totnode; i++) {
1675                                 dfs_raycast(data, node->children[i]);
1676                         }
1677                 }
1678                 else {
1679                         for (i = node->totnode - 1; i >= 0; i--) {
1680                                 dfs_raycast(data, node->children[i]);
1681                         }
1682                 }
1683         }
1684 }
1685
1686 /**
1687  * A version of #dfs_raycast with minor changes to reset the index & dist each ray cast.
1688  */
1689 static void dfs_raycast_all(BVHRayCastData *data, BVHNode *node)
1690 {
1691         int i;
1692
1693         /* ray-bv is really fast.. and simple tests revealed its worth to test it
1694          * before calling the ray-primitive functions */
1695         /* XXX: temporary solution for particles until fast_ray_nearest_hit supports ray.radius */
1696         float dist = (data->ray.radius == 0.0f) ? fast_ray_nearest_hit(data, node) : ray_nearest_hit(data, node->bv);
1697         if (dist >= data->hit.dist) {
1698                 return;
1699         }
1700
1701         if (node->totnode == 0) {
1702                 /* no need to check for 'data->callback' (using 'all' only makes sense with a callback). */
1703                 dist = data->hit.dist;
1704                 data->callback(data->userdata, node->index, &data->ray, &data->hit);
1705                 data->hit.index = -1;
1706                 data->hit.dist = dist;
1707         }
1708         else {
1709                 /* pick loop direction to dive into the tree (based on ray direction and split axis) */
1710                 if (data->ray_dot_axis[node->main_axis] > 0.0f) {
1711                         for (i = 0; i != node->totnode; i++) {
1712                                 dfs_raycast_all(data, node->children[i]);
1713                         }
1714                 }
1715                 else {
1716                         for (i = node->totnode - 1; i >= 0; i--) {
1717                                 dfs_raycast_all(data, node->children[i]);
1718                         }
1719                 }
1720         }
1721 }
1722
1723 #if 0
1724 static void iterative_raycast(BVHRayCastData *data, BVHNode *node)
1725 {
1726         while (node) {
1727                 float dist = fast_ray_nearest_hit(data, node);
1728                 if (dist >= data->hit.dist) {
1729                         node = node->skip[1];
1730                         continue;
1731                 }
1732
1733                 if (node->totnode == 0) {
1734                         if (data->callback) {
1735                                 data->callback(data->userdata, node->index, &data->ray, &data->hit);
1736                         }
1737                         else {
1738                                 data->hit.index = node->index;
1739                                 data->hit.dist  = dist;
1740                                 madd_v3_v3v3fl(data->hit.co, data->ray.origin, data->ray.direction, dist);
1741                         }
1742                         
1743                         node = node->skip[1];
1744                 }
1745                 else {
1746                         node = node->children[0];
1747                 }
1748         }
1749 }
1750 #endif
1751
1752 static void bvhtree_ray_cast_data_precalc(BVHRayCastData *data, int flag)
1753 {
1754         int i;
1755
1756         for (i = 0; i < 3; i++) {
1757                 data->ray_dot_axis[i] = dot_v3v3(data->ray.direction, bvhtree_kdop_axes[i]);
1758                 data->idot_axis[i] = 1.0f / data->ray_dot_axis[i];
1759
1760                 if (fabsf(data->ray_dot_axis[i]) < FLT_EPSILON) {
1761                         data->ray_dot_axis[i] = 0.0;
1762                 }
1763                 data->index[2 * i] = data->idot_axis[i] < 0.0f ? 1 : 0;
1764                 data->index[2 * i + 1] = 1 - data->index[2 * i];
1765                 data->index[2 * i]   += 2 * i;
1766                 data->index[2 * i + 1] += 2 * i;
1767         }
1768
1769 #ifdef USE_KDOPBVH_WATERTIGHT
1770         if (flag & BVH_RAYCAST_WATERTIGHT) {
1771                 isect_ray_tri_watertight_v3_precalc(&data->isect_precalc, data->ray.direction);
1772                 data->ray.isect_precalc = &data->isect_precalc;
1773         }
1774         else {
1775                 data->ray.isect_precalc = NULL;
1776         }
1777 #else
1778         UNUSED_VARS(flag);
1779 #endif
1780 }
1781
1782 int BLI_bvhtree_ray_cast_ex(
1783         BVHTree *tree, const float co[3], const float dir[3], float radius, BVHTreeRayHit *hit,
1784         BVHTree_RayCastCallback callback, void *userdata,
1785         int flag)
1786 {
1787         BVHRayCastData data;
1788         BVHNode *root = tree->nodes[tree->totleaf];
1789
1790         BLI_ASSERT_UNIT_V3(dir);
1791
1792         data.tree = tree;
1793
1794         data.callback = callback;
1795         data.userdata = userdata;
1796
1797         copy_v3_v3(data.ray.origin,    co);
1798         copy_v3_v3(data.ray.direction, dir);
1799         data.ray.radius = radius;
1800
1801         bvhtree_ray_cast_data_precalc(&data, flag);
1802
1803         if (hit) {
1804                 memcpy(&data.hit, hit, sizeof(*hit));
1805         }
1806         else {
1807                 data.hit.index = -1;
1808                 data.hit.dist = BVH_RAYCAST_DIST_MAX;
1809         }
1810
1811         if (root) {
1812                 dfs_raycast(&data, root);
1813 //              iterative_raycast(&data, root);
1814         }
1815
1816
1817         if (hit)
1818                 memcpy(hit, &data.hit, sizeof(*hit));
1819
1820         return data.hit.index;
1821 }
1822
1823 int BLI_bvhtree_ray_cast(
1824         BVHTree *tree, const float co[3], const float dir[3], float radius, BVHTreeRayHit *hit,
1825         BVHTree_RayCastCallback callback, void *userdata)
1826 {
1827         return BLI_bvhtree_ray_cast_ex(tree, co, dir, radius, hit, callback, userdata, BVH_RAYCAST_DEFAULT);
1828 }
1829
1830 float BLI_bvhtree_bb_raycast(const float bv[6], const float light_start[3], const float light_end[3], float pos[3])
1831 {
1832         BVHRayCastData data;
1833         float dist;
1834
1835         data.hit.dist = BVH_RAYCAST_DIST_MAX;
1836         
1837         /* get light direction */
1838         sub_v3_v3v3(data.ray.direction, light_end, light_start);
1839         
1840         data.ray.radius = 0.0;
1841         
1842         copy_v3_v3(data.ray.origin, light_start);
1843
1844         normalize_v3(data.ray.direction);
1845         copy_v3_v3(data.ray_dot_axis, data.ray.direction);
1846         
1847         dist = ray_nearest_hit(&data, bv);
1848
1849         madd_v3_v3v3fl(pos, light_start, data.ray.direction, dist);
1850
1851         return dist;
1852         
1853 }
1854
1855 /**
1856  * Calls the callback for every ray intersection
1857  *
1858  * \note Using a \a callback which resets or never sets the #BVHTreeRayHit index & dist works too,
1859  * however using this function means existing generic callbacks can be used from custom callbacks without
1860  * having to handle resetting the hit beforehand.
1861  * It also avoid redundant argument and return value which aren't meaningful when collecting multiple hits.
1862  */
1863 void BLI_bvhtree_ray_cast_all_ex(
1864         BVHTree *tree, const float co[3], const float dir[3], float radius, float hit_dist,
1865         BVHTree_RayCastCallback callback, void *userdata,
1866         int flag)
1867 {
1868         BVHRayCastData data;
1869         BVHNode *root = tree->nodes[tree->totleaf];
1870
1871         BLI_ASSERT_UNIT_V3(dir);
1872         BLI_assert(callback != NULL);
1873
1874         data.tree = tree;
1875
1876         data.callback = callback;
1877         data.userdata = userdata;
1878
1879         copy_v3_v3(data.ray.origin,    co);
1880         copy_v3_v3(data.ray.direction, dir);
1881         data.ray.radius = radius;
1882
1883         bvhtree_ray_cast_data_precalc(&data, flag);
1884
1885         data.hit.index = -1;
1886         data.hit.dist = hit_dist;
1887
1888         if (root) {
1889                 dfs_raycast_all(&data, root);
1890         }
1891 }
1892
1893 void BLI_bvhtree_ray_cast_all(
1894         BVHTree *tree, const float co[3], const float dir[3], float radius, float hit_dist,
1895         BVHTree_RayCastCallback callback, void *userdata)
1896 {
1897         BLI_bvhtree_ray_cast_all_ex(tree, co, dir, radius, hit_dist, callback, userdata, BVH_RAYCAST_DEFAULT);
1898 }
1899
1900
1901 /* -------------------------------------------------------------------- */
1902
1903 /** \name BLI_bvhtree_find_nearest_to_ray functions
1904  *
1905  * \{ */
1906
1907 static void dist_squared_ray_to_aabb_scaled_v3_precalc(
1908         BVHNearestRayData *data,
1909         const float ray_origin[3], const float ray_direction[3],
1910         const bool ray_is_normalized, const float scale[3])
1911 {
1912         if (scale) {
1913                 copy_v3_v3(data->scale, scale);
1914         }
1915         else {
1916                 copy_v3_fl(data->scale, 1.0f);
1917         }
1918         /* un-normalize ray */
1919         if (ray_is_normalized && scale &&
1920            (data->scale[0] != 1.0f || data->scale[1] != 1.0f || data->scale[2] != 1.0f))
1921         {
1922                 data->ray.direction[0] = ray_direction[0] * data->scale[0];
1923                 data->ray.direction[1] = ray_direction[1] * data->scale[1];
1924                 data->ray.direction[2] = ray_direction[2] * data->scale[2];
1925
1926                 mul_v3_v3fl(data->ray.direction, ray_direction, 1 / len_v3(data->ray.direction));
1927         }
1928         else {
1929                 copy_v3_v3(data->ray.direction, ray_direction);
1930         }
1931
1932         float dir_sq[3];
1933
1934         for (int i = 0; i < 3; i++) {
1935                 data->ray.origin[i] = ray_origin[i];
1936                 data->ray.inv_dir[i] = (data->ray.direction[i] != 0.0f) ?
1937                                        (1.0f / data->ray.direction[i]) : FLT_MAX;
1938                 /* It has to be in function of `ray.inv_dir`,
1939                  * since the division of 1 by 0.0f, can be -inf or +inf */
1940                 data->ray.sign[i] = (data->ray.inv_dir[i] < 0.0f);
1941
1942                 data->ray.direction_scaled_square[i] = data->ray.direction[i] * data->scale[i];
1943
1944                 dir_sq[i] = SQUARE(data->ray.direction_scaled_square[i]);
1945
1946                 data->ray.direction_scaled_square[i] *= data->scale[i];
1947         }
1948
1949         /* `diag_sq` Length square of each face diagonal */
1950         float diag_sq[3] = {
1951                 dir_sq[1] + dir_sq[2],
1952                 dir_sq[0] + dir_sq[2],
1953                 dir_sq[0] + dir_sq[1],
1954         };
1955
1956         data->ray.cdot_axis[0] = (diag_sq[0] != 0.0f) ? data->ray.direction[0] / diag_sq[0] : FLT_MAX;
1957         data->ray.cdot_axis[1] = (diag_sq[1] != 0.0f) ? data->ray.direction[1] / diag_sq[1] : FLT_MAX;
1958         data->ray.cdot_axis[2] = (diag_sq[2] != 0.0f) ? data->ray.direction[2] / diag_sq[2] : FLT_MAX;
1959 }
1960
1961 /**
1962  * Returns the squared distance from a ray to a bound-box `AABB`.
1963  * It is based on `fast_ray_nearest_hit` solution to obtain
1964  * the coordinates of the nearest edge of Bound Box to the ray
1965  */
1966 MINLINE float dist_squared_ray_to_aabb_scaled_v3__impl(
1967         const BVHNearestRayData *data,
1968         const float bv[6], float *r_depth_sq, bool r_axis_closest[3])
1969 {
1970
1971         /* `tmin` is a vector that has the smaller distances to each of the
1972          * infinite planes of the `AABB` faces (hit in nearest face X plane,
1973          * nearest face Y plane and nearest face Z plane) */
1974         float local_bvmin[3], local_bvmax[3];
1975
1976         if (data->ray.sign[0]) {
1977                 local_bvmin[0] = bv[1];
1978                 local_bvmax[0] = bv[0];
1979         }
1980         else {
1981                 local_bvmin[0] = bv[0];
1982                 local_bvmax[0] = bv[1];
1983         }
1984
1985         if (data->ray.sign[1]) {
1986                 local_bvmin[1] = bv[3];
1987                 local_bvmax[1] = bv[2];
1988         }
1989         else {
1990                 local_bvmin[1] = bv[2];
1991                 local_bvmax[1] = bv[3];
1992         }
1993
1994         if (data->ray.sign[2]) {
1995                 local_bvmin[2] = bv[5];
1996                 local_bvmax[2] = bv[4];
1997         }
1998         else {
1999                 local_bvmin[2] = bv[4];
2000                 local_bvmax[2] = bv[5];
2001         }
2002
2003         sub_v3_v3(local_bvmin, data->ray.origin);
2004         sub_v3_v3(local_bvmax, data->ray.origin);
2005
2006         const float tmin[3] = {
2007                 local_bvmin[0] * data->ray.inv_dir[0],
2008                 local_bvmin[1] * data->ray.inv_dir[1],
2009                 local_bvmin[2] * data->ray.inv_dir[2],
2010         };
2011
2012         /* `tmax` is a vector that has the longer distances to each of the
2013          * infinite planes of the `AABB` faces (hit in farthest face X plane,
2014          * farthest face Y plane and farthest face Z plane) */
2015         const float tmax[3] = {
2016                 local_bvmax[0] * data->ray.inv_dir[0],
2017                 local_bvmax[1] * data->ray.inv_dir[1],
2018                 local_bvmax[2] * data->ray.inv_dir[2],
2019         };
2020         /* `v1` and `v3` is be the coordinates of the nearest `AABB` edge to the ray*/
2021         float v1[3], v2[3];
2022         /* `rtmin` is the highest value of the smaller distances. == max_axis_v3(tmin)
2023          * `rtmax` is the lowest value of longer distances. == min_axis_v3(tmax)*/
2024         float rtmin, rtmax, mul;
2025         /* `main_axis` is the axis equivalent to edge close to the ray */
2026         int main_axis;
2027
2028         r_axis_closest[0] = false;
2029         r_axis_closest[1] = false;
2030         r_axis_closest[2] = false;
2031
2032         /* *** min_axis_v3(tmax) *** */
2033         if ((tmax[0] <= tmax[1]) && (tmax[0] <= tmax[2])) {
2034                 // printf("# Hit in X %s\n", data->sign[0] ? "min", "max");
2035                 rtmax = tmax[0];
2036                 v1[0] = v2[0] = local_bvmax[0];
2037                 mul = local_bvmax[0] * data->ray.direction_scaled_square[0];
2038                 main_axis = 3;
2039                 r_axis_closest[0] = data->ray.sign[0];
2040         }
2041         else if ((tmax[1] <= tmax[0]) && (tmax[1] <= tmax[2])) {
2042                 // printf("# Hit in Y %s\n", data->sign[1] ? "min", "max");
2043                 rtmax = tmax[1];
2044                 v1[1] = v2[1] = local_bvmax[1];
2045                 mul = local_bvmax[1] * data->ray.direction_scaled_square[1];
2046                 main_axis = 2;
2047                 r_axis_closest[1] = data->ray.sign[1];
2048         }
2049         else {
2050                 // printf("# Hit in Z %s\n", data->sign[2] ? "min", "max");
2051                 rtmax = tmax[2];
2052                 v1[2] = v2[2] = local_bvmax[2];
2053                 mul = local_bvmax[2] * data->ray.direction_scaled_square[2];
2054                 main_axis = 1;
2055                 r_axis_closest[2] = data->ray.sign[2];
2056         }
2057
2058         /* *** max_axis_v3(tmin) *** */
2059         if ((tmin[0] >= tmin[1]) && (tmin[0] >= tmin[2])) {
2060                 // printf("# To X %s\n", data->sign[0] ? "max", "min");
2061                 rtmin = tmin[0];
2062                 v1[0] = v2[0] = local_bvmin[0];
2063                 mul += local_bvmin[0] * data->ray.direction_scaled_square[0];
2064                 main_axis -= 3;
2065                 r_axis_closest[0] = !data->ray.sign[0];
2066         }
2067         else if ((tmin[1] >= tmin[0]) && (tmin[1] >= tmin[2])) {
2068                 // printf("# To Y %s\n", data->sign[1] ? "max", "min");
2069                 rtmin = tmin[1];
2070                 v1[1] = v2[1] = local_bvmin[1];
2071                 mul += local_bvmin[1] * data->ray.direction_scaled_square[1];
2072                 main_axis -= 1;
2073                 r_axis_closest[1] = !data->ray.sign[1];
2074         }
2075         else {
2076                 // printf("# To Z %s\n", data->sign[2] ? "max", "min");
2077                 rtmin = tmin[2];
2078                 v1[2] = v2[2] = local_bvmin[2];
2079                 mul += local_bvmin[2] * data->ray.direction_scaled_square[2];
2080                 main_axis -= 2;
2081                 r_axis_closest[2] = !data->ray.sign[2];
2082         }
2083         /* *** end min/max axis *** */
2084
2085         if (main_axis < 0)
2086                 main_axis += 3;
2087
2088         /* if rtmin < rtmax, ray intersect `AABB` */
2089         if (rtmin <= rtmax) {
2090 #ifdef IGNORE_BEHIND_RAY
2091                 /* `if rtmax < depth_min`, the whole `AABB` is behind us */
2092                 if (rtmax < min_depth) {
2093                         return fallback;
2094                 }
2095 #endif
2096                 const float proj = rtmin * data->ray.direction[main_axis];
2097
2098                 if (data->ray.sign[main_axis])
2099                         r_axis_closest[main_axis] = (proj - local_bvmax[main_axis]) < (local_bvmin[main_axis] - proj);
2100                 else
2101                         r_axis_closest[main_axis] = (proj - local_bvmin[main_axis]) < (local_bvmax[main_axis] - proj);
2102
2103                 //if (r_depth_sq)
2104                 //      *r_depth_sq = SQUARE(rtmin);
2105
2106                 return 0.0f;
2107         }
2108 #ifdef IGNORE_BEHIND_RAY
2109         /* `if rtmin < depth_min`, the whole `AABB` is behing us */
2110         else if (rtmin < min_depth) {
2111                 return fallback;
2112         }
2113 #endif
2114
2115         if (data->ray.sign[main_axis]) {
2116                 v1[main_axis] = local_bvmax[main_axis];
2117                 v2[main_axis] = local_bvmin[main_axis];
2118         }
2119         else {
2120                 v1[main_axis] = local_bvmin[main_axis];
2121                 v2[main_axis] = local_bvmax[main_axis];
2122         }
2123         {
2124                 /* `proj` equals to nearest point on the ray closest to the edge `v1 v2` of the `AABB`. */
2125                 const float proj = mul * data->ray.cdot_axis[main_axis];
2126                 float depth_sq, r_point[3];
2127                 if (v1[main_axis] > proj) { /* the nearest point to the ray is the point v1 */
2128                         r_axis_closest[main_axis] = true;
2129                         /* `depth` is equivalent the distance of the the projection of v1 on the ray */
2130                         depth_sq = mul + data->ray.direction_scaled_square[main_axis] * v1[main_axis];
2131
2132                         copy_v3_v3(r_point, v1);
2133                 }
2134                 else if (v2[main_axis] < proj) { /* the nearest point of the ray is the point v2 */
2135                         r_axis_closest[main_axis] = false;
2136
2137                         depth_sq = mul + data->ray.direction_scaled_square[main_axis] * v2[main_axis];
2138
2139                         copy_v3_v3(r_point, v2);
2140                 }
2141                 else {  /* the nearest point of the ray is on the edge of the `AABB`. */
2142                         r_axis_closest[main_axis] = (proj - v1[main_axis]) < (v2[main_axis] - proj);
2143
2144                         depth_sq = mul + data->ray.direction_scaled_square[main_axis] * proj;
2145 #if 0
2146                         r_point[0] = main_axis == 0 ? proj : v2[0];
2147                         r_point[1] = main_axis == 1 ? proj : v2[1];
2148                         r_point[2] = main_axis == 2 ? proj : v2[2];
2149 #else
2150                         v2[main_axis] = proj;
2151                         copy_v3_v3(r_point, v2);
2152 #endif
2153                 }
2154                 depth_sq *= depth_sq;
2155
2156                 if (r_depth_sq)
2157                         *r_depth_sq = depth_sq;
2158
2159                 /* TODO: scale can be optional */
2160                 r_point[0] *= data->scale[0];
2161                 r_point[1] *= data->scale[1];
2162                 r_point[2] *= data->scale[2];
2163
2164                 return len_squared_v3(r_point) - depth_sq;
2165         }
2166 }
2167
2168 /**
2169  * <pre>
2170  *  + r_point
2171  *  |
2172  *  | dist
2173  *  |
2174  *  +----depth----+orig <-- dir
2175  *
2176  * tangent = dist/depth
2177  * </pre>
2178  */
2179 static float calc_tangent_sq(BVHNearestRayData *data, BVHNode *node)
2180 {
2181         float depth_sq;
2182         const float dist_sq = dist_squared_ray_to_aabb_scaled_v3__impl(
2183                 data, node->bv, &depth_sq, data->pick_smallest);
2184
2185         return (dist_sq != 0.0f) ? (dist_sq / depth_sq) : 0.0f;
2186 }
2187
2188 static float calc_dist_sq_to_ray(BVHNearestRayData *data, BVHNode *node)
2189 {
2190         return dist_squared_ray_to_aabb_scaled_v3__impl(
2191                 data, node->bv, NULL,
2192                 data->pick_smallest);
2193 }
2194
2195 static void dfs_find_lowest_tangent_dfs(BVHNearestRayData *data, BVHNode *node)
2196 {
2197         if (node->totnode == 0) {
2198                 if (data->callback) {
2199                         data->callback(data->userdata, data->ray.origin, data->ray.direction,
2200                                        data->scale, node->index, &data->nearest);
2201                 }
2202                 else {
2203                         data->nearest.index = node->index;
2204                         data->nearest.dist_sq = calc_tangent_sq(data, node);
2205                         /* TODO: return a value to the data->nearest.co
2206                          * not urgent however since users currently define own callbacks */
2207                 }
2208         }
2209         else {
2210                 int i;
2211                 /* First pick the closest node to dive on */
2212                 if (data->pick_smallest[node->main_axis]) {
2213                         for (i = 0; i != node->totnode; i++) {
2214                                 if (calc_tangent_sq(data, node->children[i]) < data->nearest.dist_sq) {
2215                                         dfs_find_lowest_tangent_dfs(data, node->children[i]);
2216                                 }
2217                         }
2218                 }
2219                 else {
2220                         for (i = node->totnode - 1; i >= 0; i--) {
2221                                 if (calc_tangent_sq(data, node->children[i]) < data->nearest.dist_sq) {
2222                                         dfs_find_lowest_tangent_dfs(data, node->children[i]);
2223                                 }
2224                         }
2225                 }
2226         }
2227 }
2228
2229 static void dfs_find_nearest_to_ray_dfs(BVHNearestRayData *data, BVHNode *node)
2230 {
2231         if (node->totnode == 0) {
2232                 if (data->callback) {
2233                         data->callback(data->userdata, data->ray.origin, data->ray.direction,
2234                                        data->scale, node->index, &data->nearest);
2235                 }
2236                 else {
2237                         data->nearest.index = node->index;
2238                         data->nearest.dist_sq = calc_dist_sq_to_ray(data, node);
2239                         /* TODO: return a value to the data->nearest.co
2240                          * not urgent however since users currently define own callbacks */
2241                 }
2242         }
2243         else {
2244                 int i;
2245                 /* First pick the closest node to dive on */
2246                 if (data->pick_smallest[node->main_axis]) {
2247                         for (i = 0; i != node->totnode; i++) {
2248                                 if (calc_dist_sq_to_ray(data, node->children[i]) < data->nearest.dist_sq) {
2249                                         dfs_find_nearest_to_ray_dfs(data, node->children[i]);
2250                                 }
2251                         }
2252                 }
2253                 else {
2254                         for (i = node->totnode - 1; i >= 0; i--) {
2255                                 if (calc_dist_sq_to_ray(data, node->children[i]) < data->nearest.dist_sq) {
2256                                         dfs_find_nearest_to_ray_dfs(data, node->children[i]);
2257                                 }
2258                         }
2259                 }
2260         }
2261 }
2262
2263 /**
2264  * Returns the point whose tangent defined by the angle between the point and ray is the lowest
2265  * nearest.dist_sq returns the angle's tangent
2266  */
2267 int BLI_bvhtree_find_nearest_to_ray_angle(
2268         BVHTree *tree, const float co[3], const float dir[3],
2269         const bool ray_is_normalized, const float scale[3],
2270         BVHTreeNearest *nearest,
2271         BVHTree_NearestToRayCallback callback, void *userdata)
2272 {
2273         BVHNearestRayData data;
2274         BVHNode *root = tree->nodes[tree->totleaf];
2275
2276         data.tree = tree;
2277
2278         data.callback = callback;
2279         data.userdata = userdata;
2280
2281         dist_squared_ray_to_aabb_scaled_v3_precalc(&data, co, dir, ray_is_normalized, scale);
2282
2283         if (nearest) {
2284                 memcpy(&data.nearest, nearest, sizeof(*nearest));
2285         }
2286         else {
2287                 data.nearest.index = -1;
2288                 data.nearest.dist_sq = FLT_MAX;
2289         }
2290
2291         /* dfs search */
2292         if (root) {
2293                 if (calc_tangent_sq(&data, root) < data.nearest.dist_sq)
2294                         dfs_find_lowest_tangent_dfs(&data, root);
2295         }
2296
2297         /* copy back results */
2298         if (nearest) {
2299                 memcpy(nearest, &data.nearest, sizeof(*nearest));
2300         }
2301
2302         return data.nearest.index;
2303 }
2304
2305 /* return the nearest point to ray */
2306 int BLI_bvhtree_find_nearest_to_ray(
2307         BVHTree *tree, const float co[3], const float dir[3],
2308         const bool ray_is_normalized, const float scale[3],
2309         BVHTreeNearest *nearest,
2310         BVHTree_NearestToRayCallback callback, void *userdata)
2311 {
2312         BVHNearestRayData data;
2313         BVHNode *root = tree->nodes[tree->totleaf];
2314
2315         data.tree = tree;
2316
2317         data.callback = callback;
2318         data.userdata = userdata;
2319
2320         dist_squared_ray_to_aabb_scaled_v3_precalc(&data, co, dir, ray_is_normalized, scale);
2321
2322         if (nearest) {
2323                 memcpy(&data.nearest, nearest, sizeof(*nearest));
2324         }
2325         else {
2326                 data.nearest.index = -1;
2327                 data.nearest.dist_sq = FLT_MAX;
2328         }
2329
2330         /* dfs search */
2331         if (root) {
2332                 if (calc_dist_sq_to_ray(&data, root) < data.nearest.dist_sq) {
2333                         dfs_find_nearest_to_ray_dfs(&data, root);
2334                 }
2335         }
2336
2337         /* copy back results */
2338         if (nearest) {
2339                 memcpy(nearest, &data.nearest, sizeof(*nearest));
2340         }
2341
2342         return data.nearest.index;
2343 }
2344
2345 /** \} */
2346
2347
2348 /* -------------------------------------------------------------------- */
2349
2350 /** \name BLI_bvhtree_range_query
2351  *
2352  * Allocs and fills an array with the indexs of node that are on the given spherical range (center, radius).
2353  * Returns the size of the array.
2354  *
2355  * \{ */
2356
2357 typedef struct RangeQueryData {
2358         BVHTree *tree;
2359         const float *center;
2360         float radius_sq;  /* squared radius */
2361
2362         int hits;
2363
2364         BVHTree_RangeQuery callback;
2365         void *userdata;
2366 } RangeQueryData;
2367
2368
2369 static void dfs_range_query(RangeQueryData *data, BVHNode *node)
2370 {
2371         if (node->totnode == 0) {
2372 #if 0   /*UNUSED*/
2373                 /* Calculate the node min-coords (if the node was a point then this is the point coordinates) */
2374                 float co[3];
2375                 co[0] = node->bv[0];
2376                 co[1] = node->bv[2];
2377                 co[2] = node->bv[4];
2378 #endif
2379         }
2380         else {
2381                 int i;
2382                 for (i = 0; i != node->totnode; i++) {
2383                         float nearest[3];
2384                         float dist_sq = calc_nearest_point_squared(data->center, node->children[i], nearest);
2385                         if (dist_sq < data->radius_sq) {
2386                                 /* Its a leaf.. call the callback */
2387                                 if (node->children[i]->totnode == 0) {
2388                                         data->hits++;
2389                                         data->callback(data->userdata, node->children[i]->index, data->center, dist_sq);
2390                                 }
2391                                 else
2392                                         dfs_range_query(data, node->children[i]);
2393                         }
2394                 }
2395         }
2396 }
2397
2398 int BLI_bvhtree_range_query(
2399         BVHTree *tree, const float co[3], float radius,
2400         BVHTree_RangeQuery callback, void *userdata)
2401 {
2402         BVHNode *root = tree->nodes[tree->totleaf];
2403
2404         RangeQueryData data;
2405         data.tree = tree;
2406         data.center = co;
2407         data.radius_sq = radius * radius;
2408         data.hits = 0;
2409
2410         data.callback = callback;
2411         data.userdata = userdata;
2412
2413         if (root != NULL) {
2414                 float nearest[3];
2415                 float dist_sq = calc_nearest_point_squared(data.center, root, nearest);
2416                 if (dist_sq < data.radius_sq) {
2417                         /* Its a leaf.. call the callback */
2418                         if (root->totnode == 0) {
2419                                 data.hits++;
2420                                 data.callback(data.userdata, root->index, co, dist_sq);
2421                         }
2422                         else
2423                                 dfs_range_query(&data, root);
2424                 }
2425         }
2426
2427         return data.hits;
2428 }
2429
2430 /** \} */
2431
2432
2433 /* -------------------------------------------------------------------- */
2434
2435 /** \name BLI_bvhtree_walk_dfs
2436  * \{ */
2437
2438 /**
2439  * Runs first among nodes children of the first node before going to the next node in the same layer.
2440  *
2441  * \return false to break out of the search early.
2442  */
2443 static bool bvhtree_walk_dfs_recursive(
2444         BVHTree_WalkParentCallback walk_parent_cb,
2445         BVHTree_WalkLeafCallback walk_leaf_cb,
2446         BVHTree_WalkOrderCallback walk_order_cb,
2447         const BVHNode *node, void *userdata)
2448 {
2449         if (node->totnode == 0) {
2450                 return walk_leaf_cb((const BVHTreeAxisRange *)node->bv, node->index, userdata);
2451         }
2452         else {
2453                 /* First pick the closest node to recurse into */
2454                 if (walk_order_cb((const BVHTreeAxisRange *)node->bv, node->main_axis, userdata)) {
2455                         for (int i = 0; i != node->totnode; i++) {
2456                                 if (walk_parent_cb((const BVHTreeAxisRange *)node->children[i]->bv, userdata)) {
2457                                         if (!bvhtree_walk_dfs_recursive(
2458                                                 walk_parent_cb, walk_leaf_cb, walk_order_cb,
2459                                                 node->children[i], userdata))
2460                                         {
2461                                                 return false;
2462                                         }
2463                                 }
2464                         }
2465                 }
2466                 else {
2467                         for (int i = node->totnode - 1; i >= 0; i--) {
2468                                 if (walk_parent_cb((const BVHTreeAxisRange *)node->children[i]->bv, userdata)) {
2469                                         if (!bvhtree_walk_dfs_recursive(
2470                                                 walk_parent_cb, walk_leaf_cb, walk_order_cb,
2471                                                 node->children[i], userdata))
2472                                         {
2473                                                 return false;
2474                                         }
2475                                 }
2476                         }
2477                 }
2478         }
2479         return true;
2480 }
2481
2482 /**
2483  * This is a generic function to perform a depth first search on the BVHTree
2484  * where the search order and nodes traversed depend on callbacks passed in.
2485  *
2486  * \param tree: Tree to walk.
2487  * \param walk_parent_cb: Callback on a parents bound-box to test if it should be traversed.
2488  * \param walk_leaf_cb: Callback to test leaf nodes, callback must store its own result,
2489  * returning false exits early.
2490  * \param walk_order_cb: Callback that indicates which direction to search,
2491  * either from the node with the lower or higher k-dop axis value.
2492  * \param userdata: Argument passed to all callbacks.
2493  */
2494 void BLI_bvhtree_walk_dfs(
2495         BVHTree *tree,
2496         BVHTree_WalkParentCallback walk_parent_cb,
2497         BVHTree_WalkLeafCallback walk_leaf_cb,
2498         BVHTree_WalkOrderCallback walk_order_cb, void *userdata)
2499 {
2500         const BVHNode *root = tree->nodes[tree->totleaf];
2501         if (root != NULL) {
2502                 /* first make sure the bv of root passes in the test too */
2503                 if (walk_parent_cb((const BVHTreeAxisRange *)root->bv, userdata)) {
2504                         bvhtree_walk_dfs_recursive(walk_parent_cb, walk_leaf_cb, walk_order_cb, root, userdata);
2505                 }
2506         }
2507 }
2508
2509 /** \} */