c4bf2ae6910a65b16eda097cdd3b2872183eedf4
[blender.git] / source / blender / blenlib / intern / BLI_kdopbvh.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2006 by NaN Holding BV.
19  * All rights reserved.
20  *
21  * The Original Code is: all of this file.
22  *
23  * Contributor(s): Daniel Genrich, Andre Pinto
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 /** \file blender/blenlib/intern/BLI_kdopbvh.c
29  *  \ingroup bli
30  *  \brief BVH-tree implementation.
31  *
32  * KD-Overlap-BVH, implements a bvh-tree structure with support for:
33  *
34  * - Ray-cast:
35  *   #BLI_bvhtree_ray_cast, #BVHRayCastData
36  * - Nearest point on surface:
37  *   #BLI_bvhtree_find_nearest, #BVHNearestData
38  * - Overlapping 2 trees:
39  *   #BLI_bvhtree_overlap, #BVHOverlapData_Shared, #BVHOverlapData_Thread
40  */
41
42 #include <assert.h>
43
44 #include "MEM_guardedalloc.h"
45
46 #include "BLI_utildefines.h"
47 #include "BLI_alloca.h"
48 #include "BLI_stack.h"
49 #include "BLI_kdopbvh.h"
50 #include "BLI_math.h"
51 #include "BLI_strict_flags.h"
52 #include "BLI_task.h"
53
54 /* used for iterative_raycast */
55 // #define USE_SKIP_LINKS
56
57 #define MAX_TREETYPE 32
58
59 /* Setting zero so we can catch bugs in BLI_task/KDOPBVH.
60  * TODO(sergey): Deduplicate the limits with PBVH from BKE.
61  */
62 #ifdef DEBUG
63 #  define KDOPBVH_THREAD_LEAF_THRESHOLD 0
64 #else
65 #  define KDOPBVH_THREAD_LEAF_THRESHOLD 1024
66 #endif
67
68 typedef unsigned char axis_t;
69
70 typedef struct BVHNode {
71         struct BVHNode **children;
72         struct BVHNode *parent; /* some user defined traversed need that */
73 #ifdef USE_SKIP_LINKS
74         struct BVHNode *skip[2];
75 #endif
76         float *bv;      /* Bounding volume of all nodes, max 13 axis */
77         int index;      /* face, edge, vertex index */
78         char totnode;   /* how many nodes are used, used for speedup */
79         char main_axis; /* Axis used to split this node */
80 } BVHNode;
81
82 /* keep under 26 bytes for speed purposes */
83 struct BVHTree {
84         BVHNode **nodes;
85         BVHNode *nodearray;     /* pre-alloc branch nodes */
86         BVHNode **nodechild;    /* pre-alloc childs for nodes */
87         float   *nodebv;        /* pre-alloc bounding-volumes for nodes */
88         float epsilon;          /* epslion is used for inflation of the k-dop      */
89         int totleaf;            /* leafs */
90         int totbranch;
91         axis_t start_axis, stop_axis;  /* KDOP_AXES array indices according to axis */
92         axis_t axis;                   /* kdop type (6 => OBB, 7 => AABB, ...) */
93         char tree_type;                /* type of tree (4 => quadtree) */
94 };
95
96 /* optimization, ensure we stay small */
97 BLI_STATIC_ASSERT((sizeof(void *) == 8 && sizeof(BVHTree) <= 48) ||
98                   (sizeof(void *) == 4 && sizeof(BVHTree) <= 32),
99                   "over sized")
100
101 /* avoid duplicating vars in BVHOverlapData_Thread */
102 typedef struct BVHOverlapData_Shared {
103         const BVHTree *tree1, *tree2;
104         axis_t start_axis, stop_axis;
105
106         /* use for callbacks */
107         BVHTree_OverlapCallback callback;
108         void *userdata;
109 } BVHOverlapData_Shared;
110
111 typedef struct BVHOverlapData_Thread {
112         BVHOverlapData_Shared *shared;
113         struct BLI_Stack *overlap;  /* store BVHTreeOverlap */
114         /* use for callbacks */
115         int thread;
116 } BVHOverlapData_Thread;
117
118 typedef struct BVHNearestData {
119         BVHTree *tree;
120         const float *co;
121         BVHTree_NearestPointCallback callback;
122         void    *userdata;
123         float proj[13];         /* coordinates projection over axis */
124         BVHTreeNearest nearest;
125
126 } BVHNearestData;
127
128 typedef struct BVHRayCastData {
129         BVHTree *tree;
130
131         BVHTree_RayCastCallback callback;
132         void    *userdata;
133
134
135         BVHTreeRay ray;
136
137 #ifdef USE_KDOPBVH_WATERTIGHT
138         struct IsectRayPrecalc isect_precalc;
139 #endif
140
141         /* initialized by bvhtree_ray_cast_data_precalc */
142         float ray_dot_axis[13];
143         float idot_axis[13];
144         int index[6];
145
146         BVHTreeRayHit hit;
147 } BVHRayCastData;
148
149
150 /**
151  * Bounding Volume Hierarchy Definition
152  *
153  * Notes: From OBB until 26-DOP --> all bounding volumes possible, just choose type below
154  * Notes: You have to choose the type at compile time ITM
155  * Notes: You can choose the tree type --> binary, quad, octree, choose below
156  */
157
158 static const float KDOP_AXES[13][3] = {
159         {1.0, 0, 0}, {0, 1.0, 0}, {0, 0, 1.0}, {1.0, 1.0, 1.0}, {1.0, -1.0, 1.0}, {1.0, 1.0, -1.0},
160         {1.0, -1.0, -1.0}, {1.0, 1.0, 0}, {1.0, 0, 1.0}, {0, 1.0, 1.0}, {1.0, -1.0, 0}, {1.0, 0, -1.0},
161         {0, 1.0, -1.0}
162 };
163
164 MINLINE axis_t min_axis(axis_t a, axis_t b)
165 {
166         return (a < b) ? a : b;
167 }
168 #if 0
169 MINLINE axis_t max_axis(axis_t a, axis_t b)
170 {
171         return (b < a) ? a : b;
172 }
173 #endif
174
175 #if 0
176
177 /*
178  * Generic push and pop heap
179  */
180 #define PUSH_HEAP_BODY(HEAP_TYPE, PRIORITY, heap, heap_size)                  \
181         {                                                                         \
182                 HEAP_TYPE element = heap[heap_size - 1];                              \
183                 int child = heap_size - 1;                                            \
184                 while (child != 0) {                                                  \
185                         int parent = (child - 1) / 2;                                     \
186                         if (PRIORITY(element, heap[parent])) {                            \
187                                 heap[child] = heap[parent];                                   \
188                                 child = parent;                                               \
189                         }                                                                 \
190                         else {                                                            \
191                                 break;                                                        \
192                         }                                                                 \
193                 }                                                                     \
194                 heap[child] = element;                                                \
195         } (void)0
196
197 #define POP_HEAP_BODY(HEAP_TYPE, PRIORITY, heap, heap_size)                   \
198         {                                                                         \
199                 HEAP_TYPE element = heap[heap_size - 1];                              \
200                 int parent = 0;                                                       \
201                 while (parent < (heap_size - 1) / 2) {                                \
202                         int child2 = (parent + 1) * 2;                                    \
203                         if (PRIORITY(heap[child2 - 1], heap[child2])) {                   \
204                                 child2--;                                                     \
205                         }                                                                 \
206                         if (PRIORITY(element, heap[child2])) {                            \
207                                 break;                                                        \
208                         }                                                                 \
209                         heap[parent] = heap[child2];                                      \
210                         parent = child2;                                                  \
211                 }                                                                     \
212                 heap[parent] = element;                                               \
213         } (void)0
214
215 static bool ADJUST_MEMORY(void *local_memblock, void **memblock, int new_size, int *max_size, int size_per_item)
216 {
217         int new_max_size = *max_size * 2;
218         void *new_memblock = NULL;
219
220         if (new_size <= *max_size) {
221                 return true;
222         }
223
224         if (*memblock == local_memblock) {
225                 new_memblock = malloc(size_per_item * new_max_size);
226                 memcpy(new_memblock, *memblock, size_per_item * *max_size);
227         }
228         else {
229                 new_memblock = realloc(*memblock, size_per_item * new_max_size);
230         }
231
232         if (new_memblock) {
233                 *memblock = new_memblock;
234                 *max_size = new_max_size;
235                 return true;
236         }
237         else {
238                 return false;
239         }
240 }
241 #endif
242
243 /**
244  * Introsort
245  * with permission deriven from the following Java code:
246  * http://ralphunden.net/content/tutorials/a-guide-to-introsort/
247  * and he derived it from the SUN STL
248  */
249
250 //static int size_threshold = 16;
251
252 #if 0
253 /**
254  * Common methods for all algorithms
255  */
256 static int floor_lg(int a)
257 {
258         return (int)(floor(log(a) / log(2)));
259 }
260 #endif
261
262 static void node_minmax_init(const BVHTree *tree, BVHNode *node)
263 {
264         axis_t axis_iter;
265         float (*bv)[2] = (float (*)[2])node->bv;
266
267         for (axis_iter = tree->start_axis; axis_iter != tree->stop_axis; axis_iter++) {
268                 bv[axis_iter][0] =  FLT_MAX;
269                 bv[axis_iter][1] = -FLT_MAX;
270         }
271 }
272
273 /**
274  * Insertion sort algorithm
275  */
276 static void bvh_insertionsort(BVHNode **a, int lo, int hi, int axis)
277 {
278         int i, j;
279         BVHNode *t;
280         for (i = lo; i < hi; i++) {
281                 j = i;
282                 t = a[i];
283                 while ((j != lo) && (t->bv[axis] < (a[j - 1])->bv[axis])) {
284                         a[j] = a[j - 1];
285                         j--;
286                 }
287                 a[j] = t;
288         }
289 }
290
291 static int bvh_partition(BVHNode **a, int lo, int hi, BVHNode *x, int axis)
292 {
293         int i = lo, j = hi;
294         while (1) {
295                 while ((a[i])->bv[axis] < x->bv[axis]) i++;
296                 j--;
297                 while (x->bv[axis] < (a[j])->bv[axis]) j--;
298                 if (!(i < j))
299                         return i;
300                 SWAP(BVHNode *, a[i], a[j]);
301                 i++;
302         }
303 }
304
305 #if 0
306 /**
307  * Heapsort algorithm
308  */
309 static void bvh_downheap(BVHNode **a, int i, int n, int lo, int axis)
310 {
311         BVHNode *d = a[lo + i - 1];
312         int child;
313         while (i <= n / 2) {
314                 child = 2 * i;
315                 if ((child < n) && ((a[lo + child - 1])->bv[axis] < (a[lo + child])->bv[axis])) {
316                         child++;
317                 }
318                 if (!(d->bv[axis] < (a[lo + child - 1])->bv[axis])) break;
319                 a[lo + i - 1] = a[lo + child - 1];
320                 i = child;
321         }
322         a[lo + i - 1] = d;
323 }
324
325 static void bvh_heapsort(BVHNode **a, int lo, int hi, int axis)
326 {
327         int n = hi - lo, i;
328         for (i = n / 2; i >= 1; i = i - 1) {
329                 bvh_downheap(a, i, n, lo, axis);
330         }
331         for (i = n; i > 1; i = i - 1) {
332                 SWAP(BVHNode *, a[lo], a[lo + i - 1]);
333                 bvh_downheap(a, 1, i - 1, lo, axis);
334         }
335 }
336 #endif
337
338 static BVHNode *bvh_medianof3(BVHNode **a, int lo, int mid, int hi, int axis)  /* returns Sortable */
339 {
340         if ((a[mid])->bv[axis] < (a[lo])->bv[axis]) {
341                 if ((a[hi])->bv[axis] < (a[mid])->bv[axis])
342                         return a[mid];
343                 else {
344                         if ((a[hi])->bv[axis] < (a[lo])->bv[axis])
345                                 return a[hi];
346                         else
347                                 return a[lo];
348                 }
349         }
350         else {
351                 if ((a[hi])->bv[axis] < (a[mid])->bv[axis]) {
352                         if ((a[hi])->bv[axis] < (a[lo])->bv[axis])
353                                 return a[lo];
354                         else
355                                 return a[hi];
356                 }
357                 else
358                         return a[mid];
359         }
360 }
361
362 #if 0
363 /*
364  * Quicksort algorithm modified for Introsort
365  */
366 static void bvh_introsort_loop(BVHNode **a, int lo, int hi, int depth_limit, int axis)
367 {
368         int p;
369
370         while (hi - lo > size_threshold) {
371                 if (depth_limit == 0) {
372                         bvh_heapsort(a, lo, hi, axis);
373                         return;
374                 }
375                 depth_limit = depth_limit - 1;
376                 p = bvh_partition(a, lo, hi, bvh_medianof3(a, lo, lo + ((hi - lo) / 2) + 1, hi - 1, axis), axis);
377                 bvh_introsort_loop(a, p, hi, depth_limit, axis);
378                 hi = p;
379         }
380 }
381
382 static void sort(BVHNode **a0, int begin, int end, int axis)
383 {
384         if (begin < end) {
385                 BVHNode **a = a0;
386                 bvh_introsort_loop(a, begin, end, 2 * floor_lg(end - begin), axis);
387                 bvh_insertionsort(a, begin, end, axis);
388         }
389 }
390
391 static void sort_along_axis(BVHTree *tree, int start, int end, int axis)
392 {
393         sort(tree->nodes, start, end, axis);
394 }
395 #endif
396
397 /**
398  * \note after a call to this function you can expect one of:
399  * - every node to left of a[n] are smaller or equal to it
400  * - every node to the right of a[n] are greater or equal to it */
401 static int partition_nth_element(BVHNode **a, int _begin, int _end, int n, int axis)
402 {
403         int begin = _begin, end = _end, cut;
404         while (end - begin > 3) {
405                 cut = bvh_partition(a, begin, end, bvh_medianof3(a, begin, (begin + end) / 2, end - 1, axis), axis);
406                 if (cut <= n)
407                         begin = cut;
408                 else
409                         end = cut;
410         }
411         bvh_insertionsort(a, begin, end, axis);
412
413         return n;
414 }
415
416 #ifdef USE_SKIP_LINKS
417 static void build_skip_links(BVHTree *tree, BVHNode *node, BVHNode *left, BVHNode *right)
418 {
419         int i;
420         
421         node->skip[0] = left;
422         node->skip[1] = right;
423         
424         for (i = 0; i < node->totnode; i++) {
425                 if (i + 1 < node->totnode)
426                         build_skip_links(tree, node->children[i], left, node->children[i + 1]);
427                 else
428                         build_skip_links(tree, node->children[i], left, right);
429
430                 left = node->children[i];
431         }
432 }
433 #endif
434
435 /*
436  * BVHTree bounding volumes functions
437  */
438 static void create_kdop_hull(BVHTree *tree, BVHNode *node, const float *co, int numpoints, int moving)
439 {
440         float newminmax;
441         float *bv = node->bv;
442         int k;
443         axis_t axis_iter;
444         
445         /* don't init boudings for the moving case */
446         if (!moving) {
447                 node_minmax_init(tree, node);
448         }
449
450         for (k = 0; k < numpoints; k++) {
451                 /* for all Axes. */
452                 for (axis_iter = tree->start_axis; axis_iter < tree->stop_axis; axis_iter++) {
453                         newminmax = dot_v3v3(&co[k * 3], KDOP_AXES[axis_iter]);
454                         if (newminmax < bv[2 * axis_iter])
455                                 bv[2 * axis_iter] = newminmax;
456                         if (newminmax > bv[(2 * axis_iter) + 1])
457                                 bv[(2 * axis_iter) + 1] = newminmax;
458                 }
459         }
460 }
461
462 /**
463  * \note depends on the fact that the BVH's for each face is already build
464  */
465 static void refit_kdop_hull(BVHTree *tree, BVHNode *node, int start, int end)
466 {
467         float newmin, newmax;
468         float *bv = node->bv;
469         int j;
470         axis_t axis_iter;
471
472         node_minmax_init(tree, node);
473
474         for (j = start; j < end; j++) {
475                 /* for all Axes. */
476                 for (axis_iter = tree->start_axis; axis_iter < tree->stop_axis; axis_iter++) {
477                         newmin = tree->nodes[j]->bv[(2 * axis_iter)];
478                         if ((newmin < bv[(2 * axis_iter)]))
479                                 bv[(2 * axis_iter)] = newmin;
480
481                         newmax = tree->nodes[j]->bv[(2 * axis_iter) + 1];
482                         if ((newmax > bv[(2 * axis_iter) + 1]))
483                                 bv[(2 * axis_iter) + 1] = newmax;
484                 }
485         }
486
487 }
488
489 /**
490  * only supports x,y,z axis in the moment
491  * but we should use a plain and simple function here for speed sake */
492 static char get_largest_axis(const float *bv)
493 {
494         float middle_point[3];
495
496         middle_point[0] = (bv[1]) - (bv[0]); /* x axis */
497         middle_point[1] = (bv[3]) - (bv[2]); /* y axis */
498         middle_point[2] = (bv[5]) - (bv[4]); /* z axis */
499         if (middle_point[0] > middle_point[1]) {
500                 if (middle_point[0] > middle_point[2])
501                         return 1;  /* max x axis */
502                 else
503                         return 5;  /* max z axis */
504         }
505         else {
506                 if (middle_point[1] > middle_point[2])
507                         return 3;  /* max y axis */
508                 else
509                         return 5;  /* max z axis */
510         }
511 }
512
513 /**
514  * bottom-up update of bvh node BV
515  * join the children on the parent BV */
516 static void node_join(BVHTree *tree, BVHNode *node)
517 {
518         int i;
519         axis_t axis_iter;
520
521         node_minmax_init(tree, node);
522         
523         for (i = 0; i < tree->tree_type; i++) {
524                 if (node->children[i]) {
525                         for (axis_iter = tree->start_axis; axis_iter < tree->stop_axis; axis_iter++) {
526                                 /* update minimum */
527                                 if (node->children[i]->bv[(2 * axis_iter)] < node->bv[(2 * axis_iter)])
528                                         node->bv[(2 * axis_iter)] = node->children[i]->bv[(2 * axis_iter)];
529
530                                 /* update maximum */
531                                 if (node->children[i]->bv[(2 * axis_iter) + 1] > node->bv[(2 * axis_iter) + 1])
532                                         node->bv[(2 * axis_iter) + 1] = node->children[i]->bv[(2 * axis_iter) + 1];
533                         }
534                 }
535                 else
536                         break;
537         }
538 }
539
540 /*
541  * Debug and information functions
542  */
543 #if 0
544 static void bvhtree_print_tree(BVHTree *tree, BVHNode *node, int depth)
545 {
546         int i;
547         axis_t axis_iter;
548
549         for (i = 0; i < depth; i++) printf(" ");
550         printf(" - %d (%ld): ", node->index, (long int)(node - tree->nodearray));
551         for (axis_iter = (axis_t)(2 * tree->start_axis);
552              axis_iter < (axis_t)(2 * tree->stop_axis);
553              axis_iter++)
554         {
555                 printf("%.3f ", node->bv[axis_iter]);
556         }
557         printf("\n");
558
559         for (i = 0; i < tree->tree_type; i++)
560                 if (node->children[i])
561                         bvhtree_print_tree(tree, node->children[i], depth + 1);
562 }
563
564 static void bvhtree_info(BVHTree *tree)
565 {
566         printf("BVHTree info\n");
567         printf("tree_type = %d, axis = %d, epsilon = %f\n", tree->tree_type, tree->axis, tree->epsilon);
568         printf("nodes = %d, branches = %d, leafs = %d\n", tree->totbranch + tree->totleaf,  tree->totbranch, tree->totleaf);
569         printf("Memory per node = %ldbytes\n", sizeof(BVHNode) + sizeof(BVHNode *) * tree->tree_type + sizeof(float) * tree->axis);
570         printf("BV memory = %dbytes\n", (int)MEM_allocN_len(tree->nodebv));
571
572         printf("Total memory = %ldbytes\n", sizeof(BVHTree) +
573                MEM_allocN_len(tree->nodes) +
574                MEM_allocN_len(tree->nodearray) +
575                MEM_allocN_len(tree->nodechild) +
576                MEM_allocN_len(tree->nodebv));
577
578 //      bvhtree_print_tree(tree, tree->nodes[tree->totleaf], 0);
579 }
580 #endif
581
582 #if 0
583
584
585 static void verify_tree(BVHTree *tree)
586 {
587         int i, j, check = 0;
588         
589         /* check the pointer list */
590         for (i = 0; i < tree->totleaf; i++) {
591                 if (tree->nodes[i]->parent == NULL) {
592                         printf("Leaf has no parent: %d\n", i);
593                 }
594                 else {
595                         for (j = 0; j < tree->tree_type; j++) {
596                                 if (tree->nodes[i]->parent->children[j] == tree->nodes[i])
597                                         check = 1;
598                         }
599                         if (!check) {
600                                 printf("Parent child relationship doesn't match: %d\n", i);
601                         }
602                         check = 0;
603                 }
604         }
605         
606         /* check the leaf list */
607         for (i = 0; i < tree->totleaf; i++) {
608                 if (tree->nodearray[i].parent == NULL) {
609                         printf("Leaf has no parent: %d\n", i);
610                 }
611                 else {
612                         for (j = 0; j < tree->tree_type; j++) {
613                                 if (tree->nodearray[i].parent->children[j] == &tree->nodearray[i])
614                                         check = 1;
615                         }
616                         if (!check) {
617                                 printf("Parent child relationship doesn't match: %d\n", i);
618                         }
619                         check = 0;
620                 }
621         }
622         
623         printf("branches: %d, leafs: %d, total: %d\n", tree->totbranch, tree->totleaf, tree->totbranch + tree->totleaf);
624 }
625 #endif
626
627 /* Helper data and structures to build a min-leaf generalized implicit tree
628  * This code can be easily reduced (basicly this is only method to calculate pow(k, n) in O(1).. and stuff like that) */
629 typedef struct BVHBuildHelper {
630         int tree_type;              /* */
631         int totleafs;               /* */
632
633         int leafs_per_child[32];    /* Min number of leafs that are archievable from a node at depth N */
634         int branches_on_level[32];  /* Number of nodes at depth N (tree_type^N) */
635
636         int remain_leafs;           /* Number of leafs that are placed on the level that is not 100% filled */
637
638 } BVHBuildHelper;
639
640 static void build_implicit_tree_helper(BVHTree *tree, BVHBuildHelper *data)
641 {
642         int depth = 0;
643         int remain;
644         int nnodes;
645
646         data->totleafs = tree->totleaf;
647         data->tree_type = tree->tree_type;
648
649         /* Calculate the smallest tree_type^n such that tree_type^n >= num_leafs */
650         for (data->leafs_per_child[0] = 1;
651              data->leafs_per_child[0] <  data->totleafs;
652              data->leafs_per_child[0] *= data->tree_type)
653         {
654                 /* pass */
655         }
656
657         data->branches_on_level[0] = 1;
658
659         for (depth = 1; (depth < 32) && data->leafs_per_child[depth - 1]; depth++) {
660                 data->branches_on_level[depth] = data->branches_on_level[depth - 1] * data->tree_type;
661                 data->leafs_per_child[depth] = data->leafs_per_child[depth - 1] / data->tree_type;
662         }
663
664         remain = data->totleafs - data->leafs_per_child[1];
665         nnodes = (remain + data->tree_type - 2) / (data->tree_type - 1);
666         data->remain_leafs = remain + nnodes;
667 }
668
669 // return the min index of all the leafs archivable with the given branch
670 static int implicit_leafs_index(BVHBuildHelper *data, int depth, int child_index)
671 {
672         int min_leaf_index = child_index * data->leafs_per_child[depth - 1];
673         if (min_leaf_index <= data->remain_leafs)
674                 return min_leaf_index;
675         else if (data->leafs_per_child[depth])
676                 return data->totleafs - (data->branches_on_level[depth - 1] - child_index) * data->leafs_per_child[depth];
677         else
678                 return data->remain_leafs;
679 }
680
681 /**
682  * Generalized implicit tree build
683  *
684  * An implicit tree is a tree where its structure is implied, thus there is no need to store child pointers or indexs.
685  * Its possible to find the position of the child or the parent with simple maths (multiplication and adittion). This type
686  * of tree is for example used on heaps.. where node N has its childs at indexs N*2 and N*2+1.
687  *
688  * Although in this case the tree type is general.. and not know until runtime.
689  * tree_type stands for the maximum number of childs that a tree node can have.
690  * All tree types >= 2 are supported.
691  *
692  * Advantages of the used trees include:
693  *  - No need to store child/parent relations (they are implicit);
694  *  - Any node child always has an index greater than the parent;
695  *  - Brother nodes are sequential in memory;
696  *
697  *
698  * Some math relations derived for general implicit trees:
699  *
700  *   K = tree_type, ( 2 <= K )
701  *   ROOT = 1
702  *   N child of node A = A * K + (2 - K) + N, (0 <= N < K)
703  *
704  * Util methods:
705  *   TODO...
706  *    (looping elements, knowing if its a leaf or not.. etc...)
707  */
708
709 /* This functions returns the number of branches needed to have the requested number of leafs. */
710 static int implicit_needed_branches(int tree_type, int leafs)
711 {
712         return max_ii(1, (leafs + tree_type - 3) / (tree_type - 1));
713 }
714
715 /**
716  * This function handles the problem of "sorting" the leafs (along the split_axis).
717  *
718  * It arranges the elements in the given partitions such that:
719  *  - any element in partition N is less or equal to any element in partition N+1.
720  *  - if all elements are different all partition will get the same subset of elements
721  *    as if the array was sorted.
722  *
723  * partition P is described as the elements in the range ( nth[P], nth[P+1] ]
724  *
725  * TODO: This can be optimized a bit by doing a specialized nth_element instead of K nth_elements
726  */
727 static void split_leafs(BVHNode **leafs_array, int *nth, int partitions, int split_axis)
728 {
729         int i;
730         for (i = 0; i < partitions - 1; i++) {
731                 if (nth[i] >= nth[partitions])
732                         break;
733
734                 partition_nth_element(leafs_array, nth[i], nth[partitions], nth[i + 1], split_axis);
735         }
736 }
737
738 typedef struct BVHDivNodesData {
739         BVHTree *tree;
740         BVHNode *branches_array;
741         BVHNode **leafs_array;
742
743         int tree_type;
744         int tree_offset;
745
746         BVHBuildHelper *data;
747
748         int depth;
749         int i;
750         int first_of_next_level;
751 } BVHDivNodesData;
752
753 static void non_recursive_bvh_div_nodes_task_cb(void *userdata, const int j)
754 {
755         BVHDivNodesData *data = userdata;
756
757         int k;
758         const int parent_level_index = j - data->i;
759         BVHNode *parent = data->branches_array + j;
760         int nth_positions[MAX_TREETYPE + 1];
761         char split_axis;
762
763         int parent_leafs_begin = implicit_leafs_index(data->data, data->depth, parent_level_index);
764         int parent_leafs_end   = implicit_leafs_index(data->data, data->depth, parent_level_index + 1);
765
766         /* This calculates the bounding box of this branch
767          * and chooses the largest axis as the axis to divide leafs */
768         refit_kdop_hull(data->tree, parent, parent_leafs_begin, parent_leafs_end);
769         split_axis = get_largest_axis(parent->bv);
770
771         /* Save split axis (this can be used on raytracing to speedup the query time) */
772         parent->main_axis = split_axis / 2;
773
774         /* Split the childs along the split_axis, note: its not needed to sort the whole leafs array
775          * Only to assure that the elements are partitioned on a way that each child takes the elements
776          * it would take in case the whole array was sorted.
777          * Split_leafs takes care of that "sort" problem. */
778         nth_positions[0] = parent_leafs_begin;
779         nth_positions[data->tree_type] = parent_leafs_end;
780         for (k = 1; k < data->tree_type; k++) {
781                 const int child_index = j * data->tree_type + data->tree_offset + k;
782                 const int child_level_index = child_index - data->first_of_next_level; /* child level index */
783                 nth_positions[k] = implicit_leafs_index(data->data, data->depth + 1, child_level_index);
784         }
785
786         split_leafs(data->leafs_array, nth_positions, data->tree_type, split_axis);
787
788         /* Setup children and totnode counters
789          * Not really needed but currently most of BVH code relies on having an explicit children structure */
790         for (k = 0; k < data->tree_type; k++) {
791                 const int child_index = j * data->tree_type + data->tree_offset + k;
792                 const int child_level_index = child_index - data->first_of_next_level; /* child level index */
793
794                 const int child_leafs_begin = implicit_leafs_index(data->data, data->depth + 1, child_level_index);
795                 const int child_leafs_end   = implicit_leafs_index(data->data, data->depth + 1, child_level_index + 1);
796
797                 if (child_leafs_end - child_leafs_begin > 1) {
798                         parent->children[k] = data->branches_array + child_index;
799                         parent->children[k]->parent = parent;
800                 }
801                 else if (child_leafs_end - child_leafs_begin == 1) {
802                         parent->children[k] = data->leafs_array[child_leafs_begin];
803                         parent->children[k]->parent = parent;
804                 }
805                 else {
806                         break;
807                 }
808
809                 parent->totnode = (char)(k + 1);
810         }
811 }
812
813 /**
814  * This functions builds an optimal implicit tree from the given leafs.
815  * Where optimal stands for:
816  *  - The resulting tree will have the smallest number of branches;
817  *  - At most only one branch will have NULL childs;
818  *  - All leafs will be stored at level N or N+1.
819  *
820  * This function creates an implicit tree on branches_array, the leafs are given on the leafs_array.
821  *
822  * The tree is built per depth levels. First branches at depth 1.. then branches at depth 2.. etc..
823  * The reason is that we can build level N+1 from level N without any data dependencies.. thus it allows
824  * to use multithread building.
825  *
826  * To archive this is necessary to find how much leafs are accessible from a certain branch, BVHBuildHelper
827  * implicit_needed_branches and implicit_leafs_index are auxiliary functions to solve that "optimal-split".
828  */
829 static void non_recursive_bvh_div_nodes(BVHTree *tree, BVHNode *branches_array, BVHNode **leafs_array, int num_leafs)
830 {
831         int i;
832
833         const int tree_type   = tree->tree_type;
834         const int tree_offset = 2 - tree->tree_type; /* this value is 0 (on binary trees) and negative on the others */
835         const int num_branches = implicit_needed_branches(tree_type, num_leafs);
836
837         BVHBuildHelper data;
838         int depth;
839         
840         /* set parent from root node to NULL */
841         BVHNode *tmp = branches_array + 0;
842         tmp->parent = NULL;
843
844         /* Most of bvhtree code relies on 1-leaf trees having at least one branch
845          * We handle that special case here */
846         if (num_leafs == 1) {
847                 BVHNode *root = branches_array + 0;
848                 refit_kdop_hull(tree, root, 0, num_leafs);
849                 root->main_axis = get_largest_axis(root->bv) / 2;
850                 root->totnode = 1;
851                 root->children[0] = leafs_array[0];
852                 root->children[0]->parent = root;
853                 return;
854         }
855
856         branches_array--;  /* Implicit trees use 1-based indexs */
857
858         build_implicit_tree_helper(tree, &data);
859
860         BVHDivNodesData cb_data = {
861                 .tree = tree, .branches_array = branches_array, .leafs_array = leafs_array,
862                 .tree_type = tree_type, .tree_offset = tree_offset, .data = &data,
863                 .first_of_next_level = 0, .depth = 0, .i = 0,
864         };
865
866         /* Loop tree levels (log N) loops */
867         for (i = 1, depth = 1; i <= num_branches; i = i * tree_type + tree_offset, depth++) {
868                 const int first_of_next_level = i * tree_type + tree_offset;
869                 const int end_j = min_ii(first_of_next_level, num_branches + 1);  /* index of last branch on this level */
870
871                 /* Loop all branches on this level */
872                 cb_data.first_of_next_level = first_of_next_level;
873                 cb_data.i = i;
874                 cb_data.depth = depth;
875
876                 BLI_task_parallel_range(
877                             i, end_j, &cb_data, non_recursive_bvh_div_nodes_task_cb,
878                             num_leafs > KDOPBVH_THREAD_LEAF_THRESHOLD);
879         }
880 }
881
882 /* -------------------------------------------------------------------- */
883 /* BLI_bvhtree api */
884
885 /**
886  * \note many callers don't check for ``NULL`` return.
887  */
888 BVHTree *BLI_bvhtree_new(int maxsize, float epsilon, char tree_type, char axis)
889 {
890         BVHTree *tree;
891         int numnodes, i;
892
893         BLI_assert(tree_type >= 2 && tree_type <= MAX_TREETYPE);
894
895         tree = MEM_callocN(sizeof(BVHTree), "BVHTree");
896
897         /* tree epsilon must be >= FLT_EPSILON
898          * so that tangent rays can still hit a bounding volume..
899          * this bug would show up when casting a ray aligned with a kdop-axis and with an edge of 2 faces */
900         epsilon = max_ff(FLT_EPSILON, epsilon);
901
902         if (tree) {
903                 tree->epsilon = epsilon;
904                 tree->tree_type = tree_type;
905                 tree->axis = axis;
906
907                 if (axis == 26) {
908                         tree->start_axis = 0;
909                         tree->stop_axis = 13;
910                 }
911                 else if (axis == 18) {
912                         tree->start_axis = 7;
913                         tree->stop_axis = 13;
914                 }
915                 else if (axis == 14) {
916                         tree->start_axis = 0;
917                         tree->stop_axis = 7;
918                 }
919                 else if (axis == 8) { /* AABB */
920                         tree->start_axis = 0;
921                         tree->stop_axis = 4;
922                 }
923                 else if (axis == 6) { /* OBB */
924                         tree->start_axis = 0;
925                         tree->stop_axis = 3;
926                 }
927                 else {
928                         /* should never happen! */
929                         BLI_assert(0);
930
931                         goto fail;
932                 }
933
934
935                 /* Allocate arrays */
936                 numnodes = maxsize + implicit_needed_branches(tree_type, maxsize) + tree_type;
937
938                 tree->nodes = MEM_callocN(sizeof(BVHNode *) * (size_t)numnodes, "BVHNodes");
939                 tree->nodebv = MEM_callocN(sizeof(float) * (size_t)(axis * numnodes), "BVHNodeBV");
940                 tree->nodechild = MEM_callocN(sizeof(BVHNode *) * (size_t)(tree_type * numnodes), "BVHNodeBV");
941                 tree->nodearray = MEM_callocN(sizeof(BVHNode) * (size_t)numnodes, "BVHNodeArray");
942                 
943                 if (UNLIKELY((!tree->nodes) ||
944                              (!tree->nodebv) ||
945                              (!tree->nodechild) ||
946                              (!tree->nodearray)))
947                 {
948                         goto fail;
949                 }
950
951                 /* link the dynamic bv and child links */
952                 for (i = 0; i < numnodes; i++) {
953                         tree->nodearray[i].bv = &tree->nodebv[i * axis];
954                         tree->nodearray[i].children = &tree->nodechild[i * tree_type];
955                 }
956                 
957         }
958         return tree;
959
960
961 fail:
962         MEM_SAFE_FREE(tree->nodes);
963         MEM_SAFE_FREE(tree->nodebv);
964         MEM_SAFE_FREE(tree->nodechild);
965         MEM_SAFE_FREE(tree->nodearray);
966
967         MEM_freeN(tree);
968
969         return NULL;
970 }
971
972 void BLI_bvhtree_free(BVHTree *tree)
973 {
974         if (tree) {
975                 MEM_freeN(tree->nodes);
976                 MEM_freeN(tree->nodearray);
977                 MEM_freeN(tree->nodebv);
978                 MEM_freeN(tree->nodechild);
979                 MEM_freeN(tree);
980         }
981 }
982
983 void BLI_bvhtree_balance(BVHTree *tree)
984 {
985         int i;
986
987         BVHNode *branches_array = tree->nodearray + tree->totleaf;
988         BVHNode **leafs_array    = tree->nodes;
989
990         /* This function should only be called once (some big bug goes here if its being called more than once per tree) */
991         BLI_assert(tree->totbranch == 0);
992
993         /* Build the implicit tree */
994         non_recursive_bvh_div_nodes(tree, branches_array, leafs_array, tree->totleaf);
995
996         /* current code expects the branches to be linked to the nodes array
997          * we perform that linkage here */
998         tree->totbranch = implicit_needed_branches(tree->tree_type, tree->totleaf);
999         for (i = 0; i < tree->totbranch; i++)
1000                 tree->nodes[tree->totleaf + i] = branches_array + i;
1001
1002 #ifdef USE_SKIP_LINKS
1003         build_skip_links(tree, tree->nodes[tree->totleaf], NULL, NULL);
1004 #endif
1005
1006         /* bvhtree_info(tree); */
1007 }
1008
1009 void BLI_bvhtree_insert(BVHTree *tree, int index, const float co[3], int numpoints)
1010 {
1011         axis_t axis_iter;
1012         BVHNode *node = NULL;
1013
1014         /* insert should only possible as long as tree->totbranch is 0 */
1015         BLI_assert(tree->totbranch <= 0);
1016         BLI_assert((size_t)tree->totleaf < MEM_allocN_len(tree->nodes) / sizeof(*(tree->nodes)));
1017
1018         node = tree->nodes[tree->totleaf] = &(tree->nodearray[tree->totleaf]);
1019         tree->totleaf++;
1020
1021         create_kdop_hull(tree, node, co, numpoints, 0);
1022         node->index = index;
1023
1024         /* inflate the bv with some epsilon */
1025         for (axis_iter = tree->start_axis; axis_iter < tree->stop_axis; axis_iter++) {
1026                 node->bv[(2 * axis_iter)] -= tree->epsilon; /* minimum */
1027                 node->bv[(2 * axis_iter) + 1] += tree->epsilon; /* maximum */
1028         }
1029 }
1030
1031
1032 /* call before BLI_bvhtree_update_tree() */
1033 bool BLI_bvhtree_update_node(BVHTree *tree, int index, const float co[3], const float co_moving[3], int numpoints)
1034 {
1035         BVHNode *node = NULL;
1036         axis_t axis_iter;
1037         
1038         /* check if index exists */
1039         if (index > tree->totleaf)
1040                 return false;
1041         
1042         node = tree->nodearray + index;
1043         
1044         create_kdop_hull(tree, node, co, numpoints, 0);
1045         
1046         if (co_moving)
1047                 create_kdop_hull(tree, node, co_moving, numpoints, 1);
1048         
1049         /* inflate the bv with some epsilon */
1050         for (axis_iter = tree->start_axis; axis_iter < tree->stop_axis; axis_iter++) {
1051                 node->bv[(2 * axis_iter)]     -= tree->epsilon; /* minimum */
1052                 node->bv[(2 * axis_iter) + 1] += tree->epsilon; /* maximum */
1053         }
1054
1055         return true;
1056 }
1057
1058 /* call BLI_bvhtree_update_node() first for every node/point/triangle */
1059 void BLI_bvhtree_update_tree(BVHTree *tree)
1060 {
1061         /* Update bottom=>top
1062          * TRICKY: the way we build the tree all the childs have an index greater than the parent
1063          * This allows us todo a bottom up update by starting on the bigger numbered branch */
1064
1065         BVHNode **root  = tree->nodes + tree->totleaf;
1066         BVHNode **index = tree->nodes + tree->totleaf + tree->totbranch - 1;
1067
1068         for (; index >= root; index--)
1069                 node_join(tree, *index);
1070 }
1071
1072 float BLI_bvhtree_getepsilon(const BVHTree *tree)
1073 {
1074         return tree->epsilon;
1075 }
1076
1077
1078 /* -------------------------------------------------------------------- */
1079 /* BLI_bvhtree_overlap */
1080
1081 /**
1082  * overlap - is it possible for 2 bv's to collide ?
1083  */
1084 static bool tree_overlap_test(const BVHNode *node1, const BVHNode *node2, axis_t start_axis, axis_t stop_axis)
1085 {
1086         const float *bv1     = node1->bv + (start_axis << 1);
1087         const float *bv2     = node2->bv + (start_axis << 1);
1088         const float *bv1_end = node1->bv + (stop_axis  << 1);
1089         
1090         /* test all axis if min + max overlap */
1091         for (; bv1 != bv1_end; bv1 += 2, bv2 += 2) {
1092                 if ((bv1[0] > bv2[1]) || (bv2[0] > bv1[1])) {
1093                         return 0;
1094                 }
1095         }
1096
1097         return 1;
1098 }
1099
1100 static void tree_overlap_traverse(
1101         BVHOverlapData_Thread *data_thread,
1102         const BVHNode *node1, const BVHNode *node2)
1103 {
1104         BVHOverlapData_Shared *data = data_thread->shared;
1105         int j;
1106
1107         if (tree_overlap_test(node1, node2, data->start_axis, data->stop_axis)) {
1108                 /* check if node1 is a leaf */
1109                 if (!node1->totnode) {
1110                         /* check if node2 is a leaf */
1111                         if (!node2->totnode) {
1112                                 BVHTreeOverlap *overlap;
1113
1114                                 if (UNLIKELY(node1 == node2)) {
1115                                         return;
1116                                 }
1117
1118                                 /* both leafs, insert overlap! */
1119                                 overlap = BLI_stack_push_r(data_thread->overlap);
1120                                 overlap->indexA = node1->index;
1121                                 overlap->indexB = node2->index;
1122                         }
1123                         else {
1124                                 for (j = 0; j < data->tree2->tree_type; j++) {
1125                                         if (node2->children[j]) {
1126                                                 tree_overlap_traverse(data_thread, node1, node2->children[j]);
1127                                         }
1128                                 }
1129                         }
1130                 }
1131                 else {
1132                         for (j = 0; j < data->tree2->tree_type; j++) {
1133                                 if (node1->children[j]) {
1134                                         tree_overlap_traverse(data_thread, node1->children[j], node2);
1135                                 }
1136                         }
1137                 }
1138         }
1139 }
1140
1141 /**
1142  * a version of #tree_overlap_traverse that runs a callback to check if the nodes really intersect.
1143  */
1144 static void tree_overlap_traverse_cb(
1145         BVHOverlapData_Thread *data_thread,
1146         const BVHNode *node1, const BVHNode *node2)
1147 {
1148         BVHOverlapData_Shared *data = data_thread->shared;
1149         int j;
1150
1151         if (tree_overlap_test(node1, node2, data->start_axis, data->stop_axis)) {
1152                 /* check if node1 is a leaf */
1153                 if (!node1->totnode) {
1154                         /* check if node2 is a leaf */
1155                         if (!node2->totnode) {
1156                                 BVHTreeOverlap *overlap;
1157
1158                                 if (UNLIKELY(node1 == node2)) {
1159                                         return;
1160                                 }
1161
1162                                 /* only difference to tree_overlap_traverse! */
1163                                 if (data->callback(data->userdata, node1->index, node2->index, data_thread->thread)) {
1164                                         /* both leafs, insert overlap! */
1165                                         overlap = BLI_stack_push_r(data_thread->overlap);
1166                                         overlap->indexA = node1->index;
1167                                         overlap->indexB = node2->index;
1168                                 }
1169                         }
1170                         else {
1171                                 for (j = 0; j < data->tree2->tree_type; j++) {
1172                                         if (node2->children[j]) {
1173                                                 tree_overlap_traverse_cb(data_thread, node1, node2->children[j]);
1174                                         }
1175                                 }
1176                         }
1177                 }
1178                 else {
1179                         for (j = 0; j < data->tree2->tree_type; j++) {
1180                                 if (node1->children[j]) {
1181                                         tree_overlap_traverse_cb(data_thread, node1->children[j], node2);
1182                                 }
1183                         }
1184                 }
1185         }
1186 }
1187
1188 /**
1189  * Use to check the total number of threads #BLI_bvhtree_overlap will use.
1190  *
1191  * \warning Must be the first tree passed to #BLI_bvhtree_overlap!
1192  */
1193 int BLI_bvhtree_overlap_thread_num(const BVHTree *tree)
1194 {
1195         return (int)MIN2(tree->tree_type, tree->nodes[tree->totleaf]->totnode);
1196 }
1197
1198 static void bvhtree_overlap_task_cb(void *userdata, const int j)
1199 {
1200         BVHOverlapData_Thread *data = &((BVHOverlapData_Thread *)userdata)[j];
1201         BVHOverlapData_Shared *data_shared = data->shared;
1202
1203         if (data_shared->callback) {
1204                 tree_overlap_traverse_cb(
1205                             data, data_shared->tree1->nodes[data_shared->tree1->totleaf]->children[j],
1206                             data_shared->tree2->nodes[data_shared->tree2->totleaf]);
1207         }
1208         else {
1209                 tree_overlap_traverse(
1210                             data, data_shared->tree1->nodes[data_shared->tree1->totleaf]->children[j],
1211                             data_shared->tree2->nodes[data_shared->tree2->totleaf]);
1212         }
1213 }
1214
1215 BVHTreeOverlap *BLI_bvhtree_overlap(
1216         const BVHTree *tree1, const BVHTree *tree2, unsigned int *r_overlap_tot,
1217         /* optional callback to test the overlap before adding (must be thread-safe!) */
1218         BVHTree_OverlapCallback callback, void *userdata)
1219 {
1220         const int thread_num = BLI_bvhtree_overlap_thread_num(tree1);
1221         int j;
1222         size_t total = 0;
1223         BVHTreeOverlap *overlap = NULL, *to = NULL;
1224         BVHOverlapData_Shared data_shared;
1225         BVHOverlapData_Thread *data = BLI_array_alloca(data, (size_t)thread_num);
1226         axis_t start_axis, stop_axis;
1227         
1228         /* check for compatibility of both trees (can't compare 14-DOP with 18-DOP) */
1229         if (UNLIKELY((tree1->axis != tree2->axis) &&
1230                      (tree1->axis == 14 || tree2->axis == 14) &&
1231                      (tree1->axis == 18 || tree2->axis == 18)))
1232         {
1233                 BLI_assert(0);
1234                 return NULL;
1235         }
1236
1237         start_axis = min_axis(tree1->start_axis, tree2->start_axis);
1238         stop_axis  = min_axis(tree1->stop_axis,  tree2->stop_axis);
1239         
1240         /* fast check root nodes for collision before doing big splitting + traversal */
1241         if (!tree_overlap_test(tree1->nodes[tree1->totleaf], tree2->nodes[tree2->totleaf], start_axis, stop_axis)) {
1242                 return NULL;
1243         }
1244
1245         data_shared.tree1 = tree1;
1246         data_shared.tree2 = tree2;
1247         data_shared.start_axis = start_axis;
1248         data_shared.stop_axis = stop_axis;
1249
1250         /* can be NULL */
1251         data_shared.callback = callback;
1252         data_shared.userdata = userdata;
1253
1254         for (j = 0; j < thread_num; j++) {
1255                 /* init BVHOverlapData_Thread */
1256                 data[j].shared = &data_shared;
1257                 data[j].overlap = BLI_stack_new(sizeof(BVHTreeOverlap), __func__);
1258
1259                 /* for callback */
1260                 data[j].thread = j;
1261         }
1262
1263         BLI_task_parallel_range(
1264                     0, thread_num, data, bvhtree_overlap_task_cb,
1265                     tree1->totleaf > KDOPBVH_THREAD_LEAF_THRESHOLD);
1266         
1267         for (j = 0; j < thread_num; j++)
1268                 total += BLI_stack_count(data[j].overlap);
1269         
1270         to = overlap = MEM_mallocN(sizeof(BVHTreeOverlap) * total, "BVHTreeOverlap");
1271         
1272         for (j = 0; j < thread_num; j++) {
1273                 unsigned int count = (unsigned int)BLI_stack_count(data[j].overlap);
1274                 BLI_stack_pop_n(data[j].overlap, to, count);
1275                 BLI_stack_free(data[j].overlap);
1276                 to += count;
1277         }
1278
1279         *r_overlap_tot = (unsigned int)total;
1280         return overlap;
1281 }
1282
1283 /* Determines the nearest point of the given node BV. Returns the squared distance to that point. */
1284 static float calc_nearest_point_squared(const float proj[3], BVHNode *node, float nearest[3])
1285 {
1286         int i;
1287         const float *bv = node->bv;
1288
1289         /* nearest on AABB hull */
1290         for (i = 0; i != 3; i++, bv += 2) {
1291                 if (bv[0] > proj[i])
1292                         nearest[i] = bv[0];
1293                 else if (bv[1] < proj[i])
1294                         nearest[i] = bv[1];
1295                 else
1296                         nearest[i] = proj[i]; 
1297         }
1298
1299 #if 0
1300         /* nearest on a general hull */
1301         copy_v3_v3(nearest, data->co);
1302         for (i = data->tree->start_axis; i != data->tree->stop_axis; i++, bv += 2) {
1303                 float proj = dot_v3v3(nearest, KDOP_AXES[i]);
1304                 float dl = bv[0] - proj;
1305                 float du = bv[1] - proj;
1306
1307                 if (dl > 0) {
1308                         madd_v3_v3fl(nearest, KDOP_AXES[i], dl);
1309                 }
1310                 else if (du < 0) {
1311                         madd_v3_v3fl(nearest, KDOP_AXES[i], du);
1312                 }
1313         }
1314 #endif
1315
1316         return len_squared_v3v3(proj, nearest);
1317 }
1318
1319 /* TODO: use a priority queue to reduce the number of nodes looked on */
1320 static void dfs_find_nearest_dfs(BVHNearestData *data, BVHNode *node)
1321 {
1322         if (node->totnode == 0) {
1323                 if (data->callback)
1324                         data->callback(data->userdata, node->index, data->co, &data->nearest);
1325                 else {
1326                         data->nearest.index = node->index;
1327                         data->nearest.dist_sq = calc_nearest_point_squared(data->proj, node, data->nearest.co);
1328                 }
1329         }
1330         else {
1331                 /* Better heuristic to pick the closest node to dive on */
1332                 int i;
1333                 float nearest[3];
1334
1335                 if (data->proj[node->main_axis] <= node->children[0]->bv[node->main_axis * 2 + 1]) {
1336
1337                         for (i = 0; i != node->totnode; i++) {
1338                                 if (calc_nearest_point_squared(data->proj, node->children[i], nearest) >= data->nearest.dist_sq)
1339                                         continue;
1340                                 dfs_find_nearest_dfs(data, node->children[i]);
1341                         }
1342                 }
1343                 else {
1344                         for (i = node->totnode - 1; i >= 0; i--) {
1345                                 if (calc_nearest_point_squared(data->proj, node->children[i], nearest) >= data->nearest.dist_sq)
1346                                         continue;
1347                                 dfs_find_nearest_dfs(data, node->children[i]);
1348                         }
1349                 }
1350         }
1351 }
1352
1353 static void dfs_find_nearest_begin(BVHNearestData *data, BVHNode *node)
1354 {
1355         float nearest[3], dist_sq;
1356         dist_sq = calc_nearest_point_squared(data->proj, node, nearest);
1357         if (dist_sq >= data->nearest.dist_sq) {
1358                 return;
1359         }
1360         dfs_find_nearest_dfs(data, node);
1361 }
1362
1363
1364 #if 0
1365
1366 typedef struct NodeDistance {
1367         BVHNode *node;
1368         float dist;
1369
1370 } NodeDistance;
1371
1372 #define DEFAULT_FIND_NEAREST_HEAP_SIZE 1024
1373
1374 #define NodeDistance_priority(a, b) ((a).dist < (b).dist)
1375
1376 static void NodeDistance_push_heap(NodeDistance *heap, int heap_size)
1377 PUSH_HEAP_BODY(NodeDistance, NodeDistance_priority, heap, heap_size)
1378
1379 static void NodeDistance_pop_heap(NodeDistance *heap, int heap_size)
1380 POP_HEAP_BODY(NodeDistance, NodeDistance_priority, heap, heap_size)
1381
1382 /* NN function that uses an heap.. this functions leads to an optimal number of min-distance
1383  * but for normal tri-faces and BV 6-dop.. a simple dfs with local heuristics (as implemented
1384  * in source/blender/blenkernel/intern/shrinkwrap.c) works faster.
1385  *
1386  * It may make sense to use this function if the callback queries are very slow.. or if its impossible
1387  * to get a nice heuristic
1388  *
1389  * this function uses "malloc/free" instead of the MEM_* because it intends to be thread safe */
1390 static void bfs_find_nearest(BVHNearestData *data, BVHNode *node)
1391 {
1392         int i;
1393         NodeDistance default_heap[DEFAULT_FIND_NEAREST_HEAP_SIZE];
1394         NodeDistance *heap = default_heap, current;
1395         int heap_size = 0, max_heap_size = sizeof(default_heap) / sizeof(default_heap[0]);
1396         float nearest[3];
1397
1398         int callbacks = 0, push_heaps = 0;
1399
1400         if (node->totnode == 0) {
1401                 dfs_find_nearest_dfs(data, node);
1402                 return;
1403         }
1404
1405         current.node = node;
1406         current.dist = calc_nearest_point(data->proj, node, nearest);
1407
1408         while (current.dist < data->nearest.dist) {
1409 //              printf("%f : %f\n", current.dist, data->nearest.dist);
1410                 for (i = 0; i < current.node->totnode; i++) {
1411                         BVHNode *child = current.node->children[i];
1412                         if (child->totnode == 0) {
1413                                 callbacks++;
1414                                 dfs_find_nearest_dfs(data, child);
1415                         }
1416                         else {
1417                                 /* adjust heap size */
1418                                 if ((heap_size >= max_heap_size) &&
1419                                     ADJUST_MEMORY(default_heap, (void **)&heap, heap_size + 1, &max_heap_size, sizeof(heap[0])) == false)
1420                                 {
1421                                         printf("WARNING: bvh_find_nearest got out of memory\n");
1422
1423                                         if (heap != default_heap)
1424                                                 free(heap);
1425
1426                                         return;
1427                                 }
1428
1429                                 heap[heap_size].node = current.node->children[i];
1430                                 heap[heap_size].dist = calc_nearest_point(data->proj, current.node->children[i], nearest);
1431
1432                                 if (heap[heap_size].dist >= data->nearest.dist) continue;
1433                                 heap_size++;
1434
1435                                 NodeDistance_push_heap(heap, heap_size);
1436                                 //                      PUSH_HEAP_BODY(NodeDistance, NodeDistance_priority, heap, heap_size);
1437                                 push_heaps++;
1438                         }
1439                 }
1440                 
1441                 if (heap_size == 0) break;
1442
1443                 current = heap[0];
1444                 NodeDistance_pop_heap(heap, heap_size);
1445 //              POP_HEAP_BODY(NodeDistance, NodeDistance_priority, heap, heap_size);
1446                 heap_size--;
1447         }
1448
1449 //      printf("hsize=%d, callbacks=%d, pushs=%d\n", heap_size, callbacks, push_heaps);
1450
1451         if (heap != default_heap)
1452                 free(heap);
1453 }
1454 #endif
1455
1456
1457 int BLI_bvhtree_find_nearest(BVHTree *tree, const float co[3], BVHTreeNearest *nearest,
1458                              BVHTree_NearestPointCallback callback, void *userdata)
1459 {
1460         axis_t axis_iter;
1461
1462         BVHNearestData data;
1463         BVHNode *root = tree->nodes[tree->totleaf];
1464
1465         /* init data to search */
1466         data.tree = tree;
1467         data.co = co;
1468
1469         data.callback = callback;
1470         data.userdata = userdata;
1471
1472         for (axis_iter = data.tree->start_axis; axis_iter != data.tree->stop_axis; axis_iter++) {
1473                 data.proj[axis_iter] = dot_v3v3(data.co, KDOP_AXES[axis_iter]);
1474         }
1475
1476         if (nearest) {
1477                 memcpy(&data.nearest, nearest, sizeof(*nearest));
1478         }
1479         else {
1480                 data.nearest.index = -1;
1481                 data.nearest.dist_sq = FLT_MAX;
1482         }
1483
1484         /* dfs search */
1485         if (root)
1486                 dfs_find_nearest_begin(&data, root);
1487
1488         /* copy back results */
1489         if (nearest) {
1490                 memcpy(nearest, &data.nearest, sizeof(*nearest));
1491         }
1492
1493         return data.nearest.index;
1494 }
1495
1496
1497 /**
1498  * Raycast - BLI_bvhtree_ray_cast
1499  *
1500  * raycast is done by performing a DFS on the BVHTree and saving the closest hit
1501  */
1502
1503
1504 /* Determines the distance that the ray must travel to hit the bounding volume of the given node */
1505 static float ray_nearest_hit(BVHRayCastData *data, const float bv[6])
1506 {
1507         int i;
1508
1509         float low = 0, upper = data->hit.dist;
1510
1511         for (i = 0; i != 3; i++, bv += 2) {
1512                 if (data->ray_dot_axis[i] == 0.0f) {
1513                         /* axis aligned ray */
1514                         if (data->ray.origin[i] < bv[0] - data->ray.radius ||
1515                             data->ray.origin[i] > bv[1] + data->ray.radius)
1516                         {
1517                                 return FLT_MAX;
1518                         }
1519                 }
1520                 else {
1521                         float ll = (bv[0] - data->ray.radius - data->ray.origin[i]) / data->ray_dot_axis[i];
1522                         float lu = (bv[1] + data->ray.radius - data->ray.origin[i]) / data->ray_dot_axis[i];
1523
1524                         if (data->ray_dot_axis[i] > 0.0f) {
1525                                 if (ll > low) low = ll;
1526                                 if (lu < upper) upper = lu;
1527                         }
1528                         else {
1529                                 if (lu > low) low = lu;
1530                                 if (ll < upper) upper = ll;
1531                         }
1532         
1533                         if (low > upper) return FLT_MAX;
1534                 }
1535         }
1536         return low;
1537 }
1538
1539 /**
1540  * Determines the distance that the ray must travel to hit the bounding volume of the given node
1541  * Based on Tactical Optimization of Ray/Box Intersection, by Graham Fyffe
1542  * [http://tog.acm.org/resources/RTNews/html/rtnv21n1.html#art9]
1543  *
1544  * TODO this doesn't take data->ray.radius into consideration */
1545 static float fast_ray_nearest_hit(const BVHRayCastData *data, const BVHNode *node)
1546 {
1547         const float *bv = node->bv;
1548         
1549         float t1x = (bv[data->index[0]] - data->ray.origin[0]) * data->idot_axis[0];
1550         float t2x = (bv[data->index[1]] - data->ray.origin[0]) * data->idot_axis[0];
1551         float t1y = (bv[data->index[2]] - data->ray.origin[1]) * data->idot_axis[1];
1552         float t2y = (bv[data->index[3]] - data->ray.origin[1]) * data->idot_axis[1];
1553         float t1z = (bv[data->index[4]] - data->ray.origin[2]) * data->idot_axis[2];
1554         float t2z = (bv[data->index[5]] - data->ray.origin[2]) * data->idot_axis[2];
1555
1556         if ((t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) ||
1557             (t2x < 0.0f || t2y < 0.0f || t2z < 0.0f) ||
1558             (t1x > data->hit.dist || t1y > data->hit.dist || t1z > data->hit.dist))
1559         {
1560                 return FLT_MAX;
1561         }
1562         else {
1563                 return max_fff(t1x, t1y, t1z);
1564         }
1565 }
1566
1567 static void dfs_raycast(BVHRayCastData *data, BVHNode *node)
1568 {
1569         int i;
1570
1571         /* ray-bv is really fast.. and simple tests revealed its worth to test it
1572          * before calling the ray-primitive functions */
1573         /* XXX: temporary solution for particles until fast_ray_nearest_hit supports ray.radius */
1574         float dist = (data->ray.radius == 0.0f) ? fast_ray_nearest_hit(data, node) : ray_nearest_hit(data, node->bv);
1575         if (dist >= data->hit.dist) return;
1576
1577         if (node->totnode == 0) {
1578                 if (data->callback) {
1579                         data->callback(data->userdata, node->index, &data->ray, &data->hit);
1580                 }
1581                 else {
1582                         data->hit.index = node->index;
1583                         data->hit.dist  = dist;
1584                         madd_v3_v3v3fl(data->hit.co, data->ray.origin, data->ray.direction, dist);
1585                 }
1586         }
1587         else {
1588                 /* pick loop direction to dive into the tree (based on ray direction and split axis) */
1589                 if (data->ray_dot_axis[node->main_axis] > 0.0f) {
1590                         for (i = 0; i != node->totnode; i++) {
1591                                 dfs_raycast(data, node->children[i]);
1592                         }
1593                 }
1594                 else {
1595                         for (i = node->totnode - 1; i >= 0; i--) {
1596                                 dfs_raycast(data, node->children[i]);
1597                         }
1598                 }
1599         }
1600 }
1601
1602 static void dfs_raycast_all(BVHRayCastData *data, BVHNode *node)
1603 {
1604         int i;
1605
1606         /* ray-bv is really fast.. and simple tests revealed its worth to test it
1607          * before calling the ray-primitive functions */
1608         /* XXX: temporary solution for particles until fast_ray_nearest_hit supports ray.radius */
1609         float dist = (data->ray.radius == 0.0f) ? fast_ray_nearest_hit(data, node) : ray_nearest_hit(data, node->bv);
1610
1611         if (node->totnode == 0) {
1612                 if (data->callback) {
1613                         data->hit.index = -1;
1614                         data->hit.dist = BVH_RAYCAST_DIST_MAX;
1615                         data->callback(data->userdata, node->index, &data->ray, &data->hit);
1616                 }
1617                 else {
1618                         data->hit.index = node->index;
1619                         data->hit.dist  = dist;
1620                         madd_v3_v3v3fl(data->hit.co, data->ray.origin, data->ray.direction, dist);
1621                 }
1622         }
1623         else {
1624                 /* pick loop direction to dive into the tree (based on ray direction and split axis) */
1625                 if (data->ray_dot_axis[node->main_axis] > 0.0f) {
1626                         for (i = 0; i != node->totnode; i++) {
1627                                 dfs_raycast_all(data, node->children[i]);
1628                         }
1629                 }
1630                 else {
1631                         for (i = node->totnode - 1; i >= 0; i--) {
1632                                 dfs_raycast_all(data, node->children[i]);
1633                         }
1634                 }
1635         }
1636 }
1637
1638 #if 0
1639 static void iterative_raycast(BVHRayCastData *data, BVHNode *node)
1640 {
1641         while (node) {
1642                 float dist = fast_ray_nearest_hit(data, node);
1643                 if (dist >= data->hit.dist) {
1644                         node = node->skip[1];
1645                         continue;
1646                 }
1647
1648                 if (node->totnode == 0) {
1649                         if (data->callback) {
1650                                 data->callback(data->userdata, node->index, &data->ray, &data->hit);
1651                         }
1652                         else {
1653                                 data->hit.index = node->index;
1654                                 data->hit.dist  = dist;
1655                                 madd_v3_v3v3fl(data->hit.co, data->ray.origin, data->ray.direction, dist);
1656                         }
1657                         
1658                         node = node->skip[1];
1659                 }
1660                 else {
1661                         node = node->children[0];
1662                 }
1663         }
1664 }
1665 #endif
1666
1667 static void bvhtree_ray_cast_data_precalc(BVHRayCastData *data, int flag)
1668 {
1669         int i;
1670
1671         for (i = 0; i < 3; i++) {
1672                 data->ray_dot_axis[i] = dot_v3v3(data->ray.direction, KDOP_AXES[i]);
1673                 data->idot_axis[i] = 1.0f / data->ray_dot_axis[i];
1674
1675                 if (fabsf(data->ray_dot_axis[i]) < FLT_EPSILON) {
1676                         data->ray_dot_axis[i] = 0.0;
1677                 }
1678                 data->index[2 * i] = data->idot_axis[i] < 0.0f ? 1 : 0;
1679                 data->index[2 * i + 1] = 1 - data->index[2 * i];
1680                 data->index[2 * i]   += 2 * i;
1681                 data->index[2 * i + 1] += 2 * i;
1682         }
1683
1684 #ifdef USE_KDOPBVH_WATERTIGHT
1685         if (flag & BVH_RAYCAST_WATERTIGHT) {
1686                 isect_ray_tri_watertight_v3_precalc(&data->isect_precalc, data->ray.direction);
1687                 data->ray.isect_precalc = &data->isect_precalc;
1688         }
1689         else {
1690                 data->ray.isect_precalc = NULL;
1691         }
1692 #else
1693         UNUSED_VARS(flag);
1694 #endif
1695 }
1696
1697 int BLI_bvhtree_ray_cast_ex(
1698         BVHTree *tree, const float co[3], const float dir[3], float radius, BVHTreeRayHit *hit,
1699         BVHTree_RayCastCallback callback, void *userdata,
1700         int flag)
1701 {
1702         BVHRayCastData data;
1703         BVHNode *root = tree->nodes[tree->totleaf];
1704
1705         BLI_ASSERT_UNIT_V3(dir);
1706
1707         data.tree = tree;
1708
1709         data.callback = callback;
1710         data.userdata = userdata;
1711
1712         copy_v3_v3(data.ray.origin,    co);
1713         copy_v3_v3(data.ray.direction, dir);
1714         data.ray.radius = radius;
1715
1716         bvhtree_ray_cast_data_precalc(&data, flag);
1717
1718         if (hit) {
1719                 memcpy(&data.hit, hit, sizeof(*hit));
1720         }
1721         else {
1722                 data.hit.index = -1;
1723                 data.hit.dist = BVH_RAYCAST_DIST_MAX;
1724         }
1725
1726         if (root) {
1727                 dfs_raycast(&data, root);
1728 //              iterative_raycast(&data, root);
1729         }
1730
1731
1732         if (hit)
1733                 memcpy(hit, &data.hit, sizeof(*hit));
1734
1735         return data.hit.index;
1736 }
1737
1738 int BLI_bvhtree_ray_cast(
1739         BVHTree *tree, const float co[3], const float dir[3], float radius, BVHTreeRayHit *hit,
1740         BVHTree_RayCastCallback callback, void *userdata)
1741 {
1742         return BLI_bvhtree_ray_cast_ex(tree, co, dir, radius, hit, callback, userdata, BVH_RAYCAST_DEFAULT);
1743 }
1744
1745 float BLI_bvhtree_bb_raycast(const float bv[6], const float light_start[3], const float light_end[3], float pos[3])
1746 {
1747         BVHRayCastData data;
1748         float dist;
1749
1750         data.hit.dist = BVH_RAYCAST_DIST_MAX;
1751         
1752         /* get light direction */
1753         sub_v3_v3v3(data.ray.direction, light_end, light_start);
1754         
1755         data.ray.radius = 0.0;
1756         
1757         copy_v3_v3(data.ray.origin, light_start);
1758
1759         normalize_v3(data.ray.direction);
1760         copy_v3_v3(data.ray_dot_axis, data.ray.direction);
1761         
1762         dist = ray_nearest_hit(&data, bv);
1763
1764         madd_v3_v3v3fl(pos, light_start, data.ray.direction, dist);
1765
1766         return dist;
1767         
1768 }
1769
1770 /**
1771  * Calls the callback for every ray intersection
1772  */
1773 int BLI_bvhtree_ray_cast_all_ex(
1774         BVHTree *tree, const float co[3], const float dir[3], float radius,
1775         BVHTree_RayCastCallback callback, void *userdata,
1776         int flag)
1777 {
1778         BVHRayCastData data;
1779         BVHNode *root = tree->nodes[tree->totleaf];
1780
1781         BLI_ASSERT_UNIT_V3(dir);
1782
1783         data.tree = tree;
1784
1785         data.callback = callback;
1786         data.userdata = userdata;
1787
1788         copy_v3_v3(data.ray.origin,    co);
1789         copy_v3_v3(data.ray.direction, dir);
1790         data.ray.radius = radius;
1791
1792         bvhtree_ray_cast_data_precalc(&data, flag);
1793
1794         data.hit.index = -1;
1795         data.hit.dist = BVH_RAYCAST_DIST_MAX;
1796
1797         if (root) {
1798                 dfs_raycast_all(&data, root);
1799         }
1800
1801         return data.hit.index;
1802 }
1803
1804 int BLI_bvhtree_ray_cast_all(
1805         BVHTree *tree, const float co[3], const float dir[3], float radius,
1806         BVHTree_RayCastCallback callback, void *userdata)
1807 {
1808         return BLI_bvhtree_ray_cast_all_ex(tree, co, dir, radius, callback, userdata, BVH_RAYCAST_DEFAULT);
1809 }
1810
1811 /**
1812  * Range Query - as request by broken :P
1813  *
1814  * Allocs and fills an array with the indexs of node that are on the given spherical range (center, radius) 
1815  * Returns the size of the array.
1816  */
1817 typedef struct RangeQueryData {
1818         BVHTree *tree;
1819         const float *center;
1820         float radius_sq;  /* squared radius */
1821
1822         int hits;
1823
1824         BVHTree_RangeQuery callback;
1825         void *userdata;
1826 } RangeQueryData;
1827
1828
1829 static void dfs_range_query(RangeQueryData *data, BVHNode *node)
1830 {
1831         if (node->totnode == 0) {
1832 #if 0   /*UNUSED*/
1833                 /* Calculate the node min-coords (if the node was a point then this is the point coordinates) */
1834                 float co[3];
1835                 co[0] = node->bv[0];
1836                 co[1] = node->bv[2];
1837                 co[2] = node->bv[4];
1838 #endif
1839         }
1840         else {
1841                 int i;
1842                 for (i = 0; i != node->totnode; i++) {
1843                         float nearest[3];
1844                         float dist_sq = calc_nearest_point_squared(data->center, node->children[i], nearest);
1845                         if (dist_sq < data->radius_sq) {
1846                                 /* Its a leaf.. call the callback */
1847                                 if (node->children[i]->totnode == 0) {
1848                                         data->hits++;
1849                                         data->callback(data->userdata, node->children[i]->index, dist_sq);
1850                                 }
1851                                 else
1852                                         dfs_range_query(data, node->children[i]);
1853                         }
1854                 }
1855         }
1856 }
1857
1858 int BLI_bvhtree_range_query(BVHTree *tree, const float co[3], float radius, BVHTree_RangeQuery callback, void *userdata)
1859 {
1860         BVHNode *root = tree->nodes[tree->totleaf];
1861
1862         RangeQueryData data;
1863         data.tree = tree;
1864         data.center = co;
1865         data.radius_sq = radius * radius;
1866         data.hits = 0;
1867
1868         data.callback = callback;
1869         data.userdata = userdata;
1870
1871         if (root != NULL) {
1872                 float nearest[3];
1873                 float dist_sq = calc_nearest_point_squared(data.center, root, nearest);
1874                 if (dist_sq < data.radius_sq) {
1875                         /* Its a leaf.. call the callback */
1876                         if (root->totnode == 0) {
1877                                 data.hits++;
1878                                 data.callback(data.userdata, root->index, dist_sq);
1879                         }
1880                         else
1881                                 dfs_range_query(&data, root);
1882                 }
1883         }
1884
1885         return data.hits;
1886 }