code cleanup: quiet uninitialized memory use warning for X11 - harmless in this case...
[blender.git] / source / blender / blenlib / intern / BLI_kdopbvh.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2006 by NaN Holding BV.
19  * All rights reserved.
20  *
21  * The Original Code is: all of this file.
22  *
23  * Contributor(s): Daniel Genrich, Andre Pinto
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 /** \file blender/blenlib/intern/BLI_kdopbvh.c
29  *  \ingroup bli
30  */
31
32 #include <assert.h>
33
34 #include "MEM_guardedalloc.h"
35
36 #include "BLI_utildefines.h"
37 #include "BLI_kdopbvh.h"
38 #include "BLI_math.h"
39
40 #ifdef _OPENMP
41 #include <omp.h>
42 #endif
43
44 #define MAX_TREETYPE 32
45 #define DEFAULT_FIND_NEAREST_HEAP_SIZE 1024
46
47 typedef struct BVHNode {
48         struct BVHNode **children;
49         struct BVHNode *parent; /* some user defined traversed need that */
50         struct BVHNode *skip[2];
51         float *bv;      /* Bounding volume of all nodes, max 13 axis */
52         int index;      /* face, edge, vertex index */
53         char totnode;   /* how many nodes are used, used for speedup */
54         char main_axis; /* Axis used to split this node */
55 } BVHNode;
56
57 struct BVHTree {
58         BVHNode **nodes;
59         BVHNode *nodearray;     /* pre-alloc branch nodes */
60         BVHNode **nodechild;    /* pre-alloc childs for nodes */
61         float   *nodebv;        /* pre-alloc bounding-volumes for nodes */
62         float epsilon;          /* epslion is used for inflation of the k-dop      */
63         int totleaf;            /* leafs */
64         int totbranch;
65         char tree_type;         /* type of tree (4 => quadtree) */
66         char axis;              /* kdop type (6 => OBB, 7 => AABB, ...) */
67         char start_axis, stop_axis;  /* KDOP_AXES array indices according to axis */
68 };
69
70 typedef struct BVHOverlapData {
71         BVHTree *tree1, *tree2; 
72         BVHTreeOverlap *overlap; 
73         int i, max_overlap; /* i is number of overlaps */
74         int start_axis, stop_axis;
75 } BVHOverlapData;
76
77 typedef struct BVHNearestData {
78         BVHTree *tree;
79         const float *co;
80         BVHTree_NearestPointCallback callback;
81         void    *userdata;
82         float proj[13];         /* coordinates projection over axis */
83         BVHTreeNearest nearest;
84
85 } BVHNearestData;
86
87 typedef struct BVHRayCastData {
88         BVHTree *tree;
89
90         BVHTree_RayCastCallback callback;
91         void    *userdata;
92
93
94         BVHTreeRay ray;
95         float ray_dot_axis[13];
96         float idot_axis[13];
97         int index[6];
98
99         BVHTreeRayHit hit;
100 } BVHRayCastData;
101
102
103 /*
104  * Bounding Volume Hierarchy Definition
105  *
106  * Notes: From OBB until 26-DOP --> all bounding volumes possible, just choose type below
107  * Notes: You have to choose the type at compile time ITM
108  * Notes: You can choose the tree type --> binary, quad, octree, choose below
109  */
110
111 static float KDOP_AXES[13][3] = {
112         {1.0, 0, 0}, {0, 1.0, 0}, {0, 0, 1.0}, {1.0, 1.0, 1.0}, {1.0, -1.0, 1.0}, {1.0, 1.0, -1.0},
113         {1.0, -1.0, -1.0}, {1.0, 1.0, 0}, {1.0, 0, 1.0}, {0, 1.0, 1.0}, {1.0, -1.0, 0}, {1.0, 0, -1.0},
114         {0, 1.0, -1.0}
115 };
116
117 /*
118  * Generic push and pop heap
119  */
120 #define PUSH_HEAP_BODY(HEAP_TYPE, PRIORITY, heap, heap_size)                  \
121         {                                                                         \
122                 HEAP_TYPE element = heap[heap_size - 1];                              \
123                 int child = heap_size - 1;                                            \
124                 while (child != 0)                                                    \
125                 {                                                                     \
126                         int parent = (child - 1) / 2;                                     \
127                         if (PRIORITY(element, heap[parent]))                              \
128                         {                                                                 \
129                                 heap[child] = heap[parent];                                   \
130                                 child = parent;                                               \
131                         }                                                                 \
132                         else break;                                                       \
133                 }                                                                     \
134                 heap[child] = element;                                                \
135         } (void)0
136
137 #define POP_HEAP_BODY(HEAP_TYPE, PRIORITY, heap, heap_size)                   \
138         {                                                                         \
139                 HEAP_TYPE element = heap[heap_size - 1];                              \
140                 int parent = 0;                                                       \
141                 while (parent < (heap_size - 1) / 2)                                  \
142                 {                                                                     \
143                         int child2 = (parent + 1) * 2;                                    \
144                         if (PRIORITY(heap[child2 - 1], heap[child2])) {                   \
145                                 child2--;                                                     \
146                         }                                                                 \
147                         if (PRIORITY(element, heap[child2])) {                            \
148                                 break;                                                        \
149                         }                                                                 \
150                         heap[parent] = heap[child2];                                      \
151                         parent = child2;                                                  \
152                 }                                                                     \
153                 heap[parent] = element;                                               \
154         } (void)0
155
156 #if 0
157 static int ADJUST_MEMORY(void *local_memblock, void **memblock, int new_size, int *max_size, int size_per_item)
158 {
159         int new_max_size = *max_size * 2;
160         void *new_memblock = NULL;
161
162         if (new_size <= *max_size)
163                 return TRUE;
164
165         if (*memblock == local_memblock)
166         {
167                 new_memblock = malloc(size_per_item * new_max_size);
168                 memcpy(new_memblock, *memblock, size_per_item * *max_size);
169         }
170         else
171                 new_memblock = realloc(*memblock, size_per_item * new_max_size);
172
173         if (new_memblock)
174         {
175                 *memblock = new_memblock;
176                 *max_size = new_max_size;
177                 return TRUE;
178         }
179         else
180                 return FALSE;
181 }
182 #endif
183
184 /*
185  * Introsort
186  * with permission deriven from the following Java code:
187  * http://ralphunden.net/content/tutorials/a-guide-to-introsort/
188  * and he derived it from the SUN STL
189  */
190
191 //static int size_threshold = 16;
192
193 /*
194  * Common methods for all algorithms
195  */
196 #if 0
197 static int floor_lg(int a)
198 {
199         return (int)(floor(log(a) / log(2)));
200 }
201 #endif
202
203 /*
204  * Insertion sort algorithm
205  */
206 static void bvh_insertionsort(BVHNode **a, int lo, int hi, int axis)
207 {
208         int i, j;
209         BVHNode *t;
210         for (i = lo; i < hi; i++) {
211                 j = i;
212                 t = a[i];
213                 while ((j != lo) && (t->bv[axis] < (a[j - 1])->bv[axis])) {
214                         a[j] = a[j - 1];
215                         j--;
216                 }
217                 a[j] = t;
218         }
219 }
220
221 static int bvh_partition(BVHNode **a, int lo, int hi, BVHNode *x, int axis)
222 {
223         int i = lo, j = hi;
224         while (1) {
225                 while ((a[i])->bv[axis] < x->bv[axis]) i++;
226                 j--;
227                 while (x->bv[axis] < (a[j])->bv[axis]) j--;
228                 if (!(i < j))
229                         return i;
230                 SWAP(BVHNode *, a[i], a[j]);
231                 i++;
232         }
233 }
234
235 /*
236  * Heapsort algorithm
237  */
238 #if 0
239 static void bvh_downheap(BVHNode **a, int i, int n, int lo, int axis)
240 {
241         BVHNode *d = a[lo + i - 1];
242         int child;
243         while (i <= n / 2)
244         {
245                 child = 2 * i;
246                 if ((child < n) && ((a[lo + child - 1])->bv[axis] < (a[lo + child])->bv[axis]))
247                 {
248                         child++;
249                 }
250                 if (!(d->bv[axis] < (a[lo + child - 1])->bv[axis])) break;
251                 a[lo + i - 1] = a[lo + child - 1];
252                 i = child;
253         }
254         a[lo + i - 1] = d;
255 }
256
257 static void bvh_heapsort(BVHNode **a, int lo, int hi, int axis)
258 {
259         int n = hi - lo, i;
260         for (i = n / 2; i >= 1; i = i - 1)
261         {
262                 bvh_downheap(a, i, n, lo, axis);
263         }
264         for (i = n; i > 1; i = i - 1)
265         {
266                 SWAP(BVHNode *, a[lo], a[lo + i - 1]);
267                 bvh_downheap(a, 1, i - 1, lo, axis);
268         }
269 }
270 #endif
271
272 static BVHNode *bvh_medianof3(BVHNode **a, int lo, int mid, int hi, int axis) // returns Sortable
273 {
274         if ((a[mid])->bv[axis] < (a[lo])->bv[axis]) {
275                 if ((a[hi])->bv[axis] < (a[mid])->bv[axis])
276                         return a[mid];
277                 else {
278                         if ((a[hi])->bv[axis] < (a[lo])->bv[axis])
279                                 return a[hi];
280                         else
281                                 return a[lo];
282                 }
283         }
284         else {
285                 if ((a[hi])->bv[axis] < (a[mid])->bv[axis]) {
286                         if ((a[hi])->bv[axis] < (a[lo])->bv[axis])
287                                 return a[lo];
288                         else
289                                 return a[hi];
290                 }
291                 else
292                         return a[mid];
293         }
294 }
295
296 #if 0
297 /*
298  * Quicksort algorithm modified for Introsort
299  */
300 static void bvh_introsort_loop(BVHNode **a, int lo, int hi, int depth_limit, int axis)
301 {
302         int p;
303
304         while (hi - lo > size_threshold)
305         {
306                 if (depth_limit == 0)
307                 {
308                         bvh_heapsort(a, lo, hi, axis);
309                         return;
310                 }
311                 depth_limit = depth_limit - 1;
312                 p = bvh_partition(a, lo, hi, bvh_medianof3(a, lo, lo + ((hi - lo) / 2) + 1, hi - 1, axis), axis);
313                 bvh_introsort_loop(a, p, hi, depth_limit, axis);
314                 hi = p;
315         }
316 }
317
318 static void sort(BVHNode **a0, int begin, int end, int axis)
319 {
320         if (begin < end)
321         {
322                 BVHNode **a = a0;
323                 bvh_introsort_loop(a, begin, end, 2 * floor_lg(end - begin), axis);
324                 bvh_insertionsort(a, begin, end, axis);
325         }
326 }
327
328 static void sort_along_axis(BVHTree *tree, int start, int end, int axis)
329 {
330         sort(tree->nodes, start, end, axis);
331 }
332 #endif
333
334 /* after a call to this function you can expect one of:
335  * - every node to left of a[n] are smaller or equal to it
336  * - every node to the right of a[n] are greater or equal to it */
337 static int partition_nth_element(BVHNode **a, int _begin, int _end, int n, int axis)
338 {
339         int begin = _begin, end = _end, cut;
340         while (end - begin > 3) {
341                 cut = bvh_partition(a, begin, end, bvh_medianof3(a, begin, (begin + end) / 2, end - 1, axis), axis);
342                 if (cut <= n)
343                         begin = cut;
344                 else
345                         end = cut;
346         }
347         bvh_insertionsort(a, begin, end, axis);
348
349         return n;
350 }
351
352 /* --- */
353 static void build_skip_links(BVHTree *tree, BVHNode *node, BVHNode *left, BVHNode *right)
354 {
355         int i;
356         
357         node->skip[0] = left;
358         node->skip[1] = right;
359         
360         for (i = 0; i < node->totnode; i++) {
361                 if (i + 1 < node->totnode)
362                         build_skip_links(tree, node->children[i], left, node->children[i + 1]);
363                 else
364                         build_skip_links(tree, node->children[i], left, right);
365
366                 left = node->children[i];
367         }
368 }
369
370 /*
371  * BVHTree bounding volumes functions
372  */
373 static void create_kdop_hull(BVHTree *tree, BVHNode *node, const float *co, int numpoints, int moving)
374 {
375         float newminmax;
376         float *bv = node->bv;
377         int i, k;
378         
379         /* don't init boudings for the moving case */
380                 if (!moving) {
381                 for (i = tree->start_axis; i < tree->stop_axis; i++) {
382                         bv[2 * i] = FLT_MAX;
383                         bv[2 * i + 1] = -FLT_MAX;
384                 }
385         }
386         
387         for (k = 0; k < numpoints; k++) {
388                 /* for all Axes. */
389                 for (i = tree->start_axis; i < tree->stop_axis; i++) {
390                         newminmax = dot_v3v3(&co[k * 3], KDOP_AXES[i]);
391                         if (newminmax < bv[2 * i])
392                                 bv[2 * i] = newminmax;
393                         if (newminmax > bv[(2 * i) + 1])
394                                 bv[(2 * i) + 1] = newminmax;
395                 }
396         }
397 }
398
399 /* depends on the fact that the BVH's for each face is already build */
400 static void refit_kdop_hull(BVHTree *tree, BVHNode *node, int start, int end)
401 {
402         float newmin, newmax;
403         int i, j;
404         float *bv = node->bv;
405
406         
407         for (i = tree->start_axis; i < tree->stop_axis; i++) {
408                 bv[2 * i] = FLT_MAX;
409                 bv[2 * i + 1] = -FLT_MAX;
410         }
411
412         for (j = start; j < end; j++) {
413                 /* for all Axes. */
414                 for (i = tree->start_axis; i < tree->stop_axis; i++) {
415                         newmin = tree->nodes[j]->bv[(2 * i)];   
416                         if ((newmin < bv[(2 * i)]))
417                                 bv[(2 * i)] = newmin;
418  
419                         newmax = tree->nodes[j]->bv[(2 * i) + 1];
420                         if ((newmax > bv[(2 * i) + 1]))
421                                 bv[(2 * i) + 1] = newmax;
422                 }
423         }
424
425 }
426
427 /* only supports x,y,z axis in the moment
428  * but we should use a plain and simple function here for speed sake */
429 static char get_largest_axis(float *bv)
430 {
431         float middle_point[3];
432
433         middle_point[0] = (bv[1]) - (bv[0]); /* x axis */
434         middle_point[1] = (bv[3]) - (bv[2]); /* y axis */
435         middle_point[2] = (bv[5]) - (bv[4]); /* z axis */
436         if (middle_point[0] > middle_point[1]) {
437                 if (middle_point[0] > middle_point[2])
438                         return 1;  /* max x axis */
439                 else
440                         return 5;  /* max z axis */
441         }
442         else {
443                 if (middle_point[1] > middle_point[2])
444                         return 3;  /* max y axis */
445                 else
446                         return 5;  /* max z axis */
447         }
448 }
449
450 /* bottom-up update of bvh node BV
451  * join the children on the parent BV */
452 static void node_join(BVHTree *tree, BVHNode *node)
453 {
454         int i, j;
455         
456         for (i = tree->start_axis; i < tree->stop_axis; i++) {
457                 node->bv[2 * i] = FLT_MAX;
458                 node->bv[2 * i + 1] = -FLT_MAX;
459         }
460         
461         for (i = 0; i < tree->tree_type; i++) {
462                 if (node->children[i]) {
463                         for (j = tree->start_axis; j < tree->stop_axis; j++) {
464                                 /* update minimum */
465                                 if (node->children[i]->bv[(2 * j)] < node->bv[(2 * j)])
466                                         node->bv[(2 * j)] = node->children[i]->bv[(2 * j)];
467
468                                 /* update maximum */
469                                 if (node->children[i]->bv[(2 * j) + 1] > node->bv[(2 * j) + 1])
470                                         node->bv[(2 * j) + 1] = node->children[i]->bv[(2 * j) + 1];
471                         }
472                 }
473                 else
474                         break;
475         }
476 }
477
478 /*
479  * Debug and information functions
480  */
481 #if 0
482 static void bvhtree_print_tree(BVHTree *tree, BVHNode *node, int depth)
483 {
484         int i;
485         for (i = 0; i < depth; i++) printf(" ");
486         printf(" - %d (%ld): ", node->index, node - tree->nodearray);
487         for (i = 2 * tree->start_axis; i < 2 * tree->stop_axis; i++)
488                 printf("%.3f ", node->bv[i]);
489         printf("\n");
490
491         for (i = 0; i < tree->tree_type; i++)
492                 if (node->children[i])
493                         bvhtree_print_tree(tree, node->children[i], depth + 1);
494 }
495
496 static void bvhtree_info(BVHTree *tree)
497 {
498         printf("BVHTree info\n");
499         printf("tree_type = %d, axis = %d, epsilon = %f\n", tree->tree_type, tree->axis, tree->epsilon);
500         printf("nodes = %d, branches = %d, leafs = %d\n", tree->totbranch + tree->totleaf,  tree->totbranch, tree->totleaf);
501         printf("Memory per node = %ldbytes\n", sizeof(BVHNode) + sizeof(BVHNode *) * tree->tree_type + sizeof(float) * tree->axis);
502         printf("BV memory = %dbytes\n", MEM_allocN_len(tree->nodebv));
503
504         printf("Total memory = %ldbytes\n", sizeof(BVHTree) +
505                MEM_allocN_len(tree->nodes) +
506                MEM_allocN_len(tree->nodearray) +
507                MEM_allocN_len(tree->nodechild) +
508                MEM_allocN_len(tree->nodebv));
509
510 //      bvhtree_print_tree(tree, tree->nodes[tree->totleaf], 0);
511 }
512 #endif
513
514 #if 0
515
516
517 static void verify_tree(BVHTree *tree)
518 {
519         int i, j, check = 0;
520         
521         /* check the pointer list */
522         for (i = 0; i < tree->totleaf; i++)
523         {
524                 if (tree->nodes[i]->parent == NULL) {
525                         printf("Leaf has no parent: %d\n", i);
526                 }
527                 else {
528                         for (j = 0; j < tree->tree_type; j++)
529                         {
530                                 if (tree->nodes[i]->parent->children[j] == tree->nodes[i])
531                                         check = 1;
532                         }
533                         if (!check)
534                         {
535                                 printf("Parent child relationship doesn't match: %d\n", i);
536                         }
537                         check = 0;
538                 }
539         }
540         
541         /* check the leaf list */
542         for (i = 0; i < tree->totleaf; i++)
543         {
544                 if (tree->nodearray[i].parent == NULL) {
545                         printf("Leaf has no parent: %d\n", i);
546                 }
547                 else {
548                         for (j = 0; j < tree->tree_type; j++)
549                         {
550                                 if (tree->nodearray[i].parent->children[j] == &tree->nodearray[i])
551                                         check = 1;
552                         }
553                         if (!check)
554                         {
555                                 printf("Parent child relationship doesn't match: %d\n", i);
556                         }
557                         check = 0;
558                 }
559         }
560         
561         printf("branches: %d, leafs: %d, total: %d\n", tree->totbranch, tree->totleaf, tree->totbranch + tree->totleaf);
562 }
563 #endif
564
565 /* Helper data and structures to build a min-leaf generalized implicit tree
566  * This code can be easily reduced (basicly this is only method to calculate pow(k, n) in O(1).. and stuff like that) */
567 typedef struct BVHBuildHelper {
568         int tree_type;              /* */
569         int totleafs;               /* */
570
571         int leafs_per_child[32];    /* Min number of leafs that are archievable from a node at depth N */
572         int branches_on_level[32];  /* Number of nodes at depth N (tree_type^N) */
573
574         int remain_leafs;           /* Number of leafs that are placed on the level that is not 100% filled */
575
576 } BVHBuildHelper;
577
578 static void build_implicit_tree_helper(BVHTree *tree, BVHBuildHelper *data)
579 {
580         int depth = 0;
581         int remain;
582         int nnodes;
583
584         data->totleafs = tree->totleaf;
585         data->tree_type = tree->tree_type;
586
587         /* Calculate the smallest tree_type^n such that tree_type^n >= num_leafs */
588         for (data->leafs_per_child[0] = 1;
589              data->leafs_per_child[0] <  data->totleafs;
590              data->leafs_per_child[0] *= data->tree_type)
591         {
592                 /* pass */
593         }
594
595         data->branches_on_level[0] = 1;
596
597         /* We could stop the loop first (but I am lazy to find out when) */
598         for (depth = 1; depth < 32; depth++) {
599                 data->branches_on_level[depth] = data->branches_on_level[depth - 1] * data->tree_type;
600                 data->leafs_per_child[depth] = data->leafs_per_child[depth - 1] / data->tree_type;
601         }
602
603         remain = data->totleafs - data->leafs_per_child[1];
604         nnodes = (remain + data->tree_type - 2) / (data->tree_type - 1);
605         data->remain_leafs = remain + nnodes;
606 }
607
608 // return the min index of all the leafs archivable with the given branch
609 static int implicit_leafs_index(BVHBuildHelper *data, int depth, int child_index)
610 {
611         int min_leaf_index = child_index * data->leafs_per_child[depth - 1];
612         if (min_leaf_index <= data->remain_leafs)
613                 return min_leaf_index;
614         else if (data->leafs_per_child[depth])
615                 return data->totleafs - (data->branches_on_level[depth - 1] - child_index) * data->leafs_per_child[depth];
616         else
617                 return data->remain_leafs;
618 }
619
620 /**
621  * Generalized implicit tree build
622  *
623  * An implicit tree is a tree where its structure is implied, thus there is no need to store child pointers or indexs.
624  * Its possible to find the position of the child or the parent with simple maths (multiplication and adittion). This type
625  * of tree is for example used on heaps.. where node N has its childs at indexs N*2 and N*2+1.
626  *
627  * Although in this case the tree type is general.. and not know until runtime.
628  * tree_type stands for the maximum number of childs that a tree node can have.
629  * All tree types >= 2 are supported.
630  *
631  * Advantages of the used trees include:
632  *  - No need to store child/parent relations (they are implicit);
633  *  - Any node child always has an index greater than the parent;
634  *  - Brother nodes are sequential in memory;
635  *
636  *
637  * Some math relations derived for general implicit trees:
638  *
639  *   K = tree_type, ( 2 <= K )
640  *   ROOT = 1
641  *   N child of node A = A * K + (2 - K) + N, (0 <= N < K)
642  *
643  * Util methods:
644  *   TODO...
645  *    (looping elements, knowing if its a leaf or not.. etc...)
646  */
647
648 // This functions returns the number of branches needed to have the requested number of leafs.
649 static int implicit_needed_branches(int tree_type, int leafs)
650 {
651         return MAX2(1, (leafs + tree_type - 3) / (tree_type - 1) );
652 }
653
654 /*
655  * This function handles the problem of "sorting" the leafs (along the split_axis).
656  *
657  * It arranges the elements in the given partitions such that:
658  *  - any element in partition N is less or equal to any element in partition N+1.
659  *  - if all elements are different all partition will get the same subset of elements
660  *    as if the array was sorted.
661  *
662  * partition P is described as the elements in the range ( nth[P], nth[P+1] ]
663  *
664  * TODO: This can be optimized a bit by doing a specialized nth_element instead of K nth_elements
665  */
666 static void split_leafs(BVHNode **leafs_array, int *nth, int partitions, int split_axis)
667 {
668         int i;
669         for (i = 0; i < partitions - 1; i++) {
670                 if (nth[i] >= nth[partitions])
671                         break;
672
673                 partition_nth_element(leafs_array, nth[i], nth[partitions], nth[i + 1], split_axis);
674         }
675 }
676
677 /*
678  * This functions builds an optimal implicit tree from the given leafs.
679  * Where optimal stands for:
680  *  - The resulting tree will have the smallest number of branches;
681  *  - At most only one branch will have NULL childs;
682  *  - All leafs will be stored at level N or N+1.
683  *
684  * This function creates an implicit tree on branches_array, the leafs are given on the leafs_array.
685  *
686  * The tree is built per depth levels. First branches at depth 1.. then branches at depth 2.. etc..
687  * The reason is that we can build level N+1 from level N without any data dependencies.. thus it allows
688  * to use multithread building.
689  *
690  * To archieve this is necessary to find how much leafs are accessible from a certain branch, BVHBuildHelper
691  * implicit_needed_branches and implicit_leafs_index are auxiliary functions to solve that "optimal-split".
692  */
693 static void non_recursive_bvh_div_nodes(BVHTree *tree, BVHNode *branches_array, BVHNode **leafs_array, int num_leafs)
694 {
695         int i;
696
697         const int tree_type   = tree->tree_type;
698         const int tree_offset = 2 - tree->tree_type; /* this value is 0 (on binary trees) and negative on the others */
699         const int num_branches = implicit_needed_branches(tree_type, num_leafs);
700
701         BVHBuildHelper data;
702         int depth;
703         
704         /* set parent from root node to NULL */
705         BVHNode *tmp = branches_array + 0;
706         tmp->parent = NULL;
707
708         /*Most of bvhtree code relies on 1-leaf trees having at least one branch
709          *We handle that special case here */
710         if (num_leafs == 1) {
711                 BVHNode *root = branches_array + 0;
712                 refit_kdop_hull(tree, root, 0, num_leafs);
713                 root->main_axis = get_largest_axis(root->bv) / 2;
714                 root->totnode = 1;
715                 root->children[0] = leafs_array[0];
716                 root->children[0]->parent = root;
717                 return;
718         }
719
720         branches_array--;  /* Implicit trees use 1-based indexs */
721
722         build_implicit_tree_helper(tree, &data);
723
724         /* Loop tree levels (log N) loops */
725         for (i = 1, depth = 1; i <= num_branches; i = i * tree_type + tree_offset, depth++) {
726                 const int first_of_next_level = i * tree_type + tree_offset;
727                 const int end_j = MIN2(first_of_next_level, num_branches + 1);  /* index of last branch on this level */
728                 int j;
729
730                 /* Loop all branches on this level */
731 #pragma omp parallel for private(j) schedule(static)
732                 for (j = i; j < end_j; j++) {
733                         int k;
734                         const int parent_level_index = j - i;
735                         BVHNode *parent = branches_array + j;
736                         int nth_positions[MAX_TREETYPE + 1];
737                         char split_axis;
738
739                         int parent_leafs_begin = implicit_leafs_index(&data, depth, parent_level_index);
740                         int parent_leafs_end   = implicit_leafs_index(&data, depth, parent_level_index + 1);
741
742                         /* This calculates the bounding box of this branch
743                          * and chooses the largest axis as the axis to divide leafs */
744                         refit_kdop_hull(tree, parent, parent_leafs_begin, parent_leafs_end);
745                         split_axis = get_largest_axis(parent->bv);
746
747                         /* Save split axis (this can be used on raytracing to speedup the query time) */
748                         parent->main_axis = split_axis / 2;
749
750                         /* Split the childs along the split_axis, note: its not needed to sort the whole leafs array
751                          * Only to assure that the elements are partioned on a way that each child takes the elements
752                          * it would take in case the whole array was sorted.
753                          * Split_leafs takes care of that "sort" problem. */
754                         nth_positions[0] = parent_leafs_begin;
755                         nth_positions[tree_type] = parent_leafs_end;
756                         for (k = 1; k < tree_type; k++) {
757                                 int child_index = j * tree_type + tree_offset + k;
758                                 int child_level_index = child_index - first_of_next_level; /* child level index */
759                                 nth_positions[k] = implicit_leafs_index(&data, depth + 1, child_level_index);
760                         }
761
762                         split_leafs(leafs_array, nth_positions, tree_type, split_axis);
763
764
765                         /* Setup children and totnode counters
766                          * Not really needed but currently most of BVH code relies on having an explicit children structure */
767                         for (k = 0; k < tree_type; k++) {
768                                 int child_index = j * tree_type + tree_offset + k;
769                                 int child_level_index = child_index - first_of_next_level; /* child level index */
770
771                                 int child_leafs_begin = implicit_leafs_index(&data, depth + 1, child_level_index);
772                                 int child_leafs_end   = implicit_leafs_index(&data, depth + 1, child_level_index + 1);
773
774                                 if (child_leafs_end - child_leafs_begin > 1) {
775                                         parent->children[k] = branches_array + child_index;
776                                         parent->children[k]->parent = parent;
777                                 }
778                                 else if (child_leafs_end - child_leafs_begin == 1) {
779                                         parent->children[k] = leafs_array[child_leafs_begin];
780                                         parent->children[k]->parent = parent;
781                                 }
782                                 else {
783                                         break;
784                                 }
785
786                                 parent->totnode = k + 1;
787                         }
788                 }
789         }
790 }
791
792
793 /*
794  * BLI_bvhtree api
795  */
796 BVHTree *BLI_bvhtree_new(int maxsize, float epsilon, char tree_type, char axis)
797 {
798         BVHTree *tree;
799         int numnodes, i;
800         
801         /* theres not support for trees below binary-trees :P */
802         if (tree_type < 2)
803                 return NULL;
804
805         if (tree_type > MAX_TREETYPE)
806                 return NULL;
807
808         tree = (BVHTree *)MEM_callocN(sizeof(BVHTree), "BVHTree");
809
810         /* tree epsilon must be >= FLT_EPSILON
811          * so that tangent rays can still hit a bounding volume..
812          * this bug would show up when casting a ray aligned with a kdop-axis and with an edge of 2 faces */
813         epsilon = MAX2(FLT_EPSILON, epsilon);
814
815         if (tree) {
816                 tree->epsilon = epsilon;
817                 tree->tree_type = tree_type; 
818                 tree->axis = axis;
819                 
820                 if (axis == 26) {
821                         tree->start_axis = 0;
822                         tree->stop_axis = 13;
823                 }
824                 else if (axis == 18) {
825                         tree->start_axis = 7;
826                         tree->stop_axis = 13;
827                 }
828                 else if (axis == 14) {
829                         tree->start_axis = 0;
830                         tree->stop_axis = 7;
831                 }
832                 else if (axis == 8) { /* AABB */
833                         tree->start_axis = 0;
834                         tree->stop_axis = 4;
835                 }
836                 else if (axis == 6) { /* OBB */
837                         tree->start_axis = 0;
838                         tree->stop_axis = 3;
839                 }
840                 else {
841                         MEM_freeN(tree);
842                         return NULL;
843                 }
844
845
846                 //Allocate arrays
847                 numnodes = maxsize + implicit_needed_branches(tree_type, maxsize) + tree_type;
848
849                 tree->nodes = (BVHNode **)MEM_callocN(sizeof(BVHNode *) * numnodes, "BVHNodes");
850                 
851                 if (!tree->nodes) {
852                         MEM_freeN(tree);
853                         return NULL;
854                 }
855                 
856                 tree->nodebv = (float *)MEM_callocN(sizeof(float) * axis * numnodes, "BVHNodeBV");
857                 if (!tree->nodebv) {
858                         MEM_freeN(tree->nodes);
859                         MEM_freeN(tree);
860                 }
861
862                 tree->nodechild = (BVHNode **)MEM_callocN(sizeof(BVHNode *) * tree_type * numnodes, "BVHNodeBV");
863                 if (!tree->nodechild) {
864                         MEM_freeN(tree->nodebv);
865                         MEM_freeN(tree->nodes);
866                         MEM_freeN(tree);
867                 }
868
869                 tree->nodearray = (BVHNode *)MEM_callocN(sizeof(BVHNode) * numnodes, "BVHNodeArray");
870                 
871                 if (!tree->nodearray) {
872                         MEM_freeN(tree->nodechild);
873                         MEM_freeN(tree->nodebv);
874                         MEM_freeN(tree->nodes);
875                         MEM_freeN(tree);
876                         return NULL;
877                 }
878
879                 //link the dynamic bv and child links
880                 for (i = 0; i < numnodes; i++) {
881                         tree->nodearray[i].bv = tree->nodebv + i * axis;
882                         tree->nodearray[i].children = tree->nodechild + i * tree_type;
883                 }
884                 
885         }
886
887         return tree;
888 }
889
890 void BLI_bvhtree_free(BVHTree *tree)
891 {       
892         if (tree) {
893                 MEM_freeN(tree->nodes);
894                 MEM_freeN(tree->nodearray);
895                 MEM_freeN(tree->nodebv);
896                 MEM_freeN(tree->nodechild);
897                 MEM_freeN(tree);
898         }
899 }
900
901 void BLI_bvhtree_balance(BVHTree *tree)
902 {
903         int i;
904
905         BVHNode *branches_array = tree->nodearray + tree->totleaf;
906         BVHNode **leafs_array    = tree->nodes;
907
908         /* This function should only be called once (some big bug goes here if its being called more than once per tree) */
909         assert(tree->totbranch == 0);
910
911         /* Build the implicit tree */
912         non_recursive_bvh_div_nodes(tree, branches_array, leafs_array, tree->totleaf);
913
914         /*current code expects the branches to be linked to the nodes array
915          *we perform that linkage here */
916         tree->totbranch = implicit_needed_branches(tree->tree_type, tree->totleaf);
917         for (i = 0; i < tree->totbranch; i++)
918                 tree->nodes[tree->totleaf + i] = branches_array + i;
919
920         build_skip_links(tree, tree->nodes[tree->totleaf], NULL, NULL);
921         /* bvhtree_info(tree); */
922 }
923
924 int BLI_bvhtree_insert(BVHTree *tree, int index, const float *co, int numpoints)
925 {
926         int i;
927         BVHNode *node = NULL;
928
929         /* insert should only possible as long as tree->totbranch is 0 */
930         if (tree->totbranch > 0)
931                 return 0;
932
933         if (tree->totleaf + 1 >= MEM_allocN_len(tree->nodes) / sizeof(*(tree->nodes)))
934                 return 0;
935
936         /* TODO check if have enough nodes in array */
937
938         node = tree->nodes[tree->totleaf] = &(tree->nodearray[tree->totleaf]);
939         tree->totleaf++;
940
941         create_kdop_hull(tree, node, co, numpoints, 0);
942         node->index = index;
943
944         /* inflate the bv with some epsilon */
945         for (i = tree->start_axis; i < tree->stop_axis; i++) {
946                 node->bv[(2 * i)] -= tree->epsilon; /* minimum */
947                 node->bv[(2 * i) + 1] += tree->epsilon; /* maximum */
948         }
949
950         return 1;
951 }
952
953
954 /* call before BLI_bvhtree_update_tree() */
955 int BLI_bvhtree_update_node(BVHTree *tree, int index, const float *co, const float *co_moving, int numpoints)
956 {
957         int i;
958         BVHNode *node = NULL;
959         
960         /* check if index exists */
961         if (index > tree->totleaf)
962                 return 0;
963         
964         node = tree->nodearray + index;
965         
966         create_kdop_hull(tree, node, co, numpoints, 0);
967         
968         if (co_moving)
969                 create_kdop_hull(tree, node, co_moving, numpoints, 1);
970         
971         // inflate the bv with some epsilon
972         for (i = tree->start_axis; i < tree->stop_axis; i++) {
973                 node->bv[(2 * i)] -= tree->epsilon; // minimum 
974                 node->bv[(2 * i) + 1] += tree->epsilon; // maximum 
975         }
976
977         return 1;
978 }
979
980 /* call BLI_bvhtree_update_node() first for every node/point/triangle */
981 void BLI_bvhtree_update_tree(BVHTree *tree)
982 {
983         /* Update bottom=>top
984          * TRICKY: the way we build the tree all the childs have an index greater than the parent
985          * This allows us todo a bottom up update by starting on the biger numbered branch */
986
987         BVHNode **root  = tree->nodes + tree->totleaf;
988         BVHNode **index = tree->nodes + tree->totleaf + tree->totbranch - 1;
989
990         for (; index >= root; index--)
991                 node_join(tree, *index);
992 }
993
994 float BLI_bvhtree_getepsilon(BVHTree *tree)
995 {
996         return tree->epsilon;
997 }
998
999
1000 /*
1001  * BLI_bvhtree_overlap
1002  *
1003  * overlap - is it possbile for 2 bv's to collide ? */
1004 static int tree_overlap(BVHNode *node1, BVHNode *node2, int start_axis, int stop_axis)
1005 {
1006         float *bv1 = node1->bv;
1007         float *bv2 = node2->bv;
1008
1009         float *bv1_end = bv1 + (stop_axis << 1);
1010                 
1011         bv1 += start_axis << 1;
1012         bv2 += start_axis << 1;
1013         
1014         /* test all axis if min + max overlap */
1015         for (; bv1 != bv1_end; bv1 += 2, bv2 += 2) {
1016                 if ((*(bv1) > *(bv2 + 1)) || (*(bv2) > *(bv1 + 1)))
1017                         return 0;
1018         }
1019
1020         return 1;
1021 }
1022
1023 static void traverse(BVHOverlapData *data, BVHNode *node1, BVHNode *node2)
1024 {
1025         int j;
1026
1027         if (tree_overlap(node1, node2, data->start_axis, data->stop_axis)) {
1028                 /* check if node1 is a leaf */
1029                 if (!node1->totnode) {
1030                         /* check if node2 is a leaf */
1031                         if (!node2->totnode) {
1032
1033                                 if (node1 == node2) {
1034                                         return;
1035                                 }
1036
1037                                 if (data->i >= data->max_overlap) {
1038                                         /* try to make alloc'ed memory bigger */
1039                                         data->overlap = realloc(data->overlap, sizeof(BVHTreeOverlap) * data->max_overlap * 2);
1040                                         
1041                                         if (!data->overlap) {
1042                                                 printf("Out of Memory in traverse\n");
1043                                                 return;
1044                                         }
1045                                         data->max_overlap *= 2;
1046                                 }
1047                                 
1048                                 // both leafs, insert overlap!
1049                                 data->overlap[data->i].indexA = node1->index;
1050                                 data->overlap[data->i].indexB = node2->index;
1051
1052                                 data->i++;
1053                         }
1054                         else {
1055                                 for (j = 0; j < data->tree2->tree_type; j++) {
1056                                         if (node2->children[j])
1057                                                 traverse(data, node1, node2->children[j]);
1058                                 }
1059                         }
1060                 }
1061                 else {
1062                         for (j = 0; j < data->tree2->tree_type; j++) {
1063                                 if (node1->children[j])
1064                                         traverse(data, node1->children[j], node2);
1065                         }
1066                 }
1067         }
1068         return;
1069 }
1070
1071 BVHTreeOverlap *BLI_bvhtree_overlap(BVHTree *tree1, BVHTree *tree2, unsigned int *result)
1072 {
1073         int j;
1074         unsigned int total = 0;
1075         BVHTreeOverlap *overlap = NULL, *to = NULL;
1076         BVHOverlapData **data;
1077         
1078         // check for compatibility of both trees (can't compare 14-DOP with 18-DOP)
1079         if ((tree1->axis != tree2->axis) && (tree1->axis == 14 || tree2->axis == 14) && (tree1->axis == 18 || tree2->axis == 18))
1080                 return NULL;
1081         
1082         // fast check root nodes for collision before doing big splitting + traversal
1083         if (!tree_overlap(tree1->nodes[tree1->totleaf], tree2->nodes[tree2->totleaf], MIN2(tree1->start_axis, tree2->start_axis), MIN2(tree1->stop_axis, tree2->stop_axis)))
1084                 return NULL;
1085
1086         data = MEM_callocN(sizeof(BVHOverlapData *) * tree1->tree_type, "BVHOverlapData_star");
1087         
1088         for (j = 0; j < tree1->tree_type; j++) {
1089                 data[j] = (BVHOverlapData *)MEM_callocN(sizeof(BVHOverlapData), "BVHOverlapData");
1090                 
1091                 // init BVHOverlapData
1092                 data[j]->overlap = (BVHTreeOverlap *)malloc(sizeof(BVHTreeOverlap) * MAX2(tree1->totleaf, tree2->totleaf));
1093                 data[j]->tree1 = tree1;
1094                 data[j]->tree2 = tree2;
1095                 data[j]->max_overlap = MAX2(tree1->totleaf, tree2->totleaf);
1096                 data[j]->i = 0;
1097                 data[j]->start_axis = MIN2(tree1->start_axis, tree2->start_axis);
1098                 data[j]->stop_axis  = MIN2(tree1->stop_axis,  tree2->stop_axis);
1099         }
1100
1101 #pragma omp parallel for private(j) schedule(static)
1102         for (j = 0; j < MIN2(tree1->tree_type, tree1->nodes[tree1->totleaf]->totnode); j++) {
1103                 traverse(data[j], tree1->nodes[tree1->totleaf]->children[j], tree2->nodes[tree2->totleaf]);
1104         }
1105         
1106         for (j = 0; j < tree1->tree_type; j++)
1107                 total += data[j]->i;
1108         
1109         to = overlap = (BVHTreeOverlap *)MEM_callocN(sizeof(BVHTreeOverlap) * total, "BVHTreeOverlap");
1110         
1111         for (j = 0; j < tree1->tree_type; j++) {
1112                 memcpy(to, data[j]->overlap, data[j]->i * sizeof(BVHTreeOverlap));
1113                 to += data[j]->i;
1114         }
1115         
1116         for (j = 0; j < tree1->tree_type; j++) {
1117                 free(data[j]->overlap);
1118                 MEM_freeN(data[j]);
1119         }
1120         MEM_freeN(data);
1121         
1122         (*result) = total;
1123         return overlap;
1124 }
1125
1126 //Determines the nearest point of the given node BV. Returns the squared distance to that point.
1127 static float calc_nearest_point(const float proj[3], BVHNode *node, float *nearest)
1128 {
1129         int i;
1130         const float *bv = node->bv;
1131
1132         //nearest on AABB hull
1133         for (i = 0; i != 3; i++, bv += 2) {
1134                 if (bv[0] > proj[i])
1135                         nearest[i] = bv[0];
1136                 else if (bv[1] < proj[i])
1137                         nearest[i] = bv[1];
1138                 else
1139                         nearest[i] = proj[i]; 
1140         }
1141
1142 #if 0
1143         //nearest on a general hull
1144         copy_v3_v3(nearest, data->co);
1145         for (i = data->tree->start_axis; i != data->tree->stop_axis; i++, bv += 2)
1146         {
1147                 float proj = dot_v3v3(nearest, KDOP_AXES[i]);
1148                 float dl = bv[0] - proj;
1149                 float du = bv[1] - proj;
1150
1151                 if (dl > 0) {
1152                         madd_v3_v3fl(nearest, KDOP_AXES[i], dl);
1153                 }
1154                 else if (du < 0) {
1155                         madd_v3_v3fl(nearest, KDOP_AXES[i], du);
1156                 }
1157         }
1158 #endif
1159
1160         return len_squared_v3v3(proj, nearest);
1161 }
1162
1163
1164 typedef struct NodeDistance {
1165         BVHNode *node;
1166         float dist;
1167
1168 } NodeDistance;
1169
1170 #define NodeDistance_priority(a, b) ( (a).dist < (b).dist)
1171
1172 // TODO: use a priority queue to reduce the number of nodes looked on
1173 static void dfs_find_nearest_dfs(BVHNearestData *data, BVHNode *node)
1174 {
1175         if (node->totnode == 0) {
1176                 if (data->callback)
1177                         data->callback(data->userdata, node->index, data->co, &data->nearest);
1178                 else {
1179                         data->nearest.index = node->index;
1180                         data->nearest.dist  = calc_nearest_point(data->proj, node, data->nearest.co);
1181                 }
1182         }
1183         else {
1184                 //Better heuristic to pick the closest node to dive on
1185                 int i;
1186                 float nearest[3];
1187
1188                 if (data->proj[node->main_axis] <= node->children[0]->bv[node->main_axis * 2 + 1]) {
1189
1190                         for (i = 0; i != node->totnode; i++) {
1191                                 if (calc_nearest_point(data->proj, node->children[i], nearest) >= data->nearest.dist) continue;
1192                                 dfs_find_nearest_dfs(data, node->children[i]);
1193                         }
1194                 }
1195                 else {
1196                         for (i = node->totnode - 1; i >= 0; i--) {
1197                                 if (calc_nearest_point(data->proj, node->children[i], nearest) >= data->nearest.dist) continue;
1198                                 dfs_find_nearest_dfs(data, node->children[i]);
1199                         }
1200                 }
1201         }
1202 }
1203
1204 static void dfs_find_nearest_begin(BVHNearestData *data, BVHNode *node)
1205 {
1206         float nearest[3], sdist;
1207         sdist = calc_nearest_point(data->proj, node, nearest);
1208         if (sdist >= data->nearest.dist) return;
1209         dfs_find_nearest_dfs(data, node);
1210 }
1211
1212
1213 #if 0
1214 static void NodeDistance_push_heap(NodeDistance *heap, int heap_size)
1215 PUSH_HEAP_BODY(NodeDistance, NodeDistance_priority, heap, heap_size)
1216
1217 static void NodeDistance_pop_heap(NodeDistance *heap, int heap_size)
1218 POP_HEAP_BODY(NodeDistance, NodeDistance_priority, heap, heap_size)
1219
1220 /* NN function that uses an heap.. this functions leads to an optimal number of min-distance
1221  * but for normal tri-faces and BV 6-dop.. a simple dfs with local heuristics (as implemented
1222  * in source/blender/blenkernel/intern/shrinkwrap.c) works faster.
1223  *
1224  * It may make sense to use this function if the callback queries are very slow.. or if its impossible
1225  * to get a nice heuristic
1226  *
1227  * this function uses "malloc/free" instead of the MEM_* because it intends to be openmp safe */
1228 static void bfs_find_nearest(BVHNearestData *data, BVHNode *node)
1229 {
1230         int i;
1231         NodeDistance default_heap[DEFAULT_FIND_NEAREST_HEAP_SIZE];
1232         NodeDistance *heap = default_heap, current;
1233         int heap_size = 0, max_heap_size = sizeof(default_heap) / sizeof(default_heap[0]);
1234         float nearest[3];
1235
1236         int callbacks = 0, push_heaps = 0;
1237
1238         if (node->totnode == 0)
1239         {
1240                 dfs_find_nearest_dfs(data, node);
1241                 return;
1242         }
1243
1244         current.node = node;
1245         current.dist = calc_nearest_point(data->proj, node, nearest);
1246
1247         while (current.dist < data->nearest.dist)
1248         {
1249 //              printf("%f : %f\n", current.dist, data->nearest.dist);
1250                 for (i = 0; i < current.node->totnode; i++)
1251                 {
1252                         BVHNode *child = current.node->children[i];
1253                         if (child->totnode == 0)
1254                         {
1255                                 callbacks++;
1256                                 dfs_find_nearest_dfs(data, child);
1257                         }
1258                         else {
1259                                 //adjust heap size
1260                                 if ((heap_size >= max_heap_size) &&
1261                                     ADJUST_MEMORY(default_heap, (void **)&heap, heap_size + 1, &max_heap_size, sizeof(heap[0])) == FALSE)
1262                                 {
1263                                         printf("WARNING: bvh_find_nearest got out of memory\n");
1264
1265                                         if (heap != default_heap)
1266                                                 free(heap);
1267
1268                                         return;
1269                                 }
1270
1271                                 heap[heap_size].node = current.node->children[i];
1272                                 heap[heap_size].dist = calc_nearest_point(data->proj, current.node->children[i], nearest);
1273
1274                                 if (heap[heap_size].dist >= data->nearest.dist) continue;
1275                                 heap_size++;
1276
1277                                 NodeDistance_push_heap(heap, heap_size);
1278                                 //                      PUSH_HEAP_BODY(NodeDistance, NodeDistance_priority, heap, heap_size);
1279                                 push_heaps++;
1280                         }
1281                 }
1282                 
1283                 if (heap_size == 0) break;
1284
1285                 current = heap[0];
1286                 NodeDistance_pop_heap(heap, heap_size);
1287 //              POP_HEAP_BODY(NodeDistance, NodeDistance_priority, heap, heap_size);
1288                 heap_size--;
1289         }
1290
1291 //      printf("hsize=%d, callbacks=%d, pushs=%d\n", heap_size, callbacks, push_heaps);
1292
1293         if (heap != default_heap)
1294                 free(heap);
1295 }
1296 #endif
1297
1298
1299 int BLI_bvhtree_find_nearest(BVHTree *tree, const float co[3], BVHTreeNearest *nearest, BVHTree_NearestPointCallback callback, void *userdata)
1300 {
1301         int i;
1302
1303         BVHNearestData data;
1304         BVHNode *root = tree->nodes[tree->totleaf];
1305
1306         //init data to search
1307         data.tree = tree;
1308         data.co = co;
1309
1310         data.callback = callback;
1311         data.userdata = userdata;
1312
1313         for (i = data.tree->start_axis; i != data.tree->stop_axis; i++) {
1314                 data.proj[i] = dot_v3v3(data.co, KDOP_AXES[i]);
1315         }
1316
1317         if (nearest) {
1318                 memcpy(&data.nearest, nearest, sizeof(*nearest));
1319         }
1320         else {
1321                 data.nearest.index = -1;
1322                 data.nearest.dist = FLT_MAX;
1323         }
1324
1325         /* dfs search */
1326         if (root)
1327                 dfs_find_nearest_begin(&data, root);
1328
1329         /* copy back results */
1330         if (nearest) {
1331                 memcpy(nearest, &data.nearest, sizeof(*nearest));
1332         }
1333
1334         return data.nearest.index;
1335 }
1336
1337
1338 /*
1339  * Raycast - BLI_bvhtree_ray_cast
1340  *
1341  * raycast is done by performing a DFS on the BVHTree and saving the closest hit
1342  */
1343
1344
1345 /* Determines the distance that the ray must travel to hit the bounding volume of the given node */
1346 static float ray_nearest_hit(BVHRayCastData *data, float *bv)
1347 {
1348         int i;
1349
1350         float low = 0, upper = data->hit.dist;
1351
1352         for (i = 0; i != 3; i++, bv += 2) {
1353                 if (data->ray_dot_axis[i] == 0.0f) {
1354                         //axis aligned ray
1355                         if (data->ray.origin[i] < bv[0] - data->ray.radius ||
1356                             data->ray.origin[i] > bv[1] + data->ray.radius)
1357                         {
1358                                 return FLT_MAX;
1359                         }
1360                 }
1361                 else {
1362                         float ll = (bv[0] - data->ray.radius - data->ray.origin[i]) / data->ray_dot_axis[i];
1363                         float lu = (bv[1] + data->ray.radius - data->ray.origin[i]) / data->ray_dot_axis[i];
1364
1365                         if (data->ray_dot_axis[i] > 0.0f) {
1366                                 if (ll > low) low = ll;
1367                                 if (lu < upper) upper = lu;
1368                         }
1369                         else {
1370                                 if (lu > low) low = lu;
1371                                 if (ll < upper) upper = ll;
1372                         }
1373         
1374                         if (low > upper) return FLT_MAX;
1375                 }
1376         }
1377         return low;
1378 }
1379
1380 /* Determines the distance that the ray must travel to hit the bounding volume of the given node
1381  * Based on Tactical Optimization of Ray/Box Intersection, by Graham Fyffe
1382  * [http://tog.acm.org/resources/RTNews/html/rtnv21n1.html#art9]
1383  *
1384  * TODO this doens't has data->ray.radius in consideration */
1385 static float fast_ray_nearest_hit(const BVHRayCastData *data, const BVHNode *node)
1386 {
1387         const float *bv = node->bv;
1388         float dist;
1389         
1390         float t1x = (bv[data->index[0]] - data->ray.origin[0]) * data->idot_axis[0];
1391         float t2x = (bv[data->index[1]] - data->ray.origin[0]) * data->idot_axis[0];
1392         float t1y = (bv[data->index[2]] - data->ray.origin[1]) * data->idot_axis[1];
1393         float t2y = (bv[data->index[3]] - data->ray.origin[1]) * data->idot_axis[1];
1394         float t1z = (bv[data->index[4]] - data->ray.origin[2]) * data->idot_axis[2];
1395         float t2z = (bv[data->index[5]] - data->ray.origin[2]) * data->idot_axis[2];
1396
1397         if (t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return FLT_MAX;
1398         if (t2x < 0.0f || t2y < 0.0f || t2z < 0.0f) return FLT_MAX;
1399         if (t1x > data->hit.dist || t1y > data->hit.dist || t1z > data->hit.dist) return FLT_MAX;
1400
1401         dist = t1x;
1402         if (t1y > dist) dist = t1y;
1403         if (t1z > dist) dist = t1z;
1404         return dist;
1405 }
1406
1407 static void dfs_raycast(BVHRayCastData *data, BVHNode *node)
1408 {
1409         int i;
1410
1411         /* ray-bv is really fast.. and simple tests revealed its worth to test it
1412          * before calling the ray-primitive functions */
1413         /* XXX: temporary solution for particles until fast_ray_nearest_hit supports ray.radius */
1414         float dist = (data->ray.radius > 0.0f) ? ray_nearest_hit(data, node->bv) : fast_ray_nearest_hit(data, node);
1415         if (dist >= data->hit.dist) return;
1416
1417         if (node->totnode == 0) {
1418                 if (data->callback) {
1419                         data->callback(data->userdata, node->index, &data->ray, &data->hit);
1420                 }
1421                 else {
1422                         data->hit.index = node->index;
1423                         data->hit.dist  = dist;
1424                         madd_v3_v3v3fl(data->hit.co, data->ray.origin, data->ray.direction, dist);
1425                 }
1426         }
1427         else {
1428                 /* pick loop direction to dive into the tree (based on ray direction and split axis) */
1429                 if (data->ray_dot_axis[(int)node->main_axis] > 0.0f) {
1430                         for (i = 0; i != node->totnode; i++) {
1431                                 dfs_raycast(data, node->children[i]);
1432                         }
1433                 }
1434                 else {
1435                         for (i = node->totnode - 1; i >= 0; i--) {
1436                                 dfs_raycast(data, node->children[i]);
1437                         }
1438                 }
1439         }
1440 }
1441
1442 #if 0
1443 static void iterative_raycast(BVHRayCastData *data, BVHNode *node)
1444 {
1445         while (node)
1446         {
1447                 float dist = fast_ray_nearest_hit(data, node);
1448                 if (dist >= data->hit.dist)
1449                 {
1450                         node = node->skip[1];
1451                         continue;
1452                 }
1453
1454                 if (node->totnode == 0)
1455                 {
1456                         if (data->callback) {
1457                                 data->callback(data->userdata, node->index, &data->ray, &data->hit);
1458                         }
1459                         else {
1460                                 data->hit.index = node->index;
1461                                 data->hit.dist  = dist;
1462                                 madd_v3_v3v3fl(data->hit.co, data->ray.origin, data->ray.direction, dist);
1463                         }
1464                         
1465                         node = node->skip[1];
1466                 }
1467                 else {
1468                         node = node->children[0];
1469                 }       
1470         }
1471 }
1472 #endif
1473
1474 int BLI_bvhtree_ray_cast(BVHTree *tree, const float co[3], const float dir[3], float radius, BVHTreeRayHit *hit, BVHTree_RayCastCallback callback, void *userdata)
1475 {
1476         int i;
1477         BVHRayCastData data;
1478         BVHNode *root = tree->nodes[tree->totleaf];
1479
1480         data.tree = tree;
1481
1482         data.callback = callback;
1483         data.userdata = userdata;
1484
1485         copy_v3_v3(data.ray.origin,    co);
1486         copy_v3_v3(data.ray.direction, dir);
1487         data.ray.radius = radius;
1488
1489         normalize_v3(data.ray.direction);
1490
1491         for (i = 0; i < 3; i++) {
1492                 data.ray_dot_axis[i] = dot_v3v3(data.ray.direction, KDOP_AXES[i]);
1493                 data.idot_axis[i] = 1.0f / data.ray_dot_axis[i];
1494
1495                 if (fabsf(data.ray_dot_axis[i]) < FLT_EPSILON) {
1496                         data.ray_dot_axis[i] = 0.0;
1497                 }
1498                 data.index[2 * i] = data.idot_axis[i] < 0.0f ? 1 : 0;
1499                 data.index[2 * i + 1] = 1 - data.index[2 * i];
1500                 data.index[2 * i]   += 2 * i;
1501                 data.index[2 * i + 1] += 2 * i;
1502         }
1503
1504
1505         if (hit)
1506                 memcpy(&data.hit, hit, sizeof(*hit));
1507         else {
1508                 data.hit.index = -1;
1509                 data.hit.dist = FLT_MAX;
1510         }
1511
1512         if (root) {
1513                 dfs_raycast(&data, root);
1514 //              iterative_raycast(&data, root);
1515         }
1516
1517
1518         if (hit)
1519                 memcpy(hit, &data.hit, sizeof(*hit));
1520
1521         return data.hit.index;
1522 }
1523
1524 float BLI_bvhtree_bb_raycast(float *bv, const float light_start[3], const float light_end[3], float pos[3])
1525 {
1526         BVHRayCastData data;
1527         float dist = 0.0;
1528
1529         data.hit.dist = FLT_MAX;
1530         
1531         // get light direction
1532         data.ray.direction[0] = light_end[0] - light_start[0];
1533         data.ray.direction[1] = light_end[1] - light_start[1];
1534         data.ray.direction[2] = light_end[2] - light_start[2];
1535         
1536         data.ray.radius = 0.0;
1537         
1538         data.ray.origin[0] = light_start[0];
1539         data.ray.origin[1] = light_start[1];
1540         data.ray.origin[2] = light_start[2];
1541         
1542         normalize_v3(data.ray.direction);
1543         copy_v3_v3(data.ray_dot_axis, data.ray.direction);
1544         
1545         dist = ray_nearest_hit(&data, bv);
1546         
1547         if (dist > 0.0f) {
1548                 madd_v3_v3v3fl(pos, light_start, data.ray.direction, dist);
1549         }
1550         return dist;
1551         
1552 }
1553
1554 /*
1555  * Range Query - as request by broken :P
1556  *
1557  * Allocs and fills an array with the indexs of node that are on the given spherical range (center, radius) 
1558  * Returns the size of the array.
1559  */
1560 typedef struct RangeQueryData {
1561         BVHTree *tree;
1562         const float *center;
1563         float radius;           //squared radius
1564
1565         int hits;
1566
1567         BVHTree_RangeQuery callback;
1568         void *userdata;
1569
1570
1571 } RangeQueryData;
1572
1573
1574 static void dfs_range_query(RangeQueryData *data, BVHNode *node)
1575 {
1576         if (node->totnode == 0) {
1577 #if 0   /*UNUSED*/
1578                 //Calculate the node min-coords (if the node was a point then this is the point coordinates)
1579                 float co[3];
1580                 co[0] = node->bv[0];
1581                 co[1] = node->bv[2];
1582                 co[2] = node->bv[4];
1583 #endif
1584         }
1585         else {
1586                 int i;
1587                 for (i = 0; i != node->totnode; i++) {
1588                         float nearest[3];
1589                         float dist = calc_nearest_point(data->center, node->children[i], nearest);
1590                         if (dist < data->radius) {
1591                                 //Its a leaf.. call the callback
1592                                 if (node->children[i]->totnode == 0) {
1593                                         data->hits++;
1594                                         data->callback(data->userdata, node->children[i]->index, dist);
1595                                 }
1596                                 else
1597                                         dfs_range_query(data, node->children[i]);
1598                         }
1599                 }
1600         }
1601 }
1602
1603 int BLI_bvhtree_range_query(BVHTree *tree, const float co[3], float radius, BVHTree_RangeQuery callback, void *userdata)
1604 {
1605         BVHNode *root = tree->nodes[tree->totleaf];
1606
1607         RangeQueryData data;
1608         data.tree = tree;
1609         data.center = co;
1610         data.radius = radius * radius;
1611         data.hits = 0;
1612
1613         data.callback = callback;
1614         data.userdata = userdata;
1615
1616         if (root != NULL) {
1617                 float nearest[3];
1618                 float dist = calc_nearest_point(data.center, root, nearest);
1619                 if (dist < data.radius) {
1620                         /* Its a leaf.. call the callback */
1621                         if (root->totnode == 0) {
1622                                 data.hits++;
1623                                 data.callback(data.userdata, root->index, dist);
1624                         }
1625                         else
1626                                 dfs_range_query(&data, root);
1627                 }
1628         }
1629
1630         return data.hits;
1631 }