Merge from trunk: 15912:16031
[blender.git] / source / blender / blenlib / intern / BLI_kdopbvh.c
1 /**
2  *
3  * ***** BEGIN GPL LICENSE BLOCK *****
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software Foundation,
17  * Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
18  *
19  * The Original Code is Copyright (C) 2006 by NaN Holding BV.
20  * All rights reserved.
21  *
22  * The Original Code is: all of this file.
23  *
24  * Contributor(s): Daniel Genrich, Andre Pinto
25  *
26  * ***** END GPL LICENSE BLOCK *****
27  */
28
29 #include "math.h"
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "MEM_guardedalloc.h"
36
37 #include "BKE_utildefines.h"
38
39 #include "BLI_kdopbvh.h"
40 #include "BLI_arithb.h"
41
42 #ifdef _OPENMP
43 #include <omp.h>
44 #endif
45
46
47
48 #define MAX_TREETYPE 32
49
50 typedef struct BVHNode
51 {
52         struct BVHNode **children;
53         float *bv;              // Bounding volume of all nodes, max 13 axis
54         int index;              // face, edge, vertex index
55         char totnode;   // how many nodes are used, used for speedup
56         char main_axis; // Axis used to split this node
57 } BVHNode;
58
59 struct BVHTree
60 {
61         BVHNode **nodes;
62         BVHNode *nodearray; /* pre-alloc branch nodes */
63         BVHNode **nodechild;    // pre-alloc childs for nodes
64         float   *nodebv;                // pre-alloc bounding-volumes for nodes
65         float   epsilon; /* epslion is used for inflation of the k-dop     */
66         int     totleaf; // leafs
67         int     totbranch;
68         char    tree_type; // type of tree (4 => quadtree)
69         char    axis; // kdop type (6 => OBB, 7 => AABB, ...)
70         char    start_axis, stop_axis; // KDOP_AXES array indices according to axis
71 };
72
73 typedef struct BVHOverlapData 
74 {  
75         BVHTree *tree1, *tree2; 
76         BVHTreeOverlap *overlap; 
77         int i, max_overlap; /* i is number of overlaps */
78         int start_axis, stop_axis;
79 } BVHOverlapData;
80
81 typedef struct BVHNearestData
82 {
83         BVHTree *tree;
84         float   *co;
85         BVHTree_NearestPointCallback callback;
86         void    *userdata;
87         float proj[13];                 //coordinates projection over axis
88         BVHTreeNearest nearest;
89
90 } BVHNearestData;
91
92 typedef struct BVHRayCastData
93 {
94         BVHTree *tree;
95
96         BVHTree_RayCastCallback callback;
97         void    *userdata;
98
99
100         BVHTreeRay    ray;
101         float ray_dot_axis[13];
102
103         BVHTreeRayHit hit;
104 } BVHRayCastData;
105 ////////////////////////////////////////m
106
107
108 ////////////////////////////////////////////////////////////////////////
109 // Bounding Volume Hierarchy Definition
110 // 
111 // Notes: From OBB until 26-DOP --> all bounding volumes possible, just choose type below
112 // Notes: You have to choose the type at compile time ITM
113 // Notes: You can choose the tree type --> binary, quad, octree, choose below
114 ////////////////////////////////////////////////////////////////////////
115
116 static float KDOP_AXES[13][3] =
117 { {1.0, 0, 0}, {0, 1.0, 0}, {0, 0, 1.0}, {1.0, 1.0, 1.0}, {1.0, -1.0, 1.0}, {1.0, 1.0, -1.0},
118 {1.0, -1.0, -1.0}, {1.0, 1.0, 0}, {1.0, 0, 1.0}, {0, 1.0, 1.0}, {1.0, -1.0, 0}, {1.0, 0, -1.0},
119 {0, 1.0, -1.0}
120 };
121
122 //////////////////////////////////////////////////////////////////////////////////////////////////////
123 // Introsort 
124 // with permission deriven from the following Java code:
125 // http://ralphunden.net/content/tutorials/a-guide-to-introsort/
126 // and he derived it from the SUN STL 
127 //////////////////////////////////////////////////////////////////////////////////////////////////////
128 static int size_threshold = 16;
129 /*
130 * Common methods for all algorithms
131 */
132 static int floor_lg(int a)
133 {
134         return (int)(floor(log(a)/log(2)));
135 }
136
137 /*
138 * Insertion sort algorithm
139 */
140 static void bvh_insertionsort(BVHNode **a, int lo, int hi, int axis)
141 {
142         int i,j;
143         BVHNode *t;
144         for (i=lo; i < hi; i++)
145         {
146                 j=i;
147                 t = a[i];
148                 while((j!=lo) && (t->bv[axis] < (a[j-1])->bv[axis]))
149                 {
150                         a[j] = a[j-1];
151                         j--;
152                 }
153                 a[j] = t;
154         }
155 }
156
157 static int bvh_partition(BVHNode **a, int lo, int hi, BVHNode * x, int axis)
158 {
159         int i=lo, j=hi;
160         while (1)
161         {
162                 while ((a[i])->bv[axis] < x->bv[axis]) i++;
163                 j--;
164                 while (x->bv[axis] < (a[j])->bv[axis]) j--;
165                 if(!(i < j))
166                         return i;
167                 SWAP( BVHNode* , a[i], a[j]);
168                 i++;
169         }
170 }
171
172 /*
173 * Heapsort algorithm
174 */
175 static void bvh_downheap(BVHNode **a, int i, int n, int lo, int axis)
176 {
177         BVHNode * d = a[lo+i-1];
178         int child;
179         while (i<=n/2)
180         {
181                 child = 2*i;
182                 if ((child < n) && ((a[lo+child-1])->bv[axis] < (a[lo+child])->bv[axis]))
183                 {
184                         child++;
185                 }
186                 if (!(d->bv[axis] < (a[lo+child-1])->bv[axis])) break;
187                 a[lo+i-1] = a[lo+child-1];
188                 i = child;
189         }
190         a[lo+i-1] = d;
191 }
192
193 static void bvh_heapsort(BVHNode **a, int lo, int hi, int axis)
194 {
195         int n = hi-lo, i;
196         for (i=n/2; i>=1; i=i-1)
197         {
198                 bvh_downheap(a, i,n,lo, axis);
199         }
200         for (i=n; i>1; i=i-1)
201         {
202                 SWAP(BVHNode*, a[lo],a[lo+i-1]);
203                 bvh_downheap(a, 1,i-1,lo, axis);
204         }
205 }
206
207 static BVHNode *bvh_medianof3(BVHNode **a, int lo, int mid, int hi, int axis) // returns Sortable
208 {
209         if ((a[mid])->bv[axis] < (a[lo])->bv[axis])
210         {
211                 if ((a[hi])->bv[axis] < (a[mid])->bv[axis])
212                         return a[mid];
213                 else
214                 {
215                         if ((a[hi])->bv[axis] < (a[lo])->bv[axis])
216                                 return a[hi];
217                         else
218                                 return a[lo];
219                 }
220         }
221         else
222         {
223                 if ((a[hi])->bv[axis] < (a[mid])->bv[axis])
224                 {
225                         if ((a[hi])->bv[axis] < (a[lo])->bv[axis])
226                                 return a[lo];
227                         else
228                                 return a[hi];
229                 }
230                 else
231                         return a[mid];
232         }
233 }
234 /*
235 * Quicksort algorithm modified for Introsort
236 */
237 static void bvh_introsort_loop (BVHNode **a, int lo, int hi, int depth_limit, int axis)
238 {
239         int p;
240
241         while (hi-lo > size_threshold)
242         {
243                 if (depth_limit == 0)
244                 {
245                         bvh_heapsort(a, lo, hi, axis);
246                         return;
247                 }
248                 depth_limit=depth_limit-1;
249                 p=bvh_partition(a, lo, hi, bvh_medianof3(a, lo, lo+((hi-lo)/2)+1, hi-1, axis), axis);
250                 bvh_introsort_loop(a, p, hi, depth_limit, axis);
251                 hi=p;
252         }
253 }
254
255 static void sort(BVHNode **a0, int begin, int end, int axis)
256 {
257         if (begin < end)
258         {
259                 BVHNode **a=a0;
260                 bvh_introsort_loop(a, begin, end, 2*floor_lg(end-begin), axis);
261                 bvh_insertionsort(a, begin, end, axis);
262         }
263 }
264 void sort_along_axis(BVHTree *tree, int start, int end, int axis)
265 {
266         sort(tree->nodes, start, end, axis);
267 }
268
269 //after a call to this function you can expect one of:
270 //      every node to left of a[n] are smaller or equal to it
271 //      every node to the right of a[n] are greater or equal to it
272 int partition_nth_element(BVHNode **a, int _begin, int _end, int n, int axis){
273         int begin = _begin, end = _end, cut;
274         while(end-begin > 3)
275         {
276                 cut = bvh_partition(a, begin, end, bvh_medianof3(a, begin, (begin+end)/2, end-1, axis), axis );
277                 if(cut <= n)
278                         begin = cut;
279                 else
280                         end = cut;
281         }
282         bvh_insertionsort(a, begin, end, axis);
283
284         return n;
285 }
286
287 //////////////////////////////////////////////////////////////////////////////////////////////////////
288
289 /*
290  * BVHTree bounding volumes functions
291  */
292 static void create_kdop_hull(BVHTree *tree, BVHNode *node, float *co, int numpoints, int moving)
293 {
294         float newminmax;
295         float *bv = node->bv;
296         int i, k;
297         
298         // don't init boudings for the moving case
299         if(!moving)
300         {
301                 for (i = tree->start_axis; i < tree->stop_axis; i++)
302                 {
303                         bv[2*i] = FLT_MAX;
304                         bv[2*i + 1] = -FLT_MAX;
305                 }
306         }
307         
308         for(k = 0; k < numpoints; k++)
309         {
310                 // for all Axes.
311                 for (i = tree->start_axis; i < tree->stop_axis; i++)
312                 {
313                         newminmax = INPR(&co[k * 3], KDOP_AXES[i]);
314                         if (newminmax < bv[2 * i])
315                                 bv[2 * i] = newminmax;
316                         if (newminmax > bv[(2 * i) + 1])
317                                 bv[(2 * i) + 1] = newminmax;
318                 }
319         }
320 }
321
322 // depends on the fact that the BVH's for each face is already build
323 static void refit_kdop_hull(BVHTree *tree, BVHNode *node, int start, int end)
324 {
325         float newmin,newmax;
326         int i, j;
327         float *bv = node->bv;
328
329         
330         for (i = tree->start_axis; i < tree->stop_axis; i++)
331         {
332                 bv[2*i] = FLT_MAX;
333                 bv[2*i + 1] = -FLT_MAX;
334         }
335
336         for (j = start; j < end; j++)
337         {
338 // for all Axes.
339                 for (i = tree->start_axis; i < tree->stop_axis; i++)
340                 {
341                         newmin = tree->nodes[j]->bv[(2 * i)];   
342                         if ((newmin < bv[(2 * i)]))
343                                 bv[(2 * i)] = newmin;
344  
345                         newmax = tree->nodes[j]->bv[(2 * i) + 1];
346                         if ((newmax > bv[(2 * i) + 1]))
347                                 bv[(2 * i) + 1] = newmax;
348                 }
349         }
350
351 }
352
353 // only supports x,y,z axis in the moment
354 // but we should use a plain and simple function here for speed sake
355 static char get_largest_axis(float *bv)
356 {
357         float middle_point[3];
358
359         middle_point[0] = (bv[1]) - (bv[0]); // x axis
360         middle_point[1] = (bv[3]) - (bv[2]); // y axis
361         middle_point[2] = (bv[5]) - (bv[4]); // z axis
362         if (middle_point[0] > middle_point[1]) 
363         {
364                 if (middle_point[0] > middle_point[2])
365                         return 1; // max x axis
366                 else
367                         return 5; // max z axis
368         }
369         else 
370         {
371                 if (middle_point[1] > middle_point[2])
372                         return 3; // max y axis
373                 else
374                         return 5; // max z axis
375         }
376 }
377
378 // bottom-up update of bvh node BV
379 // join the children on the parent BV
380 static void node_join(BVHTree *tree, BVHNode *node)
381 {
382         int i, j;
383         
384         for (i = tree->start_axis; i < tree->stop_axis; i++)
385         {
386                 node->bv[2*i] = FLT_MAX;
387                 node->bv[2*i + 1] = -FLT_MAX;
388         }
389         
390         for (i = 0; i < tree->tree_type; i++)
391         {
392                 if (node->children[i]) 
393                 {
394                         for (j = tree->start_axis; j < tree->stop_axis; j++)
395                         {
396                                 // update minimum 
397                                 if (node->children[i]->bv[(2 * j)] < node->bv[(2 * j)]) 
398                                         node->bv[(2 * j)] = node->children[i]->bv[(2 * j)];
399                                 
400                                 // update maximum 
401                                 if (node->children[i]->bv[(2 * j) + 1] > node->bv[(2 * j) + 1])
402                                         node->bv[(2 * j) + 1] = node->children[i]->bv[(2 * j) + 1];
403                         }
404                 }
405                 else
406                         break;
407         }
408 }
409
410 /*
411  * Debug and information functions
412  */
413 static void bvhtree_print_tree(BVHTree *tree, BVHNode *node, int depth)
414 {
415         int i;
416         for(i=0; i<depth; i++) printf(" ");
417         printf(" - %d (%d): ", node->index, node - tree->nodearray);
418         for(i=2*tree->start_axis; i<2*tree->stop_axis; i++)
419                 printf("%.3f ", node->bv[i]);
420         printf("\n");
421
422         for(i=0; i<tree->tree_type; i++)
423                 if(node->children[i])
424                         bvhtree_print_tree(tree, node->children[i], depth+1);
425 }
426
427 static void bvhtree_info(BVHTree *tree)
428 {
429         printf("BVHTree info\n");
430         printf("tree_type = %d, axis = %d, epsilon = %f\n", tree->tree_type, tree->axis, tree->epsilon);
431         printf("nodes = %d, branches = %d, leafs = %d\n", tree->totbranch + tree->totleaf,  tree->totbranch, tree->totleaf);
432         printf("Memory per node = %dbytes\n", sizeof(BVHNode) + sizeof(BVHNode*)*tree->tree_type + sizeof(float)*tree->axis);
433         printf("BV memory = %dbytes\n", MEM_allocN_len(tree->nodebv));
434
435         printf("Total memory = %dbytes\n", sizeof(BVHTree)
436                 + MEM_allocN_len(tree->nodes)
437                 + MEM_allocN_len(tree->nodearray)
438                 + MEM_allocN_len(tree->nodechild)
439                 + MEM_allocN_len(tree->nodebv)
440                 );
441
442 //      bvhtree_print_tree(tree, tree->nodes[tree->totleaf], 0);
443 }
444
445 #if 0
446
447
448 static void verify_tree(BVHTree *tree)
449 {
450         int i, j, check = 0;
451         
452         // check the pointer list
453         for(i = 0; i < tree->totleaf; i++)
454         {
455                 if(tree->nodes[i]->parent == NULL)
456                         printf("Leaf has no parent: %d\n", i);
457                 else
458                 {
459                         for(j = 0; j < tree->tree_type; j++)
460                         {
461                                 if(tree->nodes[i]->parent->children[j] == tree->nodes[i])
462                                         check = 1;
463                         }
464                         if(!check)
465                         {
466                                 printf("Parent child relationship doesn't match: %d\n", i);
467                         }
468                         check = 0;
469                 }
470         }
471         
472         // check the leaf list
473         for(i = 0; i < tree->totleaf; i++)
474         {
475                 if(tree->nodearray[i].parent == NULL)
476                         printf("Leaf has no parent: %d\n", i);
477                 else
478                 {
479                         for(j = 0; j < tree->tree_type; j++)
480                         {
481                                 if(tree->nodearray[i].parent->children[j] == &tree->nodearray[i])
482                                         check = 1;
483                         }
484                         if(!check)
485                         {
486                                 printf("Parent child relationship doesn't match: %d\n", i);
487                         }
488                         check = 0;
489                 }
490         }
491         
492         printf("branches: %d, leafs: %d, total: %d\n", tree->totbranch, tree->totleaf, tree->totbranch + tree->totleaf);
493 }
494 #endif
495
496 //Helper data and structures to build a min-leaf generalized implicit tree
497 //This code can be easily reduced (basicly this is only method to calculate pow(k, n) in O(1).. and stuff like that)
498 typedef struct BVHBuildHelper
499 {
500         int tree_type;                          //
501         int totleafs;                           //
502
503         int leafs_per_child  [32];      //Min number of leafs that are archievable from a node at depth N
504         int branches_on_level[32];      //Number of nodes at depth N (tree_type^N)
505
506         int remain_leafs;                       //Number of leafs that are placed on the level that is not 100% filled
507
508 } BVHBuildHelper;
509
510 static void build_implicit_tree_helper(BVHTree *tree, BVHBuildHelper *data)
511 {
512         int depth = 0;
513         int remain;
514         int nnodes;
515
516         data->totleafs = tree->totleaf;
517         data->tree_type= tree->tree_type;
518
519         //Calculate the smallest tree_type^n such that tree_type^n >= num_leafs
520         for(
521                 data->leafs_per_child[0] = 1;
522                 data->leafs_per_child[0] <  data->totleafs;
523                 data->leafs_per_child[0] *= data->tree_type
524         );
525
526         data->branches_on_level[0] = 1;
527
528         //We could stop the loop first (but I am lazy to find out when)
529         for(depth = 1; depth < 32; depth++)
530         {
531                 data->branches_on_level[depth] = data->branches_on_level[depth-1] * data->tree_type;
532                 data->leafs_per_child  [depth] = data->leafs_per_child  [depth-1] / data->tree_type;
533         }
534
535         remain = data->totleafs - data->leafs_per_child[1];
536         nnodes = (remain + data->tree_type - 2) / (data->tree_type - 1);
537         data->remain_leafs = remain + nnodes;
538 }
539
540 // return the min index of all the leafs archivable with the given branch
541 static int implicit_leafs_index(BVHBuildHelper *data, int depth, int child_index)
542 {
543         int min_leaf_index = child_index * data->leafs_per_child[depth-1];
544         if(min_leaf_index <= data->remain_leafs)
545                 return min_leaf_index;
546         else if(data->leafs_per_child[depth])
547                 return data->totleafs - (data->branches_on_level[depth-1] - child_index) * data->leafs_per_child[depth];
548         else
549                 return data->remain_leafs;
550 }
551
552 /**
553  * Generalized implicit tree build
554  *
555  * An implicit tree is a tree where its structure is implied, thus there is no need to store child pointers or indexs.
556  * Its possible to find the position of the child or the parent with simple maths (multiplication and adittion). This type
557  * of tree is for example used on heaps.. where node N has its childs at indexs N*2 and N*2+1.
558  *
559  * Altought in this case the tree type is general.. and not know until runtime.
560  * tree_type stands for the maximum number of childs that a tree node can have.
561  * All tree types >= 2 are supported.
562  *
563  * Advantages of the used trees include:
564  *  - No need to store child/parent relations (they are implicit);
565  *  - Any node child always has an index greater than the parent;
566  *  - Brother nodes are sequencial in memory;
567  *
568  *
569  * Some math relations derived for general implicit trees:
570  *
571  *   K = tree_type, ( 2 <= K )
572  *   ROOT = 1
573  *   N child of node A = A * K + (2 - K) + N, (0 <= N < K)
574  *
575  * Util methods:
576  *   TODO...
577  *    (looping elements, knowing if its a leaf or not.. etc...)
578  */
579
580 // This functions returns the number of branches needed to have the requested number of leafs.
581 static int implicit_needed_branches(int tree_type, int leafs)
582 {
583         return MAX2(1, (leafs + tree_type - 3) / (tree_type-1) );
584 }
585
586 /*
587  * This function handles the problem of "sorting" the leafs (along the split_axis).
588  *
589  * It arranges the elements in the given partitions such that:
590  *  - any element in partition N is less or equal to any element in partition N+1.
591  *  - if all elements are diferent all partition will get the same subset of elements
592  *    as if the array was sorted.
593  *
594  * partition P is described as the elements in the range ( nth[P] , nth[P+1] ]
595  *
596  * TODO: This can be optimized a bit by doing a specialized nth_element instead of K nth_elements
597  */
598 static void split_leafs(BVHNode **leafs_array, int *nth, int partitions, int split_axis)
599 {
600         int i;
601         for(i=0; i < partitions-1; i++)
602         {
603                 if(nth[i] >= nth[partitions])
604                         break;
605
606                 partition_nth_element(leafs_array, nth[i], nth[partitions], nth[i+1], split_axis);
607         }
608 }
609
610 /*
611  * This functions builds an optimal implicit tree from the given leafs.
612  * Where optimal stands for:
613  *  - The resulting tree will have the smallest number of branches;
614  *  - At most only one branch will have NULL childs;
615  *  - All leafs will be stored at level N or N+1.
616  *
617  * This function creates an implicit tree on branches_array, the leafs are given on the leafs_array.
618  *
619  * The tree is built per depth levels. First branchs at depth 1.. then branches at depth 2.. etc..
620  * The reason is that we can build level N+1 from level N witouth any data dependencies.. thus it allows
621  * to use multithread building.
622  *
623  * To archieve this is necessary to find how much leafs are accessible from a certain branch, BVHBuildHelper
624  * implicit_needed_branches and implicit_leafs_index are auxiliar functions to solve that "optimal-split".
625  */
626 static void non_recursive_bvh_div_nodes(BVHTree *tree, BVHNode *branches_array, BVHNode **leafs_array, int num_leafs)
627 {
628         int i;
629
630         const int tree_type   = tree->tree_type;
631         const int tree_offset = 2 - tree->tree_type; //this value is 0 (on binary trees) and negative on the others
632         const int num_branches= implicit_needed_branches(tree_type, num_leafs);
633
634         BVHBuildHelper data;
635         int depth;
636
637         branches_array--;       //Implicit trees use 1-based indexs
638         
639         build_implicit_tree_helper(tree, &data);
640
641         //Loop tree levels (log N) loops
642         for(i=1, depth = 1; i <= num_branches; i = i*tree_type + tree_offset, depth++)
643         {
644                 const int first_of_next_level = i*tree_type + tree_offset;
645                 const int  end_j = MIN2(first_of_next_level, num_branches + 1); //index of last branch on this level
646                 int j;
647
648                 //Loop all branches on this level
649 #pragma omp parallel for private(j) schedule(static)
650                 for(j = i; j < end_j; j++)
651                 {
652                         int k;
653                         const int parent_level_index= j-i;
654                         BVHNode* parent = branches_array + j;
655                         int nth_positions[ MAX_TREETYPE + 1];
656                         char split_axis;
657
658                         int parent_leafs_begin = implicit_leafs_index(&data, depth, parent_level_index);
659                         int parent_leafs_end   = implicit_leafs_index(&data, depth, parent_level_index+1);
660
661                         //This calculates the bounding box of this branch
662                         //and chooses the largest axis as the axis to divide leafs
663                         refit_kdop_hull(tree, parent, parent_leafs_begin, parent_leafs_end);
664                         split_axis = get_largest_axis(parent->bv);
665
666                         //Save split axis (this can be used on raytracing to speedup the query time)
667                         parent->main_axis = split_axis / 2;
668
669                         //Split the childs along the split_axis, note: its not needed to sort the whole leafs array
670                         //Only to assure that the elements are partioned on a way that each child takes the elements
671                         //it would take in case the whole array was sorted.
672                         //Split_leafs takes care of that "sort" problem.
673                         nth_positions[        0] = parent_leafs_begin;
674                         nth_positions[tree_type] = parent_leafs_end;
675                         for(k = 1; k < tree_type; k++)
676                         {
677                                 int child_index = j * tree_type + tree_offset + k;
678                                 int child_level_index = child_index - first_of_next_level; //child level index
679                                 nth_positions[k] = implicit_leafs_index(&data, depth+1, child_level_index);
680                         }
681
682                         split_leafs(leafs_array, nth_positions, tree_type, split_axis);
683
684
685                         //Setup children and totnode counters
686                         //Not really needed but currently most of BVH code relies on having an explicit children structure
687                         for(k = 0; k < tree_type; k++)
688                         {
689                                 int child_index = j * tree_type + tree_offset + k;
690                                 int child_level_index = child_index - first_of_next_level; //child level index
691
692                                 int child_leafs_begin = implicit_leafs_index(&data, depth+1, child_level_index);
693                                 int child_leafs_end   = implicit_leafs_index(&data, depth+1, child_level_index+1);
694
695                                 if(child_leafs_end - child_leafs_begin > 1)
696                                         parent->children[k] = branches_array + child_index;
697                                 else if(child_leafs_end - child_leafs_begin == 1)
698                                         parent->children[k] = leafs_array[ child_leafs_begin ];
699                                 else
700                                         break;
701
702                                 parent->totnode = k+1;
703                         }
704                 }
705         }
706 }
707
708
709 /*
710  * BLI_bvhtree api
711  */
712 BVHTree *BLI_bvhtree_new(int maxsize, float epsilon, char tree_type, char axis)
713 {
714         BVHTree *tree;
715         int numnodes, i;
716         
717         // theres not support for trees below binary-trees :P
718         if(tree_type < 2)
719                 return NULL;
720         
721         if(tree_type > MAX_TREETYPE)
722                 return NULL;
723
724         tree = (BVHTree *)MEM_callocN(sizeof(BVHTree), "BVHTree");
725         
726         if(tree)
727         {
728                 tree->epsilon = epsilon;
729                 tree->tree_type = tree_type; 
730                 tree->axis = axis;
731                 
732                 if(axis == 26)
733                 {
734                         tree->start_axis = 0;
735                         tree->stop_axis = 13;
736                 }
737                 else if(axis == 18)
738                 {
739                         tree->start_axis = 7;
740                         tree->stop_axis = 13;
741                 }
742                 else if(axis == 14)
743                 {
744                         tree->start_axis = 0;
745                         tree->stop_axis = 7;
746                 }
747                 else if(axis == 8) // AABB
748                 {
749                         tree->start_axis = 0;
750                         tree->stop_axis = 4;
751                 }
752                 else if(axis == 6) // OBB
753                 {
754                         tree->start_axis = 0;
755                         tree->stop_axis = 3;
756                 }
757                 else
758                 {
759                         MEM_freeN(tree);
760                         return NULL;
761                 }
762
763
764                 //Allocate arrays
765                 numnodes = maxsize + implicit_needed_branches(tree_type, maxsize) + tree_type;
766
767                 tree->nodes = (BVHNode **)MEM_callocN(sizeof(BVHNode *)*numnodes, "BVHNodes");
768                 
769                 if(!tree->nodes)
770                 {
771                         MEM_freeN(tree);
772                         return NULL;
773                 }
774                 
775                 tree->nodebv = (float*)MEM_callocN(sizeof(float)* axis * numnodes, "BVHNodeBV");
776                 if(!tree->nodebv)
777                 {
778                         MEM_freeN(tree->nodes);
779                         MEM_freeN(tree);
780                 }
781
782                 tree->nodechild = (BVHNode**)MEM_callocN(sizeof(BVHNode*) * tree_type * numnodes, "BVHNodeBV");
783                 if(!tree->nodechild)
784                 {
785                         MEM_freeN(tree->nodebv);
786                         MEM_freeN(tree->nodes);
787                         MEM_freeN(tree);
788                 }
789
790                 tree->nodearray = (BVHNode *)MEM_callocN(sizeof(BVHNode)* numnodes, "BVHNodeArray");
791                 
792                 if(!tree->nodearray)
793                 {
794                         MEM_freeN(tree->nodechild);
795                         MEM_freeN(tree->nodebv);
796                         MEM_freeN(tree->nodes);
797                         MEM_freeN(tree);
798                         return NULL;
799                 }
800
801                 //link the dynamic bv and child links
802                 for(i=0; i< numnodes; i++)
803                 {
804                         tree->nodearray[i].bv = tree->nodebv + i * axis;
805                         tree->nodearray[i].children = tree->nodechild + i * tree_type;
806                 }
807                 
808         }
809
810         return tree;
811 }
812
813 void BLI_bvhtree_free(BVHTree *tree)
814 {       
815         if(tree)
816         {
817                 MEM_freeN(tree->nodes);
818                 MEM_freeN(tree->nodearray);
819                 MEM_freeN(tree->nodebv);
820                 MEM_freeN(tree->nodechild);
821                 MEM_freeN(tree);
822         }
823 }
824
825 void BLI_bvhtree_balance(BVHTree *tree)
826 {
827         int i;
828
829         BVHNode*  branches_array = tree->nodearray + tree->totleaf;
830         BVHNode** leafs_array    = tree->nodes;
831
832         //This function should only be called once (some big bug goes here if its being called more than once per tree)
833         assert(tree->totbranch == 0);
834
835         //Build the implicit tree
836         non_recursive_bvh_div_nodes(tree, branches_array, leafs_array, tree->totleaf);
837
838         //current code expects the branches to be linked to the nodes array
839         //we perform that linkage here
840         tree->totbranch = implicit_needed_branches(tree->tree_type, tree->totleaf);
841         for(i = 0; i < tree->totbranch; i++)
842                 tree->nodes[tree->totleaf + i] = branches_array + i;
843
844         //bvhtree_info(tree);
845 }
846
847 int BLI_bvhtree_insert(BVHTree *tree, int index, float *co, int numpoints)
848 {
849         int i;
850         BVHNode *node = NULL;
851         
852         // insert should only possible as long as tree->totbranch is 0
853         if(tree->totbranch > 0)
854                 return 0;
855         
856         if(tree->totleaf+1 >= MEM_allocN_len(tree->nodes)/sizeof(*(tree->nodes)))
857                 return 0;
858         
859         // TODO check if have enough nodes in array
860         
861         node = tree->nodes[tree->totleaf] = &(tree->nodearray[tree->totleaf]);
862         tree->totleaf++;
863         
864         create_kdop_hull(tree, node, co, numpoints, 0);
865         node->index= index;
866         
867         // inflate the bv with some epsilon
868         for (i = tree->start_axis; i < tree->stop_axis; i++)
869         {
870                 node->bv[(2 * i)] -= tree->epsilon; // minimum 
871                 node->bv[(2 * i) + 1] += tree->epsilon; // maximum 
872         }
873
874         return 1;
875 }
876
877
878 // call before BLI_bvhtree_update_tree()
879 int BLI_bvhtree_update_node(BVHTree *tree, int index, float *co, float *co_moving, int numpoints)
880 {
881         int i;
882         BVHNode *node= NULL;
883         
884         // check if index exists
885         if(index > tree->totleaf)
886                 return 0;
887         
888         node = tree->nodearray + index;
889         
890         create_kdop_hull(tree, node, co, numpoints, 0);
891         
892         if(co_moving)
893                 create_kdop_hull(tree, node, co_moving, numpoints, 1);
894         
895         // inflate the bv with some epsilon
896         for (i = tree->start_axis; i < tree->stop_axis; i++)
897         {
898                 node->bv[(2 * i)] -= tree->epsilon; // minimum 
899                 node->bv[(2 * i) + 1] += tree->epsilon; // maximum 
900         }
901
902         return 1;
903 }
904
905 // call BLI_bvhtree_update_node() first for every node/point/triangle
906 void BLI_bvhtree_update_tree(BVHTree *tree)
907 {
908         //Update bottom=>top
909         //TRICKY: the way we build the tree all the childs have an index greater than the parent
910         //This allows us todo a bottom up update by starting on the biger numbered branch
911
912         BVHNode** root  = tree->nodes + tree->totleaf;
913         BVHNode** index = tree->nodes + tree->totleaf + tree->totbranch-1;
914
915         for (; index >= root; index--)
916                 node_join(tree, *index);
917 }
918
919 float BLI_bvhtree_getepsilon(BVHTree *tree)
920 {
921         return tree->epsilon;
922 }
923
924
925 /*
926  * BLI_bvhtree_overlap
927  */
928 // overlap - is it possbile for 2 bv's to collide ?
929 static int tree_overlap(BVHNode *node1, BVHNode *node2, int start_axis, int stop_axis)
930 {
931         float *bv1 = node1->bv;
932         float *bv2 = node2->bv;
933
934         float *bv1_end = bv1 + (stop_axis<<1);
935                 
936         bv1 += start_axis<<1;
937         bv2 += start_axis<<1;
938         
939         // test all axis if min + max overlap
940         for (; bv1 != bv1_end; bv1+=2, bv2+=2)
941         {
942                 if ((*(bv1) > *(bv2 + 1)) || (*(bv2) > *(bv1 + 1))) 
943                         return 0;
944         }
945         
946         return 1;
947 }
948
949 static void traverse(BVHOverlapData *data, BVHNode *node1, BVHNode *node2)
950 {
951         int j;
952         
953         if(tree_overlap(node1, node2, data->start_axis, data->stop_axis))
954         {
955                 // check if node1 is a leaf
956                 if(!node1->totnode)
957                 {
958                         // check if node2 is a leaf
959                         if(!node2->totnode)
960                         {
961                                 
962                                 if(node1 == node2)
963                                 {
964                                         return;
965                                 }
966                                         
967                                 if(data->i >= data->max_overlap)
968                                 {       
969                                         // try to make alloc'ed memory bigger
970                                         data->overlap = realloc(data->overlap, sizeof(BVHTreeOverlap)*data->max_overlap*2);
971                                         
972                                         if(!data->overlap)
973                                         {
974                                                 printf("Out of Memory in traverse\n");
975                                                 return;
976                                         }
977                                         data->max_overlap *= 2;
978                                 }
979                                 
980                                 // both leafs, insert overlap!
981                                 data->overlap[data->i].indexA = node1->index;
982                                 data->overlap[data->i].indexB = node2->index;
983
984                                 data->i++;
985                         }
986                         else
987                         {
988                                 for(j = 0; j < data->tree2->tree_type; j++)
989                                 {
990                                         if(node2->children[j])
991                                                 traverse(data, node1, node2->children[j]);
992                                 }
993                         }
994                 }
995                 else
996                 {
997                         
998                         for(j = 0; j < data->tree2->tree_type; j++)
999                         {
1000                                 if(node1->children[j])
1001                                         traverse(data, node1->children[j], node2);
1002                         }
1003                 }
1004         }
1005         return;
1006 }
1007
1008 BVHTreeOverlap *BLI_bvhtree_overlap(BVHTree *tree1, BVHTree *tree2, int *result)
1009 {
1010         int j, total = 0;
1011         BVHTreeOverlap *overlap = NULL, *to = NULL;
1012         BVHOverlapData **data;
1013         
1014         // check for compatibility of both trees (can't compare 14-DOP with 18-DOP)
1015         if((tree1->axis != tree2->axis) && ((tree1->axis == 14) || tree2->axis == 14))
1016                 return 0;
1017         
1018         // fast check root nodes for collision before doing big splitting + traversal
1019         if(!tree_overlap(tree1->nodes[tree1->totleaf], tree2->nodes[tree2->totleaf], MIN2(tree1->start_axis, tree2->start_axis), MIN2(tree1->stop_axis, tree2->stop_axis)))
1020                 return 0;
1021
1022         data = MEM_callocN(sizeof(BVHOverlapData *)* tree1->tree_type, "BVHOverlapData_star");
1023         
1024         for(j = 0; j < tree1->tree_type; j++)
1025         {
1026                 data[j] = (BVHOverlapData *)MEM_callocN(sizeof(BVHOverlapData), "BVHOverlapData");
1027                 
1028                 // init BVHOverlapData
1029                 data[j]->overlap = (BVHTreeOverlap *)malloc(sizeof(BVHTreeOverlap)*MAX2(tree1->totleaf, tree2->totleaf));
1030                 data[j]->tree1 = tree1;
1031                 data[j]->tree2 = tree2;
1032                 data[j]->max_overlap = MAX2(tree1->totleaf, tree2->totleaf);
1033                 data[j]->i = 0;
1034                 data[j]->start_axis = MIN2(tree1->start_axis, tree2->start_axis);
1035                 data[j]->stop_axis  = MIN2(tree1->stop_axis,  tree2->stop_axis );
1036         }
1037
1038 #pragma omp parallel for private(j) schedule(static)
1039         for(j = 0; j < MIN2(tree1->tree_type, tree1->nodes[tree1->totleaf]->totnode); j++)
1040         {
1041                 traverse(data[j], tree1->nodes[tree1->totleaf]->children[j], tree2->nodes[tree2->totleaf]);
1042         }
1043         
1044         for(j = 0; j < tree1->tree_type; j++)
1045                 total += data[j]->i;
1046         
1047         to = overlap = (BVHTreeOverlap *)MEM_callocN(sizeof(BVHTreeOverlap)*total, "BVHTreeOverlap");
1048         
1049         for(j = 0; j < tree1->tree_type; j++)
1050         {
1051                 memcpy(to, data[j]->overlap, data[j]->i*sizeof(BVHTreeOverlap));
1052                 to+=data[j]->i;
1053         }
1054         
1055         for(j = 0; j < tree1->tree_type; j++)
1056         {
1057                 free(data[j]->overlap);
1058                 MEM_freeN(data[j]);
1059         }
1060         MEM_freeN(data);
1061         
1062         (*result) = total;
1063         return overlap;
1064 }
1065
1066
1067 /*
1068  * Nearest neighbour - BLI_bvhtree_find_nearest
1069  */
1070 static float squared_dist(const float *a, const float *b)
1071 {
1072         float tmp[3];
1073         VECSUB(tmp, a, b);
1074         return INPR(tmp, tmp);
1075 }
1076
1077 //Determines the nearest point of the given node BV. Returns the squared distance to that point.
1078 static float calc_nearest_point(BVHNearestData *data, BVHNode *node, float *nearest)
1079 {
1080         int i;
1081         const float *bv = node->bv;
1082
1083         //nearest on AABB hull
1084         for(i=0; i != 3; i++, bv += 2)
1085         {
1086                 if(bv[0] > data->proj[i])
1087                         nearest[i] = bv[0];
1088                 else if(bv[1] < data->proj[i])
1089                         nearest[i] = bv[1];
1090                 else
1091                         nearest[i] = data->proj[i];
1092         }
1093
1094 /*
1095         //nearest on a general hull
1096         VECCOPY(nearest, data->co);
1097         for(i = data->tree->start_axis; i != data->tree->stop_axis; i++, bv+=2)
1098         {
1099                 float proj = INPR( nearest, KDOP_AXES[i]);
1100                 float dl = bv[0] - proj;
1101                 float du = bv[1] - proj;
1102
1103                 if(dl > 0)
1104                 {
1105                         VECADDFAC(nearest, nearest, KDOP_AXES[i], dl);
1106                 }
1107                 else if(du < 0)
1108                 {
1109                         VECADDFAC(nearest, nearest, KDOP_AXES[i], du);
1110                 }
1111         }
1112 */
1113         return squared_dist(data->co, nearest);
1114 }
1115
1116
1117 // TODO: use a priority queue to reduce the number of nodes looked on
1118 static void dfs_find_nearest(BVHNearestData *data, BVHNode *node)
1119 {
1120         int i;
1121         float nearest[3], sdist;
1122
1123         sdist = calc_nearest_point(data, node, nearest);
1124         if(sdist >= data->nearest.dist) return;
1125
1126         if(node->totnode == 0)
1127         {
1128                 if(data->callback)
1129                         data->callback(data->userdata , node->index, data->co, &data->nearest);
1130                 else
1131                 {
1132                         data->nearest.index     = node->index;
1133                         VECCOPY(data->nearest.co, nearest);
1134                         data->nearest.dist      = sdist;
1135                 }
1136         }
1137         else
1138         {
1139                 for(i=0; i != node->totnode; i++)
1140                         dfs_find_nearest(data, node->children[i]);
1141         }
1142 }
1143
1144 int BLI_bvhtree_find_nearest(BVHTree *tree, const float *co, BVHTreeNearest *nearest, BVHTree_NearestPointCallback callback, void *userdata)
1145 {
1146         int i;
1147
1148         BVHNearestData data;
1149         BVHNode* root = tree->nodes[tree->totleaf];
1150
1151         //init data to search
1152         data.tree = tree;
1153         data.co = co;
1154
1155         data.callback = callback;
1156         data.userdata = userdata;
1157
1158         for(i = data.tree->start_axis; i != data.tree->stop_axis; i++)
1159         {
1160                 data.proj[i] = INPR(data.co, KDOP_AXES[i]);
1161         }
1162
1163         if(nearest)
1164         {
1165                 memcpy( &data.nearest , nearest, sizeof(*nearest) );
1166         }
1167         else
1168         {
1169                 data.nearest.index = -1;
1170                 data.nearest.dist = FLT_MAX;
1171         }
1172
1173         //dfs search
1174         if(root)
1175                 dfs_find_nearest(&data, root);
1176
1177         //copy back results
1178         if(nearest)
1179         {
1180                 memcpy(nearest, &data.nearest, sizeof(*nearest));
1181         }
1182
1183         return data.nearest.index;
1184 }
1185
1186
1187 /*
1188  * Raycast - BLI_bvhtree_ray_cast
1189  *
1190  * raycast is done by performing a DFS on the BVHTree and saving the closest hit
1191  */
1192
1193 //Determines the distance that the ray must travel to hit the bounding volume of the given node
1194 static float ray_nearest_hit(BVHRayCastData *data, BVHNode *node)
1195 {
1196         int i;
1197         const float *bv = node->bv;
1198
1199         float low = 0, upper = data->hit.dist;
1200
1201         for(i=0; i != 3; i++, bv += 2)
1202         {
1203                 if(data->ray_dot_axis[i] == 0.0f)
1204                 {
1205                         //axis aligned ray
1206                         if(data->ray.origin[i] < bv[0]
1207                         || data->ray.origin[i] > bv[1])
1208                                 return FLT_MAX;
1209                 }
1210                 else
1211                 {
1212                         float ll = (bv[0] - data->ray.origin[i]) / data->ray_dot_axis[i];
1213                         float lu = (bv[1] - data->ray.origin[i]) / data->ray_dot_axis[i];
1214
1215                         if(data->ray_dot_axis[i] > 0)
1216                         {
1217                                 if(ll > low)   low = ll;
1218                                 if(lu < upper) upper = lu;
1219                         }
1220                         else
1221                         {
1222                                 if(lu > low)   low = lu;
1223                                 if(ll < upper) upper = ll;
1224                         }
1225         
1226                         if(low > upper) return FLT_MAX;
1227                 }
1228         }
1229         return low;
1230 }
1231
1232 static void dfs_raycast(BVHRayCastData *data, BVHNode *node)
1233 {
1234         int i;
1235
1236         //ray-bv is really fast.. and simple tests revealed its worth to test it
1237         //before calling the ray-primitive functions
1238         float dist = ray_nearest_hit(data, node);
1239         if(dist >= data->hit.dist) return;
1240
1241         if(node->totnode == 0)
1242         {
1243                 if(data->callback)
1244                         data->callback(data->userdata, node->index, &data->ray, &data->hit);
1245                 else
1246                 {
1247                         data->hit.index = node->index;
1248                         data->hit.dist  = dist;
1249                         VECADDFAC(data->hit.co, data->ray.origin, data->ray.direction, dist);
1250                 }
1251         }
1252         else
1253         {
1254                 //pick loop direction to dive into the tree (based on ray direction and split axis)
1255                 if(data->ray_dot_axis[ node->main_axis ] > 0)
1256                 {
1257                         for(i=0; i != node->totnode; i++)
1258                         {
1259                                 dfs_raycast(data, node->children[i]);
1260                         }
1261                 }
1262                 else
1263                 {
1264                         for(i=node->totnode-1; i >= 0; i--)
1265                         {
1266                                 dfs_raycast(data, node->children[i]);
1267                         }
1268                 }
1269         }
1270 }
1271
1272 int BLI_bvhtree_ray_cast(BVHTree *tree, const float *co, const float *dir, BVHTreeRayHit *hit, BVHTree_RayCastCallback callback, void *userdata)
1273 {
1274         int i;
1275         BVHRayCastData data;
1276         BVHNode * root = tree->nodes[tree->totleaf];
1277
1278         data.tree = tree;
1279
1280         data.callback = callback;
1281         data.userdata = userdata;
1282
1283         VECCOPY(data.ray.origin,    co);
1284         VECCOPY(data.ray.direction, dir);
1285
1286         Normalize(data.ray.direction);
1287
1288         for(i=0; i<3; i++)
1289         {
1290                 data.ray_dot_axis[i] = INPR( data.ray.direction, KDOP_AXES[i]);
1291
1292                 if(fabs(data.ray_dot_axis[i]) < 1e-7)
1293                         data.ray_dot_axis[i] = 0.0;
1294         }
1295
1296
1297         if(hit)
1298                 memcpy( &data.hit, hit, sizeof(*hit) );
1299         else
1300         {
1301                 data.hit.index = -1;
1302                 data.hit.dist = FLT_MAX;
1303         }
1304
1305         if(root)
1306                 dfs_raycast(&data, root);
1307
1308
1309         if(hit)
1310                 memcpy( hit, &data.hit, sizeof(*hit) );
1311
1312         return data.hit.index;
1313 }
1314