*Added a tree structure with a variable number of childs per node, but with groupped...
authorAndre Susano Pinto <andresusanopinto@gmail.com>
Tue, 11 Aug 2009 00:33:51 +0000 (00:33 +0000)
committerAndre Susano Pinto <andresusanopinto@gmail.com>
Tue, 11 Aug 2009 00:33:51 +0000 (00:33 +0000)
*SIMD support for the first 4*N childs of each node
*Some bvh code organized

source/blender/render/intern/raytrace/bvh.h
source/blender/render/intern/raytrace/rayobject_vbvh.cpp
source/blender/render/intern/raytrace/reorganize.h
source/blender/render/intern/raytrace/svbvh.h [new file with mode: 0644]

index f6c12f4b3fb18c3e9ed4dabadd290094ec0d414f..8538d9201e06065d7b8dbb9f478245d54adff97d 100644 (file)
@@ -28,6 +28,9 @@
  */
 #include <xmmintrin.h>
 
+#ifndef RE_RAYTRACE_BVH_H
+#define RE_RAYTRACE_BVH_H
+
 inline int test_bb_group4(__m128 *bb_group, const Isect *isec)
 {
        
@@ -53,6 +56,12 @@ template<class Tree> static void bvh_add(Tree *obj, RayObject *ob)
        rtbuild_add( obj->builder, ob );
 }
 
+template<class Node>
+inline bool is_leaf(Node *node)
+{
+       return !RayObject_isAligned(node);
+}
+
 template<class Tree> static void bvh_done(Tree *obj);
 
 template<class Tree>
@@ -93,14 +102,14 @@ template<class Node> static inline int bvh_node_hit_test(Node *node, Isect *isec
 template<class Node>
 static void bvh_node_merge_bb(Node *node, float *min, float *max)
 {
-       if(RayObject_isAligned(node))
+       if(is_leaf(node))
        {
-               DO_MIN(node->bb  , min);
-               DO_MAX(node->bb+3, max);
+               RE_rayobject_merge_bb( (RayObject*)node, min, max);
        }
        else
        {
-               RE_rayobject_merge_bb( (RayObject*)node, min, max);
+               DO_MIN(node->bb  , min);
+               DO_MAX(node->bb+3, max);
        }
 }
 
@@ -117,7 +126,7 @@ static int bvh_node_stack_raycast(Node *root, Isect *isec)
        Node *stack[MAX_STACK_SIZE];
        int hit = 0, stack_pos = 0;
                
-       if(!TEST_ROOT && RayObject_isAligned(root))
+       if(!TEST_ROOT && !is_leaf(root))
                bvh_node_push_childs(root, isec, stack, stack_pos);
        else
                stack[stack_pos++] = root;
@@ -125,7 +134,7 @@ static int bvh_node_stack_raycast(Node *root, Isect *isec)
        while(stack_pos)
        {
                Node *node = stack[--stack_pos];
-               if(RayObject_isAligned(node))
+               if(!is_leaf(node))
                {
                        if(bvh_node_hit_test(node,isec))
                        {
@@ -157,9 +166,9 @@ static int bvh_node_stack_raycast_simd(Node *root, Isect *isec)
                
        if(!TEST_ROOT)
        {
-               if(RayObject_isAligned(root))
+               if(!is_leaf(root))
                {
-                       if(RayObject_isAligned(root->child))
+                       if(!is_leaf(root->child))
                                bvh_node_push_childs(root, isec, stack, stack_pos);
                        else
                                return RE_rayobject_intersect( (RayObject*)root->child, isec);
@@ -169,7 +178,7 @@ static int bvh_node_stack_raycast_simd(Node *root, Isect *isec)
        }
        else
        {
-               if(RayObject_isAligned(root))
+               if(!is_leaf(root))
                        stack[stack_pos++] = root;
                else
                        return RE_rayobject_intersect( (RayObject*)root, isec);
@@ -214,7 +223,7 @@ static int bvh_node_stack_raycast_simd(Node *root, Isect *isec)
                        for(int i=0; i<4; i++)
                        {
                                Node *t = stack[stack_pos+i];
-                               assert(RayObject_isAligned(t));
+                               assert(!is_leaf(t));
                                
                                float *bb = ((float*)t_bb)+i;
                                bb[4*0] = t->bb[0];
@@ -237,7 +246,7 @@ static int bvh_node_stack_raycast_simd(Node *root, Isect *isec)
                        if(res & (1<<i))
                        {
                                RE_RC_COUNT(isec->raycounter->bb.hit);
-                               if(RayObject_isAligned(t_node[i]))
+                               if(!is_leaf(t_node[i]))
                                {
                                        for(Node *t=t_node[i]; t; t=t->sibling)
                                        {
@@ -255,11 +264,11 @@ static int bvh_node_stack_raycast_simd(Node *root, Isect *isec)
                else if(stack_pos > 0)
                {       
                        Node *node = stack[--stack_pos];
-                       assert(RayObject_isAligned(node));
+                       assert(!is_leaf(node));
                        
                        if(bvh_node_hit_test(node,isec))
                        {
-                               if(RayObject_isAligned(node->child))
+                               if(!is_leaf(node->child))
                                {
                                        bvh_node_push_childs(node, isec, stack, stack_pos);
                                        assert(stack_pos <= MAX_STACK_SIZE);
@@ -291,7 +300,7 @@ static int bvh_node_raycast(Node *node, Isect *isec)
                {
                        int i;
                        for(i=0; i<BVH_NCHILDS; i++)
-                               if(RayObject_isAligned(node->child[i]))
+                               if(!is_leaf(node->child[i]))
                                {
                                        if(node->child[i] == 0) break;
                                        
@@ -308,7 +317,7 @@ static int bvh_node_raycast(Node *node, Isect *isec)
                {
                        int i;
                        for(i=BVH_NCHILDS-1; i>=0; i--)
-                               if(RayObject_isAligned(node->child[i]))
+                               if(!is_leaf(node->child[i]))
                                {
                                        if(node->child[i])
                                        {
@@ -326,3 +335,5 @@ static int bvh_node_raycast(Node *node, Isect *isec)
        return hit;
 }
 */
+
+#endif
index 834a1286fb81bc0625de13536df51af6f966ceed..7f990e061529e49b5e4697f44e0c9aab2cd086d9 100644 (file)
  *
  * ***** END GPL LICENSE BLOCK *****
  */
+#define RE_USE_HINT    (0)
+static int tot_pushup   = 0;
+static int tot_pushdown = 0;
+static int tot_hints    = 0;
+
+
 extern "C"
 {
 #include <assert.h>
@@ -41,22 +47,21 @@ extern "C"
 #include "rayobject_hint.h"
 #include "reorganize.h"
 #include "bvh.h"
+#include "svbvh.h"
 #include <queue>
 
-#define BVHNode VBVHNode
-#define BVHTree VBVHTree
-
 
 #define RE_DO_HINTS    (0)
 #define RAY_BB_TEST_COST (0.2f)
 #define DFS_STACK_SIZE 256
 //#define DYNAMIC_ALLOC_BB
 
+
 //#define rtbuild_split        rtbuild_mean_split_largest_axis         /* objects mean split on the longest axis, childs BB are allowed to overlap */
 //#define rtbuild_split        rtbuild_median_split_largest_axis       /* space median split on the longest axis, childs BB are allowed to overlap */
 #define rtbuild_split  rtbuild_heuristic_object_split          /* split objects using heuristic */
 
-struct BVHNode
+struct VBVHNode
 {
 #ifdef DYNAMIC_ALLOC_BB
        float *bb;
@@ -64,15 +69,15 @@ struct BVHNode
        float   bb[6];
 #endif
 
-       BVHNode *child;
-       BVHNode *sibling;
+       VBVHNode *child;
+       VBVHNode *sibling;
 };
 
-struct BVHTree
+struct VBVHTree
 {
        RayObject rayobj;
 
-       BVHNode *root;
+       SVBVHNode *root;
 
        MemArena *node_arena;
 
@@ -81,6 +86,54 @@ struct BVHTree
 };
 
 
+
+
+template<class Tree,class OldNode>
+struct Reorganize_VBVH
+{
+       Tree *tree;
+       
+       Reorganize_VBVH(Tree *t)
+       {
+               tree = t;
+       }
+       
+       VBVHNode *create_node()
+       {
+               VBVHNode *node = (VBVHNode*)BLI_memarena_alloc(tree->node_arena, sizeof(VBVHNode));
+               return node;
+       }
+       
+       void copy_bb(VBVHNode *node, OldNode *old)
+       {
+               std::copy( old->bb, old->bb+6, node->bb );
+       }
+       
+       VBVHNode *transform(OldNode *old)
+       {
+               if(is_leaf(old))
+                       return (VBVHNode*)old;
+
+               VBVHNode *node = create_node();
+               VBVHNode **child_ptr = &node->child;
+               node->sibling = 0;
+
+               copy_bb(node,old);
+
+               for(OldNode *o_child = old->child; o_child; o_child = o_child->sibling)
+               {
+                       VBVHNode *n_child = transform(o_child);
+                       *child_ptr = n_child;
+                       if(is_leaf(n_child)) return node;
+                       child_ptr = &n_child->sibling;
+               }
+               *child_ptr = 0;
+               
+               return node;
+       }       
+};
+
+
 /*
  * Push nodes (used on dfs)
  */
@@ -89,7 +142,7 @@ inline static void bvh_node_push_childs(Node *node, Isect *isec, Node **stack, i
 {
        Node *child = node->child;
 
-       if(!RayObject_isAligned(child))
+       if(is_leaf(child))
        {
                stack[stack_pos++] = child;
        }
@@ -99,7 +152,7 @@ inline static void bvh_node_push_childs(Node *node, Isect *isec, Node **stack, i
                {
                        //Skips BB tests on primitives
 /*
-                       if(!RayObject_isAligned(child->child))
+                       if(is_leaf(child->child))
                                stack[stack_pos++] = child->child;
                        else
 */
@@ -113,9 +166,9 @@ inline static void bvh_node_push_childs(Node *node, Isect *isec, Node **stack, i
 /*
  * BVH done
  */
-static BVHNode *bvh_new_node(BVHTree *tree)
+static VBVHNode *bvh_new_node(VBVHTree *tree)
 {
-       BVHNode *node = (BVHNode*)BLI_memarena_alloc(tree->node_arena, sizeof(BVHNode));
+       VBVHNode *node = (VBVHNode*)BLI_memarena_alloc(tree->node_arena, sizeof(VBVHNode));
        
        if( (((intptr_t)node) & (0x0f)) != 0 )
        {
@@ -132,70 +185,7 @@ static BVHNode *bvh_new_node(BVHTree *tree)
        return node;
 }
 
-template<class Builder>
-float rtbuild_area(Builder *builder)
-{
-       float min[3], max[3];
-       INIT_MINMAX(min, max);
-       rtbuild_merge_bb(builder, min, max);
-       return bb_area(min, max);       
-}
-
-template<class Node>
-void bvh_update_bb(Node *node)
-{
-       INIT_MINMAX(node->bb, node->bb+3);
-       Node *child = node->child;
-       
-       while(child)
-       {
-               bvh_node_merge_bb(child, node->bb, node->bb+3);
-               if(RayObject_isAligned(child))
-                       child = child->sibling;
-               else
-                       child = 0;
-       }
-}
-
 
-static int tot_pushup   = 0;
-static int tot_pushdown = 0;
-static int tot_hints    = 0;
-
-template<class Node>
-void pushdown(Node *parent)
-{
-       Node **s_child = &parent->child;
-       Node * child = parent->child;
-       
-       while(child && RayObject_isAligned(child))
-       {
-               Node *next = child->sibling;
-               Node **next_s_child = &child->sibling;
-               
-               //assert(bb_fits_inside(parent->bb, parent->bb+3, child->bb, child->bb+3));
-               
-               for(Node *i = parent->child; RayObject_isAligned(i) && i; i = i->sibling)
-               if(child != i && bb_fits_inside(i->bb, i->bb+3, child->bb, child->bb+3) && RayObject_isAligned(i->child))
-               {
-//                     todo optimize (should the one with the smallest area?)
-//                     float ia = bb_area(i->bb, i->bb+3)
-//                     if(child->i)
-                       *s_child = child->sibling;
-                       child->sibling = i->child;
-                       i->child = child;
-                       next_s_child = s_child;
-                       
-                       tot_pushdown++;
-                       break;
-               }
-               child = next;
-               s_child = next_s_child;
-       }
-       
-       for(Node *i = parent->child; RayObject_isAligned(i) && i; i = i->sibling)
-               pushdown( i );  
-}
 
 template<class Node>
 int count_childs(Node *parent)
@@ -204,7 +194,7 @@ int count_childs(Node *parent)
        for(Node *i = parent->child; i; i = i->sibling)
        {
                n++;
-               if(!RayObject_isAligned(i))
+               if(is_leaf(i))
                        break;
        }
                
@@ -220,39 +210,6 @@ void append_sibling(Node *node, Node *sibling)
        node->sibling = sibling;
 }
 
-template<class Node>
-void pushup(Node *parent)
-{
-       float p_area = bb_area(parent->bb, parent->bb+3);
-       Node **prev = &parent->child;
-       for(Node *child = parent->child; RayObject_isAligned(child) && child; )
-       {
-               float c_area = bb_area(child->bb, child->bb+3) ;
-               int nchilds = count_childs(child);
-               float original_cost = (c_area / p_area)*nchilds + 1;
-               float flatten_cost = nchilds;
-               if(flatten_cost < original_cost && nchilds >= 2)
-               {
-                       append_sibling(child, child->child);
-                       child = child->sibling;
-                       *prev = child;
-
-//                     *prev = child->child;
-//                     append_sibling( *prev, child->sibling );
-//                     child = *prev;
-                       tot_pushup++;
-               }
-               else
-               {
-                       *prev = child;
-                       prev = &(*prev)->sibling;
-                       child = *prev;
-               }               
-       }
-       
-       for(Node *child = parent->child; RayObject_isAligned(child) && child; child = child->sibling)
-               pushup(child);
-}
 
 template<class Tree, class Node, class Builder>
 Node *bvh_rearrange(Tree *tree, Builder *builder)
@@ -264,7 +221,7 @@ Node *bvh_rearrange(Tree *tree, Builder *builder)
                Node *node = bvh_new_node(tree);
                INIT_MINMAX(node->bb, node->bb+3);
                rtbuild_merge_bb(builder, node->bb, node->bb+3);                
-               node->child = (BVHNode*) rtbuild_get_primitive( builder, 0 );
+               node->child = (VBVHNode*) rtbuild_get_primitive( builder, 0 );
                return node;
        }
        else
@@ -292,30 +249,8 @@ Node *bvh_rearrange(Tree *tree, Builder *builder)
        }
 }
 
-template<class Node>
-float bvh_refit(Node *node)
-{
-       if(!RayObject_isAligned(node)) return 0;        
-       if(!RayObject_isAligned(node->child)) return 0;
-       
-       float total = 0;
-       
-       for(Node *child = node->child; child; child = child->sibling)
-               total += bvh_refit(child);
-               
-       float old_area = bb_area(node->bb, node->bb+3);
-       INIT_MINMAX(node->bb, node->bb+3);
-       for(Node *child = node->child; child; child = child->sibling)
-       {
-               DO_MIN(child->bb, node->bb);
-               DO_MAX(child->bb+3, node->bb+3);
-       }
-       total += old_area - bb_area(node->bb, node->bb+3);
-       return total;
-}
-
 template<>
-void bvh_done<BVHTree>(BVHTree *obj)
+void bvh_done<VBVHTree>(VBVHTree *obj)
 {
        rtbuild_done(obj->builder);
        
@@ -323,18 +258,47 @@ void bvh_done<BVHTree>(BVHTree *obj)
        if(needed_nodes > BLI_MEMARENA_STD_BUFSIZE)
                needed_nodes = BLI_MEMARENA_STD_BUFSIZE;
 
-       obj->node_arena = BLI_memarena_new(needed_nodes);
-       BLI_memarena_use_malloc(obj->node_arena);
-       BLI_memarena_use_align(obj->node_arena, 16);
+       MemArena *arena1 = BLI_memarena_new(needed_nodes);
+       BLI_memarena_use_malloc(arena1);
+       BLI_memarena_use_align(arena1, 16);
+       obj->node_arena = arena1;
+       
+       VBVHNode *root = bvh_rearrange<VBVHTree,VBVHNode,RTBuilder>( obj, obj->builder );
+       reorganize(root);
+       remove_useless(root, &root);
+       printf("refit: %f\n", bvh_refit(root) );
+       
+       pushup(root);
+       pushdown(root);
 
+       //Memory re-organize
+       if(0)
+       {
+               MemArena *arena2 = BLI_memarena_new(needed_nodes);
+               BLI_memarena_use_malloc(arena2);
+               BLI_memarena_use_align(arena2, 16);
+               obj->node_arena = arena2;
+               root = Reorganize_VBVH<VBVHTree,VBVHNode>(obj).transform(root);
        
-       obj->root = bvh_rearrange<BVHTree,BVHNode,RTBuilder>( obj, obj->builder );
-       reorganize(obj->root);
-       remove_useless(obj->root, &obj->root);
-       printf("refit: %f\n", bvh_refit(obj->root) );
-       pushup(obj->root);
-       pushdown(obj->root);
-//     obj->root = memory_rearrange(obj->root);
+               BLI_memarena_free(arena1);
+       }
+
+       if(1)
+       {
+               MemArena *arena2 = BLI_memarena_new(needed_nodes);
+               BLI_memarena_use_malloc(arena2);
+               BLI_memarena_use_align(arena2, 16);
+               obj->node_arena = arena2;
+               obj->root = Reorganize_SVBVH<VBVHTree,VBVHNode>(obj).transform(root);
+       
+               BLI_memarena_free(arena1);
+       }
+/*
+       {
+               obj->root = root;       
+       }
+*/
+
        obj->cost = 1.0;
        
        rtbuild_free( obj->builder );
@@ -342,8 +306,9 @@ void bvh_done<BVHTree>(BVHTree *obj)
 }
 
 template<int StackSize>
-int intersect(BVHTree *obj, Isect* isec)
+int intersect(VBVHTree *obj, Isect* isec)
 {
+/*
        if(RE_DO_HINTS && isec->hint)
        {
                LCTSHint *lcts = (LCTSHint*)isec->hint;
@@ -352,9 +317,9 @@ int intersect(BVHTree *obj, Isect* isec)
                int hit = 0;
                for(int i=0; i<lcts->size; i++)
                {
-                       BVHNode *node = (BVHNode*)lcts->stack[i];
+                       VBVHNode *node = (VBVHNode*)lcts->stack[i];
                        if(RayObject_isAligned(node))
-                               hit |= bvh_node_stack_raycast_simd<BVHNode,StackSize,true>(node, isec);
+                               hit |= bvh_node_stack_raycast<VBVHNode,StackSize,true>(node, isec);
                        else
                                hit |= RE_rayobject_intersect( (RayObject*)node, isec );
                        
@@ -365,9 +330,10 @@ int intersect(BVHTree *obj, Isect* isec)
                return hit;
        }
        else
+*/
        {
                if(RayObject_isAligned(obj->root))
-                       return bvh_node_stack_raycast_simd<BVHNode,StackSize,false>(obj->root, isec);
+                       return bvh_node_stack_raycast<SVBVHNode,StackSize,false>( obj->root, isec);
                else
                        return RE_rayobject_intersect( (RayObject*) obj->root, isec );
        }
@@ -395,7 +361,7 @@ void bvh_dfs_make_hint(Node *node, LCTSHint *hint, int reserve_space, HintObject
 {
        assert( hint->size + reserve_space + 1 <= RE_RAY_LCTS_MAX_SIZE );
        
-       if(!RayObject_isAligned(node))
+       if(is_leaf(node))
        {
                hint->stack[hint->size++] = (RayObject*)node;
        }
@@ -425,25 +391,26 @@ void bvh_dfs_make_hint(Node *node, LCTSHint *hint, int reserve_space, HintObject
 template<class Tree>
 void bvh_hint_bb(Tree *tree, LCTSHint *hint, float *min, float *max)
 {
-       if(RE_DO_HINTS)
+/*
+       if(RE_USE_HINT)
        {
                HintBB bb;
                VECCOPY(bb.bb, min);
                VECCOPY(bb.bb+3, max);
-       
+
                hint->size = 0;
                bvh_dfs_make_hint( tree->root, hint, 0, &bb );
                tot_hints++;
        }
        else
+*/
        {
-               hint->size = 0;
-               hint->stack[hint->size++] = (RayObject*)tree->root;
-               tot_hints++;
+               hint->size = 0;
+               hint->stack[hint->size++] = (RayObject*)tree->root;
        }
 }
 
-void bfree(BVHTree *tree)
+void bfree(VBVHTree *tree)
 {
        if(tot_pushup + tot_pushdown + tot_hints + tot_moves)
        {
@@ -460,47 +427,63 @@ void bfree(BVHTree *tree)
 }
 
 /* the cast to pointer function is needed to workarround gcc bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11407 */
-template<int STACK_SIZE>
+template<class Tree,int STACK_SIZE>
 static RayObjectAPI make_api()
 {
        static RayObjectAPI api = 
        {
-               (RE_rayobject_raycast_callback) ((int(*)(BVHTree*,Isect*)) &intersect<STACK_SIZE>),
-               (RE_rayobject_add_callback)     ((void(*)(BVHTree*,RayObject*)) &bvh_add<BVHTree>),
-               (RE_rayobject_done_callback)    ((void(*)(BVHTree*))       &bvh_done<BVHTree>),
-//             (RE_rayobject_free_callback)    ((void(*)(BVHTree*))       &bvh_free<BVHTree>),
-               (RE_rayobject_free_callback)    ((void(*)(BVHTree*))       &bfree),
-               (RE_rayobject_merge_bb_callback)((void(*)(BVHTree*,float*,float*)) &bvh_bb<BVHTree>),
-               (RE_rayobject_cost_callback)    ((float(*)(BVHTree*))      &bvh_cost<BVHTree>),
-               (RE_rayobject_hint_bb_callback) ((void(*)(BVHTree*,LCTSHint*,float*,float*)) &bvh_hint_bb<BVHTree>)
+               (RE_rayobject_raycast_callback) ((int(*)(Tree*,Isect*)) &intersect<STACK_SIZE>),
+               (RE_rayobject_add_callback)     ((void(*)(Tree*,RayObject*)) &bvh_add<Tree>),
+               (RE_rayobject_done_callback)    ((void(*)(Tree*))       &bvh_done<Tree>),
+//             (RE_rayobject_free_callback)    ((void(*)(Tree*))       &bvh_free<Tree>),
+               (RE_rayobject_free_callback)    ((void(*)(Tree*))       &bfree),
+               (RE_rayobject_merge_bb_callback)((void(*)(Tree*,float*,float*)) &bvh_bb<Tree>),
+               (RE_rayobject_cost_callback)    ((float(*)(Tree*))      &bvh_cost<Tree>),
+               (RE_rayobject_hint_bb_callback) ((void(*)(Tree*,LCTSHint*,float*,float*)) &bvh_hint_bb<Tree>)
        };
        
        return api;
 }
 
+template<class Tree>
 static RayObjectAPI* get_api(int maxstacksize)
 {
-//     static RayObjectAPI bvh_api16  = make_api<16>();
-//     static RayObjectAPI bvh_api32  = make_api<32>();
-//     static RayObjectAPI bvh_api64  = make_api<64>();
-       static RayObjectAPI bvh_api128 = make_api<128>();
-       static RayObjectAPI bvh_api256 = make_api<256>();
+       static RayObjectAPI bvh_api256 = make_api<Tree,1024>();
        
-//     if(maxstacksize <= 16 ) return &bvh_api16;
-//     if(maxstacksize <= 32 ) return &bvh_api32;
-//     if(maxstacksize <= 64 ) return &bvh_api64;
-       if(maxstacksize <= 128) return &bvh_api128;
-       if(maxstacksize <= 256) return &bvh_api256;
+       if(maxstacksize <= 1024) return &bvh_api256;
        assert(maxstacksize <= 256);
        return 0;
 }
 
 RayObject *RE_rayobject_vbvh_create(int size)
 {
-       BVHTree *obj= (BVHTree*)MEM_callocN(sizeof(BVHTree), "BVHTree");
+       VBVHTree *obj= (VBVHTree*)MEM_callocN(sizeof(VBVHTree), "VBVHTree");
        assert( RayObject_isAligned(obj) ); /* RayObject API assumes real data to be 4-byte aligned */  
        
-       obj->rayobj.api = get_api(DFS_STACK_SIZE);
+       obj->rayobj.api = get_api<VBVHTree>(DFS_STACK_SIZE);
+       obj->root = NULL;
+       
+       obj->node_arena = NULL;
+       obj->builder    = rtbuild_create( size );
+       
+       return RayObject_unalignRayAPI((RayObject*) obj);
+}
+
+
+
+/* SVBVH */
+template<class HintObject>
+void bvh_dfs_make_hint(VBVHNode *node, LCTSHint *hint, int reserve_space, HintObject *hintObject)
+{
+       return;
+}
+/*
+RayObject *RE_rayobject_svbvh_create(int size)
+{
+       SVBVHTree *obj= (SVBVHTree*)MEM_callocN(sizeof(SVBVHTree), "SVBVHTree");
+       assert( RayObject_isAligned(obj) ); // RayObject API assumes real data to be 4-byte aligned
+       
+       obj->rayobj.api = get_api<SVBVHTree>(DFS_STACK_SIZE);
        obj->root = NULL;
        
        obj->node_arena = NULL;
@@ -508,3 +491,4 @@ RayObject *RE_rayobject_vbvh_create(int size)
        
        return RayObject_unalignRayAPI((RayObject*) obj);
 }
+*/
\ No newline at end of file
index 723c2b77902d430e7176acdb4f634b164f0ad063..2f79b0f6e82abe3108c91e4b0b85ce2e79dc2baa 100644 (file)
@@ -130,9 +130,115 @@ void remove_useless(Node *node, Node **new_node)
        }
        if(node->child)
        {
-               if(RayObject_isAligned(node->child) && node->child->child == 0)
+               if(RayObject_isAligned(node->child) && node->child->sibling == 0)
                        *new_node = node->child;
        }
        else if(node->child == 0)
                *new_node = 0;  
 }
+
+/*
+ * Minimizes expected number of BBtest by colapsing nodes
+ * it uses surface area heuristic for determining whether a node should be colapsed
+ */
+template<class Node>
+void pushup(Node *parent)
+{
+       float p_area = bb_area(parent->bb, parent->bb+3);
+       Node **prev = &parent->child;
+       for(Node *child = parent->child; RayObject_isAligned(child) && child; )
+       {
+               float c_area = bb_area(child->bb, child->bb+3) ;
+               int nchilds = count_childs(child);
+               float original_cost = (c_area / p_area)*nchilds + 1;
+               float flatten_cost = nchilds;
+               if(flatten_cost < original_cost && nchilds >= 2)
+               {
+                       append_sibling(child, child->child);
+                       child = child->sibling;
+                       *prev = child;
+
+//                     *prev = child->child;
+//                     append_sibling( *prev, child->sibling );
+//                     child = *prev;
+                       tot_pushup++;
+               }
+               else
+               {
+                       *prev = child;
+                       prev = &(*prev)->sibling;
+                       child = *prev;
+               }               
+       }
+       
+       for(Node *child = parent->child; RayObject_isAligned(child) && child; child = child->sibling)
+               pushup(child);
+}
+
+
+
+/*
+ * Pushdown
+ *     makes sure no child fits inside any of its sibling
+ */
+template<class Node>
+void pushdown(Node *parent)
+{
+       Node **s_child = &parent->child;
+       Node * child = parent->child;
+       
+       while(child && RayObject_isAligned(child))
+       {
+               Node *next = child->sibling;
+               Node **next_s_child = &child->sibling;
+               
+               //assert(bb_fits_inside(parent->bb, parent->bb+3, child->bb, child->bb+3));
+               
+               for(Node *i = parent->child; RayObject_isAligned(i) && i; i = i->sibling)
+               if(child != i && bb_fits_inside(i->bb, i->bb+3, child->bb, child->bb+3) && RayObject_isAligned(i->child))
+               {
+//                     todo optimize (should the one with the smallest area?)
+//                     float ia = bb_area(i->bb, i->bb+3)
+//                     if(child->i)
+                       *s_child = child->sibling;
+                       child->sibling = i->child;
+                       i->child = child;
+                       next_s_child = s_child;
+                       
+                       tot_pushdown++;
+                       break;
+               }
+               child = next;
+               s_child = next_s_child;
+       }
+       
+       for(Node *i = parent->child; RayObject_isAligned(i) && i; i = i->sibling)
+               pushdown( i );  
+}
+
+
+/*
+ * BVH refit
+ * reajust nodes BB (useful if nodes childs where modified)
+ */
+template<class Node>
+float bvh_refit(Node *node)
+{
+       if(is_leaf(node)) return 0;     
+       if(is_leaf(node->child)) return 0;
+       
+       float total = 0;
+       
+       for(Node *child = node->child; child; child = child->sibling)
+               total += bvh_refit(child);
+               
+       float old_area = bb_area(node->bb, node->bb+3);
+       INIT_MINMAX(node->bb, node->bb+3);
+       for(Node *child = node->child; child; child = child->sibling)
+       {
+               DO_MIN(child->bb, node->bb);
+               DO_MAX(child->bb+3, node->bb+3);
+       }
+       total += old_area - bb_area(node->bb, node->bb+3);
+       return total;
+}
diff --git a/source/blender/render/intern/raytrace/svbvh.h b/source/blender/render/intern/raytrace/svbvh.h
new file mode 100644 (file)
index 0000000..f537aa7
--- /dev/null
@@ -0,0 +1,230 @@
+/**
+ * $Id$
+ *
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version. 
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The Original Code is Copyright (C) 2009 Blender Foundation.
+ * All rights reserved.
+ *
+ * The Original Code is: all of this file.
+ *
+ * Contributor(s): AndrĂ© Pinto.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+#ifndef RE_RAYTRACE_SVBVH_H
+#define RE_RAYTRACE_SVBVH_H
+
+#define SVBVH_SIMD 1
+
+#include "bvh.h"
+#include <stdio.h>
+
+struct SVBVHNode
+{
+       int nchilds;
+
+       //Array of bb, array of childs
+       float *bb;
+       SVBVHNode **child;
+};
+
+template<>
+inline int bvh_node_hit_test<SVBVHNode>(SVBVHNode *node, Isect *isec)
+{
+       return 1;
+}
+
+template<>
+inline void bvh_node_push_childs<SVBVHNode>(SVBVHNode *node, Isect *isec, SVBVHNode **stack, int &stack_pos)
+{
+       if(SVBVH_SIMD)
+       {
+               int i=0;
+               while(i+4 <= node->nchilds)
+               {
+                       int res = test_bb_group4( (__m128*) (node->bb+6*i), isec );
+                       RE_RC_COUNT(isec->raycounter->bb.test);
+                       RE_RC_COUNT(isec->raycounter->bb.test);
+                       RE_RC_COUNT(isec->raycounter->bb.test);
+                       RE_RC_COUNT(isec->raycounter->bb.test);
+                       
+                       if(res & 1) { stack[stack_pos++] = node->child[i+0]; RE_RC_COUNT(isec->raycounter->bb.hit); }
+                       if(res & 2) { stack[stack_pos++] = node->child[i+1]; RE_RC_COUNT(isec->raycounter->bb.hit); }
+                       if(res & 4) { stack[stack_pos++] = node->child[i+2]; RE_RC_COUNT(isec->raycounter->bb.hit); }
+                       if(res & 8) { stack[stack_pos++] = node->child[i+3]; RE_RC_COUNT(isec->raycounter->bb.hit); }
+                       
+                       i += 4;
+               }
+               while(i < node->nchilds)
+               {
+                       if(RE_rayobject_bb_intersect_test(isec, (const float*)node->bb+6*i))
+                               stack[stack_pos++] = node->child[i];
+                       i++;
+               }
+       }
+       else
+       {
+               for(int i=0; i<node->nchilds; i++)
+               {
+                       if(RE_rayobject_bb_intersect_test(isec, (const float*)node->bb+6*i))
+                               stack[stack_pos++] = node->child[i];
+               }
+       }
+}
+
+struct SVBVHTree
+{
+       RayObject rayobj;
+
+       SVBVHNode *root;
+
+       MemArena *node_arena;
+
+       float cost;
+       RTBuilder *builder;
+};
+
+
+
+template<class Tree,class OldNode>
+struct Reorganize_SVBVH
+{
+       Tree *tree;
+
+       float childs_per_node;
+       int nodes_with_childs[16];
+       int nodes;
+
+       Reorganize_SVBVH(Tree *t)
+       {
+               tree = t;
+               nodes = 0;
+               childs_per_node = 0;
+               
+               for(int i=0; i<16; i++)
+                       nodes_with_childs[i] = 0;
+       }
+       
+       ~Reorganize_SVBVH()
+       {
+               printf("%f childs per node\n", childs_per_node / nodes);                
+               for(int i=0; i<16; i++)
+                       printf("%i childs per node: %d/%d = %f\n", i, nodes_with_childs[i], nodes,  nodes_with_childs[i]/float(nodes));
+       }
+       
+       SVBVHNode *create_node(int nchilds)
+       {
+               SVBVHNode *node = (SVBVHNode*)BLI_memarena_alloc(tree->node_arena, sizeof(SVBVHNode));
+               node->nchilds = nchilds;
+               node->bb   = (float*)BLI_memarena_alloc(tree->node_arena, sizeof(float)*6*nchilds);
+               node->child= (SVBVHNode**)BLI_memarena_alloc(tree->node_arena, sizeof(SVBVHNode*)*nchilds);
+
+               return node;
+       }
+       
+       void copy_bb(float *bb, float *old_bb)
+       {
+               std::copy( old_bb, old_bb+6, bb );
+       }
+       
+       void prepare_for_simd(SVBVHNode *node)
+       {
+               int i=0;
+               while(i+4 <= node->nchilds)
+               {
+                       float vec_tmp[4*6];
+                       float *res = node->bb+6*i;
+                       std::copy( node->bb+6*i, node->bb+6*(i+4), vec_tmp);
+                       
+                       for(int j=0; j<6; j++)
+                       {
+                               res[4*j+0] = vec_tmp[6*0+j];
+                               res[4*j+1] = vec_tmp[6*1+j];
+                               res[4*j+2] = vec_tmp[6*2+j];
+                               res[4*j+3] = vec_tmp[6*3+j];
+                       }
+/*
+                       const float *bb0 = vec_tmp+6*(i+0);
+                       const float *bb1 = vec_tmp+6*(i+1);
+                       const float *bb2 = vec_tmp+6*(i+2);
+                       const float *bb3 = vec_tmp+6*(i+3);
+
+                       //memmoves could be memory alligned
+                       const __m128 x0y0x1y1 = _mm_shuffle_ps( _mm_loadu_ps(bb0), _mm_loadu_ps(bb1), _MM_SHUFFLE(1,0,1,0) );
+                       const __m128 x2y2x3y3 = _mm_shuffle_ps( _mm_loadu_ps(bb2), _mm_loadu_ps(bb3), _MM_SHUFFLE(1,0,1,0) );
+                       _mm_store_ps( node->bb+6*i+4*0, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2,0,2,0) ) );
+                       _mm_store_ps( node->bb+6*i+4*1, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3,1,3,1) ) );
+
+                       const __m128 z0X0z1X1 = _mm_shuffle_ps( _mm_loadu_ps(bb0), _mm_loadu_ps(bb1), _MM_SHUFFLE(3,2,3,2) );
+                       const __m128 z2X2z3X3 = _mm_shuffle_ps( _mm_loadu_ps(bb2), _mm_loadu_ps(bb3), _MM_SHUFFLE(3,2,3,2) );
+                       _mm_store_ps( node->bb+6*i+4*2, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2,0,2,0) ) );
+                       _mm_store_ps( node->bb+6*i+4*3, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3,1,3,1) ) );
+
+                       const __m128 Y0Z0Y1Z1 = _mm_shuffle_ps( _mm_loadu_ps(bb0+4), _mm_loadu_ps(bb1+4), _MM_SHUFFLE(1,0,1,0) );
+                       const __m128 Y2Z2Y3Z3 = _mm_shuffle_ps( _mm_loadu_ps(bb2+4), _mm_loadu_ps(bb3+4), _MM_SHUFFLE(1,0,1,0) );
+                       _mm_store_ps( node->bb+6*i+4*4, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2,0,2,0) ) );
+                       _mm_store_ps( node->bb+6*i+4*5, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3,1,3,1) ) );
+ */
+                       
+                       i += 4;
+               }
+       }
+
+       SVBVHNode *transform(OldNode *old)
+       {
+               if(is_leaf(old))
+                       return (SVBVHNode*)old;
+               if(is_leaf(old->child))
+                       return (SVBVHNode*)old->child;
+
+               int nchilds = count_childs(old);
+               SVBVHNode *node = create_node(nchilds);
+
+               childs_per_node += nchilds;
+               nodes++;
+               if(nchilds < 16)
+                       nodes_with_childs[nchilds]++;
+               
+               int i=nchilds;
+               for(OldNode *o_child = old->child; o_child; o_child = o_child->sibling)
+               {
+                       i--;
+                       node->child[i] = transform(o_child);
+                       if(is_leaf(o_child))
+                       {
+                               float bb[6];
+                               INIT_MINMAX(bb, bb+3);
+                               RE_rayobject_merge_bb( (RayObject*)o_child, bb, bb+3);
+                               copy_bb(node->bb+i*6, bb);
+                               break;
+                       }
+                       else
+                       {
+                               copy_bb(node->bb+i*6, o_child->bb);
+                       }
+               }
+               assert( i == 0 );
+
+               if(SVBVH_SIMD)
+                       prepare_for_simd(node);
+               
+               return node;
+       }       
+};
+
+#endif