*Added a tree structure with a variable number of childs per node, but with groupped...
[blender.git] / source / blender / render / intern / raytrace / svbvh.h
1 /**
2  * $Id$
3  *
4  * ***** BEGIN GPL LICENSE BLOCK *****
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version. 
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software Foundation,
18  * Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
19  *
20  * The Original Code is Copyright (C) 2009 Blender Foundation.
21  * All rights reserved.
22  *
23  * The Original Code is: all of this file.
24  *
25  * Contributor(s): AndrĂ© Pinto.
26  *
27  * ***** END GPL LICENSE BLOCK *****
28  */
29 #ifndef RE_RAYTRACE_SVBVH_H
30 #define RE_RAYTRACE_SVBVH_H
31
32 #define SVBVH_SIMD 1
33
34 #include "bvh.h"
35 #include <stdio.h>
36
37 struct SVBVHNode
38 {
39         int nchilds;
40
41         //Array of bb, array of childs
42         float *bb;
43         SVBVHNode **child;
44 };
45
46 template<>
47 inline int bvh_node_hit_test<SVBVHNode>(SVBVHNode *node, Isect *isec)
48 {
49         return 1;
50 }
51
52 template<>
53 inline void bvh_node_push_childs<SVBVHNode>(SVBVHNode *node, Isect *isec, SVBVHNode **stack, int &stack_pos)
54 {
55         if(SVBVH_SIMD)
56         {
57                 int i=0;
58                 while(i+4 <= node->nchilds)
59                 {
60                         int res = test_bb_group4( (__m128*) (node->bb+6*i), isec );
61                         RE_RC_COUNT(isec->raycounter->bb.test);
62                         RE_RC_COUNT(isec->raycounter->bb.test);
63                         RE_RC_COUNT(isec->raycounter->bb.test);
64                         RE_RC_COUNT(isec->raycounter->bb.test);
65                         
66                         if(res & 1) { stack[stack_pos++] = node->child[i+0]; RE_RC_COUNT(isec->raycounter->bb.hit); }
67                         if(res & 2) { stack[stack_pos++] = node->child[i+1]; RE_RC_COUNT(isec->raycounter->bb.hit); }
68                         if(res & 4) { stack[stack_pos++] = node->child[i+2]; RE_RC_COUNT(isec->raycounter->bb.hit); }
69                         if(res & 8) { stack[stack_pos++] = node->child[i+3]; RE_RC_COUNT(isec->raycounter->bb.hit); }
70                         
71                         i += 4;
72                 }
73                 while(i < node->nchilds)
74                 {
75                         if(RE_rayobject_bb_intersect_test(isec, (const float*)node->bb+6*i))
76                                 stack[stack_pos++] = node->child[i];
77                         i++;
78                 }
79         }
80         else
81         {
82                 for(int i=0; i<node->nchilds; i++)
83                 {
84                         if(RE_rayobject_bb_intersect_test(isec, (const float*)node->bb+6*i))
85                                 stack[stack_pos++] = node->child[i];
86                 }
87         }
88 }
89
90 struct SVBVHTree
91 {
92         RayObject rayobj;
93
94         SVBVHNode *root;
95
96         MemArena *node_arena;
97
98         float cost;
99         RTBuilder *builder;
100 };
101
102
103
104 template<class Tree,class OldNode>
105 struct Reorganize_SVBVH
106 {
107         Tree *tree;
108
109         float childs_per_node;
110         int nodes_with_childs[16];
111         int nodes;
112
113         Reorganize_SVBVH(Tree *t)
114         {
115                 tree = t;
116                 nodes = 0;
117                 childs_per_node = 0;
118                 
119                 for(int i=0; i<16; i++)
120                         nodes_with_childs[i] = 0;
121         }
122         
123         ~Reorganize_SVBVH()
124         {
125                 printf("%f childs per node\n", childs_per_node / nodes);                
126                 for(int i=0; i<16; i++)
127                         printf("%i childs per node: %d/%d = %f\n", i, nodes_with_childs[i], nodes,  nodes_with_childs[i]/float(nodes));
128         }
129         
130         SVBVHNode *create_node(int nchilds)
131         {
132                 SVBVHNode *node = (SVBVHNode*)BLI_memarena_alloc(tree->node_arena, sizeof(SVBVHNode));
133                 node->nchilds = nchilds;
134                 node->bb   = (float*)BLI_memarena_alloc(tree->node_arena, sizeof(float)*6*nchilds);
135                 node->child= (SVBVHNode**)BLI_memarena_alloc(tree->node_arena, sizeof(SVBVHNode*)*nchilds);
136
137                 return node;
138         }
139         
140         void copy_bb(float *bb, float *old_bb)
141         {
142                 std::copy( old_bb, old_bb+6, bb );
143         }
144         
145         void prepare_for_simd(SVBVHNode *node)
146         {
147                 int i=0;
148                 while(i+4 <= node->nchilds)
149                 {
150                         float vec_tmp[4*6];
151                         float *res = node->bb+6*i;
152                         std::copy( node->bb+6*i, node->bb+6*(i+4), vec_tmp);
153                         
154                         for(int j=0; j<6; j++)
155                         {
156                                 res[4*j+0] = vec_tmp[6*0+j];
157                                 res[4*j+1] = vec_tmp[6*1+j];
158                                 res[4*j+2] = vec_tmp[6*2+j];
159                                 res[4*j+3] = vec_tmp[6*3+j];
160                         }
161 /*
162                         const float *bb0 = vec_tmp+6*(i+0);
163                         const float *bb1 = vec_tmp+6*(i+1);
164                         const float *bb2 = vec_tmp+6*(i+2);
165                         const float *bb3 = vec_tmp+6*(i+3);
166
167                         //memmoves could be memory alligned
168                         const __m128 x0y0x1y1 = _mm_shuffle_ps( _mm_loadu_ps(bb0), _mm_loadu_ps(bb1), _MM_SHUFFLE(1,0,1,0) );
169                         const __m128 x2y2x3y3 = _mm_shuffle_ps( _mm_loadu_ps(bb2), _mm_loadu_ps(bb3), _MM_SHUFFLE(1,0,1,0) );
170                         _mm_store_ps( node->bb+6*i+4*0, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2,0,2,0) ) );
171                         _mm_store_ps( node->bb+6*i+4*1, _mm_shuffle_ps( x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3,1,3,1) ) );
172
173                         const __m128 z0X0z1X1 = _mm_shuffle_ps( _mm_loadu_ps(bb0), _mm_loadu_ps(bb1), _MM_SHUFFLE(3,2,3,2) );
174                         const __m128 z2X2z3X3 = _mm_shuffle_ps( _mm_loadu_ps(bb2), _mm_loadu_ps(bb3), _MM_SHUFFLE(3,2,3,2) );
175                         _mm_store_ps( node->bb+6*i+4*2, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2,0,2,0) ) );
176                         _mm_store_ps( node->bb+6*i+4*3, _mm_shuffle_ps( z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3,1,3,1) ) );
177
178                         const __m128 Y0Z0Y1Z1 = _mm_shuffle_ps( _mm_loadu_ps(bb0+4), _mm_loadu_ps(bb1+4), _MM_SHUFFLE(1,0,1,0) );
179                         const __m128 Y2Z2Y3Z3 = _mm_shuffle_ps( _mm_loadu_ps(bb2+4), _mm_loadu_ps(bb3+4), _MM_SHUFFLE(1,0,1,0) );
180                         _mm_store_ps( node->bb+6*i+4*4, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2,0,2,0) ) );
181                         _mm_store_ps( node->bb+6*i+4*5, _mm_shuffle_ps( Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3,1,3,1) ) );
182  */
183                         
184                         i += 4;
185                 }
186         }
187
188         SVBVHNode *transform(OldNode *old)
189         {
190                 if(is_leaf(old))
191                         return (SVBVHNode*)old;
192                 if(is_leaf(old->child))
193                         return (SVBVHNode*)old->child;
194
195                 int nchilds = count_childs(old);
196                 SVBVHNode *node = create_node(nchilds);
197
198                 childs_per_node += nchilds;
199                 nodes++;
200                 if(nchilds < 16)
201                         nodes_with_childs[nchilds]++;
202                 
203                 int i=nchilds;
204                 for(OldNode *o_child = old->child; o_child; o_child = o_child->sibling)
205                 {
206                         i--;
207                         node->child[i] = transform(o_child);
208                         if(is_leaf(o_child))
209                         {
210                                 float bb[6];
211                                 INIT_MINMAX(bb, bb+3);
212                                 RE_rayobject_merge_bb( (RayObject*)o_child, bb, bb+3);
213                                 copy_bb(node->bb+i*6, bb);
214                                 break;
215                         }
216                         else
217                         {
218                                 copy_bb(node->bb+i*6, o_child->bb);
219                         }
220                 }
221                 assert( i == 0 );
222
223                 if(SVBVH_SIMD)
224                         prepare_for_simd(node);
225                 
226                 return node;
227         }       
228 };
229
230 #endif