unify include guard defines, __$FILENAME__
[blender-staging.git] / source / blender / render / intern / raytrace / svbvh.h
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version. 
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2009 Blender Foundation.
19  * All rights reserved.
20  *
21  * The Original Code is: all of this file.
22  *
23  * Contributor(s): AndrĂ© Pinto.
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 /** \file blender/render/intern/raytrace/svbvh.h
29  *  \ingroup render
30  */
31
32
33 #ifdef __SSE__
34  
35 #ifndef __SVBVH_H__
36 #define __SVBVH_H__
37
38 #include "bvh.h"
39 #include "BLI_memarena.h"
40 #include "BKE_global.h"
41 #include <stdio.h>
42 #include <algorithm>
43
44 struct SVBVHNode
45 {
46         float child_bb[24];
47         SVBVHNode *child[4];
48         int nchilds;
49 };
50
51 static int svbvh_bb_intersect_test_simd4(const Isect *isec, const __m128 *bb_group)
52 {
53         const __m128 tmin0 = _mm_setzero_ps();
54         const __m128 tmax0 = _mm_set_ps1(isec->dist);
55
56         const __m128 start0 = _mm_set_ps1(isec->start[0]);
57         const __m128 start1 = _mm_set_ps1(isec->start[1]);
58         const __m128 start2 = _mm_set_ps1(isec->start[2]);
59         const __m128 sub0 = _mm_sub_ps(bb_group[isec->bv_index[0]], start0);
60         const __m128 sub1 = _mm_sub_ps(bb_group[isec->bv_index[1]], start0);
61         const __m128 sub2 = _mm_sub_ps(bb_group[isec->bv_index[2]], start1);
62         const __m128 sub3 = _mm_sub_ps(bb_group[isec->bv_index[3]], start1);
63         const __m128 sub4 = _mm_sub_ps(bb_group[isec->bv_index[4]], start2);
64         const __m128 sub5 = _mm_sub_ps(bb_group[isec->bv_index[5]], start2);
65         const __m128 idot_axis0 = _mm_set_ps1(isec->idot_axis[0]);
66         const __m128 idot_axis1 = _mm_set_ps1(isec->idot_axis[1]);
67         const __m128 idot_axis2 = _mm_set_ps1(isec->idot_axis[2]);
68         const __m128 mul0 = _mm_mul_ps(sub0, idot_axis0);
69         const __m128 mul1 = _mm_mul_ps(sub1, idot_axis0);
70         const __m128 mul2 = _mm_mul_ps(sub2, idot_axis1);
71         const __m128 mul3 = _mm_mul_ps(sub3, idot_axis1);
72         const __m128 mul4 = _mm_mul_ps(sub4, idot_axis2);
73         const __m128 mul5 = _mm_mul_ps(sub5, idot_axis2);
74         const __m128 tmin1 = _mm_max_ps(tmin0, mul0);
75         const __m128 tmax1 = _mm_min_ps(tmax0, mul1);
76         const __m128 tmin2 = _mm_max_ps(tmin1, mul2);
77         const __m128 tmax2 = _mm_min_ps(tmax1, mul3);
78         const __m128 tmin3 = _mm_max_ps(tmin2, mul4);
79         const __m128 tmax3 = _mm_min_ps(tmax2, mul5);
80         
81         return _mm_movemask_ps(_mm_cmpge_ps(tmax3, tmin3));
82 }
83
84 static int svbvh_bb_intersect_test(const Isect *isec, const float *_bb)
85 {
86         const float *bb = _bb;
87         
88         float t1x = (bb[isec->bv_index[0]] - isec->start[0]) * isec->idot_axis[0];
89         float t2x = (bb[isec->bv_index[1]] - isec->start[0]) * isec->idot_axis[0];
90         float t1y = (bb[isec->bv_index[2]] - isec->start[1]) * isec->idot_axis[1];
91         float t2y = (bb[isec->bv_index[3]] - isec->start[1]) * isec->idot_axis[1];
92         float t1z = (bb[isec->bv_index[4]] - isec->start[2]) * isec->idot_axis[2];
93         float t2z = (bb[isec->bv_index[5]] - isec->start[2]) * isec->idot_axis[2];
94         
95         RE_RC_COUNT(isec->raycounter->bb.test);
96
97         if(t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return 0;
98         if(t2x < 0.0 || t2y < 0.0 || t2z < 0.0) return 0;
99         if(t1x > isec->dist || t1y > isec->dist || t1z > isec->dist) return 0;
100
101         RE_RC_COUNT(isec->raycounter->bb.hit);  
102
103         return 1;
104 }
105
106 static bool svbvh_node_is_leaf(const SVBVHNode *node)
107 {
108         return !RE_rayobject_isAligned(node);
109 }
110
111 template<int MAX_STACK_SIZE, bool SHADOW>
112 static int svbvh_node_stack_raycast(SVBVHNode *root, Isect *isec)
113 {
114         SVBVHNode *stack[MAX_STACK_SIZE], *node;
115         int hit = 0, stack_pos = 0;
116
117         stack[stack_pos++] = root;
118
119         while(stack_pos)
120         {
121                 node = stack[--stack_pos];
122
123                 if(!svbvh_node_is_leaf(node))
124                 {
125                         int nchilds= node->nchilds;
126
127                         if(nchilds == 4) {
128                                 float *child_bb= node->child_bb;
129                                 int res = svbvh_bb_intersect_test_simd4(isec, ((__m128*) (child_bb)));
130                                 SVBVHNode **child= node->child;
131
132                                 RE_RC_COUNT(isec->raycounter->simd_bb.test);
133
134                                 if(res & 1) { stack[stack_pos++] = child[0]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
135                                 if(res & 2) { stack[stack_pos++] = child[1]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
136                                 if(res & 4) { stack[stack_pos++] = child[2]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
137                                 if(res & 8) { stack[stack_pos++] = child[3]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
138                         }
139                         else {
140                                 float *child_bb= node->child_bb;
141                                 SVBVHNode **child= node->child;
142                                 int i;
143
144                                 for(i=0; i<nchilds; i++)
145                                         if(svbvh_bb_intersect_test(isec, (float*)child_bb+6*i))
146                                                 stack[stack_pos++] = child[i];
147                         }
148                 }
149                 else
150                 {
151                         hit |= RE_rayobject_intersect((RayObject*)node, isec);
152                         if(SHADOW && hit) break;
153                 }
154         }
155
156         return hit;
157 }
158
159
160 template<>
161 inline void bvh_node_merge_bb<SVBVHNode>(SVBVHNode *node, float *min, float *max)
162 {
163         if(is_leaf(node))
164         {
165                 RE_rayobject_merge_bb((RayObject*)node, min, max);
166         }
167         else
168         {
169                 int i=0;
170                 while(i+4 <= node->nchilds)
171                 {
172                         float *res = node->child_bb + 6*i;
173                         for(int j=0; j<3; j++)
174                         {
175                                 min[j] = MIN2(min[j], res[4*j+0]);
176                                 min[j] = MIN2(min[j], res[4*j+1]);
177                                 min[j] = MIN2(min[j], res[4*j+2]);
178                                 min[j] = MIN2(min[j], res[4*j+3]);
179                         }
180                         for(int j=0; j<3; j++)
181                         {
182                                 max[j] = MAX2(max[j], res[4*(j+3)+0]);
183                                 max[j] = MAX2(max[j], res[4*(j+3)+1]);
184                                 max[j] = MAX2(max[j], res[4*(j+3)+2]);
185                                 max[j] = MAX2(max[j], res[4*(j+3)+3]);
186                         }
187                         
188                         i += 4;
189                 }
190
191                 for(; i<node->nchilds; i++)
192                 {
193                         DO_MIN(node->child_bb+6*i  , min);
194                         DO_MAX(node->child_bb+3+6*i, max);
195                 }
196         }
197 }
198
199
200
201 /*
202  * Builds a SVBVH tree form a VBVHTree
203  */
204 template<class OldNode>
205 struct Reorganize_SVBVH
206 {
207         MemArena *arena;
208
209         float childs_per_node;
210         int nodes_with_childs[16];
211         int useless_bb;
212         int nodes;
213
214         Reorganize_SVBVH(MemArena *a)
215         {
216                 arena = a;
217                 nodes = 0;
218                 childs_per_node = 0;
219                 useless_bb = 0;
220                 
221                 for(int i=0; i<16; i++)
222                         nodes_with_childs[i] = 0;
223         }
224         
225         ~Reorganize_SVBVH()
226         {
227                 if(G.f & G_DEBUG) {
228                         printf("%f childs per node\n", childs_per_node / nodes);
229                         printf("%d childs BB are useless\n", useless_bb);
230                         for(int i=0; i<16; i++)
231                                 printf("%i childs per node: %d/%d = %f\n", i, nodes_with_childs[i], nodes,  nodes_with_childs[i]/float(nodes));
232                 }
233         }
234         
235         SVBVHNode *create_node(int nchilds)
236         {
237                 SVBVHNode *node = (SVBVHNode*)BLI_memarena_alloc(arena, sizeof(SVBVHNode));
238                 node->nchilds = nchilds;
239
240                 return node;
241         }
242         
243         void copy_bb(float *bb, const float *old_bb)
244         {
245                 std::copy(old_bb, old_bb+6, bb);
246         }
247         
248         void prepare_for_simd(SVBVHNode *node)
249         {
250                 int i=0;
251                 while(i+4 <= node->nchilds)
252                 {
253                         float vec_tmp[4*6];
254                         float *res = node->child_bb+6*i;
255                         std::copy(res, res+6*4, vec_tmp);
256                         
257                         for(int j=0; j<6; j++)
258                         {
259                                 res[4*j+0] = vec_tmp[6*0+j];
260                                 res[4*j+1] = vec_tmp[6*1+j];
261                                 res[4*j+2] = vec_tmp[6*2+j];
262                                 res[4*j+3] = vec_tmp[6*3+j];
263                         }
264
265                         i += 4;
266                 }
267         }
268
269         /* amt must be power of two */
270         inline int padup(int num, int amt)
271         {
272                 return ((num+(amt-1))&~(amt-1));
273         }
274         
275         SVBVHNode *transform(OldNode *old)
276         {
277                 if(is_leaf(old))
278                         return (SVBVHNode*)old;
279                 if(is_leaf(old->child))
280                         return (SVBVHNode*)old->child;
281
282                 int nchilds = count_childs(old);
283                 int alloc_childs = nchilds;
284                 if(nchilds % 4 > 2)
285                         alloc_childs = padup(nchilds, 4);
286                 
287                 SVBVHNode *node = create_node(alloc_childs);
288
289                 childs_per_node += nchilds;
290                 nodes++;
291                 if(nchilds < 16)
292                         nodes_with_childs[nchilds]++;
293                 
294                 useless_bb += alloc_childs-nchilds;
295                 while(alloc_childs > nchilds)
296                 {
297                         const static float def_bb[6] = { FLT_MAX, FLT_MAX, FLT_MAX, FLT_MIN, FLT_MIN, FLT_MIN };
298                         alloc_childs--;
299                         node->child[alloc_childs] = 0;
300                         copy_bb(node->child_bb+alloc_childs*6, def_bb);
301                 }
302                 
303                 int i=nchilds;
304                 for(OldNode *o_child = old->child; o_child; o_child = o_child->sibling)
305                 {
306                         i--;
307                         node->child[i] = transform(o_child);
308                         if(is_leaf(o_child))
309                         {
310                                 float bb[6];
311                                 INIT_MINMAX(bb, bb+3);
312                                 RE_rayobject_merge_bb((RayObject*)o_child, bb, bb+3);
313                                 copy_bb(node->child_bb+i*6, bb);
314                                 break;
315                         }
316                         else
317                         {
318                                 copy_bb(node->child_bb+i*6, o_child->bb);
319                         }
320                 }
321                 assert(i == 0);
322
323                 prepare_for_simd(node);
324                 
325                 return node;
326         }       
327 };
328
329 #endif
330
331 #endif //__SSE__
332