rename api functions...
[blender.git] / source / blender / render / intern / raytrace / svbvh.h
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version. 
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2009 Blender Foundation.
19  * All rights reserved.
20  *
21  * The Original Code is: all of this file.
22  *
23  * Contributor(s): AndrĂ© Pinto.
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 /** \file blender/render/intern/raytrace/svbvh.h
29  *  \ingroup render
30  */
31
32 #ifndef __SVBVH_H__
33 #define __SVBVH_H__
34
35 #ifdef __SSE__
36
37 #include "bvh.h"
38 #include "BLI_memarena.h"
39 #include "BKE_global.h"
40 #include <stdio.h>
41 #include <algorithm>
42
43 struct SVBVHNode {
44         float child_bb[24];
45         SVBVHNode *child[4];
46         int nchilds;
47 };
48
49 static int svbvh_bb_intersect_test_simd4(const Isect *isec, const __m128 *bb_group)
50 {
51         const __m128 tmin0 = _mm_setzero_ps();
52         const __m128 tmax0 = _mm_set_ps1(isec->dist);
53
54         const __m128 start0 = _mm_set_ps1(isec->start[0]);
55         const __m128 start1 = _mm_set_ps1(isec->start[1]);
56         const __m128 start2 = _mm_set_ps1(isec->start[2]);
57         const __m128 sub0 = _mm_sub_ps(bb_group[isec->bv_index[0]], start0);
58         const __m128 sub1 = _mm_sub_ps(bb_group[isec->bv_index[1]], start0);
59         const __m128 sub2 = _mm_sub_ps(bb_group[isec->bv_index[2]], start1);
60         const __m128 sub3 = _mm_sub_ps(bb_group[isec->bv_index[3]], start1);
61         const __m128 sub4 = _mm_sub_ps(bb_group[isec->bv_index[4]], start2);
62         const __m128 sub5 = _mm_sub_ps(bb_group[isec->bv_index[5]], start2);
63         const __m128 idot_axis0 = _mm_set_ps1(isec->idot_axis[0]);
64         const __m128 idot_axis1 = _mm_set_ps1(isec->idot_axis[1]);
65         const __m128 idot_axis2 = _mm_set_ps1(isec->idot_axis[2]);
66         const __m128 mul0 = _mm_mul_ps(sub0, idot_axis0);
67         const __m128 mul1 = _mm_mul_ps(sub1, idot_axis0);
68         const __m128 mul2 = _mm_mul_ps(sub2, idot_axis1);
69         const __m128 mul3 = _mm_mul_ps(sub3, idot_axis1);
70         const __m128 mul4 = _mm_mul_ps(sub4, idot_axis2);
71         const __m128 mul5 = _mm_mul_ps(sub5, idot_axis2);
72         const __m128 tmin1 = _mm_max_ps(tmin0, mul0);
73         const __m128 tmax1 = _mm_min_ps(tmax0, mul1);
74         const __m128 tmin2 = _mm_max_ps(tmin1, mul2);
75         const __m128 tmax2 = _mm_min_ps(tmax1, mul3);
76         const __m128 tmin3 = _mm_max_ps(tmin2, mul4);
77         const __m128 tmax3 = _mm_min_ps(tmax2, mul5);
78         
79         return _mm_movemask_ps(_mm_cmpge_ps(tmax3, tmin3));
80 }
81
82 static int svbvh_bb_intersect_test(const Isect *isec, const float *_bb)
83 {
84         const float *bb = _bb;
85         
86         float t1x = (bb[isec->bv_index[0]] - isec->start[0]) * isec->idot_axis[0];
87         float t2x = (bb[isec->bv_index[1]] - isec->start[0]) * isec->idot_axis[0];
88         float t1y = (bb[isec->bv_index[2]] - isec->start[1]) * isec->idot_axis[1];
89         float t2y = (bb[isec->bv_index[3]] - isec->start[1]) * isec->idot_axis[1];
90         float t1z = (bb[isec->bv_index[4]] - isec->start[2]) * isec->idot_axis[2];
91         float t2z = (bb[isec->bv_index[5]] - isec->start[2]) * isec->idot_axis[2];
92         
93         RE_RC_COUNT(isec->raycounter->bb.test);
94
95         if (t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return 0;
96         if (t2x < 0.0 || t2y < 0.0 || t2z < 0.0) return 0;
97         if (t1x > isec->dist || t1y > isec->dist || t1z > isec->dist) return 0;
98
99         RE_RC_COUNT(isec->raycounter->bb.hit);
100
101         return 1;
102 }
103
104 static bool svbvh_node_is_leaf(const SVBVHNode *node)
105 {
106         return !RE_rayobject_isAligned(node);
107 }
108
109 template<int MAX_STACK_SIZE, bool SHADOW>
110 static int svbvh_node_stack_raycast(SVBVHNode *root, Isect *isec)
111 {
112         SVBVHNode *stack[MAX_STACK_SIZE], *node;
113         int hit = 0, stack_pos = 0;
114
115         stack[stack_pos++] = root;
116
117         while (stack_pos) {
118                 node = stack[--stack_pos];
119
120                 if (!svbvh_node_is_leaf(node)) {
121                         int nchilds = node->nchilds;
122
123                         if (nchilds == 4) {
124                                 float *child_bb = node->child_bb;
125                                 int res = svbvh_bb_intersect_test_simd4(isec, ((__m128 *) (child_bb)));
126                                 SVBVHNode **child = node->child;
127
128                                 RE_RC_COUNT(isec->raycounter->simd_bb.test);
129
130                                 if (res & 1) { stack[stack_pos++] = child[0]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
131                                 if (res & 2) { stack[stack_pos++] = child[1]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
132                                 if (res & 4) { stack[stack_pos++] = child[2]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
133                                 if (res & 8) { stack[stack_pos++] = child[3]; RE_RC_COUNT(isec->raycounter->simd_bb.hit); }
134                         }
135                         else {
136                                 float *child_bb = node->child_bb;
137                                 SVBVHNode **child = node->child;
138                                 int i;
139
140                                 for (i = 0; i < nchilds; i++) {
141                                         if (svbvh_bb_intersect_test(isec, (float *)child_bb + 6 * i)) {
142                                                 stack[stack_pos++] = child[i];
143                                         }
144                                 }
145                         }
146                 }
147                 else {
148                         hit |= RE_rayobject_intersect((RayObject *)node, isec);
149                         if (SHADOW && hit) break;
150                 }
151         }
152
153         return hit;
154 }
155
156
157 template<>
158 inline void bvh_node_merge_bb<SVBVHNode>(SVBVHNode *node, float min[3], float max[3])
159 {
160         if (is_leaf(node)) {
161                 RE_rayobject_merge_bb((RayObject *)node, min, max);
162         }
163         else {
164                 int i;
165                 for (i = 0; i + 4 <= node->nchilds; i += 4) {
166                         float *res = node->child_bb + 6 * i;
167                         for (int j = 0; j < 3; j++) {
168                                 min[j] = min_ff(res[4 * j + 0],
169                                          min_ff(res[4 * j + 1],
170                                          min_ff(res[4 * j + 2],
171                                          min_ff(res[4 * j + 3], min[j]))));
172                         }
173                         for (int j = 0; j < 3; j++) {
174                                 max[j] = max_ff(res[4 * (j + 3) + 0],
175                                          max_ff(res[4 * (j + 3) + 1],
176                                          max_ff(res[4 * (j + 3) + 2],
177                                          max_ff(res[4 * (j + 3) + 3], max[j]))));
178                         }
179                 }
180
181                 for (; i < node->nchilds; i++) {
182                         DO_MIN(node->child_bb + 6 * i, min);
183                         DO_MAX(node->child_bb + 3 + 6 * i, max);
184                 }
185         }
186 }
187
188
189
190 /*
191  * Builds a SVBVH tree form a VBVHTree
192  */
193 template<class OldNode>
194 struct Reorganize_SVBVH {
195         MemArena *arena;
196
197         float childs_per_node;
198         int nodes_with_childs[16];
199         int useless_bb;
200         int nodes;
201
202         Reorganize_SVBVH(MemArena *a)
203         {
204                 arena = a;
205                 nodes = 0;
206                 childs_per_node = 0;
207                 useless_bb = 0;
208                 
209                 for (int i = 0; i < 16; i++) {
210                         nodes_with_childs[i] = 0;
211                 }
212         }
213         
214         ~Reorganize_SVBVH()
215         {
216                 if (G.debug & G_DEBUG) {
217                         printf("%f childs per node\n", childs_per_node / nodes);
218                         printf("%d childs BB are useless\n", useless_bb);
219                         for (int i = 0; i < 16; i++) {
220                                 printf("%i childs per node: %d/%d = %f\n", i, nodes_with_childs[i], nodes,  nodes_with_childs[i] / float(nodes));
221                         }
222                 }
223         }
224         
225         SVBVHNode *create_node(int nchilds)
226         {
227                 SVBVHNode *node = (SVBVHNode *)BLI_memarena_alloc(arena, sizeof(SVBVHNode));
228                 node->nchilds = nchilds;
229
230                 return node;
231         }
232         
233         void copy_bb(float bb[6], const float old_bb[6])
234         {
235                 std::copy(old_bb, old_bb + 6, bb);
236         }
237         
238         void prepare_for_simd(SVBVHNode *node)
239         {
240                 int i = 0;
241                 while (i + 4 <= node->nchilds) {
242                         float vec_tmp[4 * 6];
243                         float *res = node->child_bb + 6 * i;
244                         std::copy(res, res + 6 * 4, vec_tmp);
245
246                         for (int j = 0; j < 6; j++) {
247                                 res[4 * j + 0] = vec_tmp[6 * 0 + j];
248                                 res[4 * j + 1] = vec_tmp[6 * 1 + j];
249                                 res[4 * j + 2] = vec_tmp[6 * 2 + j];
250                                 res[4 * j + 3] = vec_tmp[6 * 3 + j];
251                         }
252
253                         i += 4;
254                 }
255         }
256
257         /* amt must be power of two */
258         inline int padup(int num, int amt)
259         {
260                 return ((num + (amt - 1)) & ~(amt - 1));
261         }
262         
263         SVBVHNode *transform(OldNode *old)
264         {
265                 if (is_leaf(old))
266                         return (SVBVHNode *)old;
267                 if (is_leaf(old->child))
268                         return (SVBVHNode *)old->child;
269
270                 int nchilds = count_childs(old);
271                 int alloc_childs = nchilds;
272                 if (nchilds % 4 > 2)
273                         alloc_childs = padup(nchilds, 4);
274                 
275                 SVBVHNode *node = create_node(alloc_childs);
276
277                 childs_per_node += nchilds;
278                 nodes++;
279                 if (nchilds < 16)
280                         nodes_with_childs[nchilds]++;
281                 
282                 useless_bb += alloc_childs - nchilds;
283                 while (alloc_childs > nchilds) {
284                         const static float def_bb[6] = {FLT_MAX,  FLT_MAX,  FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX};
285                         alloc_childs--;
286                         node->child[alloc_childs] = NULL;
287                         copy_bb(node->child_bb + alloc_childs * 6, def_bb);
288                 }
289                 
290                 int i = nchilds;
291                 for (OldNode *o_child = old->child; o_child; o_child = o_child->sibling) {
292                         i--;
293                         node->child[i] = transform(o_child);
294                         if (is_leaf(o_child)) {
295                                 float bb[6];
296                                 INIT_MINMAX(bb, bb + 3);
297                                 RE_rayobject_merge_bb((RayObject *)o_child, bb, bb + 3);
298                                 copy_bb(node->child_bb + i * 6, bb);
299                                 break;
300                         }
301                         else {
302                                 copy_bb(node->child_bb + i * 6, o_child->bb);
303                         }
304                 }
305                 assert(i == 0);
306
307                 prepare_for_simd(node);
308                 
309                 return node;
310         }
311 };
312
313 #endif  /* __SSE__ */
314
315 #endif  /* __SVBVH_H__ */