9318e1758a68d84f953e6b0ecd40585b9f579d50
[blender.git] / source / blender / render / intern / raytrace / bvh.h
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version. 
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2009 Blender Foundation.
19  * All rights reserved.
20  *
21  * The Original Code is: all of this file.
22  *
23  * Contributor(s): AndrĂ© Pinto.
24  *
25  * ***** END GPL LICENSE BLOCK *****
26  */
27
28 /** \file blender/render/intern/raytrace/bvh.h
29  *  \ingroup render
30  */
31
32
33 #include "MEM_guardedalloc.h"
34
35 #include "BLI_math.h"
36
37 #include "raycounter.h"
38 #include "rayintersection.h"
39 #include "rayobject.h"
40 #include "rayobject_hint.h"
41 #include "rayobject_rtbuild.h"
42
43 #include <assert.h>
44
45 #ifdef __SSE__
46 #include <xmmintrin.h>
47 #endif
48
49 #ifndef __BVH_H__
50 #define __BVH_H__
51
52 #ifdef __SSE__
53 inline int test_bb_group4(__m128 *bb_group, const Isect *isec)
54 {
55         const __m128 tmin0 = _mm_setzero_ps();
56         const __m128 tmax0 = _mm_set_ps1(isec->dist);
57
58         float start[3], idot_axis[3];
59         copy_v3_v3(start, isec->start);
60         copy_v3_v3(idot_axis, isec->idot_axis);
61
62         const __m128 tmin1 = _mm_max_ps(tmin0, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[0]], _mm_set_ps1(start[0]) ), _mm_set_ps1(idot_axis[0])) );
63         const __m128 tmax1 = _mm_min_ps(tmax0, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[1]], _mm_set_ps1(start[0]) ), _mm_set_ps1(idot_axis[0])) );
64         const __m128 tmin2 = _mm_max_ps(tmin1, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[2]], _mm_set_ps1(start[1]) ), _mm_set_ps1(idot_axis[1])) );
65         const __m128 tmax2 = _mm_min_ps(tmax1, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[3]], _mm_set_ps1(start[1]) ), _mm_set_ps1(idot_axis[1])) );
66         const __m128 tmin3 = _mm_max_ps(tmin2, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[4]], _mm_set_ps1(start[2]) ), _mm_set_ps1(idot_axis[2])) );
67         const __m128 tmax3 = _mm_min_ps(tmax2, _mm_mul_ps(_mm_sub_ps(bb_group[isec->bv_index[5]], _mm_set_ps1(start[2]) ), _mm_set_ps1(idot_axis[2])) );
68         
69         return _mm_movemask_ps(_mm_cmpge_ps(tmax3, tmin3));
70 }
71 #endif
72
73 /*
74  * Determines the distance that the ray must travel to hit the bounding volume of the given node
75  * Based on Tactical Optimization of Ray/Box Intersection, by Graham Fyffe
76  *  [http://tog.acm.org/resources/RTNews/html/rtnv21n1.html#art9]
77  */
78 static inline int rayobject_bb_intersect_test(const Isect *isec, const float *_bb)
79 {
80         const float *bb = _bb;
81         
82         float t1x = (bb[isec->bv_index[0]] - isec->start[0]) * isec->idot_axis[0];
83         float t2x = (bb[isec->bv_index[1]] - isec->start[0]) * isec->idot_axis[0];
84         float t1y = (bb[isec->bv_index[2]] - isec->start[1]) * isec->idot_axis[1];
85         float t2y = (bb[isec->bv_index[3]] - isec->start[1]) * isec->idot_axis[1];
86         float t1z = (bb[isec->bv_index[4]] - isec->start[2]) * isec->idot_axis[2];
87         float t2z = (bb[isec->bv_index[5]] - isec->start[2]) * isec->idot_axis[2];
88
89         RE_RC_COUNT(isec->raycounter->bb.test);
90         
91         if (t1x > t2y || t2x < t1y || t1x > t2z || t2x < t1z || t1y > t2z || t2y < t1z) return 0;
92         if (t2x < 0.0 || t2y < 0.0 || t2z < 0.0) return 0;
93         if (t1x > isec->dist || t1y > isec->dist || t1z > isec->dist) return 0;
94         RE_RC_COUNT(isec->raycounter->bb.hit);  
95
96         return 1;
97 }
98
99 /* bvh tree generics */
100 template<class Tree> static void bvh_add(Tree *obj, RayObject *ob)
101 {
102         rtbuild_add(obj->builder, ob);
103 }
104
105 template<class Node>
106 inline bool is_leaf(Node *node)
107 {
108         return !RE_rayobject_isAligned(node);
109 }
110
111 template<class Tree> static void bvh_done(Tree *obj);
112
113 template<class Tree>
114 static void bvh_free(Tree *obj)
115 {
116         if (obj->builder)
117                 rtbuild_free(obj->builder);
118
119         if (obj->node_arena)
120                 BLI_memarena_free(obj->node_arena);
121
122         MEM_freeN(obj);
123 }
124
125 template<class Tree>
126 static void bvh_bb(Tree *obj, float *min, float *max)
127 {
128         if (obj->root)
129                 bvh_node_merge_bb(obj->root, min, max);
130 }
131
132
133 template<class Tree>
134 static float bvh_cost(Tree *obj)
135 {
136         assert(obj->cost >= 0.0);
137         return obj->cost;
138 }
139
140
141
142 /* bvh tree nodes generics */
143 template<class Node> static inline int bvh_node_hit_test(Node *node, Isect *isec)
144 {
145         return rayobject_bb_intersect_test(isec, (const float *)node->bb);
146 }
147
148
149 template<class Node>
150 static inline void bvh_node_merge_bb(Node *node, float min[3], float max[3])
151 {
152         if (is_leaf(node)) {
153                 RE_rayobject_merge_bb((RayObject *)node, min, max);
154         }
155         else {
156                 DO_MIN(node->bb,     min);
157                 DO_MAX(node->bb + 3, max);
158         }
159 }
160
161
162
163 /*
164  * recursively transverse a BVH looking for a rayhit using a local stack
165  */
166 template<class Node> static inline void bvh_node_push_childs(Node *node, Isect *isec, Node **stack, int &stack_pos);
167
168 template<class Node, int MAX_STACK_SIZE, bool TEST_ROOT, bool SHADOW>
169 static int bvh_node_stack_raycast(Node *root, Isect *isec)
170 {
171         Node *stack[MAX_STACK_SIZE];
172         int hit = 0, stack_pos = 0;
173                 
174         if (!TEST_ROOT && !is_leaf(root))
175                 bvh_node_push_childs(root, isec, stack, stack_pos);
176         else
177                 stack[stack_pos++] = root;
178
179         while (stack_pos) {
180                 Node *node = stack[--stack_pos];
181                 if (!is_leaf(node)) {
182                         if (bvh_node_hit_test(node, isec)) {
183                                 bvh_node_push_childs(node, isec, stack, stack_pos);
184                                 assert(stack_pos <= MAX_STACK_SIZE);
185                         }
186                 }
187                 else {
188                         hit |= RE_rayobject_intersect( (RayObject *)node, isec);
189                         if (SHADOW && hit) return hit;
190                 }
191         }
192         return hit;
193 }
194
195
196 #ifdef __SSE__
197 /*
198  * Generic SIMD bvh recursion
199  * this was created to be able to use any simd (with the cost of some memmoves)
200  * it can take advantage of any SIMD width and doens't needs any special tree care
201  */
202 template<class Node, int MAX_STACK_SIZE, bool TEST_ROOT>
203 static int bvh_node_stack_raycast_simd(Node *root, Isect *isec)
204 {
205         Node *stack[MAX_STACK_SIZE];
206
207         int hit = 0, stack_pos = 0;
208                 
209         if (!TEST_ROOT) {
210                 if (!is_leaf(root)) {
211                         if (!is_leaf(root->child))
212                                 bvh_node_push_childs(root, isec, stack, stack_pos);
213                         else
214                                 return RE_rayobject_intersect( (RayObject *)root->child, isec);
215                 }
216                 else
217                         return RE_rayobject_intersect( (RayObject *)root, isec);
218         }
219         else {
220                 if (!is_leaf(root))
221                         stack[stack_pos++] = root;
222                 else
223                         return RE_rayobject_intersect( (RayObject *)root, isec);
224         }
225
226         while (true) {
227                 //Use SIMD 4
228                 if (stack_pos >= 4) {
229                         __m128 t_bb[6];
230                         Node *t_node[4];
231                         
232                         stack_pos -= 4;
233
234                         /* prepare the 4BB for SIMD */
235                         t_node[0] = stack[stack_pos + 0]->child;
236                         t_node[1] = stack[stack_pos + 1]->child;
237                         t_node[2] = stack[stack_pos + 2]->child;
238                         t_node[3] = stack[stack_pos + 3]->child;
239
240                         const float *bb0 = stack[stack_pos + 0]->bb;
241                         const float *bb1 = stack[stack_pos + 1]->bb;
242                         const float *bb2 = stack[stack_pos + 2]->bb;
243                         const float *bb3 = stack[stack_pos + 3]->bb;
244
245                         const __m128 x0y0x1y1 = _mm_shuffle_ps(_mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(1, 0, 1, 0) );
246                         const __m128 x2y2x3y3 = _mm_shuffle_ps(_mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(1, 0, 1, 0) );
247                         t_bb[0] = _mm_shuffle_ps(x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(2, 0, 2, 0) );
248                         t_bb[1] = _mm_shuffle_ps(x0y0x1y1, x2y2x3y3, _MM_SHUFFLE(3, 1, 3, 1) );
249
250                         const __m128 z0X0z1X1 = _mm_shuffle_ps(_mm_load_ps(bb0), _mm_load_ps(bb1), _MM_SHUFFLE(3, 2, 3, 2) );
251                         const __m128 z2X2z3X3 = _mm_shuffle_ps(_mm_load_ps(bb2), _mm_load_ps(bb3), _MM_SHUFFLE(3, 2, 3, 2) );
252                         t_bb[2] = _mm_shuffle_ps(z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(2, 0, 2, 0) );
253                         t_bb[3] = _mm_shuffle_ps(z0X0z1X1, z2X2z3X3, _MM_SHUFFLE(3, 1, 3, 1) );
254
255                         const __m128 Y0Z0Y1Z1 = _mm_shuffle_ps(_mm_load_ps(bb0 + 4), _mm_load_ps(bb1 + 4), _MM_SHUFFLE(1, 0, 1, 0) );
256                         const __m128 Y2Z2Y3Z3 = _mm_shuffle_ps(_mm_load_ps(bb2 + 4), _mm_load_ps(bb3 + 4), _MM_SHUFFLE(1, 0, 1, 0) );
257                         t_bb[4] = _mm_shuffle_ps(Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(2, 0, 2, 0) );
258                         t_bb[5] = _mm_shuffle_ps(Y0Z0Y1Z1, Y2Z2Y3Z3, _MM_SHUFFLE(3, 1, 3, 1) );
259 #if 0
260                         for (int i = 0; i < 4; i++)
261                         {
262                                 Node *t = stack[stack_pos + i];
263                                 assert(!is_leaf(t));
264                                 
265                                 float *bb = ((float *)t_bb) + i;
266                                 bb[4 * 0] = t->bb[0];
267                                 bb[4 * 1] = t->bb[1];
268                                 bb[4 * 2] = t->bb[2];
269                                 bb[4 * 3] = t->bb[3];
270                                 bb[4 * 4] = t->bb[4];
271                                 bb[4 * 5] = t->bb[5];
272                                 t_node[i] = t->child;
273                         }
274 #endif
275                         RE_RC_COUNT(isec->raycounter->simd_bb.test);
276                         int res = test_bb_group4(t_bb, isec);
277
278                         for (int i = 0; i < 4; i++)
279                                 if (res & (1 << i)) {
280                                         RE_RC_COUNT(isec->raycounter->simd_bb.hit);
281                                         if (!is_leaf(t_node[i])) {
282                                                 for (Node *t = t_node[i]; t; t = t->sibling) {
283                                                         assert(stack_pos < MAX_STACK_SIZE);
284                                                         stack[stack_pos++] = t;
285                                                 }
286                                         }
287                                         else {
288                                                 hit |= RE_rayobject_intersect( (RayObject *)t_node[i], isec);
289                                                 if (hit && isec->mode == RE_RAY_SHADOW) return hit;
290                                         }
291                                 }
292                 }
293                 else if (stack_pos > 0) {
294                         Node *node = stack[--stack_pos];
295                         assert(!is_leaf(node));
296                         
297                         if (bvh_node_hit_test(node, isec)) {
298                                 if (!is_leaf(node->child)) {
299                                         bvh_node_push_childs(node, isec, stack, stack_pos);
300                                         assert(stack_pos <= MAX_STACK_SIZE);
301                                 }
302                                 else {
303                                         hit |= RE_rayobject_intersect( (RayObject *)node->child, isec);
304                                         if (hit && isec->mode == RE_RAY_SHADOW) return hit;
305                                 }
306                         }
307                 }
308                 else break;
309         }
310         return hit;
311 }
312 #endif
313
314 /*
315  * recursively transverse a BVH looking for a rayhit using system stack
316  */
317 #if 0
318 template<class Node>
319 static int bvh_node_raycast(Node *node, Isect *isec)
320 {
321         int hit = 0;
322         if (bvh_test_node(node, isec))
323         {
324                 if (isec->idot_axis[node->split_axis] > 0.0f)
325                 {
326                         int i;
327                         for (i = 0; i < BVH_NCHILDS; i++)
328                                 if (!is_leaf(node->child[i]))
329                                 {
330                                         if (node->child[i] == 0) break;
331                                         
332                                         hit |= bvh_node_raycast(node->child[i], isec);
333                                         if (hit && isec->mode == RE_RAY_SHADOW) return hit;
334                                 }
335                                 else {
336                                         hit |= RE_rayobject_intersect( (RayObject *)node->child[i], isec);
337                                         if (hit && isec->mode == RE_RAY_SHADOW) return hit;
338                                 }
339                 }
340                 else {
341                         int i;
342                         for (i = BVH_NCHILDS - 1; i >= 0; i--)
343                                 if (!is_leaf(node->child[i]))
344                                 {
345                                         if (node->child[i])
346                                         {
347                                                 hit |= dfs_raycast(node->child[i], isec);
348                                                 if (hit && isec->mode == RE_RAY_SHADOW) return hit;
349                                         }
350                                 }
351                                 else {
352                                         hit |= RE_rayobject_intersect( (RayObject *)node->child[i], isec);
353                                         if (hit && isec->mode == RE_RAY_SHADOW) return hit;
354                                 }
355                 }
356         }
357         return hit;
358 }
359 #endif
360
361 template<class Node, class HintObject>
362 void bvh_dfs_make_hint(Node *node, LCTSHint *hint, int reserve_space, HintObject *hintObject)
363 {
364         assert(hint->size + reserve_space + 1 <= RE_RAY_LCTS_MAX_SIZE);
365         
366         if (is_leaf(node)) {
367                 hint->stack[hint->size++] = (RayObject *)node;
368         }
369         else {
370                 int childs = count_childs(node);
371                 if (hint->size + reserve_space + childs <= RE_RAY_LCTS_MAX_SIZE) {
372                         int result = hint_test_bb(hintObject, node->bb, node->bb + 3);
373                         if (result == HINT_RECURSE) {
374                                 /* We are 100% sure the ray will be pass inside this node */
375                                 bvh_dfs_make_hint_push_siblings(node->child, hint, reserve_space, hintObject);
376                         }
377                         else if (result == HINT_ACCEPT) {
378                                 hint->stack[hint->size++] = (RayObject *)node;
379                         }
380                 }
381                 else {
382                         hint->stack[hint->size++] = (RayObject *)node;
383                 }
384         }
385 }
386
387
388 template<class Tree>
389 static RayObjectAPI *bvh_get_api(int maxstacksize);
390
391
392 template<class Tree, int DFS_STACK_SIZE>
393 static inline RayObject *bvh_create_tree(int size)
394 {
395         Tree *obj = (Tree *)MEM_callocN(sizeof(Tree), "BVHTree");
396         assert(RE_rayobject_isAligned(obj)); /* RayObject API assumes real data to be 4-byte aligned */
397         
398         obj->rayobj.api = bvh_get_api<Tree>(DFS_STACK_SIZE);
399         obj->root = NULL;
400         
401         obj->node_arena = NULL;
402         obj->builder    = rtbuild_create(size);
403         
404         return RE_rayobject_unalignRayAPI((RayObject *) obj);
405 }
406
407 #endif