Cycles: Simplify code around debug stats in BVH traversing
[blender.git] / intern / cycles / kernel / geom / geom_bvh_traversal.h
1 /*
2  * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
3  * and code copyright 2009-2012 Intel Corporation
4  *
5  * Modifications Copyright 2011-2013, Blender Foundation.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19
20 #ifdef __QBVH__
21 #  include "geom_qbvh_traversal.h"
22 #endif
23
24 /* This is a template BVH traversal function, where various features can be
25  * enabled/disabled. This way we can compile optimized versions for each case
26  * without new features slowing things down.
27  *
28  * BVH_INSTANCING: object instancing
29  * BVH_HAIR: hair curve rendering
30  * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
31  * BVH_MOTION: motion blur rendering
32  *
33  */
34
35 ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
36                                             const Ray *ray,
37                                             Intersection *isect,
38                                             const uint visibility
39 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
40                                             , uint *lcg_state,
41                                             float difl,
42                                             float extmax
43 #endif
44                                             )
45 {
46         /* todo:
47          * - test if pushing distance on the stack helps (for non shadow rays)
48          * - separate version for shadow rays
49          * - likely and unlikely for if() statements
50          * - test restrict attribute for pointers
51          */
52         
53         /* traversal stack in CUDA thread-local memory */
54         int traversalStack[BVH_STACK_SIZE];
55         traversalStack[0] = ENTRYPOINT_SENTINEL;
56
57         /* traversal variables in registers */
58         int stackPtr = 0;
59         int nodeAddr = kernel_data.bvh.root;
60
61         /* ray parameters in registers */
62         float3 P = ray->P;
63         float3 dir = bvh_clamp_direction(ray->D);
64         float3 idir = bvh_inverse_direction(dir);
65         int object = OBJECT_NONE;
66
67 #if BVH_FEATURE(BVH_MOTION)
68         Transform ob_itfm;
69 #endif
70
71         isect->t = ray->t;
72         isect->u = 0.0f;
73         isect->v = 0.0f;
74         isect->prim = PRIM_NONE;
75         isect->object = OBJECT_NONE;
76
77         BVH_DEBUG_INIT();
78
79 #if defined(__KERNEL_SSE2__)
80         const shuffle_swap_t shuf_identity = shuffle_swap_identity();
81         const shuffle_swap_t shuf_swap = shuffle_swap_swap();
82         
83         const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
84         ssef Psplat[3], idirsplat[3];
85         shuffle_swap_t shufflexyz[3];
86
87         Psplat[0] = ssef(P.x);
88         Psplat[1] = ssef(P.y);
89         Psplat[2] = ssef(P.z);
90
91         ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
92
93         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
94 #endif
95
96         IsectPrecalc isect_precalc;
97         triangle_intersect_precalc(dir, &isect_precalc);
98
99         /* traversal loop */
100         do {
101                 do {
102                         /* traverse internal nodes */
103                         while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
104                                 bool traverseChild0, traverseChild1;
105                                 int nodeAddrChild1;
106
107 #if !defined(__KERNEL_SSE2__)
108                                 /* Intersect two child bounding boxes, non-SSE version */
109                                 float t = isect->t;
110
111                                 /* fetch node data */
112                                 float4 node0 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0);
113                                 float4 node1 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1);
114                                 float4 node2 = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2);
115                                 float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
116
117                                 /* intersect ray against child nodes */
118                                 NO_EXTENDED_PRECISION float c0lox = (node0.x - P.x) * idir.x;
119                                 NO_EXTENDED_PRECISION float c0hix = (node0.z - P.x) * idir.x;
120                                 NO_EXTENDED_PRECISION float c0loy = (node1.x - P.y) * idir.y;
121                                 NO_EXTENDED_PRECISION float c0hiy = (node1.z - P.y) * idir.y;
122                                 NO_EXTENDED_PRECISION float c0loz = (node2.x - P.z) * idir.z;
123                                 NO_EXTENDED_PRECISION float c0hiz = (node2.z - P.z) * idir.z;
124                                 NO_EXTENDED_PRECISION float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
125                                 NO_EXTENDED_PRECISION float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
126
127                                 NO_EXTENDED_PRECISION float c1lox = (node0.y - P.x) * idir.x;
128                                 NO_EXTENDED_PRECISION float c1hix = (node0.w - P.x) * idir.x;
129                                 NO_EXTENDED_PRECISION float c1loy = (node1.y - P.y) * idir.y;
130                                 NO_EXTENDED_PRECISION float c1hiy = (node1.w - P.y) * idir.y;
131                                 NO_EXTENDED_PRECISION float c1loz = (node2.y - P.z) * idir.z;
132                                 NO_EXTENDED_PRECISION float c1hiz = (node2.w - P.z) * idir.z;
133                                 NO_EXTENDED_PRECISION float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
134                                 NO_EXTENDED_PRECISION float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
135
136 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
137                                 if(difl != 0.0f) {
138                                         float hdiff = 1.0f + difl;
139                                         float ldiff = 1.0f - difl;
140                                         if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
141                                                 c0min = max(ldiff * c0min, c0min - extmax);
142                                                 c0max = min(hdiff * c0max, c0max + extmax);
143                                         }
144                                         if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
145                                                 c1min = max(ldiff * c1min, c1min - extmax);
146                                                 c1max = min(hdiff * c1max, c1max + extmax);
147                                         }
148                                 }
149 #  endif
150
151                                 /* decide which nodes to traverse next */
152 #  ifdef __VISIBILITY_FLAG__
153                                 /* this visibility test gives a 5% performance hit, how to solve? */
154                                 traverseChild0 = (c0max >= c0min) && (__float_as_uint(cnodes.z) & visibility);
155                                 traverseChild1 = (c1max >= c1min) && (__float_as_uint(cnodes.w) & visibility);
156 #  else
157                                 traverseChild0 = (c0max >= c0min);
158                                 traverseChild1 = (c1max >= c1min);
159 #  endif
160
161 #else // __KERNEL_SSE2__
162                                 /* Intersect two child bounding boxes, SSE3 version adapted from Embree */
163
164                                 /* fetch node data */
165                                 const ssef *bvh_nodes = (ssef*)kg->__bvh_nodes.data + nodeAddr*BVH_NODE_SIZE;
166                                 const float4 cnodes = ((float4*)bvh_nodes)[3];
167
168                                 /* intersect ray against child nodes */
169                                 const ssef tminmaxx = (shuffle_swap(bvh_nodes[0], shufflexyz[0]) - Psplat[0]) * idirsplat[0];
170                                 const ssef tminmaxy = (shuffle_swap(bvh_nodes[1], shufflexyz[1]) - Psplat[1]) * idirsplat[1];
171                                 const ssef tminmaxz = (shuffle_swap(bvh_nodes[2], shufflexyz[2]) - Psplat[2]) * idirsplat[2];
172
173                                 /* calculate { c0min, c1min, -c0max, -c1max} */
174                                 ssef minmax = max(max(tminmaxx, tminmaxy), max(tminmaxz, tsplat));
175                                 const ssef tminmax = minmax ^ pn;
176
177 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
178                                 if(difl != 0.0f) {
179                                         float4 *tminmaxview = (float4*)&tminmax;
180                                         float &c0min = tminmaxview->x, &c1min = tminmaxview->y;
181                                         float &c0max = tminmaxview->z, &c1max = tminmaxview->w;
182
183                                         float hdiff = 1.0f + difl;
184                                         float ldiff = 1.0f - difl;
185                                         if(__float_as_int(cnodes.z) & PATH_RAY_CURVE) {
186                                                 c0min = max(ldiff * c0min, c0min - extmax);
187                                                 c0max = min(hdiff * c0max, c0max + extmax);
188                                         }
189                                         if(__float_as_int(cnodes.w) & PATH_RAY_CURVE) {
190                                                 c1min = max(ldiff * c1min, c1min - extmax);
191                                                 c1max = min(hdiff * c1max, c1max + extmax);
192                                         }
193                                 }
194 #  endif
195
196                                 const sseb lrhit = tminmax <= shuffle<2, 3, 0, 1>(tminmax);
197
198                                 /* decide which nodes to traverse next */
199 #  ifdef __VISIBILITY_FLAG__
200                                 /* this visibility test gives a 5% performance hit, how to solve? */
201                                 traverseChild0 = (movemask(lrhit) & 1) && (__float_as_uint(cnodes.z) & visibility);
202                                 traverseChild1 = (movemask(lrhit) & 2) && (__float_as_uint(cnodes.w) & visibility);
203 #  else
204                                 traverseChild0 = (movemask(lrhit) & 1);
205                                 traverseChild1 = (movemask(lrhit) & 2);
206 #  endif
207 #endif // __KERNEL_SSE2__
208
209                                 nodeAddr = __float_as_int(cnodes.x);
210                                 nodeAddrChild1 = __float_as_int(cnodes.y);
211
212                                 if(traverseChild0 && traverseChild1) {
213                                         /* both children were intersected, push the farther one */
214 #if !defined(__KERNEL_SSE2__)
215                                         bool closestChild1 = (c1min < c0min);
216 #else
217                                         bool closestChild1 = tminmax[1] < tminmax[0];
218 #endif
219
220                                         if(closestChild1) {
221                                                 int tmp = nodeAddr;
222                                                 nodeAddr = nodeAddrChild1;
223                                                 nodeAddrChild1 = tmp;
224                                         }
225
226                                         ++stackPtr;
227                                         kernel_assert(stackPtr < BVH_STACK_SIZE);
228                                         traversalStack[stackPtr] = nodeAddrChild1;
229                                 }
230                                 else {
231                                         /* one child was intersected */
232                                         if(traverseChild1) {
233                                                 nodeAddr = nodeAddrChild1;
234                                         }
235                                         else if(!traverseChild0) {
236                                                 /* neither child was intersected */
237                                                 nodeAddr = traversalStack[stackPtr];
238                                                 --stackPtr;
239                                         }
240                                 }
241                                 BVH_DEBUG_NEXT_STEP();
242                         }
243
244                         /* if node is leaf, fetch triangle list */
245                         if(nodeAddr < 0) {
246                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_NODE_LEAF_SIZE);
247                                 int primAddr = __float_as_int(leaf.x);
248
249 #if BVH_FEATURE(BVH_INSTANCING)
250                                 if(primAddr >= 0) {
251 #endif
252                                         const int primAddr2 = __float_as_int(leaf.y);
253                                         const uint type = __float_as_int(leaf.w);
254
255                                         /* pop */
256                                         nodeAddr = traversalStack[stackPtr];
257                                         --stackPtr;
258
259                                         /* primitive intersection */
260                                         switch(type & PRIMITIVE_ALL) {
261                                                 case PRIMITIVE_TRIANGLE: {
262                                                         for(; primAddr < primAddr2; primAddr++) {
263                                                                 BVH_DEBUG_NEXT_STEP();
264                                                                 kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
265                                                                 if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) {
266                                                                         /* shadow ray early termination */
267 #if defined(__KERNEL_SSE2__)
268                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
269                                                                                 return true;
270                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
271 #else
272                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
273                                                                                 return true;
274 #endif
275                                                                 }
276                                                         }
277                                                         break;
278                                                 }
279 #if BVH_FEATURE(BVH_MOTION)
280                                                 case PRIMITIVE_MOTION_TRIANGLE: {
281                                                         for(; primAddr < primAddr2; primAddr++) {
282                                                                 BVH_DEBUG_NEXT_STEP();
283                                                                 kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
284                                                                 if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) {
285                                                                         /* shadow ray early termination */
286 #  if defined(__KERNEL_SSE2__)
287                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
288                                                                                 return true;
289                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
290 #  else
291                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
292                                                                                 return true;
293 #  endif
294                                                                 }
295                                                         }
296                                                         break;
297                                                 }
298 #endif  /* BVH_FEATURE(BVH_MOTION) */
299 #if BVH_FEATURE(BVH_HAIR)
300                                                 case PRIMITIVE_CURVE:
301                                                 case PRIMITIVE_MOTION_CURVE: {
302                                                         for(; primAddr < primAddr2; primAddr++) {
303                                                                 BVH_DEBUG_NEXT_STEP();
304                                                                 kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
305                                                                 bool hit;
306                                                                 if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
307                                                                         hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
308                                                                 else
309                                                                         hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
310                                                                 if(hit) {
311                                                                         /* shadow ray early termination */
312 #  if defined(__KERNEL_SSE2__)
313                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
314                                                                                 return true;
315                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
316 #  else
317                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
318                                                                                 return true;
319 #  endif
320                                                                 }
321                                                         }
322                                                         break;
323                                                 }
324 #endif  /* BVH_FEATURE(BVH_HAIR) */
325                                         }
326                                 }
327 #if BVH_FEATURE(BVH_INSTANCING)
328                                 else {
329                                         /* instance push */
330                                         object = kernel_tex_fetch(__prim_object, -primAddr-1);
331
332 #  if BVH_FEATURE(BVH_MOTION)
333                                         bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
334 #  else
335                                         bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
336 #  endif
337                                         triangle_intersect_precalc(dir, &isect_precalc);
338
339 #  if defined(__KERNEL_SSE2__)
340                                         Psplat[0] = ssef(P.x);
341                                         Psplat[1] = ssef(P.y);
342                                         Psplat[2] = ssef(P.z);
343
344                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
345
346                                         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
347 #  endif
348
349                                         ++stackPtr;
350                                         kernel_assert(stackPtr < BVH_STACK_SIZE);
351                                         traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
352
353                                         nodeAddr = kernel_tex_fetch(__object_node, object);
354
355                                         BVH_DEBUG_NEXT_INSTANCE();
356                                 }
357                         }
358 #endif  /* FEATURE(BVH_INSTANCING) */
359                 } while(nodeAddr != ENTRYPOINT_SENTINEL);
360
361 #if BVH_FEATURE(BVH_INSTANCING)
362                 if(stackPtr >= 0) {
363                         kernel_assert(object != OBJECT_NONE);
364
365                         /* instance pop */
366 #  if BVH_FEATURE(BVH_MOTION)
367                         bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
368 #  else
369                         bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
370 #  endif
371                         triangle_intersect_precalc(dir, &isect_precalc);
372
373 #  if defined(__KERNEL_SSE2__)
374                         Psplat[0] = ssef(P.x);
375                         Psplat[1] = ssef(P.y);
376                         Psplat[2] = ssef(P.z);
377
378                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
379
380                         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
381 #  endif
382
383                         object = OBJECT_NONE;
384                         nodeAddr = traversalStack[stackPtr];
385                         --stackPtr;
386                 }
387 #endif  /* FEATURE(BVH_INSTANCING) */
388         } while(nodeAddr != ENTRYPOINT_SENTINEL);
389
390         return (isect->prim != PRIM_NONE);
391 }
392
393 ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
394                                          const Ray *ray,
395                                          Intersection *isect,
396                                          const uint visibility
397 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
398                                          , uint *lcg_state,
399                                          float difl,
400                                          float extmax
401 #endif
402                                          )
403 {
404 #ifdef __QBVH__
405         if(kernel_data.bvh.use_qbvh) {
406                 return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
407                                                     ray,
408                                                     isect,
409                                                     visibility
410 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
411                                                     , lcg_state,
412                                                     difl,
413                                                     extmax
414 #endif
415                                                     );
416         }
417         else
418 #endif
419         {
420                 kernel_assert(kernel_data.bvh.use_qbvh == false);
421                 return BVH_FUNCTION_FULL_NAME(BVH)(kg,
422                                                    ray,
423                                                    isect,
424                                                    visibility
425 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
426                                                    , lcg_state,
427                                                    difl,
428                                                    extmax
429 #endif
430                                                    );
431         }
432 }
433
434 #undef BVH_FUNCTION_NAME
435 #undef BVH_FUNCTION_FEATURES