Cycles: Simplify code around debug stats in BVH traversing
[blender.git] / intern / cycles / kernel / geom / geom_qbvh_traversal.h
1 /*
2  * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
3  * and code copyright 2009-2012 Intel Corporation
4  *
5  * Modifications Copyright 2011-2014, Blender Foundation.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19
20 /* This is a template BVH traversal function, where various features can be
21  * enabled/disabled. This way we can compile optimized versions for each case
22  * without new features slowing things down.
23  *
24  * BVH_INSTANCING: object instancing
25  * BVH_HAIR: hair curve rendering
26  * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
27  * BVH_MOTION: motion blur rendering
28  *
29  */
30
31 ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
32                                              const Ray *ray,
33                                              Intersection *isect,
34                                              const uint visibility
35 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
36                                              ,uint *lcg_state,
37                                              float difl,
38                                              float extmax
39 #endif
40                                              )
41 {
42         /* TODO(sergey):
43          * - Test if pushing distance on the stack helps (for non shadow rays).
44          * - Separate version for shadow rays.
45          * - Likely and unlikely for if() statements.
46          * - Test restrict attribute for pointers.
47          */
48
49         /* Traversal stack in CUDA thread-local memory. */
50         QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
51         traversalStack[0].addr = ENTRYPOINT_SENTINEL;
52         traversalStack[0].dist = -FLT_MAX;
53
54         /* Traversal variables in registers. */
55         int stackPtr = 0;
56         int nodeAddr = kernel_data.bvh.root;
57         float nodeDist = -FLT_MAX;
58
59         /* Ray parameters in registers. */
60         float3 P = ray->P;
61         float3 dir = bvh_clamp_direction(ray->D);
62         float3 idir = bvh_inverse_direction(dir);
63         int object = OBJECT_NONE;
64
65 #if BVH_FEATURE(BVH_MOTION)
66         Transform ob_itfm;
67 #endif
68
69 #ifndef __KERNEL_SSE41__
70         if(!isfinite(P.x)) {
71                 return false;
72         }
73 #endif
74
75         isect->t = ray->t;
76         isect->u = 0.0f;
77         isect->v = 0.0f;
78         isect->prim = PRIM_NONE;
79         isect->object = OBJECT_NONE;
80
81         BVH_DEBUG_INIT();
82
83         ssef tnear(0.0f), tfar(ray->t);
84         sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
85
86 #ifdef __KERNEL_AVX2__
87         float3 P_idir = P*idir;
88         sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
89 #else
90         sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
91 #endif
92
93         /* Offsets to select the side that becomes the lower or upper bound. */
94         int near_x, near_y, near_z;
95         int far_x, far_y, far_z;
96
97         if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
98         if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
99         if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
100
101         IsectPrecalc isect_precalc;
102         triangle_intersect_precalc(dir, &isect_precalc);
103
104         /* Traversal loop. */
105         do {
106                 do {
107                         /* Traverse internal nodes. */
108                         while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
109                                 if(UNLIKELY(nodeDist > isect->t)) {
110                                         /* Pop. */
111                                         nodeAddr = traversalStack[stackPtr].addr;
112                                         nodeDist = traversalStack[stackPtr].dist;
113                                         --stackPtr;
114                                         continue;
115                                 }
116
117                                 int traverseChild;
118                                 ssef dist;
119
120                                 BVH_DEBUG_NEXT_STEP();
121
122 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
123                                 if(difl != 0.0f) {
124                                         /* NOTE: We extend all the child BB instead of fetching
125                                          * and checking visibility flags for each of the,
126                                          *
127                                          * Need to test if doing opposite would be any faster.
128                                          */
129                                         traverseChild = qbvh_node_intersect_robust(kg,
130                                                                                    tnear,
131                                                                                    tfar,
132 #  ifdef __KERNEL_AVX2__
133                                                                                    P_idir4,
134 #  else
135                                                                                    org,
136 #  endif
137                                                                                    idir4,
138                                                                                    near_x, near_y, near_z,
139                                                                                    far_x, far_y, far_z,
140                                                                                    nodeAddr,
141                                                                                    difl,
142                                                                                    &dist);
143                                 }
144                                 else
145 #endif  /* BVH_HAIR_MINIMUM_WIDTH */
146                                 {
147                                         traverseChild = qbvh_node_intersect(kg,
148                                                                             tnear,
149                                                                             tfar,
150 #ifdef __KERNEL_AVX2__
151                                                                             P_idir4,
152 #else
153                                                                             org,
154 #endif
155                                                                             idir4,
156                                                                             near_x, near_y, near_z,
157                                                                             far_x, far_y, far_z,
158                                                                             nodeAddr,
159                                                                             &dist);
160                                 }
161
162                                 if(traverseChild != 0) {
163                                         float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6);
164
165                                         /* One child is hit, continue with that child. */
166                                         int r = __bscf(traverseChild);
167                                         float d0 = ((float*)&dist)[r];
168                                         if(traverseChild == 0) {
169                                                 nodeAddr = __float_as_int(cnodes[r]);
170                                                 nodeDist = d0;
171                                                 continue;
172                                         }
173
174                                         /* Two children are hit, push far child, and continue with
175                                          * closer child.
176                                          */
177                                         int c0 = __float_as_int(cnodes[r]);
178                                         r = __bscf(traverseChild);
179                                         int c1 = __float_as_int(cnodes[r]);
180                                         float d1 = ((float*)&dist)[r];
181                                         if(traverseChild == 0) {
182                                                 if(d1 < d0) {
183                                                         nodeAddr = c1;
184                                                         nodeDist = d1;
185                                                         ++stackPtr;
186                                                         kernel_assert(stackPtr < BVH_QSTACK_SIZE);
187                                                         traversalStack[stackPtr].addr = c0;
188                                                         traversalStack[stackPtr].dist = d0;
189                                                         continue;
190                                                 }
191                                                 else {
192                                                         nodeAddr = c0;
193                                                         nodeDist = d0;
194                                                         ++stackPtr;
195                                                         kernel_assert(stackPtr < BVH_QSTACK_SIZE);
196                                                         traversalStack[stackPtr].addr = c1;
197                                                         traversalStack[stackPtr].dist = d1;
198                                                         continue;
199                                                 }
200                                         }
201
202                                         /* Here starts the slow path for 3 or 4 hit children. We push
203                                          * all nodes onto the stack to sort them there.
204                                          */
205                                         ++stackPtr;
206                                         kernel_assert(stackPtr < BVH_QSTACK_SIZE);
207                                         traversalStack[stackPtr].addr = c1;
208                                         traversalStack[stackPtr].dist = d1;
209                                         ++stackPtr;
210                                         kernel_assert(stackPtr < BVH_QSTACK_SIZE);
211                                         traversalStack[stackPtr].addr = c0;
212                                         traversalStack[stackPtr].dist = d0;
213
214                                         /* Three children are hit, push all onto stack and sort 3
215                                          * stack items, continue with closest child.
216                                          */
217                                         r = __bscf(traverseChild);
218                                         int c2 = __float_as_int(cnodes[r]);
219                                         float d2 = ((float*)&dist)[r];
220                                         if(traverseChild == 0) {
221                                                 ++stackPtr;
222                                                 kernel_assert(stackPtr < BVH_QSTACK_SIZE);
223                                                 traversalStack[stackPtr].addr = c2;
224                                                 traversalStack[stackPtr].dist = d2;
225                                                 qbvh_stack_sort(&traversalStack[stackPtr],
226                                                                 &traversalStack[stackPtr - 1],
227                                                                 &traversalStack[stackPtr - 2]);
228                                                 nodeAddr = traversalStack[stackPtr].addr;
229                                                 nodeDist = traversalStack[stackPtr].dist;
230                                                 --stackPtr;
231                                                 continue;
232                                         }
233
234                                         /* Four children are hit, push all onto stack and sort 4
235                                          * stack items, continue with closest child.
236                                          */
237                                         r = __bscf(traverseChild);
238                                         int c3 = __float_as_int(cnodes[r]);
239                                         float d3 = ((float*)&dist)[r];
240                                         ++stackPtr;
241                                         kernel_assert(stackPtr < BVH_QSTACK_SIZE);
242                                         traversalStack[stackPtr].addr = c3;
243                                         traversalStack[stackPtr].dist = d3;
244                                         ++stackPtr;
245                                         kernel_assert(stackPtr < BVH_QSTACK_SIZE);
246                                         traversalStack[stackPtr].addr = c2;
247                                         traversalStack[stackPtr].dist = d2;
248                                         qbvh_stack_sort(&traversalStack[stackPtr],
249                                                         &traversalStack[stackPtr - 1],
250                                                         &traversalStack[stackPtr - 2],
251                                                         &traversalStack[stackPtr - 3]);
252                                 }
253
254                                 nodeAddr = traversalStack[stackPtr].addr;
255                                 nodeDist = traversalStack[stackPtr].dist;
256                                 --stackPtr;
257                         }
258
259                         /* If node is leaf, fetch triangle list. */
260                         if(nodeAddr < 0) {
261                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
262
263 #ifdef __VISIBILITY_FLAG__
264                                 if(UNLIKELY((nodeDist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
265 #else
266                                 if(UNLIKELY((nodeDist > isect->t)))
267 #endif
268                                 {
269                                         /* Pop. */
270                                         nodeAddr = traversalStack[stackPtr].addr;
271                                         nodeDist = traversalStack[stackPtr].dist;
272                                         --stackPtr;
273                                         continue;
274                                 }
275
276                                 int primAddr = __float_as_int(leaf.x);
277
278 #if BVH_FEATURE(BVH_INSTANCING)
279                                 if(primAddr >= 0) {
280 #endif
281                                         int primAddr2 = __float_as_int(leaf.y);
282                                         const uint type = __float_as_int(leaf.w);
283
284                                         /* Pop. */
285                                         nodeAddr = traversalStack[stackPtr].addr;
286                                         nodeDist = traversalStack[stackPtr].dist;
287                                         --stackPtr;
288
289                                         /* Primitive intersection. */
290                                         switch(type & PRIMITIVE_ALL) {
291                                                 case PRIMITIVE_TRIANGLE: {
292                                                         for(; primAddr < primAddr2; primAddr++) {
293                                                                 BVH_DEBUG_NEXT_STEP();
294                                                                 kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
295                                                                 if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) {
296                                                                         tfar = ssef(isect->t);
297                                                                         /* Shadow ray early termination. */
298                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
299                                                                                 return true;
300                                                                 }
301                                                         }
302                                                         break;
303                                                 }
304 #if BVH_FEATURE(BVH_MOTION)
305                                                 case PRIMITIVE_MOTION_TRIANGLE: {
306                                                         for(; primAddr < primAddr2; primAddr++) {
307                                                                 BVH_DEBUG_NEXT_STEP();
308                                                                 kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
309                                                                 if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) {
310                                                                         tfar = ssef(isect->t);
311                                                                         /* Shadow ray early termination. */
312                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
313                                                                                 return true;
314                                                                 }
315                                                         }
316                                                         break;
317                                                 }
318 #endif  /* BVH_FEATURE(BVH_MOTION) */
319 #if BVH_FEATURE(BVH_HAIR)
320                                                 case PRIMITIVE_CURVE:
321                                                 case PRIMITIVE_MOTION_CURVE: {
322                                                         for(; primAddr < primAddr2; primAddr++) {
323                                                                 BVH_DEBUG_NEXT_STEP();
324                                                                 kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
325                                                                 bool hit;
326                                                                 if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
327                                                                         hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
328                                                                 else
329                                                                         hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
330                                                                 if(hit) {
331                                                                         tfar = ssef(isect->t);
332                                                                         /* Shadow ray early termination. */
333                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
334                                                                                 return true;
335                                                                 }
336                                                         }
337                                                         break;
338                                                 }
339 #endif  /* BVH_FEATURE(BVH_HAIR) */
340                                         }
341                                 }
342 #if BVH_FEATURE(BVH_INSTANCING)
343                                 else {
344                                         /* Instance push. */
345                                         object = kernel_tex_fetch(__prim_object, -primAddr-1);
346
347 #  if BVH_FEATURE(BVH_MOTION)
348                                         qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist, &ob_itfm);
349 #  else
350                                         qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist);
351 #  endif
352
353                                         if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
354                                         if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
355                                         if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
356                                         tfar = ssef(isect->t);
357                                         idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
358 #  ifdef __KERNEL_AVX2__
359                                         P_idir = P*idir;
360                                         P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
361 #  else
362                                         org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
363 #  endif
364                                         triangle_intersect_precalc(dir, &isect_precalc);
365
366                                         ++stackPtr;
367                                         kernel_assert(stackPtr < BVH_QSTACK_SIZE);
368                                         traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
369                                         traversalStack[stackPtr].dist = -FLT_MAX;
370
371                                         nodeAddr = kernel_tex_fetch(__object_node, object);
372
373                                         BVH_DEBUG_NEXT_INSTANCE();
374                                 }
375                         }
376 #endif  /* FEATURE(BVH_INSTANCING) */
377                 } while(nodeAddr != ENTRYPOINT_SENTINEL);
378
379 #if BVH_FEATURE(BVH_INSTANCING)
380                 if(stackPtr >= 0) {
381                         kernel_assert(object != OBJECT_NONE);
382
383                         /* Instance pop. */
384 #  if BVH_FEATURE(BVH_MOTION)
385                         bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
386 #  else
387                         bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
388 #  endif
389
390                         if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
391                         if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
392                         if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
393                         tfar = ssef(isect->t);
394                         idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
395 #  ifdef __KERNEL_AVX2__
396                         P_idir = P*idir;
397                         P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
398 #  else
399                         org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
400 #  endif
401                         triangle_intersect_precalc(dir, &isect_precalc);
402
403                         object = OBJECT_NONE;
404                         nodeAddr = traversalStack[stackPtr].addr;
405                         nodeDist = traversalStack[stackPtr].dist;
406                         --stackPtr;
407                 }
408 #endif  /* FEATURE(BVH_INSTANCING) */
409         } while(nodeAddr != ENTRYPOINT_SENTINEL);
410
411         return (isect->prim != PRIM_NONE);
412 }