ClangFormat: apply to source, most of intern
[blender.git] / intern / cycles / kernel / bvh / qbvh_traversal.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* This is a template BVH traversal function, where various features can be
18  * enabled/disabled. This way we can compile optimized versions for each case
19  * without new features slowing things down.
20  *
21  * BVH_INSTANCING: object instancing
22  * BVH_HAIR: hair curve rendering
23  * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
24  * BVH_MOTION: motion blur rendering
25  */
26
27 #if BVH_FEATURE(BVH_HAIR)
28 #  define NODE_INTERSECT qbvh_node_intersect
29 #  define NODE_INTERSECT_ROBUST qbvh_node_intersect_robust
30 #else
31 #  define NODE_INTERSECT qbvh_aligned_node_intersect
32 #  define NODE_INTERSECT_ROBUST qbvh_aligned_node_intersect_robust
33 #endif
34
35 ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
36                                              const Ray *ray,
37                                              Intersection *isect,
38                                              const uint visibility
39 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
40                                              ,
41                                              uint *lcg_state,
42                                              float difl,
43                                              float extmax
44 #endif
45 )
46 {
47   /* TODO(sergey):
48    * - Test if pushing distance on the stack helps (for non shadow rays).
49    * - Separate version for shadow rays.
50    * - Likely and unlikely for if() statements.
51    * - Test restrict attribute for pointers.
52    */
53
54   /* Traversal stack in CUDA thread-local memory. */
55   QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
56   traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
57   traversal_stack[0].dist = -FLT_MAX;
58
59   /* Traversal variables in registers. */
60   int stack_ptr = 0;
61   int node_addr = kernel_data.bvh.root;
62   float node_dist = -FLT_MAX;
63
64   /* Ray parameters in registers. */
65   float3 P = ray->P;
66   float3 dir = bvh_clamp_direction(ray->D);
67   float3 idir = bvh_inverse_direction(dir);
68   int object = OBJECT_NONE;
69
70 #if BVH_FEATURE(BVH_MOTION)
71   Transform ob_itfm;
72 #endif
73
74   isect->t = ray->t;
75   isect->u = 0.0f;
76   isect->v = 0.0f;
77   isect->prim = PRIM_NONE;
78   isect->object = OBJECT_NONE;
79
80   BVH_DEBUG_INIT();
81
82   ssef tnear(0.0f), tfar(ray->t);
83 #if BVH_FEATURE(BVH_HAIR)
84   sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
85 #endif
86   sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
87
88 #ifdef __KERNEL_AVX2__
89   float3 P_idir = P * idir;
90   sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
91 #endif
92 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
93   sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
94 #endif
95
96   /* Offsets to select the side that becomes the lower or upper bound. */
97   int near_x, near_y, near_z;
98   int far_x, far_y, far_z;
99   qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
100
101   /* Traversal loop. */
102   do {
103     do {
104       /* Traverse internal nodes. */
105       while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
106         float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
107         (void)inodes;
108
109         if (UNLIKELY(node_dist > isect->t)
110 #if BVH_FEATURE(BVH_MOTION)
111             || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
112 #endif
113 #ifdef __VISIBILITY_FLAG__
114             || (__float_as_uint(inodes.x) & visibility) == 0
115 #endif
116         ) {
117           /* Pop. */
118           node_addr = traversal_stack[stack_ptr].addr;
119           node_dist = traversal_stack[stack_ptr].dist;
120           --stack_ptr;
121           continue;
122         }
123
124         int child_mask;
125         ssef dist;
126
127         BVH_DEBUG_NEXT_NODE();
128
129 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
130         if (difl != 0.0f) {
131           /* NOTE: We extend all the child BB instead of fetching
132            * and checking visibility flags for each of the,
133            *
134            * Need to test if doing opposite would be any faster.
135            */
136           child_mask = NODE_INTERSECT_ROBUST(kg,
137                                              tnear,
138                                              tfar,
139 #  ifdef __KERNEL_AVX2__
140                                              P_idir4,
141 #  endif
142 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
143                                              org4,
144 #  endif
145 #  if BVH_FEATURE(BVH_HAIR)
146                                              dir4,
147 #  endif
148                                              idir4,
149                                              near_x,
150                                              near_y,
151                                              near_z,
152                                              far_x,
153                                              far_y,
154                                              far_z,
155                                              node_addr,
156                                              difl,
157                                              &dist);
158         }
159         else
160 #endif /* BVH_HAIR_MINIMUM_WIDTH */
161         {
162           child_mask = NODE_INTERSECT(kg,
163                                       tnear,
164                                       tfar,
165 #ifdef __KERNEL_AVX2__
166                                       P_idir4,
167 #endif
168 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
169                                       org4,
170 #endif
171 #if BVH_FEATURE(BVH_HAIR)
172                                       dir4,
173 #endif
174                                       idir4,
175                                       near_x,
176                                       near_y,
177                                       near_z,
178                                       far_x,
179                                       far_y,
180                                       far_z,
181                                       node_addr,
182                                       &dist);
183         }
184
185         if (child_mask != 0) {
186           float4 cnodes;
187           /* TODO(sergey): Investigate whether moving cnodes upwards
188            * gives a speedup (will be different cache pattern but will
189            * avoid extra check here).
190            */
191 #if BVH_FEATURE(BVH_HAIR)
192           if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
193             cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
194           }
195           else
196 #endif
197           {
198             cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
199           }
200
201           /* One child is hit, continue with that child. */
202           int r = __bscf(child_mask);
203           float d0 = ((float *)&dist)[r];
204           if (child_mask == 0) {
205             node_addr = __float_as_int(cnodes[r]);
206             node_dist = d0;
207             continue;
208           }
209
210           /* Two children are hit, push far child, and continue with
211            * closer child.
212            */
213           int c0 = __float_as_int(cnodes[r]);
214           r = __bscf(child_mask);
215           int c1 = __float_as_int(cnodes[r]);
216           float d1 = ((float *)&dist)[r];
217           if (child_mask == 0) {
218             if (d1 < d0) {
219               node_addr = c1;
220               node_dist = d1;
221               ++stack_ptr;
222               kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
223               traversal_stack[stack_ptr].addr = c0;
224               traversal_stack[stack_ptr].dist = d0;
225               continue;
226             }
227             else {
228               node_addr = c0;
229               node_dist = d0;
230               ++stack_ptr;
231               kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
232               traversal_stack[stack_ptr].addr = c1;
233               traversal_stack[stack_ptr].dist = d1;
234               continue;
235             }
236           }
237
238           /* Here starts the slow path for 3 or 4 hit children. We push
239            * all nodes onto the stack to sort them there.
240            */
241           ++stack_ptr;
242           kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
243           traversal_stack[stack_ptr].addr = c1;
244           traversal_stack[stack_ptr].dist = d1;
245           ++stack_ptr;
246           kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
247           traversal_stack[stack_ptr].addr = c0;
248           traversal_stack[stack_ptr].dist = d0;
249
250           /* Three children are hit, push all onto stack and sort 3
251            * stack items, continue with closest child.
252            */
253           r = __bscf(child_mask);
254           int c2 = __float_as_int(cnodes[r]);
255           float d2 = ((float *)&dist)[r];
256           if (child_mask == 0) {
257             ++stack_ptr;
258             kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
259             traversal_stack[stack_ptr].addr = c2;
260             traversal_stack[stack_ptr].dist = d2;
261             qbvh_stack_sort(&traversal_stack[stack_ptr],
262                             &traversal_stack[stack_ptr - 1],
263                             &traversal_stack[stack_ptr - 2]);
264             node_addr = traversal_stack[stack_ptr].addr;
265             node_dist = traversal_stack[stack_ptr].dist;
266             --stack_ptr;
267             continue;
268           }
269
270           /* Four children are hit, push all onto stack and sort 4
271            * stack items, continue with closest child.
272            */
273           r = __bscf(child_mask);
274           int c3 = __float_as_int(cnodes[r]);
275           float d3 = ((float *)&dist)[r];
276           ++stack_ptr;
277           kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
278           traversal_stack[stack_ptr].addr = c3;
279           traversal_stack[stack_ptr].dist = d3;
280           ++stack_ptr;
281           kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
282           traversal_stack[stack_ptr].addr = c2;
283           traversal_stack[stack_ptr].dist = d2;
284           qbvh_stack_sort(&traversal_stack[stack_ptr],
285                           &traversal_stack[stack_ptr - 1],
286                           &traversal_stack[stack_ptr - 2],
287                           &traversal_stack[stack_ptr - 3]);
288         }
289
290         node_addr = traversal_stack[stack_ptr].addr;
291         node_dist = traversal_stack[stack_ptr].dist;
292         --stack_ptr;
293       }
294
295       /* If node is leaf, fetch triangle list. */
296       if (node_addr < 0) {
297         float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
298
299 #ifdef __VISIBILITY_FLAG__
300         if (UNLIKELY((node_dist > isect->t) || ((__float_as_uint(leaf.z) & visibility) == 0)))
301 #else
302         if (UNLIKELY((node_dist > isect->t)))
303 #endif
304         {
305           /* Pop. */
306           node_addr = traversal_stack[stack_ptr].addr;
307           node_dist = traversal_stack[stack_ptr].dist;
308           --stack_ptr;
309           continue;
310         }
311
312         int prim_addr = __float_as_int(leaf.x);
313
314 #if BVH_FEATURE(BVH_INSTANCING)
315         if (prim_addr >= 0) {
316 #endif
317           int prim_addr2 = __float_as_int(leaf.y);
318           const uint type = __float_as_int(leaf.w);
319
320           /* Pop. */
321           node_addr = traversal_stack[stack_ptr].addr;
322           node_dist = traversal_stack[stack_ptr].dist;
323           --stack_ptr;
324
325           /* Primitive intersection. */
326           switch (type & PRIMITIVE_ALL) {
327             case PRIMITIVE_TRIANGLE: {
328               for (; prim_addr < prim_addr2; prim_addr++) {
329                 BVH_DEBUG_NEXT_INTERSECTION();
330                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
331                 if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
332                   tfar = ssef(isect->t);
333                   /* Shadow ray early termination. */
334                   if (visibility & PATH_RAY_SHADOW_OPAQUE) {
335                     return true;
336                   }
337                 }
338               }
339               break;
340             }
341 #if BVH_FEATURE(BVH_MOTION)
342             case PRIMITIVE_MOTION_TRIANGLE: {
343               for (; prim_addr < prim_addr2; prim_addr++) {
344                 BVH_DEBUG_NEXT_INTERSECTION();
345                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
346                 if (motion_triangle_intersect(
347                         kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
348                   tfar = ssef(isect->t);
349                   /* Shadow ray early termination. */
350                   if (visibility & PATH_RAY_SHADOW_OPAQUE) {
351                     return true;
352                   }
353                 }
354               }
355               break;
356             }
357 #endif /* BVH_FEATURE(BVH_MOTION) */
358 #if BVH_FEATURE(BVH_HAIR)
359             case PRIMITIVE_CURVE:
360             case PRIMITIVE_MOTION_CURVE: {
361               for (; prim_addr < prim_addr2; prim_addr++) {
362                 BVH_DEBUG_NEXT_INTERSECTION();
363                 const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
364                 kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
365                 bool hit;
366                 if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
367                   hit = cardinal_curve_intersect(kg,
368                                                  isect,
369                                                  P,
370                                                  dir,
371                                                  visibility,
372                                                  object,
373                                                  prim_addr,
374                                                  ray->time,
375                                                  curve_type,
376                                                  lcg_state,
377                                                  difl,
378                                                  extmax);
379                 }
380                 else {
381                   hit = curve_intersect(kg,
382                                         isect,
383                                         P,
384                                         dir,
385                                         visibility,
386                                         object,
387                                         prim_addr,
388                                         ray->time,
389                                         curve_type,
390                                         lcg_state,
391                                         difl,
392                                         extmax);
393                 }
394                 if (hit) {
395                   tfar = ssef(isect->t);
396                   /* Shadow ray early termination. */
397                   if (visibility & PATH_RAY_SHADOW_OPAQUE) {
398                     return true;
399                   }
400                 }
401               }
402               break;
403             }
404 #endif /* BVH_FEATURE(BVH_HAIR) */
405           }
406         }
407 #if BVH_FEATURE(BVH_INSTANCING)
408         else {
409           /* Instance push. */
410           object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
411
412 #  if BVH_FEATURE(BVH_MOTION)
413           qbvh_instance_motion_push(
414               kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
415 #  else
416           qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
417 #  endif
418
419           qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
420           tfar = ssef(isect->t);
421 #  if BVH_FEATURE(BVH_HAIR)
422           dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
423 #  endif
424           idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
425 #  ifdef __KERNEL_AVX2__
426           P_idir = P * idir;
427           P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
428 #  endif
429 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
430           org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
431 #  endif
432
433           ++stack_ptr;
434           kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
435           traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
436           traversal_stack[stack_ptr].dist = -FLT_MAX;
437
438           node_addr = kernel_tex_fetch(__object_node, object);
439
440           BVH_DEBUG_NEXT_INSTANCE();
441         }
442       }
443 #endif /* FEATURE(BVH_INSTANCING) */
444     } while (node_addr != ENTRYPOINT_SENTINEL);
445
446 #if BVH_FEATURE(BVH_INSTANCING)
447     if (stack_ptr >= 0) {
448       kernel_assert(object != OBJECT_NONE);
449
450       /* Instance pop. */
451 #  if BVH_FEATURE(BVH_MOTION)
452       isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
453 #  else
454       isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
455 #  endif
456
457       qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
458       tfar = ssef(isect->t);
459 #  if BVH_FEATURE(BVH_HAIR)
460       dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
461 #  endif
462       idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
463 #  ifdef __KERNEL_AVX2__
464       P_idir = P * idir;
465       P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
466 #  endif
467 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
468       org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
469 #  endif
470
471       object = OBJECT_NONE;
472       node_addr = traversal_stack[stack_ptr].addr;
473       node_dist = traversal_stack[stack_ptr].dist;
474       --stack_ptr;
475     }
476 #endif /* FEATURE(BVH_INSTANCING) */
477   } while (node_addr != ENTRYPOINT_SENTINEL);
478
479   return (isect->prim != PRIM_NONE);
480 }
481
482 #undef NODE_INTERSECT
483 #undef NODE_INTERSECT_ROBUST