ClangFormat: apply to source, most of intern
[blender.git] / intern / cycles / kernel / bvh / bvh_traversal.h
1 /*
2  * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
3  * and code copyright 2009-2012 Intel Corporation
4  *
5  * Modifications Copyright 2011-2013, Blender Foundation.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19
20 #ifdef __QBVH__
21 #  include "kernel/bvh/qbvh_traversal.h"
22 #endif
23 #ifdef __KERNEL_AVX2__
24 #  include "kernel/bvh/obvh_traversal.h"
25 #endif
26
27 #if BVH_FEATURE(BVH_HAIR)
28 #  define NODE_INTERSECT bvh_node_intersect
29 #  define NODE_INTERSECT_ROBUST bvh_node_intersect_robust
30 #else
31 #  define NODE_INTERSECT bvh_aligned_node_intersect
32 #  define NODE_INTERSECT_ROBUST bvh_aligned_node_intersect_robust
33 #endif
34
35 /* This is a template BVH traversal function, where various features can be
36  * enabled/disabled. This way we can compile optimized versions for each case
37  * without new features slowing things down.
38  *
39  * BVH_INSTANCING: object instancing
40  * BVH_HAIR: hair curve rendering
41  * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
42  * BVH_MOTION: motion blur rendering
43  */
44
45 ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
46                                                      const Ray *ray,
47                                                      Intersection *isect,
48                                                      const uint visibility
49 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
50                                                      ,
51                                                      uint *lcg_state,
52                                                      float difl,
53                                                      float extmax
54 #endif
55 )
56 {
57   /* todo:
58    * - test if pushing distance on the stack helps (for non shadow rays)
59    * - separate version for shadow rays
60    * - likely and unlikely for if() statements
61    * - test restrict attribute for pointers
62    */
63
64   /* traversal stack in CUDA thread-local memory */
65   int traversal_stack[BVH_STACK_SIZE];
66   traversal_stack[0] = ENTRYPOINT_SENTINEL;
67
68   /* traversal variables in registers */
69   int stack_ptr = 0;
70   int node_addr = kernel_data.bvh.root;
71
72   /* ray parameters in registers */
73   float3 P = ray->P;
74   float3 dir = bvh_clamp_direction(ray->D);
75   float3 idir = bvh_inverse_direction(dir);
76   int object = OBJECT_NONE;
77
78 #if BVH_FEATURE(BVH_MOTION)
79   Transform ob_itfm;
80 #endif
81
82   isect->t = ray->t;
83   isect->u = 0.0f;
84   isect->v = 0.0f;
85   isect->prim = PRIM_NONE;
86   isect->object = OBJECT_NONE;
87
88   BVH_DEBUG_INIT();
89
90 #if defined(__KERNEL_SSE2__)
91   const shuffle_swap_t shuf_identity = shuffle_swap_identity();
92   const shuffle_swap_t shuf_swap = shuffle_swap_swap();
93
94   const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
95   ssef Psplat[3], idirsplat[3];
96 #  if BVH_FEATURE(BVH_HAIR)
97   ssef tnear(0.0f), tfar(isect->t);
98 #  endif
99   shuffle_swap_t shufflexyz[3];
100
101   Psplat[0] = ssef(P.x);
102   Psplat[1] = ssef(P.y);
103   Psplat[2] = ssef(P.z);
104
105   ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
106
107   gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
108 #endif
109
110   /* traversal loop */
111   do {
112     do {
113       /* traverse internal nodes */
114       while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
115         int node_addr_child1, traverse_mask;
116         float dist[2];
117         float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
118
119 #if !defined(__KERNEL_SSE2__)
120 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
121         if (difl != 0.0f) {
122           traverse_mask = NODE_INTERSECT_ROBUST(kg,
123                                                 P,
124 #    if BVH_FEATURE(BVH_HAIR)
125                                                 dir,
126 #    endif
127                                                 idir,
128                                                 isect->t,
129                                                 difl,
130                                                 extmax,
131                                                 node_addr,
132                                                 visibility,
133                                                 dist);
134         }
135         else
136 #  endif
137         {
138           traverse_mask = NODE_INTERSECT(kg,
139                                          P,
140 #  if BVH_FEATURE(BVH_HAIR)
141                                          dir,
142 #  endif
143                                          idir,
144                                          isect->t,
145                                          node_addr,
146                                          visibility,
147                                          dist);
148         }
149 #else  // __KERNEL_SSE2__
150 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
151         if (difl != 0.0f) {
152           traverse_mask = NODE_INTERSECT_ROBUST(kg,
153                                                 P,
154                                                 dir,
155 #    if BVH_FEATURE(BVH_HAIR)
156                                                 tnear,
157                                                 tfar,
158 #    endif
159                                                 tsplat,
160                                                 Psplat,
161                                                 idirsplat,
162                                                 shufflexyz,
163                                                 difl,
164                                                 extmax,
165                                                 node_addr,
166                                                 visibility,
167                                                 dist);
168         }
169         else
170 #  endif
171         {
172           traverse_mask = NODE_INTERSECT(kg,
173                                          P,
174                                          dir,
175 #  if BVH_FEATURE(BVH_HAIR)
176                                          tnear,
177                                          tfar,
178 #  endif
179                                          tsplat,
180                                          Psplat,
181                                          idirsplat,
182                                          shufflexyz,
183                                          node_addr,
184                                          visibility,
185                                          dist);
186         }
187 #endif  // __KERNEL_SSE2__
188
189         node_addr = __float_as_int(cnodes.z);
190         node_addr_child1 = __float_as_int(cnodes.w);
191
192         if (traverse_mask == 3) {
193           /* Both children were intersected, push the farther one. */
194           bool is_closest_child1 = (dist[1] < dist[0]);
195           if (is_closest_child1) {
196             int tmp = node_addr;
197             node_addr = node_addr_child1;
198             node_addr_child1 = tmp;
199           }
200
201           ++stack_ptr;
202           kernel_assert(stack_ptr < BVH_STACK_SIZE);
203           traversal_stack[stack_ptr] = node_addr_child1;
204         }
205         else {
206           /* One child was intersected. */
207           if (traverse_mask == 2) {
208             node_addr = node_addr_child1;
209           }
210           else if (traverse_mask == 0) {
211             /* Neither child was intersected. */
212             node_addr = traversal_stack[stack_ptr];
213             --stack_ptr;
214           }
215         }
216         BVH_DEBUG_NEXT_NODE();
217       }
218
219       /* if node is leaf, fetch triangle list */
220       if (node_addr < 0) {
221         float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
222         int prim_addr = __float_as_int(leaf.x);
223
224 #if BVH_FEATURE(BVH_INSTANCING)
225         if (prim_addr >= 0) {
226 #endif
227           const int prim_addr2 = __float_as_int(leaf.y);
228           const uint type = __float_as_int(leaf.w);
229
230           /* pop */
231           node_addr = traversal_stack[stack_ptr];
232           --stack_ptr;
233
234           /* primitive intersection */
235           switch (type & PRIMITIVE_ALL) {
236             case PRIMITIVE_TRIANGLE: {
237               for (; prim_addr < prim_addr2; prim_addr++) {
238                 BVH_DEBUG_NEXT_INTERSECTION();
239                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
240                 if (triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr)) {
241                   /* shadow ray early termination */
242 #if defined(__KERNEL_SSE2__)
243                   if (visibility & PATH_RAY_SHADOW_OPAQUE)
244                     return true;
245                   tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
246 #  if BVH_FEATURE(BVH_HAIR)
247                   tfar = ssef(isect->t);
248 #  endif
249 #else
250                 if (visibility & PATH_RAY_SHADOW_OPAQUE)
251                   return true;
252 #endif
253                 }
254               }
255               break;
256             }
257 #if BVH_FEATURE(BVH_MOTION)
258             case PRIMITIVE_MOTION_TRIANGLE: {
259               for (; prim_addr < prim_addr2; prim_addr++) {
260                 BVH_DEBUG_NEXT_INTERSECTION();
261                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
262                 if (motion_triangle_intersect(
263                         kg, isect, P, dir, ray->time, visibility, object, prim_addr)) {
264                   /* shadow ray early termination */
265 #  if defined(__KERNEL_SSE2__)
266                   if (visibility & PATH_RAY_SHADOW_OPAQUE)
267                     return true;
268                   tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
269 #    if BVH_FEATURE(BVH_HAIR)
270                   tfar = ssef(isect->t);
271 #    endif
272 #  else
273                   if (visibility & PATH_RAY_SHADOW_OPAQUE)
274                     return true;
275 #  endif
276                 }
277               }
278               break;
279             }
280 #endif /* BVH_FEATURE(BVH_MOTION) */
281 #if BVH_FEATURE(BVH_HAIR)
282             case PRIMITIVE_CURVE:
283             case PRIMITIVE_MOTION_CURVE: {
284               for (; prim_addr < prim_addr2; prim_addr++) {
285                 BVH_DEBUG_NEXT_INTERSECTION();
286                 const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
287                 kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
288                 bool hit;
289                 if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
290                   hit = cardinal_curve_intersect(kg,
291                                                  isect,
292                                                  P,
293                                                  dir,
294                                                  visibility,
295                                                  object,
296                                                  prim_addr,
297                                                  ray->time,
298                                                  curve_type,
299                                                  lcg_state,
300                                                  difl,
301                                                  extmax);
302                 }
303                 else {
304                   hit = curve_intersect(kg,
305                                         isect,
306                                         P,
307                                         dir,
308                                         visibility,
309                                         object,
310                                         prim_addr,
311                                         ray->time,
312                                         curve_type,
313                                         lcg_state,
314                                         difl,
315                                         extmax);
316                 }
317                 if (hit) {
318                   /* shadow ray early termination */
319 #  if defined(__KERNEL_SSE2__)
320                   if (visibility & PATH_RAY_SHADOW_OPAQUE)
321                     return true;
322                   tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
323 #    if BVH_FEATURE(BVH_HAIR)
324                   tfar = ssef(isect->t);
325 #    endif
326 #  else
327                   if (visibility & PATH_RAY_SHADOW_OPAQUE)
328                     return true;
329 #  endif
330                 }
331               }
332               break;
333             }
334 #endif /* BVH_FEATURE(BVH_HAIR) */
335           }
336         }
337 #if BVH_FEATURE(BVH_INSTANCING)
338         else {
339           /* instance push */
340           object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
341
342 #  if BVH_FEATURE(BVH_MOTION)
343           isect->t = bvh_instance_motion_push(
344               kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
345 #  else
346           isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
347 #  endif
348
349 #  if defined(__KERNEL_SSE2__)
350           Psplat[0] = ssef(P.x);
351           Psplat[1] = ssef(P.y);
352           Psplat[2] = ssef(P.z);
353
354           tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
355 #    if BVH_FEATURE(BVH_HAIR)
356           tfar = ssef(isect->t);
357 #    endif
358
359           gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
360 #  endif
361
362           ++stack_ptr;
363           kernel_assert(stack_ptr < BVH_STACK_SIZE);
364           traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
365
366           node_addr = kernel_tex_fetch(__object_node, object);
367
368           BVH_DEBUG_NEXT_INSTANCE();
369         }
370       }
371 #endif /* FEATURE(BVH_INSTANCING) */
372     } while (node_addr != ENTRYPOINT_SENTINEL);
373
374 #if BVH_FEATURE(BVH_INSTANCING)
375     if (stack_ptr >= 0) {
376       kernel_assert(object != OBJECT_NONE);
377
378       /* instance pop */
379 #  if BVH_FEATURE(BVH_MOTION)
380       isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
381 #  else
382       isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
383 #  endif
384
385 #  if defined(__KERNEL_SSE2__)
386       Psplat[0] = ssef(P.x);
387       Psplat[1] = ssef(P.y);
388       Psplat[2] = ssef(P.z);
389
390       tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
391 #    if BVH_FEATURE(BVH_HAIR)
392       tfar = ssef(isect->t);
393 #    endif
394
395       gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
396 #  endif
397
398       object = OBJECT_NONE;
399       node_addr = traversal_stack[stack_ptr];
400       --stack_ptr;
401     }
402 #endif /* FEATURE(BVH_INSTANCING) */
403   } while (node_addr != ENTRYPOINT_SENTINEL);
404
405   return (isect->prim != PRIM_NONE);
406 }
407
408 ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
409                                          const Ray *ray,
410                                          Intersection *isect,
411                                          const uint visibility
412 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
413                                          ,
414                                          uint *lcg_state,
415                                          float difl,
416                                          float extmax
417 #endif
418 )
419 {
420   switch (kernel_data.bvh.bvh_layout) {
421 #ifdef __KERNEL_AVX2__
422     case BVH_LAYOUT_BVH8:
423       return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
424                                           ray,
425                                           isect,
426                                           visibility
427 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
428                                           ,
429                                           lcg_state,
430                                           difl,
431                                           extmax
432 #  endif
433       );
434 #endif
435 #ifdef __QBVH__
436     case BVH_LAYOUT_BVH4:
437       return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
438                                           ray,
439                                           isect,
440                                           visibility
441 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
442                                           ,
443                                           lcg_state,
444                                           difl,
445                                           extmax
446 #  endif
447       );
448 #endif /* __QBVH__ */
449     case BVH_LAYOUT_BVH2:
450       return BVH_FUNCTION_FULL_NAME(BVH)(kg,
451                                          ray,
452                                          isect,
453                                          visibility
454 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
455                                          ,
456                                          lcg_state,
457                                          difl,
458                                          extmax
459 #endif
460       );
461   }
462   kernel_assert(!"Should not happen");
463   return false;
464 }
465
466 #undef BVH_FUNCTION_NAME
467 #undef BVH_FUNCTION_FEATURES
468 #undef NODE_INTERSECT
469 #undef NODE_INTERSECT_ROBUST