Fix T52452: OSL trace broken after shadow catcher recent changes.
[blender.git] / intern / cycles / kernel / bvh / bvh_traversal.h
1 /*
2  * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
3  * and code copyright 2009-2012 Intel Corporation
4  *
5  * Modifications Copyright 2011-2013, Blender Foundation.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19
20 #ifdef __QBVH__
21 #  include "kernel/bvh/qbvh_traversal.h"
22 #endif
23
24 #if BVH_FEATURE(BVH_HAIR)
25 #  define NODE_INTERSECT bvh_node_intersect
26 #  define NODE_INTERSECT_ROBUST bvh_node_intersect_robust
27 #else
28 #  define NODE_INTERSECT bvh_aligned_node_intersect
29 #  define NODE_INTERSECT_ROBUST bvh_aligned_node_intersect_robust
30 #endif
31
32 /* This is a template BVH traversal function, where various features can be
33  * enabled/disabled. This way we can compile optimized versions for each case
34  * without new features slowing things down.
35  *
36  * BVH_INSTANCING: object instancing
37  * BVH_HAIR: hair curve rendering
38  * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
39  * BVH_MOTION: motion blur rendering
40  *
41  */
42
43 ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
44                                                      const Ray *ray,
45                                                      Intersection *isect,
46                                                      const uint visibility
47 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
48                                                      , uint *lcg_state,
49                                                      float difl,
50                                                      float extmax
51 #endif
52                                                      )
53 {
54         /* todo:
55          * - test if pushing distance on the stack helps (for non shadow rays)
56          * - separate version for shadow rays
57          * - likely and unlikely for if() statements
58          * - test restrict attribute for pointers
59          */
60
61         /* traversal stack in CUDA thread-local memory */
62         int traversal_stack[BVH_STACK_SIZE];
63         traversal_stack[0] = ENTRYPOINT_SENTINEL;
64
65         /* traversal variables in registers */
66         int stack_ptr = 0;
67         int node_addr = kernel_data.bvh.root;
68
69         /* ray parameters in registers */
70         float3 P = ray->P;
71         float3 dir = bvh_clamp_direction(ray->D);
72         float3 idir = bvh_inverse_direction(dir);
73         int object = OBJECT_NONE;
74
75 #if BVH_FEATURE(BVH_MOTION)
76         Transform ob_itfm;
77 #endif
78
79         isect->t = ray->t;
80         isect->u = 0.0f;
81         isect->v = 0.0f;
82         isect->prim = PRIM_NONE;
83         isect->object = OBJECT_NONE;
84
85         BVH_DEBUG_INIT();
86
87 #if defined(__KERNEL_SSE2__)
88         const shuffle_swap_t shuf_identity = shuffle_swap_identity();
89         const shuffle_swap_t shuf_swap = shuffle_swap_swap();
90
91         const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
92         ssef Psplat[3], idirsplat[3];
93 #  if BVH_FEATURE(BVH_HAIR)
94         ssef tnear(0.0f), tfar(isect->t);
95 #  endif
96         shuffle_swap_t shufflexyz[3];
97
98         Psplat[0] = ssef(P.x);
99         Psplat[1] = ssef(P.y);
100         Psplat[2] = ssef(P.z);
101
102         ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
103
104         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
105 #endif
106
107         /* traversal loop */
108         do {
109                 do {
110                         /* traverse internal nodes */
111                         while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
112                                 int node_addr_child1, traverse_mask;
113                                 float dist[2];
114                                 float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
115
116 #if !defined(__KERNEL_SSE2__)
117 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
118                                 if(difl != 0.0f) {
119                                         traverse_mask = NODE_INTERSECT_ROBUST(kg,
120                                                                               P,
121 #    if BVH_FEATURE(BVH_HAIR)
122                                                                               dir,
123 #    endif
124                                                                               idir,
125                                                                               isect->t,
126                                                                               difl,
127                                                                               extmax,
128                                                                               node_addr,
129                                                                               visibility,
130                                                                               dist);
131                                 }
132                                 else
133 #  endif
134                                 {
135                                         traverse_mask = NODE_INTERSECT(kg,
136                                                                        P,
137 #    if BVH_FEATURE(BVH_HAIR)
138                                                                        dir,
139 #    endif
140                                                                        idir,
141                                                                        isect->t,
142                                                                        node_addr,
143                                                                        visibility,
144                                                                        dist);
145                                 }
146 #else // __KERNEL_SSE2__
147 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
148                                 if(difl != 0.0f) {
149                                         traverse_mask = NODE_INTERSECT_ROBUST(kg,
150                                                                               P,
151                                                                               dir,
152 #    if BVH_FEATURE(BVH_HAIR)
153                                                                               tnear,
154                                                                               tfar,
155 #    endif
156                                                                               tsplat,
157                                                                               Psplat,
158                                                                               idirsplat,
159                                                                               shufflexyz,
160                                                                               difl,
161                                                                               extmax,
162                                                                               node_addr,
163                                                                               visibility,
164                                                                               dist);
165                                 }
166                                 else
167 #  endif
168                                 {
169                                         traverse_mask = NODE_INTERSECT(kg,
170                                                                        P,
171                                                                        dir,
172 #    if BVH_FEATURE(BVH_HAIR)
173                                                                        tnear,
174                                                                        tfar,
175 #    endif
176                                                                        tsplat,
177                                                                        Psplat,
178                                                                        idirsplat,
179                                                                        shufflexyz,
180                                                                        node_addr,
181                                                                        visibility,
182                                                                        dist);
183                                 }
184 #endif // __KERNEL_SSE2__
185
186                                 node_addr = __float_as_int(cnodes.z);
187                                 node_addr_child1 = __float_as_int(cnodes.w);
188
189                                 if(traverse_mask == 3) {
190                                         /* Both children were intersected, push the farther one. */
191                                         bool is_closest_child1 = (dist[1] < dist[0]);
192                                         if(is_closest_child1) {
193                                                 int tmp = node_addr;
194                                                 node_addr = node_addr_child1;
195                                                 node_addr_child1 = tmp;
196                                         }
197
198                                         ++stack_ptr;
199                                         kernel_assert(stack_ptr < BVH_STACK_SIZE);
200                                         traversal_stack[stack_ptr] = node_addr_child1;
201                                 }
202                                 else {
203                                         /* One child was intersected. */
204                                         if(traverse_mask == 2) {
205                                                 node_addr = node_addr_child1;
206                                         }
207                                         else if(traverse_mask == 0) {
208                                                 /* Neither child was intersected. */
209                                                 node_addr = traversal_stack[stack_ptr];
210                                                 --stack_ptr;
211                                         }
212                                 }
213                                 BVH_DEBUG_NEXT_NODE();
214                         }
215
216                         /* if node is leaf, fetch triangle list */
217                         if(node_addr < 0) {
218                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
219                                 int prim_addr = __float_as_int(leaf.x);
220
221 #if BVH_FEATURE(BVH_INSTANCING)
222                                 if(prim_addr >= 0) {
223 #endif
224                                         const int prim_addr2 = __float_as_int(leaf.y);
225                                         const uint type = __float_as_int(leaf.w);
226
227                                         /* pop */
228                                         node_addr = traversal_stack[stack_ptr];
229                                         --stack_ptr;
230
231                                         /* primitive intersection */
232                                         switch(type & PRIMITIVE_ALL) {
233                                                 case PRIMITIVE_TRIANGLE: {
234                                                         for(; prim_addr < prim_addr2; prim_addr++) {
235                                                                 BVH_DEBUG_NEXT_INTERSECTION();
236                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
237                                                                 if(triangle_intersect(kg,
238                                                                                       isect,
239                                                                                       P,
240                                                                                       dir,
241                                                                                       visibility,
242                                                                                       object,
243                                                                                       prim_addr))
244                                                                 {
245                                                                         /* shadow ray early termination */
246 #if defined(__KERNEL_SSE2__)
247                                                                         if(!(visibility & (PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE)))
248                                                                                 return true;
249                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
250 #  if BVH_FEATURE(BVH_HAIR)
251                                                                         tfar = ssef(isect->t);
252 #  endif
253 #else
254                                                                         if(!(visibility & (PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE)))
255                                                                                 return true;
256 #endif
257                                                                 }
258                                                         }
259                                                         break;
260                                                 }
261 #if BVH_FEATURE(BVH_MOTION)
262                                                 case PRIMITIVE_MOTION_TRIANGLE: {
263                                                         for(; prim_addr < prim_addr2; prim_addr++) {
264                                                                 BVH_DEBUG_NEXT_INTERSECTION();
265                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
266                                                                 if(motion_triangle_intersect(kg,
267                                                                                              isect,
268                                                                                              P,
269                                                                                              dir,
270                                                                                              ray->time,
271                                                                                              visibility,
272                                                                                              object,
273                                                                                              prim_addr))
274                                                                 {
275                                                                         /* shadow ray early termination */
276 #  if defined(__KERNEL_SSE2__)
277                                                                         if(!(visibility & (PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE)))
278                                                                                 return true;
279                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
280 #    if BVH_FEATURE(BVH_HAIR)
281                                                                         tfar = ssef(isect->t);
282 #    endif
283 #  else
284                                                                         if(!(visibility & (PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE)))
285                                                                                 return true;
286 #  endif
287                                                                 }
288                                                         }
289                                                         break;
290                                                 }
291 #endif  /* BVH_FEATURE(BVH_MOTION) */
292 #if BVH_FEATURE(BVH_HAIR)
293                                                 case PRIMITIVE_CURVE:
294                                                 case PRIMITIVE_MOTION_CURVE: {
295                                                         for(; prim_addr < prim_addr2; prim_addr++) {
296                                                                 BVH_DEBUG_NEXT_INTERSECTION();
297                                                                 const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
298                                                                 kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
299                                                                 bool hit;
300                                                                 if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
301                                                                         hit = cardinal_curve_intersect(kg,
302                                                                                                        isect,
303                                                                                                        P,
304                                                                                                        dir,
305                                                                                                        visibility,
306                                                                                                        object,
307                                                                                                        prim_addr,
308                                                                                                        ray->time,
309                                                                                                        curve_type,
310                                                                                                        lcg_state,
311                                                                                                        difl,
312                                                                                                        extmax);
313                                                                 }
314                                                                 else {
315                                                                         hit = curve_intersect(kg,
316                                                                                               isect,
317                                                                                               P,
318                                                                                               dir,
319                                                                                               visibility,
320                                                                                               object,
321                                                                                               prim_addr,
322                                                                                               ray->time,
323                                                                                               curve_type,
324                                                                                               lcg_state,
325                                                                                               difl,
326                                                                                               extmax);
327                                                                 }
328                                                                 if(hit) {
329                                                                         /* shadow ray early termination */
330 #  if defined(__KERNEL_SSE2__)
331                                                                         if(!(visibility & (PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE)))
332                                                                                 return true;
333                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
334 #    if BVH_FEATURE(BVH_HAIR)
335                                                                         tfar = ssef(isect->t);
336 #    endif
337 #  else
338                                                                         if(!(visibility & (PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE)))
339                                                                                 return true;
340 #  endif
341                                                                 }
342                                                         }
343                                                         break;
344                                                 }
345 #endif  /* BVH_FEATURE(BVH_HAIR) */
346                                         }
347                                 }
348 #if BVH_FEATURE(BVH_INSTANCING)
349                                 else {
350                                         /* instance push */
351                                         object = kernel_tex_fetch(__prim_object, -prim_addr-1);
352
353 #  if BVH_FEATURE(BVH_MOTION)
354                                         isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
355 #  else
356                                         isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
357 #  endif
358
359 #  if defined(__KERNEL_SSE2__)
360                                         Psplat[0] = ssef(P.x);
361                                         Psplat[1] = ssef(P.y);
362                                         Psplat[2] = ssef(P.z);
363
364                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
365 #    if BVH_FEATURE(BVH_HAIR)
366                                         tfar = ssef(isect->t);
367 #    endif
368
369                                         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
370 #  endif
371
372                                         ++stack_ptr;
373                                         kernel_assert(stack_ptr < BVH_STACK_SIZE);
374                                         traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
375
376                                         node_addr = kernel_tex_fetch(__object_node, object);
377
378                                         BVH_DEBUG_NEXT_INSTANCE();
379                                 }
380                         }
381 #endif  /* FEATURE(BVH_INSTANCING) */
382                 } while(node_addr != ENTRYPOINT_SENTINEL);
383
384 #if BVH_FEATURE(BVH_INSTANCING)
385                 if(stack_ptr >= 0) {
386                         kernel_assert(object != OBJECT_NONE);
387
388                         /* instance pop */
389 #  if BVH_FEATURE(BVH_MOTION)
390                         isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
391 #  else
392                         isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
393 #  endif
394
395 #  if defined(__KERNEL_SSE2__)
396                         Psplat[0] = ssef(P.x);
397                         Psplat[1] = ssef(P.y);
398                         Psplat[2] = ssef(P.z);
399
400                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
401 #    if BVH_FEATURE(BVH_HAIR)
402                         tfar = ssef(isect->t);
403 #    endif
404
405                         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
406 #  endif
407
408                         object = OBJECT_NONE;
409                         node_addr = traversal_stack[stack_ptr];
410                         --stack_ptr;
411                 }
412 #endif  /* FEATURE(BVH_INSTANCING) */
413         } while(node_addr != ENTRYPOINT_SENTINEL);
414
415         return (isect->prim != PRIM_NONE);
416 }
417
418 ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
419                                          const Ray *ray,
420                                          Intersection *isect,
421                                          const uint visibility
422 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
423                                          , uint *lcg_state,
424                                          float difl,
425                                          float extmax
426 #endif
427                                          )
428 {
429 #ifdef __QBVH__
430         if(kernel_data.bvh.use_qbvh) {
431                 return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
432                                                     ray,
433                                                     isect,
434                                                     visibility
435 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
436                                                     , lcg_state,
437                                                     difl,
438                                                     extmax
439 #endif
440                                                     );
441         }
442         else
443 #endif
444         {
445                 kernel_assert(kernel_data.bvh.use_qbvh == false);
446                 return BVH_FUNCTION_FULL_NAME(BVH)(kg,
447                                                    ray,
448                                                    isect,
449                                                    visibility
450 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
451                                                    , lcg_state,
452                                                    difl,
453                                                    extmax
454 #endif
455                                                    );
456         }
457 }
458
459 #undef BVH_FUNCTION_NAME
460 #undef BVH_FUNCTION_FEATURES
461 #undef NODE_INTERSECT
462 #undef NODE_INTERSECT_ROBUST