Merge branch 'master' into blender2.8
[blender.git] / intern / cycles / kernel / bvh / bvh_traversal.h
1 /*
2  * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
3  * and code copyright 2009-2012 Intel Corporation
4  *
5  * Modifications Copyright 2011-2013, Blender Foundation.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19
20 #ifdef __QBVH__
21 #  include "kernel/bvh/qbvh_traversal.h"
22 #endif
23 #ifdef __KERNEL_AVX2__
24 #  include "kernel/bvh/obvh_traversal.h"
25 #endif
26
27 #if BVH_FEATURE(BVH_HAIR)
28 #  define NODE_INTERSECT bvh_node_intersect
29 #  define NODE_INTERSECT_ROBUST bvh_node_intersect_robust
30 #else
31 #  define NODE_INTERSECT bvh_aligned_node_intersect
32 #  define NODE_INTERSECT_ROBUST bvh_aligned_node_intersect_robust
33 #endif
34
35 /* This is a template BVH traversal function, where various features can be
36  * enabled/disabled. This way we can compile optimized versions for each case
37  * without new features slowing things down.
38  *
39  * BVH_INSTANCING: object instancing
40  * BVH_HAIR: hair curve rendering
41  * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
42  * BVH_MOTION: motion blur rendering
43  *
44  */
45
46 ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
47                                                      const Ray *ray,
48                                                      Intersection *isect,
49                                                      const uint visibility
50 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
51                                                      , uint *lcg_state,
52                                                      float difl,
53                                                      float extmax
54 #endif
55                                                      )
56 {
57         /* todo:
58          * - test if pushing distance on the stack helps (for non shadow rays)
59          * - separate version for shadow rays
60          * - likely and unlikely for if() statements
61          * - test restrict attribute for pointers
62          */
63
64         /* traversal stack in CUDA thread-local memory */
65         int traversal_stack[BVH_STACK_SIZE];
66         traversal_stack[0] = ENTRYPOINT_SENTINEL;
67
68         /* traversal variables in registers */
69         int stack_ptr = 0;
70         int node_addr = kernel_data.bvh.root;
71
72         /* ray parameters in registers */
73         float3 P = ray->P;
74         float3 dir = bvh_clamp_direction(ray->D);
75         float3 idir = bvh_inverse_direction(dir);
76         int object = OBJECT_NONE;
77
78 #if BVH_FEATURE(BVH_MOTION)
79         Transform ob_itfm;
80 #endif
81
82         isect->t = ray->t;
83         isect->u = 0.0f;
84         isect->v = 0.0f;
85         isect->prim = PRIM_NONE;
86         isect->object = OBJECT_NONE;
87
88         BVH_DEBUG_INIT();
89
90 #if defined(__KERNEL_SSE2__)
91         const shuffle_swap_t shuf_identity = shuffle_swap_identity();
92         const shuffle_swap_t shuf_swap = shuffle_swap_swap();
93
94         const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
95         ssef Psplat[3], idirsplat[3];
96 #  if BVH_FEATURE(BVH_HAIR)
97         ssef tnear(0.0f), tfar(isect->t);
98 #  endif
99         shuffle_swap_t shufflexyz[3];
100
101         Psplat[0] = ssef(P.x);
102         Psplat[1] = ssef(P.y);
103         Psplat[2] = ssef(P.z);
104
105         ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
106
107         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
108 #endif
109
110         /* traversal loop */
111         do {
112                 do {
113                         /* traverse internal nodes */
114                         while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
115                                 int node_addr_child1, traverse_mask;
116                                 float dist[2];
117                                 float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
118
119 #if !defined(__KERNEL_SSE2__)
120 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
121                                 if(difl != 0.0f) {
122                                         traverse_mask = NODE_INTERSECT_ROBUST(kg,
123                                                                               P,
124 #    if BVH_FEATURE(BVH_HAIR)
125                                                                               dir,
126 #    endif
127                                                                               idir,
128                                                                               isect->t,
129                                                                               difl,
130                                                                               extmax,
131                                                                               node_addr,
132                                                                               visibility,
133                                                                               dist);
134                                 }
135                                 else
136 #  endif
137                                 {
138                                         traverse_mask = NODE_INTERSECT(kg,
139                                                                        P,
140 #    if BVH_FEATURE(BVH_HAIR)
141                                                                        dir,
142 #    endif
143                                                                        idir,
144                                                                        isect->t,
145                                                                        node_addr,
146                                                                        visibility,
147                                                                        dist);
148                                 }
149 #else  // __KERNEL_SSE2__
150 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
151                                 if(difl != 0.0f) {
152                                         traverse_mask = NODE_INTERSECT_ROBUST(kg,
153                                                                               P,
154                                                                               dir,
155 #    if BVH_FEATURE(BVH_HAIR)
156                                                                               tnear,
157                                                                               tfar,
158 #    endif
159                                                                               tsplat,
160                                                                               Psplat,
161                                                                               idirsplat,
162                                                                               shufflexyz,
163                                                                               difl,
164                                                                               extmax,
165                                                                               node_addr,
166                                                                               visibility,
167                                                                               dist);
168                                 }
169                                 else
170 #  endif
171                                 {
172                                         traverse_mask = NODE_INTERSECT(kg,
173                                                                        P,
174                                                                        dir,
175 #    if BVH_FEATURE(BVH_HAIR)
176                                                                        tnear,
177                                                                        tfar,
178 #    endif
179                                                                        tsplat,
180                                                                        Psplat,
181                                                                        idirsplat,
182                                                                        shufflexyz,
183                                                                        node_addr,
184                                                                        visibility,
185                                                                        dist);
186                                 }
187 #endif  // __KERNEL_SSE2__
188
189                                 node_addr = __float_as_int(cnodes.z);
190                                 node_addr_child1 = __float_as_int(cnodes.w);
191
192                                 if(traverse_mask == 3) {
193                                         /* Both children were intersected, push the farther one. */
194                                         bool is_closest_child1 = (dist[1] < dist[0]);
195                                         if(is_closest_child1) {
196                                                 int tmp = node_addr;
197                                                 node_addr = node_addr_child1;
198                                                 node_addr_child1 = tmp;
199                                         }
200
201                                         ++stack_ptr;
202                                         kernel_assert(stack_ptr < BVH_STACK_SIZE);
203                                         traversal_stack[stack_ptr] = node_addr_child1;
204                                 }
205                                 else {
206                                         /* One child was intersected. */
207                                         if(traverse_mask == 2) {
208                                                 node_addr = node_addr_child1;
209                                         }
210                                         else if(traverse_mask == 0) {
211                                                 /* Neither child was intersected. */
212                                                 node_addr = traversal_stack[stack_ptr];
213                                                 --stack_ptr;
214                                         }
215                                 }
216                                 BVH_DEBUG_NEXT_NODE();
217                         }
218
219                         /* if node is leaf, fetch triangle list */
220                         if(node_addr < 0) {
221                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
222                                 int prim_addr = __float_as_int(leaf.x);
223
224 #if BVH_FEATURE(BVH_INSTANCING)
225                                 if(prim_addr >= 0) {
226 #endif
227                                         const int prim_addr2 = __float_as_int(leaf.y);
228                                         const uint type = __float_as_int(leaf.w);
229
230                                         /* pop */
231                                         node_addr = traversal_stack[stack_ptr];
232                                         --stack_ptr;
233
234                                         /* primitive intersection */
235                                         switch(type & PRIMITIVE_ALL) {
236                                                 case PRIMITIVE_TRIANGLE: {
237                                                         for(; prim_addr < prim_addr2; prim_addr++) {
238                                                                 BVH_DEBUG_NEXT_INTERSECTION();
239                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
240                                                                 if(triangle_intersect(kg,
241                                                                                       isect,
242                                                                                       P,
243                                                                                       dir,
244                                                                                       visibility,
245                                                                                       object,
246                                                                                       prim_addr))
247                                                                 {
248                                                                         /* shadow ray early termination */
249 #if defined(__KERNEL_SSE2__)
250                                                                         if(visibility & PATH_RAY_SHADOW_OPAQUE)
251                                                                                 return true;
252                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
253 #  if BVH_FEATURE(BVH_HAIR)
254                                                                         tfar = ssef(isect->t);
255 #  endif
256 #else
257                                                                         if(visibility & PATH_RAY_SHADOW_OPAQUE)
258                                                                                 return true;
259 #endif
260                                                                 }
261                                                         }
262                                                         break;
263                                                 }
264 #if BVH_FEATURE(BVH_MOTION)
265                                                 case PRIMITIVE_MOTION_TRIANGLE: {
266                                                         for(; prim_addr < prim_addr2; prim_addr++) {
267                                                                 BVH_DEBUG_NEXT_INTERSECTION();
268                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
269                                                                 if(motion_triangle_intersect(kg,
270                                                                                              isect,
271                                                                                              P,
272                                                                                              dir,
273                                                                                              ray->time,
274                                                                                              visibility,
275                                                                                              object,
276                                                                                              prim_addr))
277                                                                 {
278                                                                         /* shadow ray early termination */
279 #  if defined(__KERNEL_SSE2__)
280                                                                         if(visibility & PATH_RAY_SHADOW_OPAQUE)
281                                                                                 return true;
282                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
283 #    if BVH_FEATURE(BVH_HAIR)
284                                                                         tfar = ssef(isect->t);
285 #    endif
286 #  else
287                                                                         if(visibility & PATH_RAY_SHADOW_OPAQUE)
288                                                                                 return true;
289 #  endif
290                                                                 }
291                                                         }
292                                                         break;
293                                                 }
294 #endif  /* BVH_FEATURE(BVH_MOTION) */
295 #if BVH_FEATURE(BVH_HAIR)
296                                                 case PRIMITIVE_CURVE:
297                                                 case PRIMITIVE_MOTION_CURVE: {
298                                                         for(; prim_addr < prim_addr2; prim_addr++) {
299                                                                 BVH_DEBUG_NEXT_INTERSECTION();
300                                                                 const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
301                                                                 kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
302                                                                 bool hit;
303                                                                 if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
304                                                                         hit = cardinal_curve_intersect(kg,
305                                                                                                        isect,
306                                                                                                        P,
307                                                                                                        dir,
308                                                                                                        visibility,
309                                                                                                        object,
310                                                                                                        prim_addr,
311                                                                                                        ray->time,
312                                                                                                        curve_type,
313                                                                                                        lcg_state,
314                                                                                                        difl,
315                                                                                                        extmax);
316                                                                 }
317                                                                 else {
318                                                                         hit = curve_intersect(kg,
319                                                                                               isect,
320                                                                                               P,
321                                                                                               dir,
322                                                                                               visibility,
323                                                                                               object,
324                                                                                               prim_addr,
325                                                                                               ray->time,
326                                                                                               curve_type,
327                                                                                               lcg_state,
328                                                                                               difl,
329                                                                                               extmax);
330                                                                 }
331                                                                 if(hit) {
332                                                                         /* shadow ray early termination */
333 #  if defined(__KERNEL_SSE2__)
334                                                                         if(visibility & PATH_RAY_SHADOW_OPAQUE)
335                                                                                 return true;
336                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
337 #    if BVH_FEATURE(BVH_HAIR)
338                                                                         tfar = ssef(isect->t);
339 #    endif
340 #  else
341                                                                         if(visibility & PATH_RAY_SHADOW_OPAQUE)
342                                                                                 return true;
343 #  endif
344                                                                 }
345                                                         }
346                                                         break;
347                                                 }
348 #endif  /* BVH_FEATURE(BVH_HAIR) */
349                                         }
350                                 }
351 #if BVH_FEATURE(BVH_INSTANCING)
352                                 else {
353                                         /* instance push */
354                                         object = kernel_tex_fetch(__prim_object, -prim_addr-1);
355
356 #  if BVH_FEATURE(BVH_MOTION)
357                                         isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
358 #  else
359                                         isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
360 #  endif
361
362 #  if defined(__KERNEL_SSE2__)
363                                         Psplat[0] = ssef(P.x);
364                                         Psplat[1] = ssef(P.y);
365                                         Psplat[2] = ssef(P.z);
366
367                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
368 #    if BVH_FEATURE(BVH_HAIR)
369                                         tfar = ssef(isect->t);
370 #    endif
371
372                                         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
373 #  endif
374
375                                         ++stack_ptr;
376                                         kernel_assert(stack_ptr < BVH_STACK_SIZE);
377                                         traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
378
379                                         node_addr = kernel_tex_fetch(__object_node, object);
380
381                                         BVH_DEBUG_NEXT_INSTANCE();
382                                 }
383                         }
384 #endif  /* FEATURE(BVH_INSTANCING) */
385                 } while(node_addr != ENTRYPOINT_SENTINEL);
386
387 #if BVH_FEATURE(BVH_INSTANCING)
388                 if(stack_ptr >= 0) {
389                         kernel_assert(object != OBJECT_NONE);
390
391                         /* instance pop */
392 #  if BVH_FEATURE(BVH_MOTION)
393                         isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
394 #  else
395                         isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
396 #  endif
397
398 #  if defined(__KERNEL_SSE2__)
399                         Psplat[0] = ssef(P.x);
400                         Psplat[1] = ssef(P.y);
401                         Psplat[2] = ssef(P.z);
402
403                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
404 #    if BVH_FEATURE(BVH_HAIR)
405                         tfar = ssef(isect->t);
406 #    endif
407
408                         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
409 #  endif
410
411                         object = OBJECT_NONE;
412                         node_addr = traversal_stack[stack_ptr];
413                         --stack_ptr;
414                 }
415 #endif  /* FEATURE(BVH_INSTANCING) */
416         } while(node_addr != ENTRYPOINT_SENTINEL);
417
418         return (isect->prim != PRIM_NONE);
419 }
420
421 ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
422                                          const Ray *ray,
423                                          Intersection *isect,
424                                          const uint visibility
425 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
426                                          , uint *lcg_state,
427                                          float difl,
428                                          float extmax
429 #endif
430                                          )
431 {
432         switch(kernel_data.bvh.bvh_layout) {
433 #ifdef __KERNEL_AVX2__
434                 case BVH_LAYOUT_BVH8:
435                         return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
436                                                             ray,
437                                                             isect,
438                                                             visibility
439 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
440                                                             , lcg_state,
441                                                             difl,
442                                                             extmax
443 #  endif
444                                                             );
445 #endif
446 #ifdef __QBVH__
447                 case BVH_LAYOUT_BVH4:
448                         return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
449                                                             ray,
450                                                             isect,
451                                                             visibility
452 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
453                                                             , lcg_state,
454                                                             difl,
455                                                             extmax
456 #  endif
457                                                             );
458 #endif  /* __QBVH__ */
459                 case BVH_LAYOUT_BVH2:
460                         return BVH_FUNCTION_FULL_NAME(BVH)(kg,
461                                                            ray,
462                                                            isect,
463                                                            visibility
464 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
465                                                            , lcg_state,
466                                                            difl,
467                                                            extmax
468 #endif
469                                                            );
470         }
471         kernel_assert(!"Should not happen");
472         return false;
473 }
474
475 #undef BVH_FUNCTION_NAME
476 #undef BVH_FUNCTION_FEATURES
477 #undef NODE_INTERSECT
478 #undef NODE_INTERSECT_ROBUST