Cycles: SSS and Volume rendering in split kernel
[blender.git] / intern / cycles / kernel / bvh / bvh_traversal.h
1 /*
2  * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
3  * and code copyright 2009-2012 Intel Corporation
4  *
5  * Modifications Copyright 2011-2013, Blender Foundation.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19
20 #ifdef __QBVH__
21 #  include "qbvh_traversal.h"
22 #endif
23
24 #if BVH_FEATURE(BVH_HAIR)
25 #  define NODE_INTERSECT bvh_node_intersect
26 #  define NODE_INTERSECT_ROBUST bvh_node_intersect_robust
27 #else
28 #  define NODE_INTERSECT bvh_aligned_node_intersect
29 #  define NODE_INTERSECT_ROBUST bvh_aligned_node_intersect_robust
30 #endif
31
32 /* This is a template BVH traversal function, where various features can be
33  * enabled/disabled. This way we can compile optimized versions for each case
34  * without new features slowing things down.
35  *
36  * BVH_INSTANCING: object instancing
37  * BVH_HAIR: hair curve rendering
38  * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
39  * BVH_MOTION: motion blur rendering
40  *
41  */
42
43 ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
44                                                      const Ray *ray,
45                                                      Intersection *isect,
46                                                      const uint visibility
47 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
48                                                      , uint *lcg_state,
49                                                      float difl,
50                                                      float extmax
51 #endif
52                                                      )
53 {
54         /* todo:
55          * - test if pushing distance on the stack helps (for non shadow rays)
56          * - separate version for shadow rays
57          * - likely and unlikely for if() statements
58          * - test restrict attribute for pointers
59          */
60
61         /* traversal stack in CUDA thread-local memory */
62         int traversal_stack[BVH_STACK_SIZE];
63         traversal_stack[0] = ENTRYPOINT_SENTINEL;
64
65         /* traversal variables in registers */
66         int stack_ptr = 0;
67         int node_addr = kernel_data.bvh.root;
68
69         /* ray parameters in registers */
70         float3 P = ray->P;
71         float3 dir = bvh_clamp_direction(ray->D);
72         float3 idir = bvh_inverse_direction(dir);
73         int object = OBJECT_NONE;
74
75 #if BVH_FEATURE(BVH_MOTION)
76         Transform ob_itfm;
77 #endif
78
79         isect->t = ray->t;
80         isect->u = 0.0f;
81         isect->v = 0.0f;
82         isect->prim = PRIM_NONE;
83         isect->object = OBJECT_NONE;
84
85         BVH_DEBUG_INIT();
86
87 #if defined(__KERNEL_SSE2__)
88         const shuffle_swap_t shuf_identity = shuffle_swap_identity();
89         const shuffle_swap_t shuf_swap = shuffle_swap_swap();
90
91         const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
92         ssef Psplat[3], idirsplat[3];
93 #  if BVH_FEATURE(BVH_HAIR)
94         ssef tnear(0.0f), tfar(isect->t);
95 #  endif
96         shuffle_swap_t shufflexyz[3];
97
98         Psplat[0] = ssef(P.x);
99         Psplat[1] = ssef(P.y);
100         Psplat[2] = ssef(P.z);
101
102         ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
103
104         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
105 #endif
106
107         IsectPrecalc isect_precalc;
108         triangle_intersect_precalc(dir, &isect_precalc);
109
110         /* traversal loop */
111         do {
112                 do {
113                         /* traverse internal nodes */
114                         while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
115                                 int node_addr_child1, traverse_mask;
116                                 float dist[2];
117                                 float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
118
119 #if !defined(__KERNEL_SSE2__)
120 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
121                                 if(difl != 0.0f) {
122                                         traverse_mask = NODE_INTERSECT_ROBUST(kg,
123                                                                               P,
124 #    if BVH_FEATURE(BVH_HAIR)
125                                                                               dir,
126 #    endif
127                                                                               idir,
128                                                                               isect->t,
129                                                                               difl,
130                                                                               extmax,
131                                                                               node_addr,
132                                                                               visibility,
133                                                                               dist);
134                                 }
135                                 else
136 #  endif
137                                 {
138                                         traverse_mask = NODE_INTERSECT(kg,
139                                                                        P,
140 #    if BVH_FEATURE(BVH_HAIR)
141                                                                        dir,
142 #    endif
143                                                                        idir,
144                                                                        isect->t,
145                                                                        node_addr,
146                                                                        visibility,
147                                                                        dist);
148                                 }
149 #else // __KERNEL_SSE2__
150 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
151                                 if(difl != 0.0f) {
152                                         traverse_mask = NODE_INTERSECT_ROBUST(kg,
153                                                                               P,
154                                                                               dir,
155 #    if BVH_FEATURE(BVH_HAIR)
156                                                                               tnear,
157                                                                               tfar,
158 #    endif
159                                                                               tsplat,
160                                                                               Psplat,
161                                                                               idirsplat,
162                                                                               shufflexyz,
163                                                                               difl,
164                                                                               extmax,
165                                                                               node_addr,
166                                                                               visibility,
167                                                                               dist);
168                                 }
169                                 else
170 #  endif
171                                 {
172                                         traverse_mask = NODE_INTERSECT(kg,
173                                                                        P,
174                                                                        dir,
175 #    if BVH_FEATURE(BVH_HAIR)
176                                                                        tnear,
177                                                                        tfar,
178 #    endif
179                                                                        tsplat,
180                                                                        Psplat,
181                                                                        idirsplat,
182                                                                        shufflexyz,
183                                                                        node_addr,
184                                                                        visibility,
185                                                                        dist);
186                                 }
187 #endif // __KERNEL_SSE2__
188
189                                 node_addr = __float_as_int(cnodes.z);
190                                 node_addr_child1 = __float_as_int(cnodes.w);
191
192                                 if(traverse_mask == 3) {
193                                         /* Both children were intersected, push the farther one. */
194                                         bool is_closest_child1 = (dist[1] < dist[0]);
195                                         if(is_closest_child1) {
196                                                 int tmp = node_addr;
197                                                 node_addr = node_addr_child1;
198                                                 node_addr_child1 = tmp;
199                                         }
200
201                                         ++stack_ptr;
202                                         kernel_assert(stack_ptr < BVH_STACK_SIZE);
203                                         traversal_stack[stack_ptr] = node_addr_child1;
204                                 }
205                                 else {
206                                         /* One child was intersected. */
207                                         if(traverse_mask == 2) {
208                                                 node_addr = node_addr_child1;
209                                         }
210                                         else if(traverse_mask == 0) {
211                                                 /* Neither child was intersected. */
212                                                 node_addr = traversal_stack[stack_ptr];
213                                                 --stack_ptr;
214                                         }
215                                 }
216                                 BVH_DEBUG_NEXT_NODE();
217                         }
218
219                         /* if node is leaf, fetch triangle list */
220                         if(node_addr < 0) {
221                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
222                                 int prim_addr = __float_as_int(leaf.x);
223
224 #if BVH_FEATURE(BVH_INSTANCING)
225                                 if(prim_addr >= 0) {
226 #endif
227                                         const int prim_addr2 = __float_as_int(leaf.y);
228                                         const uint type = __float_as_int(leaf.w);
229
230                                         /* pop */
231                                         node_addr = traversal_stack[stack_ptr];
232                                         --stack_ptr;
233
234                                         /* primitive intersection */
235                                         switch(type & PRIMITIVE_ALL) {
236                                                 case PRIMITIVE_TRIANGLE: {
237                                                         for(; prim_addr < prim_addr2; prim_addr++) {
238                                                                 BVH_DEBUG_NEXT_INTERSECTION();
239                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
240                                                                 if(triangle_intersect(kg,
241                                                                                       &isect_precalc,
242                                                                                       isect,
243                                                                                       P,
244                                                                                       visibility,
245                                                                                       object,
246                                                                                       prim_addr))
247                                                                 {
248                                                                         /* shadow ray early termination */
249 #if defined(__KERNEL_SSE2__)
250                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
251                                                                                 return true;
252                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
253 #  if BVH_FEATURE(BVH_HAIR)
254                                                                         tfar = ssef(isect->t);
255 #  endif
256 #else
257                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
258                                                                                 return true;
259 #endif
260                                                                 }
261                                                         }
262                                                         break;
263                                                 }
264 #if BVH_FEATURE(BVH_MOTION)
265                                                 case PRIMITIVE_MOTION_TRIANGLE: {
266                                                         for(; prim_addr < prim_addr2; prim_addr++) {
267                                                                 BVH_DEBUG_NEXT_INTERSECTION();
268                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
269                                                                 if(motion_triangle_intersect(kg,
270                                                                                              isect,
271                                                                                              P,
272                                                                                              dir,
273                                                                                              ray->time,
274                                                                                              visibility,
275                                                                                              object,
276                                                                                              prim_addr))
277                                                                 {
278                                                                         /* shadow ray early termination */
279 #  if defined(__KERNEL_SSE2__)
280                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
281                                                                                 return true;
282                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
283 #    if BVH_FEATURE(BVH_HAIR)
284                                                                         tfar = ssef(isect->t);
285 #    endif
286 #  else
287                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
288                                                                                 return true;
289 #  endif
290                                                                 }
291                                                         }
292                                                         break;
293                                                 }
294 #endif  /* BVH_FEATURE(BVH_MOTION) */
295 #if BVH_FEATURE(BVH_HAIR)
296                                                 case PRIMITIVE_CURVE:
297                                                 case PRIMITIVE_MOTION_CURVE: {
298                                                         for(; prim_addr < prim_addr2; prim_addr++) {
299                                                                 BVH_DEBUG_NEXT_INTERSECTION();
300                                                                 const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
301                                                                 kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
302                                                                 bool hit;
303                                                                 if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
304                                                                         hit = bvh_cardinal_curve_intersect(kg,
305                                                                                                            isect,
306                                                                                                            P,
307                                                                                                            dir,
308                                                                                                            visibility,
309                                                                                                            object,
310                                                                                                            prim_addr,
311                                                                                                            ray->time,
312                                                                                                            curve_type,
313                                                                                                            lcg_state,
314                                                                                                            difl,
315                                                                                                            extmax);
316                                                                 }
317                                                                 else {
318                                                                         hit = bvh_curve_intersect(kg,
319                                                                                                   isect,
320                                                                                                   P,
321                                                                                                   dir,
322                                                                                                   visibility,
323                                                                                                   object,
324                                                                                                   prim_addr,
325                                                                                                   ray->time,
326                                                                                                   curve_type,
327                                                                                                   lcg_state,
328                                                                                                   difl,
329                                                                                                   extmax);
330                                                                 }
331                                                                 if(hit) {
332                                                                         /* shadow ray early termination */
333 #  if defined(__KERNEL_SSE2__)
334                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
335                                                                                 return true;
336                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
337 #    if BVH_FEATURE(BVH_HAIR)
338                                                                         tfar = ssef(isect->t);
339 #    endif
340 #  else
341                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
342                                                                                 return true;
343 #  endif
344                                                                 }
345                                                         }
346                                                         break;
347                                                 }
348 #endif  /* BVH_FEATURE(BVH_HAIR) */
349                                         }
350                                 }
351 #if BVH_FEATURE(BVH_INSTANCING)
352                                 else {
353                                         /* instance push */
354                                         object = kernel_tex_fetch(__prim_object, -prim_addr-1);
355
356 #  if BVH_FEATURE(BVH_MOTION)
357                                         isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
358 #  else
359                                         isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
360 #  endif
361                                         triangle_intersect_precalc(dir, &isect_precalc);
362
363 #  if defined(__KERNEL_SSE2__)
364                                         Psplat[0] = ssef(P.x);
365                                         Psplat[1] = ssef(P.y);
366                                         Psplat[2] = ssef(P.z);
367
368                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
369 #    if BVH_FEATURE(BVH_HAIR)
370                                         tfar = ssef(isect->t);
371 #    endif
372
373                                         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
374 #  endif
375
376                                         ++stack_ptr;
377                                         kernel_assert(stack_ptr < BVH_STACK_SIZE);
378                                         traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
379
380                                         node_addr = kernel_tex_fetch(__object_node, object);
381
382                                         BVH_DEBUG_NEXT_INSTANCE();
383                                 }
384                         }
385 #endif  /* FEATURE(BVH_INSTANCING) */
386                 } while(node_addr != ENTRYPOINT_SENTINEL);
387
388 #if BVH_FEATURE(BVH_INSTANCING)
389                 if(stack_ptr >= 0) {
390                         kernel_assert(object != OBJECT_NONE);
391
392                         /* instance pop */
393 #  if BVH_FEATURE(BVH_MOTION)
394                         isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
395 #  else
396                         isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
397 #  endif
398                         triangle_intersect_precalc(dir, &isect_precalc);
399
400 #  if defined(__KERNEL_SSE2__)
401                         Psplat[0] = ssef(P.x);
402                         Psplat[1] = ssef(P.y);
403                         Psplat[2] = ssef(P.z);
404
405                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
406 #    if BVH_FEATURE(BVH_HAIR)
407                         tfar = ssef(isect->t);
408 #    endif
409
410                         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
411 #  endif
412
413                         object = OBJECT_NONE;
414                         node_addr = traversal_stack[stack_ptr];
415                         --stack_ptr;
416                 }
417 #endif  /* FEATURE(BVH_INSTANCING) */
418         } while(node_addr != ENTRYPOINT_SENTINEL);
419
420         return (isect->prim != PRIM_NONE);
421 }
422
423 ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
424                                          const Ray *ray,
425                                          Intersection *isect,
426                                          const uint visibility
427 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
428                                          , uint *lcg_state,
429                                          float difl,
430                                          float extmax
431 #endif
432                                          )
433 {
434 #ifdef __QBVH__
435         if(kernel_data.bvh.use_qbvh) {
436                 return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
437                                                     ray,
438                                                     isect,
439                                                     visibility
440 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
441                                                     , lcg_state,
442                                                     difl,
443                                                     extmax
444 #endif
445                                                     );
446         }
447         else
448 #endif
449         {
450                 kernel_assert(kernel_data.bvh.use_qbvh == false);
451                 return BVH_FUNCTION_FULL_NAME(BVH)(kg,
452                                                    ray,
453                                                    isect,
454                                                    visibility
455 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
456                                                    , lcg_state,
457                                                    difl,
458                                                    extmax
459 #endif
460                                                    );
461         }
462 }
463
464 #undef BVH_FUNCTION_NAME
465 #undef BVH_FUNCTION_FEATURES
466 #undef NODE_INTERSECT
467 #undef NODE_INTERSECT_ROBUST