Cycles: Move BVK kernel files to own directory
[blender.git] / intern / cycles / kernel / bvh / bvh_traversal.h
1 /*
2  * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
3  * and code copyright 2009-2012 Intel Corporation
4  *
5  * Modifications Copyright 2011-2013, Blender Foundation.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19
20 #ifdef __QBVH__
21 #  include "qbvh_traversal.h"
22 #endif
23
24 #if BVH_FEATURE(BVH_HAIR)
25 #  define NODE_INTERSECT bvh_node_intersect
26 #  define NODE_INTERSECT_ROBUST bvh_node_intersect_robust
27 #else
28 #  define NODE_INTERSECT bvh_aligned_node_intersect
29 #  define NODE_INTERSECT_ROBUST bvh_aligned_node_intersect_robust
30 #endif
31
32 /* This is a template BVH traversal function, where various features can be
33  * enabled/disabled. This way we can compile optimized versions for each case
34  * without new features slowing things down.
35  *
36  * BVH_INSTANCING: object instancing
37  * BVH_HAIR: hair curve rendering
38  * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
39  * BVH_MOTION: motion blur rendering
40  *
41  */
42
43 ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
44                                             const Ray *ray,
45                                             Intersection *isect,
46                                             const uint visibility
47 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
48                                             , uint *lcg_state,
49                                             float difl,
50                                             float extmax
51 #endif
52                                             )
53 {
54         /* todo:
55          * - test if pushing distance on the stack helps (for non shadow rays)
56          * - separate version for shadow rays
57          * - likely and unlikely for if() statements
58          * - test restrict attribute for pointers
59          */
60
61         /* traversal stack in CUDA thread-local memory */
62         int traversalStack[BVH_STACK_SIZE];
63         traversalStack[0] = ENTRYPOINT_SENTINEL;
64
65         /* traversal variables in registers */
66         int stackPtr = 0;
67         int nodeAddr = kernel_data.bvh.root;
68
69         /* ray parameters in registers */
70         float3 P = ray->P;
71         float3 dir = bvh_clamp_direction(ray->D);
72         float3 idir = bvh_inverse_direction(dir);
73         int object = OBJECT_NONE;
74
75 #if BVH_FEATURE(BVH_MOTION)
76         Transform ob_itfm;
77 #endif
78
79         isect->t = ray->t;
80         isect->u = 0.0f;
81         isect->v = 0.0f;
82         isect->prim = PRIM_NONE;
83         isect->object = OBJECT_NONE;
84
85         BVH_DEBUG_INIT();
86
87 #if defined(__KERNEL_SSE2__)
88         const shuffle_swap_t shuf_identity = shuffle_swap_identity();
89         const shuffle_swap_t shuf_swap = shuffle_swap_swap();
90
91         const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
92         ssef Psplat[3], idirsplat[3];
93 #  if BVH_FEATURE(BVH_HAIR)
94         ssef tnear(0.0f), tfar(isect->t);
95 #  endif
96         shuffle_swap_t shufflexyz[3];
97
98         Psplat[0] = ssef(P.x);
99         Psplat[1] = ssef(P.y);
100         Psplat[2] = ssef(P.z);
101
102         ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
103
104         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
105 #endif
106
107         IsectPrecalc isect_precalc;
108         triangle_intersect_precalc(dir, &isect_precalc);
109
110         /* traversal loop */
111         do {
112                 do {
113                         /* traverse internal nodes */
114                         while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
115                                 int nodeAddrChild1, traverse_mask;
116                                 float dist[2];
117                                 float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr+0);
118
119 #if !defined(__KERNEL_SSE2__)
120 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
121                                 if(difl != 0.0f) {
122                                         traverse_mask = NODE_INTERSECT_ROBUST(kg,
123                                                                               P,
124 #    if BVH_FEATURE(BVH_HAIR)
125                                                                               dir,
126 #    endif
127                                                                               idir,
128                                                                               isect->t,
129                                                                               difl,
130                                                                               extmax,
131                                                                               nodeAddr,
132                                                                               visibility,
133                                                                               dist);
134                                 }
135                                 else
136 #  endif
137                                 {
138                                         traverse_mask = NODE_INTERSECT(kg,
139                                                                        P,
140 #    if BVH_FEATURE(BVH_HAIR)
141                                                                        dir,
142 #    endif
143                                                                        idir,
144                                                                        isect->t,
145                                                                        nodeAddr,
146                                                                        visibility,
147                                                                        dist);
148                                 }
149 #else // __KERNEL_SSE2__
150 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
151                                 if(difl != 0.0f) {
152                                         traverse_mask = NODE_INTERSECT_ROBUST(kg,
153                                                                               P,
154                                                                               dir,
155 #    if BVH_FEATURE(BVH_HAIR)
156                                                                               tnear,
157                                                                               tfar,
158 #    endif
159                                                                               tsplat,
160                                                                               Psplat,
161                                                                               idirsplat,
162                                                                               shufflexyz,
163                                                                               difl,
164                                                                               extmax,
165                                                                               nodeAddr,
166                                                                               visibility,
167                                                                               dist);
168                                 }
169                                 else
170 #  endif
171                                 {
172                                         traverse_mask = NODE_INTERSECT(kg,
173                                                                        P,
174                                                                        dir,
175 #    if BVH_FEATURE(BVH_HAIR)
176                                                                        tnear,
177                                                                        tfar,
178 #    endif
179                                                                        tsplat,
180                                                                        Psplat,
181                                                                        idirsplat,
182                                                                        shufflexyz,
183                                                                        nodeAddr,
184                                                                        visibility,
185                                                                        dist);
186                                 }
187 #endif // __KERNEL_SSE2__
188
189                                 nodeAddr = __float_as_int(cnodes.z);
190                                 nodeAddrChild1 = __float_as_int(cnodes.w);
191
192                                 if(traverse_mask == 3) {
193                                         /* Both children were intersected, push the farther one. */
194                                         bool closestChild1 = (dist[1] < dist[0]);
195
196                                         if(closestChild1) {
197                                                 int tmp = nodeAddr;
198                                                 nodeAddr = nodeAddrChild1;
199                                                 nodeAddrChild1 = tmp;
200                                         }
201
202                                         ++stackPtr;
203                                         kernel_assert(stackPtr < BVH_STACK_SIZE);
204                                         traversalStack[stackPtr] = nodeAddrChild1;
205                                 }
206                                 else {
207                                         /* One child was intersected. */
208                                         if(traverse_mask == 2) {
209                                                 nodeAddr = nodeAddrChild1;
210                                         }
211                                         else if(traverse_mask == 0) {
212                                                 /* Neither child was intersected. */
213                                                 nodeAddr = traversalStack[stackPtr];
214                                                 --stackPtr;
215                                         }
216                                 }
217                                 BVH_DEBUG_NEXT_STEP();
218                         }
219
220                         /* if node is leaf, fetch triangle list */
221                         if(nodeAddr < 0) {
222                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1));
223                                 int primAddr = __float_as_int(leaf.x);
224
225 #if BVH_FEATURE(BVH_INSTANCING)
226                                 if(primAddr >= 0) {
227 #endif
228                                         const int primAddr2 = __float_as_int(leaf.y);
229                                         const uint type = __float_as_int(leaf.w);
230
231                                         /* pop */
232                                         nodeAddr = traversalStack[stackPtr];
233                                         --stackPtr;
234
235                                         /* primitive intersection */
236                                         switch(type & PRIMITIVE_ALL) {
237                                                 case PRIMITIVE_TRIANGLE: {
238                                                         for(; primAddr < primAddr2; primAddr++) {
239                                                                 BVH_DEBUG_NEXT_STEP();
240                                                                 kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
241                                                                 if(triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr)) {
242                                                                         /* shadow ray early termination */
243 #if defined(__KERNEL_SSE2__)
244                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
245                                                                                 return true;
246                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
247 #  if BVH_FEATURE(BVH_HAIR)
248                                                                         tfar = ssef(isect->t);
249 #  endif
250 #else
251                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
252                                                                                 return true;
253 #endif
254                                                                 }
255                                                         }
256                                                         break;
257                                                 }
258 #if BVH_FEATURE(BVH_MOTION)
259                                                 case PRIMITIVE_MOTION_TRIANGLE: {
260                                                         for(; primAddr < primAddr2; primAddr++) {
261                                                                 BVH_DEBUG_NEXT_STEP();
262                                                                 kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
263                                                                 if(motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr)) {
264                                                                         /* shadow ray early termination */
265 #  if defined(__KERNEL_SSE2__)
266                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
267                                                                                 return true;
268                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
269 #    if BVH_FEATURE(BVH_HAIR)
270                                                                         tfar = ssef(isect->t);
271 #    endif
272 #  else
273                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
274                                                                                 return true;
275 #  endif
276                                                                 }
277                                                         }
278                                                         break;
279                                                 }
280 #endif  /* BVH_FEATURE(BVH_MOTION) */
281 #if BVH_FEATURE(BVH_HAIR)
282                                                 case PRIMITIVE_CURVE:
283                                                 case PRIMITIVE_MOTION_CURVE: {
284                                                         for(; primAddr < primAddr2; primAddr++) {
285                                                                 BVH_DEBUG_NEXT_STEP();
286                                                                 kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
287                                                                 bool hit;
288                                                                 if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
289                                                                         hit = bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
290                                                                 else
291                                                                         hit = bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, lcg_state, difl, extmax);
292                                                                 if(hit) {
293                                                                         /* shadow ray early termination */
294 #  if defined(__KERNEL_SSE2__)
295                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
296                                                                                 return true;
297                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
298 #    if BVH_FEATURE(BVH_HAIR)
299                                                                         tfar = ssef(isect->t);
300 #    endif
301 #  else
302                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
303                                                                                 return true;
304 #  endif
305                                                                 }
306                                                         }
307                                                         break;
308                                                 }
309 #endif  /* BVH_FEATURE(BVH_HAIR) */
310                                         }
311                                 }
312 #if BVH_FEATURE(BVH_INSTANCING)
313                                 else {
314                                         /* instance push */
315                                         object = kernel_tex_fetch(__prim_object, -primAddr-1);
316
317 #  if BVH_FEATURE(BVH_MOTION)
318                                         bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
319 #  else
320                                         bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
321 #  endif
322                                         triangle_intersect_precalc(dir, &isect_precalc);
323
324 #  if defined(__KERNEL_SSE2__)
325                                         Psplat[0] = ssef(P.x);
326                                         Psplat[1] = ssef(P.y);
327                                         Psplat[2] = ssef(P.z);
328
329                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
330 #    if BVH_FEATURE(BVH_HAIR)
331                                         tfar = ssef(isect->t);
332 #    endif
333
334                                         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
335 #  endif
336
337                                         ++stackPtr;
338                                         kernel_assert(stackPtr < BVH_STACK_SIZE);
339                                         traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
340
341                                         nodeAddr = kernel_tex_fetch(__object_node, object);
342
343                                         BVH_DEBUG_NEXT_INSTANCE();
344                                 }
345                         }
346 #endif  /* FEATURE(BVH_INSTANCING) */
347                 } while(nodeAddr != ENTRYPOINT_SENTINEL);
348
349 #if BVH_FEATURE(BVH_INSTANCING)
350                 if(stackPtr >= 0) {
351                         kernel_assert(object != OBJECT_NONE);
352
353                         /* instance pop */
354 #  if BVH_FEATURE(BVH_MOTION)
355                         bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
356 #  else
357                         bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
358 #  endif
359                         triangle_intersect_precalc(dir, &isect_precalc);
360
361 #  if defined(__KERNEL_SSE2__)
362                         Psplat[0] = ssef(P.x);
363                         Psplat[1] = ssef(P.y);
364                         Psplat[2] = ssef(P.z);
365
366                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
367 #    if BVH_FEATURE(BVH_HAIR)
368                         tfar = ssef(isect->t);
369 #    endif
370
371                         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
372 #  endif
373
374                         object = OBJECT_NONE;
375                         nodeAddr = traversalStack[stackPtr];
376                         --stackPtr;
377                 }
378 #endif  /* FEATURE(BVH_INSTANCING) */
379         } while(nodeAddr != ENTRYPOINT_SENTINEL);
380
381         return (isect->prim != PRIM_NONE);
382 }
383
384 ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
385                                          const Ray *ray,
386                                          Intersection *isect,
387                                          const uint visibility
388 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
389                                          , uint *lcg_state,
390                                          float difl,
391                                          float extmax
392 #endif
393                                          )
394 {
395 #ifdef __QBVH__
396         if(kernel_data.bvh.use_qbvh) {
397                 return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
398                                                     ray,
399                                                     isect,
400                                                     visibility
401 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
402                                                     , lcg_state,
403                                                     difl,
404                                                     extmax
405 #endif
406                                                     );
407         }
408         else
409 #endif
410         {
411                 kernel_assert(kernel_data.bvh.use_qbvh == false);
412                 return BVH_FUNCTION_FULL_NAME(BVH)(kg,
413                                                    ray,
414                                                    isect,
415                                                    visibility
416 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
417                                                    , lcg_state,
418                                                    difl,
419                                                    extmax
420 #endif
421                                                    );
422         }
423 }
424
425 #undef BVH_FUNCTION_NAME
426 #undef BVH_FUNCTION_FEATURES
427 #undef NODE_INTERSECT
428 #undef NODE_INTERSECT_ROBUST