Cleanup: remove redundant, invalid info from headers
[blender.git] / intern / cycles / kernel / bvh / bvh_traversal.h
1 /*
2  * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
3  * and code copyright 2009-2012 Intel Corporation
4  *
5  * Modifications Copyright 2011-2013, Blender Foundation.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19
20 #ifdef __QBVH__
21 #  include "kernel/bvh/qbvh_traversal.h"
22 #endif
23 #ifdef __KERNEL_AVX2__
24 #  include "kernel/bvh/obvh_traversal.h"
25 #endif
26
27 #if BVH_FEATURE(BVH_HAIR)
28 #  define NODE_INTERSECT bvh_node_intersect
29 #  define NODE_INTERSECT_ROBUST bvh_node_intersect_robust
30 #else
31 #  define NODE_INTERSECT bvh_aligned_node_intersect
32 #  define NODE_INTERSECT_ROBUST bvh_aligned_node_intersect_robust
33 #endif
34
35 /* This is a template BVH traversal function, where various features can be
36  * enabled/disabled. This way we can compile optimized versions for each case
37  * without new features slowing things down.
38  *
39  * BVH_INSTANCING: object instancing
40  * BVH_HAIR: hair curve rendering
41  * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
42  * BVH_MOTION: motion blur rendering
43  */
44
45 ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
46                                                      const Ray *ray,
47                                                      Intersection *isect,
48                                                      const uint visibility
49 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
50                                                      , uint *lcg_state,
51                                                      float difl,
52                                                      float extmax
53 #endif
54                                                      )
55 {
56         /* todo:
57          * - test if pushing distance on the stack helps (for non shadow rays)
58          * - separate version for shadow rays
59          * - likely and unlikely for if() statements
60          * - test restrict attribute for pointers
61          */
62
63         /* traversal stack in CUDA thread-local memory */
64         int traversal_stack[BVH_STACK_SIZE];
65         traversal_stack[0] = ENTRYPOINT_SENTINEL;
66
67         /* traversal variables in registers */
68         int stack_ptr = 0;
69         int node_addr = kernel_data.bvh.root;
70
71         /* ray parameters in registers */
72         float3 P = ray->P;
73         float3 dir = bvh_clamp_direction(ray->D);
74         float3 idir = bvh_inverse_direction(dir);
75         int object = OBJECT_NONE;
76
77 #if BVH_FEATURE(BVH_MOTION)
78         Transform ob_itfm;
79 #endif
80
81         isect->t = ray->t;
82         isect->u = 0.0f;
83         isect->v = 0.0f;
84         isect->prim = PRIM_NONE;
85         isect->object = OBJECT_NONE;
86
87         BVH_DEBUG_INIT();
88
89 #if defined(__KERNEL_SSE2__)
90         const shuffle_swap_t shuf_identity = shuffle_swap_identity();
91         const shuffle_swap_t shuf_swap = shuffle_swap_swap();
92
93         const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
94         ssef Psplat[3], idirsplat[3];
95 #  if BVH_FEATURE(BVH_HAIR)
96         ssef tnear(0.0f), tfar(isect->t);
97 #  endif
98         shuffle_swap_t shufflexyz[3];
99
100         Psplat[0] = ssef(P.x);
101         Psplat[1] = ssef(P.y);
102         Psplat[2] = ssef(P.z);
103
104         ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
105
106         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
107 #endif
108
109         /* traversal loop */
110         do {
111                 do {
112                         /* traverse internal nodes */
113                         while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
114                                 int node_addr_child1, traverse_mask;
115                                 float dist[2];
116                                 float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
117
118 #if !defined(__KERNEL_SSE2__)
119 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
120                                 if(difl != 0.0f) {
121                                         traverse_mask = NODE_INTERSECT_ROBUST(kg,
122                                                                               P,
123 #    if BVH_FEATURE(BVH_HAIR)
124                                                                               dir,
125 #    endif
126                                                                               idir,
127                                                                               isect->t,
128                                                                               difl,
129                                                                               extmax,
130                                                                               node_addr,
131                                                                               visibility,
132                                                                               dist);
133                                 }
134                                 else
135 #  endif
136                                 {
137                                         traverse_mask = NODE_INTERSECT(kg,
138                                                                        P,
139 #    if BVH_FEATURE(BVH_HAIR)
140                                                                        dir,
141 #    endif
142                                                                        idir,
143                                                                        isect->t,
144                                                                        node_addr,
145                                                                        visibility,
146                                                                        dist);
147                                 }
148 #else  // __KERNEL_SSE2__
149 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
150                                 if(difl != 0.0f) {
151                                         traverse_mask = NODE_INTERSECT_ROBUST(kg,
152                                                                               P,
153                                                                               dir,
154 #    if BVH_FEATURE(BVH_HAIR)
155                                                                               tnear,
156                                                                               tfar,
157 #    endif
158                                                                               tsplat,
159                                                                               Psplat,
160                                                                               idirsplat,
161                                                                               shufflexyz,
162                                                                               difl,
163                                                                               extmax,
164                                                                               node_addr,
165                                                                               visibility,
166                                                                               dist);
167                                 }
168                                 else
169 #  endif
170                                 {
171                                         traverse_mask = NODE_INTERSECT(kg,
172                                                                        P,
173                                                                        dir,
174 #    if BVH_FEATURE(BVH_HAIR)
175                                                                        tnear,
176                                                                        tfar,
177 #    endif
178                                                                        tsplat,
179                                                                        Psplat,
180                                                                        idirsplat,
181                                                                        shufflexyz,
182                                                                        node_addr,
183                                                                        visibility,
184                                                                        dist);
185                                 }
186 #endif  // __KERNEL_SSE2__
187
188                                 node_addr = __float_as_int(cnodes.z);
189                                 node_addr_child1 = __float_as_int(cnodes.w);
190
191                                 if(traverse_mask == 3) {
192                                         /* Both children were intersected, push the farther one. */
193                                         bool is_closest_child1 = (dist[1] < dist[0]);
194                                         if(is_closest_child1) {
195                                                 int tmp = node_addr;
196                                                 node_addr = node_addr_child1;
197                                                 node_addr_child1 = tmp;
198                                         }
199
200                                         ++stack_ptr;
201                                         kernel_assert(stack_ptr < BVH_STACK_SIZE);
202                                         traversal_stack[stack_ptr] = node_addr_child1;
203                                 }
204                                 else {
205                                         /* One child was intersected. */
206                                         if(traverse_mask == 2) {
207                                                 node_addr = node_addr_child1;
208                                         }
209                                         else if(traverse_mask == 0) {
210                                                 /* Neither child was intersected. */
211                                                 node_addr = traversal_stack[stack_ptr];
212                                                 --stack_ptr;
213                                         }
214                                 }
215                                 BVH_DEBUG_NEXT_NODE();
216                         }
217
218                         /* if node is leaf, fetch triangle list */
219                         if(node_addr < 0) {
220                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
221                                 int prim_addr = __float_as_int(leaf.x);
222
223 #if BVH_FEATURE(BVH_INSTANCING)
224                                 if(prim_addr >= 0) {
225 #endif
226                                         const int prim_addr2 = __float_as_int(leaf.y);
227                                         const uint type = __float_as_int(leaf.w);
228
229                                         /* pop */
230                                         node_addr = traversal_stack[stack_ptr];
231                                         --stack_ptr;
232
233                                         /* primitive intersection */
234                                         switch(type & PRIMITIVE_ALL) {
235                                                 case PRIMITIVE_TRIANGLE: {
236                                                         for(; prim_addr < prim_addr2; prim_addr++) {
237                                                                 BVH_DEBUG_NEXT_INTERSECTION();
238                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
239                                                                 if(triangle_intersect(kg,
240                                                                                       isect,
241                                                                                       P,
242                                                                                       dir,
243                                                                                       visibility,
244                                                                                       object,
245                                                                                       prim_addr))
246                                                                 {
247                                                                         /* shadow ray early termination */
248 #if defined(__KERNEL_SSE2__)
249                                                                         if(visibility & PATH_RAY_SHADOW_OPAQUE)
250                                                                                 return true;
251                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
252 #  if BVH_FEATURE(BVH_HAIR)
253                                                                         tfar = ssef(isect->t);
254 #  endif
255 #else
256                                                                         if(visibility & PATH_RAY_SHADOW_OPAQUE)
257                                                                                 return true;
258 #endif
259                                                                 }
260                                                         }
261                                                         break;
262                                                 }
263 #if BVH_FEATURE(BVH_MOTION)
264                                                 case PRIMITIVE_MOTION_TRIANGLE: {
265                                                         for(; prim_addr < prim_addr2; prim_addr++) {
266                                                                 BVH_DEBUG_NEXT_INTERSECTION();
267                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
268                                                                 if(motion_triangle_intersect(kg,
269                                                                                              isect,
270                                                                                              P,
271                                                                                              dir,
272                                                                                              ray->time,
273                                                                                              visibility,
274                                                                                              object,
275                                                                                              prim_addr))
276                                                                 {
277                                                                         /* shadow ray early termination */
278 #  if defined(__KERNEL_SSE2__)
279                                                                         if(visibility & PATH_RAY_SHADOW_OPAQUE)
280                                                                                 return true;
281                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
282 #    if BVH_FEATURE(BVH_HAIR)
283                                                                         tfar = ssef(isect->t);
284 #    endif
285 #  else
286                                                                         if(visibility & PATH_RAY_SHADOW_OPAQUE)
287                                                                                 return true;
288 #  endif
289                                                                 }
290                                                         }
291                                                         break;
292                                                 }
293 #endif  /* BVH_FEATURE(BVH_MOTION) */
294 #if BVH_FEATURE(BVH_HAIR)
295                                                 case PRIMITIVE_CURVE:
296                                                 case PRIMITIVE_MOTION_CURVE: {
297                                                         for(; prim_addr < prim_addr2; prim_addr++) {
298                                                                 BVH_DEBUG_NEXT_INTERSECTION();
299                                                                 const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
300                                                                 kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
301                                                                 bool hit;
302                                                                 if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
303                                                                         hit = cardinal_curve_intersect(kg,
304                                                                                                        isect,
305                                                                                                        P,
306                                                                                                        dir,
307                                                                                                        visibility,
308                                                                                                        object,
309                                                                                                        prim_addr,
310                                                                                                        ray->time,
311                                                                                                        curve_type,
312                                                                                                        lcg_state,
313                                                                                                        difl,
314                                                                                                        extmax);
315                                                                 }
316                                                                 else {
317                                                                         hit = curve_intersect(kg,
318                                                                                               isect,
319                                                                                               P,
320                                                                                               dir,
321                                                                                               visibility,
322                                                                                               object,
323                                                                                               prim_addr,
324                                                                                               ray->time,
325                                                                                               curve_type,
326                                                                                               lcg_state,
327                                                                                               difl,
328                                                                                               extmax);
329                                                                 }
330                                                                 if(hit) {
331                                                                         /* shadow ray early termination */
332 #  if defined(__KERNEL_SSE2__)
333                                                                         if(visibility & PATH_RAY_SHADOW_OPAQUE)
334                                                                                 return true;
335                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
336 #    if BVH_FEATURE(BVH_HAIR)
337                                                                         tfar = ssef(isect->t);
338 #    endif
339 #  else
340                                                                         if(visibility & PATH_RAY_SHADOW_OPAQUE)
341                                                                                 return true;
342 #  endif
343                                                                 }
344                                                         }
345                                                         break;
346                                                 }
347 #endif  /* BVH_FEATURE(BVH_HAIR) */
348                                         }
349                                 }
350 #if BVH_FEATURE(BVH_INSTANCING)
351                                 else {
352                                         /* instance push */
353                                         object = kernel_tex_fetch(__prim_object, -prim_addr-1);
354
355 #  if BVH_FEATURE(BVH_MOTION)
356                                         isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
357 #  else
358                                         isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
359 #  endif
360
361 #  if defined(__KERNEL_SSE2__)
362                                         Psplat[0] = ssef(P.x);
363                                         Psplat[1] = ssef(P.y);
364                                         Psplat[2] = ssef(P.z);
365
366                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
367 #    if BVH_FEATURE(BVH_HAIR)
368                                         tfar = ssef(isect->t);
369 #    endif
370
371                                         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
372 #  endif
373
374                                         ++stack_ptr;
375                                         kernel_assert(stack_ptr < BVH_STACK_SIZE);
376                                         traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
377
378                                         node_addr = kernel_tex_fetch(__object_node, object);
379
380                                         BVH_DEBUG_NEXT_INSTANCE();
381                                 }
382                         }
383 #endif  /* FEATURE(BVH_INSTANCING) */
384                 } while(node_addr != ENTRYPOINT_SENTINEL);
385
386 #if BVH_FEATURE(BVH_INSTANCING)
387                 if(stack_ptr >= 0) {
388                         kernel_assert(object != OBJECT_NONE);
389
390                         /* instance pop */
391 #  if BVH_FEATURE(BVH_MOTION)
392                         isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
393 #  else
394                         isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
395 #  endif
396
397 #  if defined(__KERNEL_SSE2__)
398                         Psplat[0] = ssef(P.x);
399                         Psplat[1] = ssef(P.y);
400                         Psplat[2] = ssef(P.z);
401
402                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
403 #    if BVH_FEATURE(BVH_HAIR)
404                         tfar = ssef(isect->t);
405 #    endif
406
407                         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
408 #  endif
409
410                         object = OBJECT_NONE;
411                         node_addr = traversal_stack[stack_ptr];
412                         --stack_ptr;
413                 }
414 #endif  /* FEATURE(BVH_INSTANCING) */
415         } while(node_addr != ENTRYPOINT_SENTINEL);
416
417         return (isect->prim != PRIM_NONE);
418 }
419
420 ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
421                                          const Ray *ray,
422                                          Intersection *isect,
423                                          const uint visibility
424 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
425                                          , uint *lcg_state,
426                                          float difl,
427                                          float extmax
428 #endif
429                                          )
430 {
431         switch(kernel_data.bvh.bvh_layout) {
432 #ifdef __KERNEL_AVX2__
433                 case BVH_LAYOUT_BVH8:
434                         return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
435                                                             ray,
436                                                             isect,
437                                                             visibility
438 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
439                                                             , lcg_state,
440                                                             difl,
441                                                             extmax
442 #  endif
443                                                             );
444 #endif
445 #ifdef __QBVH__
446                 case BVH_LAYOUT_BVH4:
447                         return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
448                                                             ray,
449                                                             isect,
450                                                             visibility
451 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
452                                                             , lcg_state,
453                                                             difl,
454                                                             extmax
455 #  endif
456                                                             );
457 #endif  /* __QBVH__ */
458                 case BVH_LAYOUT_BVH2:
459                         return BVH_FUNCTION_FULL_NAME(BVH)(kg,
460                                                            ray,
461                                                            isect,
462                                                            visibility
463 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
464                                                            , lcg_state,
465                                                            difl,
466                                                            extmax
467 #endif
468                                                            );
469         }
470         kernel_assert(!"Should not happen");
471         return false;
472 }
473
474 #undef BVH_FUNCTION_NAME
475 #undef BVH_FUNCTION_FEATURES
476 #undef NODE_INTERSECT
477 #undef NODE_INTERSECT_ROBUST