Merge branch 'master' into blender2.8
[blender.git] / intern / cycles / kernel / bvh / bvh_traversal.h
1 /*
2  * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
3  * and code copyright 2009-2012 Intel Corporation
4  *
5  * Modifications Copyright 2011-2013, Blender Foundation.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19
20 #ifdef __QBVH__
21 #  include "qbvh_traversal.h"
22 #endif
23
24 #if BVH_FEATURE(BVH_HAIR)
25 #  define NODE_INTERSECT bvh_node_intersect
26 #  define NODE_INTERSECT_ROBUST bvh_node_intersect_robust
27 #else
28 #  define NODE_INTERSECT bvh_aligned_node_intersect
29 #  define NODE_INTERSECT_ROBUST bvh_aligned_node_intersect_robust
30 #endif
31
32 /* This is a template BVH traversal function, where various features can be
33  * enabled/disabled. This way we can compile optimized versions for each case
34  * without new features slowing things down.
35  *
36  * BVH_INSTANCING: object instancing
37  * BVH_HAIR: hair curve rendering
38  * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
39  * BVH_MOTION: motion blur rendering
40  *
41  */
42
43 #ifndef __KERNEL_GPU__
44 ccl_device
45 #else
46 ccl_device_inline
47 #endif
48 bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
49                                  const Ray *ray,
50                                  Intersection *isect,
51                                  const uint visibility
52 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
53                                  , uint *lcg_state,
54                                  float difl,
55                                  float extmax
56 #endif
57                                  )
58 {
59         /* todo:
60          * - test if pushing distance on the stack helps (for non shadow rays)
61          * - separate version for shadow rays
62          * - likely and unlikely for if() statements
63          * - test restrict attribute for pointers
64          */
65
66         /* traversal stack in CUDA thread-local memory */
67         int traversal_stack[BVH_STACK_SIZE];
68         traversal_stack[0] = ENTRYPOINT_SENTINEL;
69
70         /* traversal variables in registers */
71         int stack_ptr = 0;
72         int node_addr = kernel_data.bvh.root;
73
74         /* ray parameters in registers */
75         float3 P = ray->P;
76         float3 dir = bvh_clamp_direction(ray->D);
77         float3 idir = bvh_inverse_direction(dir);
78         int object = OBJECT_NONE;
79
80 #if BVH_FEATURE(BVH_MOTION)
81         Transform ob_itfm;
82 #endif
83
84         isect->t = ray->t;
85         isect->u = 0.0f;
86         isect->v = 0.0f;
87         isect->prim = PRIM_NONE;
88         isect->object = OBJECT_NONE;
89
90         BVH_DEBUG_INIT();
91
92 #if defined(__KERNEL_SSE2__)
93         const shuffle_swap_t shuf_identity = shuffle_swap_identity();
94         const shuffle_swap_t shuf_swap = shuffle_swap_swap();
95
96         const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
97         ssef Psplat[3], idirsplat[3];
98 #  if BVH_FEATURE(BVH_HAIR)
99         ssef tnear(0.0f), tfar(isect->t);
100 #  endif
101         shuffle_swap_t shufflexyz[3];
102
103         Psplat[0] = ssef(P.x);
104         Psplat[1] = ssef(P.y);
105         Psplat[2] = ssef(P.z);
106
107         ssef tsplat(0.0f, 0.0f, -isect->t, -isect->t);
108
109         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
110 #endif
111
112         IsectPrecalc isect_precalc;
113         triangle_intersect_precalc(dir, &isect_precalc);
114
115         /* traversal loop */
116         do {
117                 do {
118                         /* traverse internal nodes */
119                         while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
120                                 int node_addr_child1, traverse_mask;
121                                 float dist[2];
122                                 float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
123
124 #if !defined(__KERNEL_SSE2__)
125 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
126                                 if(difl != 0.0f) {
127                                         traverse_mask = NODE_INTERSECT_ROBUST(kg,
128                                                                               P,
129 #    if BVH_FEATURE(BVH_HAIR)
130                                                                               dir,
131 #    endif
132                                                                               idir,
133                                                                               isect->t,
134                                                                               difl,
135                                                                               extmax,
136                                                                               node_addr,
137                                                                               visibility,
138                                                                               dist);
139                                 }
140                                 else
141 #  endif
142                                 {
143                                         traverse_mask = NODE_INTERSECT(kg,
144                                                                        P,
145 #    if BVH_FEATURE(BVH_HAIR)
146                                                                        dir,
147 #    endif
148                                                                        idir,
149                                                                        isect->t,
150                                                                        node_addr,
151                                                                        visibility,
152                                                                        dist);
153                                 }
154 #else // __KERNEL_SSE2__
155 #  if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
156                                 if(difl != 0.0f) {
157                                         traverse_mask = NODE_INTERSECT_ROBUST(kg,
158                                                                               P,
159                                                                               dir,
160 #    if BVH_FEATURE(BVH_HAIR)
161                                                                               tnear,
162                                                                               tfar,
163 #    endif
164                                                                               tsplat,
165                                                                               Psplat,
166                                                                               idirsplat,
167                                                                               shufflexyz,
168                                                                               difl,
169                                                                               extmax,
170                                                                               node_addr,
171                                                                               visibility,
172                                                                               dist);
173                                 }
174                                 else
175 #  endif
176                                 {
177                                         traverse_mask = NODE_INTERSECT(kg,
178                                                                        P,
179                                                                        dir,
180 #    if BVH_FEATURE(BVH_HAIR)
181                                                                        tnear,
182                                                                        tfar,
183 #    endif
184                                                                        tsplat,
185                                                                        Psplat,
186                                                                        idirsplat,
187                                                                        shufflexyz,
188                                                                        node_addr,
189                                                                        visibility,
190                                                                        dist);
191                                 }
192 #endif // __KERNEL_SSE2__
193
194                                 node_addr = __float_as_int(cnodes.z);
195                                 node_addr_child1 = __float_as_int(cnodes.w);
196
197                                 if(traverse_mask == 3) {
198                                         /* Both children were intersected, push the farther one. */
199                                         bool is_closest_child1 = (dist[1] < dist[0]);
200                                         if(is_closest_child1) {
201                                                 int tmp = node_addr;
202                                                 node_addr = node_addr_child1;
203                                                 node_addr_child1 = tmp;
204                                         }
205
206                                         ++stack_ptr;
207                                         kernel_assert(stack_ptr < BVH_STACK_SIZE);
208                                         traversal_stack[stack_ptr] = node_addr_child1;
209                                 }
210                                 else {
211                                         /* One child was intersected. */
212                                         if(traverse_mask == 2) {
213                                                 node_addr = node_addr_child1;
214                                         }
215                                         else if(traverse_mask == 0) {
216                                                 /* Neither child was intersected. */
217                                                 node_addr = traversal_stack[stack_ptr];
218                                                 --stack_ptr;
219                                         }
220                                 }
221                                 BVH_DEBUG_NEXT_STEP();
222                         }
223
224                         /* if node is leaf, fetch triangle list */
225                         if(node_addr < 0) {
226                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
227                                 int prim_addr = __float_as_int(leaf.x);
228
229 #if BVH_FEATURE(BVH_INSTANCING)
230                                 if(prim_addr >= 0) {
231 #endif
232                                         const int prim_addr2 = __float_as_int(leaf.y);
233                                         const uint type = __float_as_int(leaf.w);
234
235                                         /* pop */
236                                         node_addr = traversal_stack[stack_ptr];
237                                         --stack_ptr;
238
239                                         /* primitive intersection */
240                                         switch(type & PRIMITIVE_ALL) {
241                                                 case PRIMITIVE_TRIANGLE: {
242                                                         for(; prim_addr < prim_addr2; prim_addr++) {
243                                                                 BVH_DEBUG_NEXT_STEP();
244                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
245                                                                 if(triangle_intersect(kg,
246                                                                                       &isect_precalc,
247                                                                                       isect,
248                                                                                       P,
249                                                                                       visibility,
250                                                                                       object,
251                                                                                       prim_addr))
252                                                                 {
253                                                                         /* shadow ray early termination */
254 #if defined(__KERNEL_SSE2__)
255                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
256                                                                                 return true;
257                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
258 #  if BVH_FEATURE(BVH_HAIR)
259                                                                         tfar = ssef(isect->t);
260 #  endif
261 #else
262                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
263                                                                                 return true;
264 #endif
265                                                                 }
266                                                         }
267                                                         break;
268                                                 }
269 #if BVH_FEATURE(BVH_MOTION)
270                                                 case PRIMITIVE_MOTION_TRIANGLE: {
271                                                         for(; prim_addr < prim_addr2; prim_addr++) {
272                                                                 BVH_DEBUG_NEXT_STEP();
273                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
274                                                                 if(motion_triangle_intersect(kg,
275                                                                                              isect,
276                                                                                              P,
277                                                                                              dir,
278                                                                                              ray->time,
279                                                                                              visibility,
280                                                                                              object,
281                                                                                              prim_addr))
282                                                                 {
283                                                                         /* shadow ray early termination */
284 #  if defined(__KERNEL_SSE2__)
285                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
286                                                                                 return true;
287                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
288 #    if BVH_FEATURE(BVH_HAIR)
289                                                                         tfar = ssef(isect->t);
290 #    endif
291 #  else
292                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
293                                                                                 return true;
294 #  endif
295                                                                 }
296                                                         }
297                                                         break;
298                                                 }
299 #endif  /* BVH_FEATURE(BVH_MOTION) */
300 #if BVH_FEATURE(BVH_HAIR)
301                                                 case PRIMITIVE_CURVE:
302                                                 case PRIMITIVE_MOTION_CURVE: {
303                                                         for(; prim_addr < prim_addr2; prim_addr++) {
304                                                                 BVH_DEBUG_NEXT_STEP();
305                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
306                                                                 bool hit;
307                                                                 if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
308                                                                         hit = bvh_cardinal_curve_intersect(kg,
309                                                                                                            isect,
310                                                                                                            P,
311                                                                                                            dir,
312                                                                                                            visibility,
313                                                                                                            object,
314                                                                                                            prim_addr,
315                                                                                                            ray->time,
316                                                                                                            type,
317                                                                                                            lcg_state,
318                                                                                                            difl,
319                                                                                                            extmax);
320                                                                 }
321                                                                 else {
322                                                                         hit = bvh_curve_intersect(kg,
323                                                                                                   isect,
324                                                                                                   P,
325                                                                                                   dir,
326                                                                                                   visibility,
327                                                                                                   object,
328                                                                                                   prim_addr,
329                                                                                                   ray->time,
330                                                                                                   type,
331                                                                                                   lcg_state,
332                                                                                                   difl,
333                                                                                                   extmax);
334                                                                 }
335                                                                 if(hit) {
336                                                                         /* shadow ray early termination */
337 #  if defined(__KERNEL_SSE2__)
338                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
339                                                                                 return true;
340                                                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
341 #    if BVH_FEATURE(BVH_HAIR)
342                                                                         tfar = ssef(isect->t);
343 #    endif
344 #  else
345                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE)
346                                                                                 return true;
347 #  endif
348                                                                 }
349                                                         }
350                                                         break;
351                                                 }
352 #endif  /* BVH_FEATURE(BVH_HAIR) */
353                                         }
354                                 }
355 #if BVH_FEATURE(BVH_INSTANCING)
356                                 else {
357                                         /* instance push */
358                                         object = kernel_tex_fetch(__prim_object, -prim_addr-1);
359
360 #  if BVH_FEATURE(BVH_MOTION)
361                                         bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
362 #  else
363                                         bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
364 #  endif
365                                         triangle_intersect_precalc(dir, &isect_precalc);
366
367 #  if defined(__KERNEL_SSE2__)
368                                         Psplat[0] = ssef(P.x);
369                                         Psplat[1] = ssef(P.y);
370                                         Psplat[2] = ssef(P.z);
371
372                                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
373 #    if BVH_FEATURE(BVH_HAIR)
374                                         tfar = ssef(isect->t);
375 #    endif
376
377                                         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
378 #  endif
379
380                                         ++stack_ptr;
381                                         kernel_assert(stack_ptr < BVH_STACK_SIZE);
382                                         traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
383
384                                         node_addr = kernel_tex_fetch(__object_node, object);
385
386                                         BVH_DEBUG_NEXT_INSTANCE();
387                                 }
388                         }
389 #endif  /* FEATURE(BVH_INSTANCING) */
390                 } while(node_addr != ENTRYPOINT_SENTINEL);
391
392 #if BVH_FEATURE(BVH_INSTANCING)
393                 if(stack_ptr >= 0) {
394                         kernel_assert(object != OBJECT_NONE);
395
396                         /* instance pop */
397 #  if BVH_FEATURE(BVH_MOTION)
398                         bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
399 #  else
400                         bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
401 #  endif
402                         triangle_intersect_precalc(dir, &isect_precalc);
403
404 #  if defined(__KERNEL_SSE2__)
405                         Psplat[0] = ssef(P.x);
406                         Psplat[1] = ssef(P.y);
407                         Psplat[2] = ssef(P.z);
408
409                         tsplat = ssef(0.0f, 0.0f, -isect->t, -isect->t);
410 #    if BVH_FEATURE(BVH_HAIR)
411                         tfar = ssef(isect->t);
412 #    endif
413
414                         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
415 #  endif
416
417                         object = OBJECT_NONE;
418                         node_addr = traversal_stack[stack_ptr];
419                         --stack_ptr;
420                 }
421 #endif  /* FEATURE(BVH_INSTANCING) */
422         } while(node_addr != ENTRYPOINT_SENTINEL);
423
424         return (isect->prim != PRIM_NONE);
425 }
426
427 ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
428                                          const Ray *ray,
429                                          Intersection *isect,
430                                          const uint visibility
431 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
432                                          , uint *lcg_state,
433                                          float difl,
434                                          float extmax
435 #endif
436                                          )
437 {
438 #ifdef __QBVH__
439         if(kernel_data.bvh.use_qbvh) {
440                 return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
441                                                     ray,
442                                                     isect,
443                                                     visibility
444 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
445                                                     , lcg_state,
446                                                     difl,
447                                                     extmax
448 #endif
449                                                     );
450         }
451         else
452 #endif
453         {
454                 kernel_assert(kernel_data.bvh.use_qbvh == false);
455                 return BVH_FUNCTION_FULL_NAME(BVH)(kg,
456                                                    ray,
457                                                    isect,
458                                                    visibility
459 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
460                                                    , lcg_state,
461                                                    difl,
462                                                    extmax
463 #endif
464                                                    );
465         }
466 }
467
468 #undef BVH_FUNCTION_NAME
469 #undef BVH_FUNCTION_FEATURES
470 #undef NODE_INTERSECT
471 #undef NODE_INTERSECT_ROBUST