335a4afd47a3957ef2c02486f029b8b4ab11377f
[blender.git] / intern / cycles / kernel / bvh / qbvh_traversal.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* This is a template BVH traversal function, where various features can be
18  * enabled/disabled. This way we can compile optimized versions for each case
19  * without new features slowing things down.
20  *
21  * BVH_INSTANCING: object instancing
22  * BVH_HAIR: hair curve rendering
23  * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
24  * BVH_MOTION: motion blur rendering
25  *
26  */
27
28 #if BVH_FEATURE(BVH_HAIR)
29 #  define NODE_INTERSECT qbvh_node_intersect
30 #  define NODE_INTERSECT_ROBUST qbvh_node_intersect_robust
31 #else
32 #  define NODE_INTERSECT qbvh_aligned_node_intersect
33 #  define NODE_INTERSECT_ROBUST qbvh_aligned_node_intersect_robust
34 #endif
35
36 ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
37                                              const Ray *ray,
38                                              Intersection *isect,
39                                              const uint visibility
40 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
41                                              ,uint *lcg_state,
42                                              float difl,
43                                              float extmax
44 #endif
45                                              )
46 {
47         /* TODO(sergey):
48          * - Test if pushing distance on the stack helps (for non shadow rays).
49          * - Separate version for shadow rays.
50          * - Likely and unlikely for if() statements.
51          * - Test restrict attribute for pointers.
52          */
53
54         /* Traversal stack in CUDA thread-local memory. */
55         QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
56         traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
57         traversal_stack[0].dist = -FLT_MAX;
58
59         /* Traversal variables in registers. */
60         int stack_ptr = 0;
61         int node_addr = kernel_data.bvh.root;
62         float node_dist = -FLT_MAX;
63
64         /* Ray parameters in registers. */
65         float3 P = ray->P;
66         float3 dir = bvh_clamp_direction(ray->D);
67         float3 idir = bvh_inverse_direction(dir);
68         int object = OBJECT_NONE;
69
70 #if BVH_FEATURE(BVH_MOTION)
71         Transform ob_itfm;
72 #endif
73
74 #ifndef __KERNEL_SSE41__
75         if(!isfinite(P.x)) {
76                 return false;
77         }
78 #endif
79
80         isect->t = ray->t;
81         isect->u = 0.0f;
82         isect->v = 0.0f;
83         isect->prim = PRIM_NONE;
84         isect->object = OBJECT_NONE;
85
86         BVH_DEBUG_INIT();
87
88         ssef tnear(0.0f), tfar(ray->t);
89 #if BVH_FEATURE(BVH_HAIR)
90         sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
91 #endif
92         sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
93
94 #ifdef __KERNEL_AVX2__
95         float3 P_idir = P*idir;
96         sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
97 #endif
98 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
99         sse3f org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
100 #endif
101
102         /* Offsets to select the side that becomes the lower or upper bound. */
103         int near_x, near_y, near_z;
104         int far_x, far_y, far_z;
105         qbvh_near_far_idx_calc(idir,
106                                &near_x, &near_y, &near_z,
107                                &far_x, &far_y, &far_z);
108
109         /* Traversal loop. */
110         do {
111                 do {
112                         /* Traverse internal nodes. */
113                         while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
114                                 float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
115                                 (void)inodes;
116
117                                 if(UNLIKELY(node_dist > isect->t)
118 #if BVH_FEATURE(BVH_MOTION)
119                                    || UNLIKELY(ray->time < inodes.y)
120                                    || UNLIKELY(ray->time > inodes.z)
121 #endif
122 #ifdef __VISIBILITY_FLAG__
123                                    || (__float_as_uint(inodes.x) & visibility) == 0
124 #endif
125                                  )
126                                 {
127                                         /* Pop. */
128                                         node_addr = traversal_stack[stack_ptr].addr;
129                                         node_dist = traversal_stack[stack_ptr].dist;
130                                         --stack_ptr;
131                                         continue;
132                                 }
133
134                                 int child_mask;
135                                 ssef dist;
136
137                                 BVH_DEBUG_NEXT_NODE();
138
139 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
140                                 if(difl != 0.0f) {
141                                         /* NOTE: We extend all the child BB instead of fetching
142                                          * and checking visibility flags for each of the,
143                                          *
144                                          * Need to test if doing opposite would be any faster.
145                                          */
146                                         child_mask = NODE_INTERSECT_ROBUST(kg,
147                                                                            tnear,
148                                                                            tfar,
149 #  ifdef __KERNEL_AVX2__
150                                                                            P_idir4,
151 #  endif
152 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
153                                                                            org4,
154 #  endif
155 #  if BVH_FEATURE(BVH_HAIR)
156                                                                            dir4,
157 #  endif
158                                                                            idir4,
159                                                                            near_x, near_y, near_z,
160                                                                            far_x, far_y, far_z,
161                                                                            node_addr,
162                                                                            difl,
163                                                                            &dist);
164                                 }
165                                 else
166 #endif  /* BVH_HAIR_MINIMUM_WIDTH */
167                                 {
168                                         child_mask = NODE_INTERSECT(kg,
169                                                                     tnear,
170                                                                     tfar,
171 #ifdef __KERNEL_AVX2__
172                                                                     P_idir4,
173 #endif
174 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
175                                                                     org4,
176 #endif
177 #if BVH_FEATURE(BVH_HAIR)
178                                                                     dir4,
179 #endif
180                                                                     idir4,
181                                                                     near_x, near_y, near_z,
182                                                                     far_x, far_y, far_z,
183                                                                     node_addr,
184                                                                     &dist);
185                                 }
186
187                                 if(child_mask != 0) {
188                                         float4 cnodes;
189                                         /* TODO(sergey): Investigate whether moving cnodes upwards
190                                          * gives a speedup (will be different cache pattern but will
191                                          * avoid extra check here),
192                                          */
193 #if BVH_FEATURE(BVH_HAIR)
194                                         if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
195                                                 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
196                                         }
197                                         else
198 #endif
199                                         {
200                                                 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
201                                         }
202
203                                         /* One child is hit, continue with that child. */
204                                         int r = __bscf(child_mask);
205                                         float d0 = ((float*)&dist)[r];
206                                         if(child_mask == 0) {
207                                                 node_addr = __float_as_int(cnodes[r]);
208                                                 node_dist = d0;
209                                                 continue;
210                                         }
211
212                                         /* Two children are hit, push far child, and continue with
213                                          * closer child.
214                                          */
215                                         int c0 = __float_as_int(cnodes[r]);
216                                         r = __bscf(child_mask);
217                                         int c1 = __float_as_int(cnodes[r]);
218                                         float d1 = ((float*)&dist)[r];
219                                         if(child_mask == 0) {
220                                                 if(d1 < d0) {
221                                                         node_addr = c1;
222                                                         node_dist = d1;
223                                                         ++stack_ptr;
224                                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
225                                                         traversal_stack[stack_ptr].addr = c0;
226                                                         traversal_stack[stack_ptr].dist = d0;
227                                                         continue;
228                                                 }
229                                                 else {
230                                                         node_addr = c0;
231                                                         node_dist = d0;
232                                                         ++stack_ptr;
233                                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
234                                                         traversal_stack[stack_ptr].addr = c1;
235                                                         traversal_stack[stack_ptr].dist = d1;
236                                                         continue;
237                                                 }
238                                         }
239
240                                         /* Here starts the slow path for 3 or 4 hit children. We push
241                                          * all nodes onto the stack to sort them there.
242                                          */
243                                         ++stack_ptr;
244                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
245                                         traversal_stack[stack_ptr].addr = c1;
246                                         traversal_stack[stack_ptr].dist = d1;
247                                         ++stack_ptr;
248                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
249                                         traversal_stack[stack_ptr].addr = c0;
250                                         traversal_stack[stack_ptr].dist = d0;
251
252                                         /* Three children are hit, push all onto stack and sort 3
253                                          * stack items, continue with closest child.
254                                          */
255                                         r = __bscf(child_mask);
256                                         int c2 = __float_as_int(cnodes[r]);
257                                         float d2 = ((float*)&dist)[r];
258                                         if(child_mask == 0) {
259                                                 ++stack_ptr;
260                                                 kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
261                                                 traversal_stack[stack_ptr].addr = c2;
262                                                 traversal_stack[stack_ptr].dist = d2;
263                                                 qbvh_stack_sort(&traversal_stack[stack_ptr],
264                                                                 &traversal_stack[stack_ptr - 1],
265                                                                 &traversal_stack[stack_ptr - 2]);
266                                                 node_addr = traversal_stack[stack_ptr].addr;
267                                                 node_dist = traversal_stack[stack_ptr].dist;
268                                                 --stack_ptr;
269                                                 continue;
270                                         }
271
272                                         /* Four children are hit, push all onto stack and sort 4
273                                          * stack items, continue with closest child.
274                                          */
275                                         r = __bscf(child_mask);
276                                         int c3 = __float_as_int(cnodes[r]);
277                                         float d3 = ((float*)&dist)[r];
278                                         ++stack_ptr;
279                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
280                                         traversal_stack[stack_ptr].addr = c3;
281                                         traversal_stack[stack_ptr].dist = d3;
282                                         ++stack_ptr;
283                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
284                                         traversal_stack[stack_ptr].addr = c2;
285                                         traversal_stack[stack_ptr].dist = d2;
286                                         qbvh_stack_sort(&traversal_stack[stack_ptr],
287                                                         &traversal_stack[stack_ptr - 1],
288                                                         &traversal_stack[stack_ptr - 2],
289                                                         &traversal_stack[stack_ptr - 3]);
290                                 }
291
292                                 node_addr = traversal_stack[stack_ptr].addr;
293                                 node_dist = traversal_stack[stack_ptr].dist;
294                                 --stack_ptr;
295                         }
296
297                         /* If node is leaf, fetch triangle list. */
298                         if(node_addr < 0) {
299                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
300
301 #ifdef __VISIBILITY_FLAG__
302                                 if(UNLIKELY((node_dist > isect->t) ||
303                                             ((__float_as_uint(leaf.z) & visibility) == 0)))
304 #else
305                                 if(UNLIKELY((node_dist > isect->t)))
306 #endif
307                                 {
308                                         /* Pop. */
309                                         node_addr = traversal_stack[stack_ptr].addr;
310                                         node_dist = traversal_stack[stack_ptr].dist;
311                                         --stack_ptr;
312                                         continue;
313                                 }
314
315                                 int prim_addr = __float_as_int(leaf.x);
316
317 #if BVH_FEATURE(BVH_INSTANCING)
318                                 if(prim_addr >= 0) {
319 #endif
320                                         int prim_addr2 = __float_as_int(leaf.y);
321                                         const uint type = __float_as_int(leaf.w);
322
323                                         /* Pop. */
324                                         node_addr = traversal_stack[stack_ptr].addr;
325                                         node_dist = traversal_stack[stack_ptr].dist;
326                                         --stack_ptr;
327
328                                         /* Primitive intersection. */
329                                         switch(type & PRIMITIVE_ALL) {
330                                                 case PRIMITIVE_TRIANGLE: {
331                                                         for(; prim_addr < prim_addr2; prim_addr++) {
332                                                                 BVH_DEBUG_NEXT_INTERSECTION();
333                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
334                                                                 if(triangle_intersect(kg,
335                                                                                       isect,
336                                                                                       P,
337                                                                                       dir,
338                                                                                       visibility,
339                                                                                       object,
340                                                                                       prim_addr)) {
341                                                                         tfar = ssef(isect->t);
342                                                                         /* Shadow ray early termination. */
343                                                                         if(visibility & PATH_RAY_SHADOW_OPAQUE) {
344                                                                                 return true;
345                                                                         }
346                                                                 }
347                                                         }
348                                                         break;
349                                                 }
350 #if BVH_FEATURE(BVH_MOTION)
351                                                 case PRIMITIVE_MOTION_TRIANGLE: {
352                                                         for(; prim_addr < prim_addr2; prim_addr++) {
353                                                                 BVH_DEBUG_NEXT_INTERSECTION();
354                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
355                                                                 if(motion_triangle_intersect(kg,
356                                                                                              isect,
357                                                                                              P,
358                                                                                              dir,
359                                                                                              ray->time,
360                                                                                              visibility,
361                                                                                              object,
362                                                                                              prim_addr)) {
363                                                                         tfar = ssef(isect->t);
364                                                                         /* Shadow ray early termination. */
365                                                                         if(visibility & PATH_RAY_SHADOW_OPAQUE) {
366                                                                                 return true;
367                                                                         }
368                                                                 }
369                                                         }
370                                                         break;
371                                                 }
372 #endif  /* BVH_FEATURE(BVH_MOTION) */
373 #if BVH_FEATURE(BVH_HAIR)
374                                                 case PRIMITIVE_CURVE:
375                                                 case PRIMITIVE_MOTION_CURVE: {
376                                                         for(; prim_addr < prim_addr2; prim_addr++) {
377                                                                 BVH_DEBUG_NEXT_INTERSECTION();
378                                                                 const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
379                                                                 kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
380                                                                 bool hit;
381                                                                 if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
382                                                                         hit = cardinal_curve_intersect(kg,
383                                                                                                        isect,
384                                                                                                        P,
385                                                                                                        dir,
386                                                                                                        visibility,
387                                                                                                        object,
388                                                                                                        prim_addr,
389                                                                                                        ray->time,
390                                                                                                        curve_type,
391                                                                                                        lcg_state,
392                                                                                                        difl,
393                                                                                                        extmax);
394                                                                 }
395                                                                 else {
396                                                                         hit = curve_intersect(kg,
397                                                                                               isect,
398                                                                                               P,
399                                                                                               dir,
400                                                                                               visibility,
401                                                                                               object,
402                                                                                               prim_addr,
403                                                                                               ray->time,
404                                                                                               curve_type,
405                                                                                               lcg_state,
406                                                                                               difl,
407                                                                                               extmax);
408                                                                 }
409                                                                 if(hit) {
410                                                                         tfar = ssef(isect->t);
411                                                                         /* Shadow ray early termination. */
412                                                                         if(visibility & PATH_RAY_SHADOW_OPAQUE) {
413                                                                                 return true;
414                                                                         }
415                                                                 }
416                                                         }
417                                                         break;
418                                                 }
419 #endif  /* BVH_FEATURE(BVH_HAIR) */
420                                         }
421                                 }
422 #if BVH_FEATURE(BVH_INSTANCING)
423                                 else {
424                                         /* Instance push. */
425                                         object = kernel_tex_fetch(__prim_object, -prim_addr-1);
426
427 #  if BVH_FEATURE(BVH_MOTION)
428                                         qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
429 #  else
430                                         qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
431 #  endif
432
433                                         qbvh_near_far_idx_calc(idir,
434                                                                &near_x, &near_y, &near_z,
435                                                                &far_x, &far_y, &far_z);
436                                         tfar = ssef(isect->t);
437 #  if BVH_FEATURE(BVH_HAIR)
438                                         dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
439 #  endif
440                                         idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
441 #  ifdef __KERNEL_AVX2__
442                                         P_idir = P*idir;
443                                         P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
444 #  endif
445 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
446                                         org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
447 #  endif
448
449                                         ++stack_ptr;
450                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
451                                         traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
452                                         traversal_stack[stack_ptr].dist = -FLT_MAX;
453
454                                         node_addr = kernel_tex_fetch(__object_node, object);
455
456                                         BVH_DEBUG_NEXT_INSTANCE();
457                                 }
458                         }
459 #endif  /* FEATURE(BVH_INSTANCING) */
460                 } while(node_addr != ENTRYPOINT_SENTINEL);
461
462 #if BVH_FEATURE(BVH_INSTANCING)
463                 if(stack_ptr >= 0) {
464                         kernel_assert(object != OBJECT_NONE);
465
466                         /* Instance pop. */
467 #  if BVH_FEATURE(BVH_MOTION)
468                         isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
469 #  else
470                         isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
471 #  endif
472
473                         qbvh_near_far_idx_calc(idir,
474                                                &near_x, &near_y, &near_z,
475                                                &far_x, &far_y, &far_z);
476                         tfar = ssef(isect->t);
477 #  if BVH_FEATURE(BVH_HAIR)
478                         dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
479 #  endif
480                         idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
481 #  ifdef __KERNEL_AVX2__
482                         P_idir = P*idir;
483                         P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
484 #  endif
485 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
486                         org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
487 #  endif
488
489                         object = OBJECT_NONE;
490                         node_addr = traversal_stack[stack_ptr].addr;
491                         node_dist = traversal_stack[stack_ptr].dist;
492                         --stack_ptr;
493                 }
494 #endif  /* FEATURE(BVH_INSTANCING) */
495         } while(node_addr != ENTRYPOINT_SENTINEL);
496
497         return (isect->prim != PRIM_NONE);
498 }
499
500 #undef NODE_INTERSECT
501 #undef NODE_INTERSECT_ROBUST