2021d8e1143b47c8a08b98d5261c5c36301b9073
[blender.git] / intern / cycles / kernel / bvh / obvh_traversal.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* This is a template BVH traversal function, where various features can be
18  * enabled/disabled. This way we can compile optimized versions for each case
19  * without new features slowing things down.
20  *
21  * BVH_INSTANCING: object instancing
22  * BVH_HAIR: hair curve rendering
23  * BVH_HAIR_MINIMUM_WIDTH: hair curve rendering with minimum width
24  * BVH_MOTION: motion blur rendering
25  *
26  */
27
28 #if BVH_FEATURE(BVH_HAIR)
29 #  define NODE_INTERSECT obvh_node_intersect
30 #  define NODE_INTERSECT_ROBUST obvh_node_intersect_robust
31 #else
32 #  define NODE_INTERSECT obvh_aligned_node_intersect
33 #  define NODE_INTERSECT_ROBUST obvh_aligned_node_intersect_robust
34 #endif
35
36 ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
37                                              const Ray *ray,
38                                              Intersection *isect,
39                                              const uint visibility
40 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
41                                              ,uint *lcg_state,
42                                              float difl,
43                                              float extmax
44 #endif
45                                              )
46 {
47         /* Traversal stack in CUDA thread-local memory. */
48         OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
49         traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
50         traversal_stack[0].dist = -FLT_MAX;
51
52         /* Traversal variables in registers. */
53         int stack_ptr = 0;
54         int node_addr = kernel_data.bvh.root;
55         float node_dist = -FLT_MAX;
56
57         /* Ray parameters in registers. */
58         float3 P = ray->P;
59         float3 dir = bvh_clamp_direction(ray->D);
60         float3 idir = bvh_inverse_direction(dir);
61         int object = OBJECT_NONE;
62
63 #if BVH_FEATURE(BVH_MOTION)
64         Transform ob_itfm;
65 #endif
66
67 #ifndef __KERNEL_SSE41__
68         if(!isfinite(P.x)) {
69                 return false;
70         }
71 #endif
72
73         isect->t = ray->t;
74         isect->u = 0.0f;
75         isect->v = 0.0f;
76         isect->prim = PRIM_NONE;
77         isect->object = OBJECT_NONE;
78
79         BVH_DEBUG_INIT();
80         avxf tnear(0.0f), tfar(ray->t);
81 #if BVH_FEATURE(BVH_HAIR)
82         avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
83 #endif
84         avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
85
86 #ifdef __KERNEL_AVX2__
87         float3 P_idir = P*idir;
88         avx3f P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
89 #endif
90 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
91         avx3f org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
92 #endif
93
94         /* Offsets to select the side that becomes the lower or upper bound. */
95         int near_x, near_y, near_z;
96         int far_x, far_y, far_z;
97         obvh_near_far_idx_calc(idir,
98                                &near_x, &near_y, &near_z,
99                                &far_x, &far_y, &far_z);
100         /* Traversal loop. */
101         do {
102                 do {
103                         /* Traverse internal nodes. */
104                         while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
105                                 float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
106                                 (void)inodes;
107
108                                 if(UNLIKELY(node_dist > isect->t)
109 #if BVH_FEATURE(BVH_MOTION)
110                                    || UNLIKELY(ray->time < inodes.y)
111                                    || UNLIKELY(ray->time > inodes.z)
112 #endif
113 #ifdef __VISIBILITY_FLAG__
114                                    || (__float_as_uint(inodes.x) & visibility) == 0
115 #endif
116                                  )
117                                 {
118                                         /* Pop. */
119                                         node_addr = traversal_stack[stack_ptr].addr;
120                                         node_dist = traversal_stack[stack_ptr].dist;
121                                         --stack_ptr;
122                                         continue;
123                                 }
124
125                                 int child_mask;
126                                 avxf dist;
127
128                                 BVH_DEBUG_NEXT_NODE();
129
130 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
131                                 if(difl != 0.0f) {
132                                         /* NOTE: We extend all the child BB instead of fetching
133                                          * and checking visibility flags for each of the,
134                                          *
135                                          * Need to test if doing opposite would be any faster.
136                                          */
137                                         child_mask = NODE_INTERSECT_ROBUST(kg,
138                                                                            tnear,
139                                                                            tfar,
140 #  ifdef __KERNEL_AVX2__
141                                                                            P_idir4,
142 #  endif
143 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
144                                                                            org4,
145 #  endif
146 #  if BVH_FEATURE(BVH_HAIR)
147                                                                            dir4,
148 #  endif
149                                                                            idir4,
150                                                                            near_x, near_y, near_z,
151                                                                            far_x, far_y, far_z,
152                                                                            node_addr,
153                                                                            difl,
154                                                                            &dist);
155                                 }
156                                 else
157 #endif  /* BVH_HAIR_MINIMUM_WIDTH */
158                                 {
159                                         child_mask = NODE_INTERSECT(kg,
160                                                                     tnear,
161                                                                     tfar,
162 #ifdef __KERNEL_AVX2__
163                                                                     P_idir4,
164 #endif
165 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
166                                                                     org4,
167 #endif
168 #if BVH_FEATURE(BVH_HAIR)
169                                                                     dir4,
170 #endif
171                                                                     idir4,
172                                                                     near_x, near_y, near_z,
173                                                                     far_x, far_y, far_z,
174                                                                     node_addr,
175                                                                     &dist);
176                                 }
177
178                                 if(child_mask != 0) {
179                                         avxf cnodes;
180                                         /* TODO(sergey): Investigate whether moving cnodes upwards
181                                          * gives a speedup (will be different cache pattern but will
182                                          * avoid extra check here),
183                                          */
184 #if BVH_FEATURE(BVH_HAIR)
185                                         if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
186                                                 cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
187                                         }
188                                         else
189 #endif
190                                         {
191                                                 cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
192                                         }
193
194                                         /* One child is hit, continue with that child. */
195                                         int r = __bscf(child_mask);
196                                         float d0 = ((float*)&dist)[r];
197                                         if(child_mask == 0) {
198                                                 node_addr = __float_as_int(cnodes[r]);
199                                                 node_dist = d0;
200                                                 continue;
201                                         }
202
203                                         /* Two children are hit, push far child, and continue with
204                                          * closer child.
205                                          */
206                                         int c0 = __float_as_int(cnodes[r]);
207                                         r = __bscf(child_mask);
208                                         int c1 = __float_as_int(cnodes[r]);
209                                         float d1 = ((float*)&dist)[r];
210                                         if(child_mask == 0) {
211                                                 if(d1 < d0) {
212                                                         node_addr = c1;
213                                                         node_dist = d1;
214                                                         ++stack_ptr;
215                                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
216                                                         traversal_stack[stack_ptr].addr = c0;
217                                                         traversal_stack[stack_ptr].dist = d0;
218                                                         continue;
219                                                 }
220                                                 else {
221                                                         node_addr = c0;
222                                                         node_dist = d0;
223                                                         ++stack_ptr;
224                                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
225                                                         traversal_stack[stack_ptr].addr = c1;
226                                                         traversal_stack[stack_ptr].dist = d1;
227                                                         continue;
228                                                 }
229                                         }
230
231                                         /* Here starts the slow path for 3 or 4 hit children. We push
232                                          * all nodes onto the stack to sort them there.
233                                          */
234                                         ++stack_ptr;
235                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
236                                         traversal_stack[stack_ptr].addr = c1;
237                                         traversal_stack[stack_ptr].dist = d1;
238                                         ++stack_ptr;
239                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
240                                         traversal_stack[stack_ptr].addr = c0;
241                                         traversal_stack[stack_ptr].dist = d0;
242
243                                         /* Three children are hit, push all onto stack and sort 3
244                                          * stack items, continue with closest child.
245                                          */
246                                         r = __bscf(child_mask);
247                                         int c2 = __float_as_int(cnodes[r]);
248                                         float d2 = ((float*)&dist)[r];
249                                         if(child_mask == 0) {
250                                                 ++stack_ptr;
251                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
252                                                 traversal_stack[stack_ptr].addr = c2;
253                                                 traversal_stack[stack_ptr].dist = d2;
254                                                 obvh_stack_sort(&traversal_stack[stack_ptr],
255                                                                 &traversal_stack[stack_ptr - 1],
256                                                                 &traversal_stack[stack_ptr - 2]);
257                                                 node_addr = traversal_stack[stack_ptr].addr;
258                                                 node_dist = traversal_stack[stack_ptr].dist;
259                                                 --stack_ptr;
260                                                 continue;
261                                         }
262
263                                         /* Four children are hit, push all onto stack and sort 4
264                                          * stack items, continue with closest child.
265                                          */
266                                         r = __bscf(child_mask);
267                                         int c3 = __float_as_int(cnodes[r]);
268                                         float d3 = ((float*)&dist)[r];
269                                         if(child_mask == 0) {
270                                                 ++stack_ptr;
271                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
272                                                 traversal_stack[stack_ptr].addr = c3;
273                                                 traversal_stack[stack_ptr].dist = d3;
274                                                 ++stack_ptr;
275                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
276                                                 traversal_stack[stack_ptr].addr = c2;
277                                                 traversal_stack[stack_ptr].dist = d2;
278                                                 obvh_stack_sort(&traversal_stack[stack_ptr],
279                                                                 &traversal_stack[stack_ptr - 1],
280                                                                 &traversal_stack[stack_ptr - 2],
281                                                                 &traversal_stack[stack_ptr - 3]);
282                                                 node_addr = traversal_stack[stack_ptr].addr;
283                                                 node_dist = traversal_stack[stack_ptr].dist;
284                                                 --stack_ptr;
285                                                 continue;
286                                         }
287
288                                         ++stack_ptr;
289                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
290                                         traversal_stack[stack_ptr].addr = c3;
291                                         traversal_stack[stack_ptr].dist = d3;
292                                         ++stack_ptr;
293                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
294                                         traversal_stack[stack_ptr].addr = c2;
295                                         traversal_stack[stack_ptr].dist = d2;
296
297                                         /* Five children are hit, push all onto stack and sort 5
298                                          * stack items, continue with closest child.
299                                          */
300                                         r = __bscf(child_mask);
301                                         int c4 = __float_as_int(cnodes[r]);
302                                         float d4 = ((float*)&dist)[r];
303                                         if(child_mask == 0) {
304                                                 ++stack_ptr;
305                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
306                                                 traversal_stack[stack_ptr].addr = c4;
307                                                 traversal_stack[stack_ptr].dist = d4;
308                                                 obvh_stack_sort(&traversal_stack[stack_ptr],
309                                                                 &traversal_stack[stack_ptr - 1],
310                                                                 &traversal_stack[stack_ptr - 2],
311                                                                 &traversal_stack[stack_ptr - 3],
312                                                                 &traversal_stack[stack_ptr - 4]);
313                                                 node_addr = traversal_stack[stack_ptr].addr;
314                                                 node_dist = traversal_stack[stack_ptr].dist;
315                                                 --stack_ptr;
316                                                 continue;
317                                         }
318
319                                         /* Six children are hit, push all onto stack and sort 6
320                                          * stack items, continue with closest child.
321                                          */
322                                         r = __bscf(child_mask);
323                                         int c5 = __float_as_int(cnodes[r]);
324                                         float d5 = ((float*)&dist)[r];
325                                         if(child_mask == 0) {
326                                                 ++stack_ptr;
327                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
328                                                 traversal_stack[stack_ptr].addr = c5;
329                                                 traversal_stack[stack_ptr].dist = d5;
330                                                 ++stack_ptr;
331                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
332                                                 traversal_stack[stack_ptr].addr = c4;
333                                                 traversal_stack[stack_ptr].dist = d4;
334                                                 obvh_stack_sort(&traversal_stack[stack_ptr],
335                                                                 &traversal_stack[stack_ptr - 1],
336                                                                 &traversal_stack[stack_ptr - 2],
337                                                                 &traversal_stack[stack_ptr - 3],
338                                                                 &traversal_stack[stack_ptr - 4],
339                                                                 &traversal_stack[stack_ptr - 5]);
340                                                 node_addr = traversal_stack[stack_ptr].addr;
341                                                 node_dist = traversal_stack[stack_ptr].dist;
342                                                 --stack_ptr;
343                                                 continue;
344                                         }
345
346                                         ++stack_ptr;
347                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
348                                         traversal_stack[stack_ptr].addr = c5;
349                                         traversal_stack[stack_ptr].dist = d5;
350                                         ++stack_ptr;
351                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
352                                         traversal_stack[stack_ptr].addr = c4;
353                                         traversal_stack[stack_ptr].dist = d4;
354
355                                         /* Seven children are hit, push all onto stack and sort 7
356                                          * stack items, continue with closest child.
357                                          */
358                                         r = __bscf(child_mask);
359                                         int c6 = __float_as_int(cnodes[r]);
360                                         float d6 = ((float*)&dist)[r];
361                                         if(child_mask == 0) {
362                                                 ++stack_ptr;
363                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
364                                                 traversal_stack[stack_ptr].addr = c6;
365                                                 traversal_stack[stack_ptr].dist = d6;
366                                                 obvh_stack_sort(&traversal_stack[stack_ptr],
367                                                                 &traversal_stack[stack_ptr - 1],
368                                                                 &traversal_stack[stack_ptr - 2],
369                                                                 &traversal_stack[stack_ptr - 3],
370                                                                 &traversal_stack[stack_ptr - 4],
371                                                                 &traversal_stack[stack_ptr - 5],
372                                                                 &traversal_stack[stack_ptr - 6]);
373                                                 node_addr = traversal_stack[stack_ptr].addr;
374                                                 node_dist = traversal_stack[stack_ptr].dist;
375                                                 --stack_ptr;
376                                                 continue;
377                                         }
378
379                                         /* Eight children are hit, push all onto stack and sort 8
380                                         * stack items, continue with closest child.
381                                         */
382                                         r = __bscf(child_mask);
383                                         int c7 = __float_as_int(cnodes[r]);
384                                         float d7 = ((float*)&dist)[r];
385                                         ++stack_ptr;
386                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
387                                         traversal_stack[stack_ptr].addr = c7;
388                                         traversal_stack[stack_ptr].dist = d7;
389                                         ++stack_ptr;
390                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
391                                         traversal_stack[stack_ptr].addr = c6;
392                                         traversal_stack[stack_ptr].dist = d6;
393                                         obvh_stack_sort(&traversal_stack[stack_ptr],
394                                                         &traversal_stack[stack_ptr - 1],
395                                                         &traversal_stack[stack_ptr - 2],
396                                                         &traversal_stack[stack_ptr - 3],
397                                                         &traversal_stack[stack_ptr - 4],
398                                                         &traversal_stack[stack_ptr - 5],
399                                                         &traversal_stack[stack_ptr - 6],
400                                                         &traversal_stack[stack_ptr - 7]);
401                                         node_addr = traversal_stack[stack_ptr].addr;
402                                         node_dist = traversal_stack[stack_ptr].dist;
403                                         --stack_ptr;
404                                         continue;
405                                 }
406
407
408                                 node_addr = traversal_stack[stack_ptr].addr;
409                                 node_dist = traversal_stack[stack_ptr].dist;
410                                 --stack_ptr;
411                         }
412
413                         /* If node is leaf, fetch triangle list. */
414                         if(node_addr < 0) {
415                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
416
417 #ifdef __VISIBILITY_FLAG__
418                                 if(UNLIKELY((node_dist > isect->t) ||
419                                             ((__float_as_uint(leaf.z) & visibility) == 0)))
420 #else
421                                 if(UNLIKELY((node_dist > isect->t)))
422 #endif
423                                 {
424                                         /* Pop. */
425                                         node_addr = traversal_stack[stack_ptr].addr;
426                                         node_dist = traversal_stack[stack_ptr].dist;
427                                         --stack_ptr;
428                                         continue;
429                                 }
430                                 int prim_addr = __float_as_int(leaf.x);
431
432 #if BVH_FEATURE(BVH_INSTANCING)
433                                 if(prim_addr >= 0) {
434 #endif
435                                         int prim_addr2 = __float_as_int(leaf.y);
436                                         const uint type = __float_as_int(leaf.w);
437
438                                         /* Pop. */
439                                         node_addr = traversal_stack[stack_ptr].addr;
440                                         node_dist = traversal_stack[stack_ptr].dist;
441                                         --stack_ptr;
442
443                                         /* Primitive intersection. */
444                                         switch(type & PRIMITIVE_ALL) {
445                                         case PRIMITIVE_TRIANGLE: {
446                                                 int prim_count = prim_addr2 - prim_addr;
447                                                 if(prim_count < 3) {
448                                                         for(; prim_addr < prim_addr2; prim_addr++) {
449                                                                  BVH_DEBUG_NEXT_INTERSECTION();
450                                                                  kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
451                                                                  if(triangle_intersect(kg,
452                                                                                        isect,
453                                                                                        P,
454                                                                                        dir,
455                                                                                        visibility,
456                                                                                        object,
457                                                                                        prim_addr))
458                                                                  {
459                                                                          tfar = avxf(isect->t);
460                                                                          /* Shadow ray early termination. */
461                                                                          if(visibility == PATH_RAY_SHADOW_OPAQUE) {
462                                                                                  return true;
463                                                                          }
464                                                                  }
465                                                         }//for
466                                                 }
467                                                 else {
468                                                         kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
469                                                         if(triangle_intersect8(kg,
470                                                                                &isect,
471                                                                                P,
472                                                                                dir,
473                                                                                visibility,
474                                                                                object,
475                                                                                prim_addr,
476                                                                                prim_count,
477                                                                                0,
478                                                                                0,
479                                                                                NULL,
480                                                                                0.0f))
481                                                         {
482                                                                 tfar = avxf(isect->t);
483                                                                 if(visibility == PATH_RAY_SHADOW_OPAQUE) {
484                                                                         return true;
485                                                                 }
486                                                         }
487                                                 }//prim count
488                                                 break;
489                                         }
490 #if BVH_FEATURE(BVH_MOTION)
491                                                 case PRIMITIVE_MOTION_TRIANGLE: {
492                                                         for(; prim_addr < prim_addr2; prim_addr++) {
493                                                                 BVH_DEBUG_NEXT_INTERSECTION();
494                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
495                                                                 if(motion_triangle_intersect(kg,
496                                                                                              isect,
497                                                                                              P,
498                                                                                              dir,
499                                                                                              ray->time,
500                                                                                              visibility,
501                                                                                              object,
502                                                                                              prim_addr))
503                                                                 {
504                                                                         tfar = avxf(isect->t);
505                                                                         /* Shadow ray early termination. */
506                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE) {
507                                                                                 return true;
508                                                                         }
509                                                                 }
510                                                         }
511                                                         break;
512                                                 }
513 #endif  /* BVH_FEATURE(BVH_MOTION) */
514 #if BVH_FEATURE(BVH_HAIR)
515                                                 case PRIMITIVE_CURVE:
516                                                 case PRIMITIVE_MOTION_CURVE: {
517                                                         for(; prim_addr < prim_addr2; prim_addr++) {
518                                                                 BVH_DEBUG_NEXT_INTERSECTION();
519                                                                 const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
520                                                                 kernel_assert((curve_type & PRIMITIVE_ALL) == (type & PRIMITIVE_ALL));
521                                                                 bool hit;
522                                                                 if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
523                                                                         hit = cardinal_curve_intersect(kg,
524                                                                                                        isect,
525                                                                                                        P,
526                                                                                                        dir,
527                                                                                                        visibility,
528                                                                                                        object,
529                                                                                                        prim_addr,
530                                                                                                        ray->time,
531                                                                                                        curve_type,
532                                                                                                        lcg_state,
533                                                                                                        difl,
534                                                                                                        extmax);
535                                                                 }
536                                                                 else {
537                                                                         hit = curve_intersect(kg,
538                                                                                               isect,
539                                                                                               P,
540                                                                                               dir,
541                                                                                               visibility,
542                                                                                               object,
543                                                                                               prim_addr,
544                                                                                               ray->time,
545                                                                                               curve_type,
546                                                                                               lcg_state,
547                                                                                               difl,
548                                                                                               extmax);
549                                                                 }
550                                                                 if(hit) {
551                                                                         tfar = avxf(isect->t);
552                                                                         /* Shadow ray early termination. */
553                                                                         if(visibility == PATH_RAY_SHADOW_OPAQUE) {
554                                                                                 return true;
555                                                                         }
556                                                                 }
557                                                         }
558                                                         break;
559                                                 }
560 #endif  /* BVH_FEATURE(BVH_HAIR) */
561                                         }
562                                 }
563 #if BVH_FEATURE(BVH_INSTANCING)
564                                 else {
565                                         /* Instance push. */
566                                         object = kernel_tex_fetch(__prim_object, -prim_addr-1);
567
568 #  if BVH_FEATURE(BVH_MOTION)
569                                         qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist, &ob_itfm);
570 #  else
571                                         qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &node_dist);
572 #  endif
573
574                                         obvh_near_far_idx_calc(idir,
575                                                                &near_x, &near_y, &near_z,
576                                                                &far_x, &far_y, &far_z);
577                                         tfar = avxf(isect->t);
578 #  if BVH_FEATURE(BVH_HAIR)
579                                         dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
580 #  endif
581                                         idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
582 #  ifdef __KERNEL_AVX2__
583                                         P_idir = P*idir;
584                                         P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
585 #  endif
586 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
587                                         org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
588 #  endif
589
590                                         ++stack_ptr;
591                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
592                                         traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
593                                         traversal_stack[stack_ptr].dist = -FLT_MAX;
594
595                                         node_addr = kernel_tex_fetch(__object_node, object);
596
597                                         BVH_DEBUG_NEXT_INSTANCE();
598                                 }
599                         }
600 #endif  /* FEATURE(BVH_INSTANCING) */
601                 } while(node_addr != ENTRYPOINT_SENTINEL);
602
603 #if BVH_FEATURE(BVH_INSTANCING)
604                 if(stack_ptr >= 0) {
605                         kernel_assert(object != OBJECT_NONE);
606
607                         /* Instance pop. */
608 #  if BVH_FEATURE(BVH_MOTION)
609                         isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
610 #  else
611                         isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
612 #  endif
613
614                         obvh_near_far_idx_calc(idir,
615                                                &near_x, &near_y, &near_z,
616                                                &far_x, &far_y, &far_z);
617                         tfar = avxf(isect->t);
618 #  if BVH_FEATURE(BVH_HAIR)
619                         dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
620 #  endif
621                         idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
622 #  ifdef __KERNEL_AVX2__
623                         P_idir = P*idir;
624                         P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
625 #  endif
626 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
627                         org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
628 #  endif
629
630                         object = OBJECT_NONE;
631                         node_addr = traversal_stack[stack_ptr].addr;
632                         node_dist = traversal_stack[stack_ptr].dist;
633                         --stack_ptr;
634                 }
635 #endif  /* FEATURE(BVH_INSTANCING) */
636         } while(node_addr != ENTRYPOINT_SENTINEL);
637
638         return (isect->prim != PRIM_NONE);
639 }
640
641 #undef NODE_INTERSECT
642 #undef NODE_INTERSECT_ROBUST