Fix T48824: Crash when having too many ray-to-volume intersections
[blender.git] / intern / cycles / kernel / bvh / qbvh_volume_all.h
1 /*
2  * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
3  * and code copyright 2009-2012 Intel Corporation
4  *
5  * Modifications Copyright 2011-2014, Blender Foundation.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19
20 /* This is a template BVH traversal function for volumes, where
21  * various features can be enabled/disabled. This way we can compile optimized
22  * versions for each case without new features slowing things down.
23  *
24  * BVH_INSTANCING: object instancing
25  * BVH_MOTION: motion blur rendering
26  *
27  */
28
29 #if BVH_FEATURE(BVH_HAIR)
30 #  define NODE_INTERSECT qbvh_node_intersect
31 #else
32 #  define NODE_INTERSECT qbvh_aligned_node_intersect
33 #endif
34
35 ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
36                                              const Ray *ray,
37                                              Intersection *isect_array,
38                                              const uint max_hits,
39                                              const uint visibility)
40 {
41         /* TODO(sergey):
42          * - Test if pushing distance on the stack helps.
43          * - Likely and unlikely for if() statements.
44          * - Test restrict attribute for pointers.
45          */
46
47         /* Traversal stack in CUDA thread-local memory. */
48         QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
49         traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
50
51         /* Traversal variables in registers. */
52         int stack_ptr = 0;
53         int node_addr = kernel_data.bvh.root;
54
55         /* Ray parameters in registers. */
56         const float tmax = ray->t;
57         float3 P = ray->P;
58         float3 dir = bvh_clamp_direction(ray->D);
59         float3 idir = bvh_inverse_direction(dir);
60         int object = OBJECT_NONE;
61         float isect_t = tmax;
62
63 #if BVH_FEATURE(BVH_MOTION)
64         Transform ob_itfm;
65 #endif
66
67         uint num_hits = 0;
68         isect_array->t = tmax;
69
70 #ifndef __KERNEL_SSE41__
71         if(!isfinite(P.x)) {
72                 return false;
73         }
74 #endif
75
76 #if BVH_FEATURE(BVH_INSTANCING)
77         int num_hits_in_instance = 0;
78 #endif
79
80         ssef tnear(0.0f), tfar(isect_t);
81 #if BVH_FEATURE(BVH_HAIR)
82         sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
83 #endif
84         sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
85
86 #ifdef __KERNEL_AVX2__
87         float3 P_idir = P*idir;
88         sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
89 #endif
90 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
91         sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
92 #endif
93
94         /* Offsets to select the side that becomes the lower or upper bound. */
95         int near_x, near_y, near_z;
96         int far_x, far_y, far_z;
97
98         if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
99         if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
100         if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
101
102         IsectPrecalc isect_precalc;
103         triangle_intersect_precalc(dir, &isect_precalc);
104
105         /* Traversal loop. */
106         do {
107                 do {
108                         /* Traverse internal nodes. */
109                         while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
110 #ifdef __VISIBILITY_FLAG__
111                                 float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
112                                 if((__float_as_uint(inodes.x) & visibility) == 0) {
113                                         /* Pop. */
114                                         node_addr = traversal_stack[stack_ptr].addr;
115                                         --stack_ptr;
116                                         continue;
117                                 }
118 #endif
119
120                                 ssef dist;
121                                 int child_mask = NODE_INTERSECT(kg,
122                                                                 tnear,
123                                                                 tfar,
124 #ifdef __KERNEL_AVX2__
125                                                                 P_idir4,
126 #endif
127 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
128                                                                 org4,
129 #endif
130 #if BVH_FEATURE(BVH_HAIR)
131                                                                 dir4,
132 #endif
133                                                                 idir4,
134                                                                 near_x, near_y, near_z,
135                                                                 far_x, far_y, far_z,
136                                                                 node_addr,
137                                                                 &dist);
138
139                                 if(child_mask != 0) {
140                                         float4 cnodes;
141 #if BVH_FEATURE(BVH_HAIR)
142                                         if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
143                                                 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
144                                         }
145                                         else
146 #endif
147                                         {
148                                                 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
149                                         }
150
151                                         /* One child is hit, continue with that child. */
152                                         int r = __bscf(child_mask);
153                                         if(child_mask == 0) {
154                                                 node_addr = __float_as_int(cnodes[r]);
155                                                 continue;
156                                         }
157
158                                         /* Two children are hit, push far child, and continue with
159                                          * closer child.
160                                          */
161                                         int c0 = __float_as_int(cnodes[r]);
162                                         float d0 = ((float*)&dist)[r];
163                                         r = __bscf(child_mask);
164                                         int c1 = __float_as_int(cnodes[r]);
165                                         float d1 = ((float*)&dist)[r];
166                                         if(child_mask == 0) {
167                                                 if(d1 < d0) {
168                                                         node_addr = c1;
169                                                         ++stack_ptr;
170                                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
171                                                         traversal_stack[stack_ptr].addr = c0;
172                                                         traversal_stack[stack_ptr].dist = d0;
173                                                         continue;
174                                                 }
175                                                 else {
176                                                         node_addr = c0;
177                                                         ++stack_ptr;
178                                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
179                                                         traversal_stack[stack_ptr].addr = c1;
180                                                         traversal_stack[stack_ptr].dist = d1;
181                                                         continue;
182                                                 }
183                                         }
184
185                                         /* Here starts the slow path for 3 or 4 hit children. We push
186                                          * all nodes onto the stack to sort them there.
187                                          */
188                                         ++stack_ptr;
189                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
190                                         traversal_stack[stack_ptr].addr = c1;
191                                         traversal_stack[stack_ptr].dist = d1;
192                                         ++stack_ptr;
193                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
194                                         traversal_stack[stack_ptr].addr = c0;
195                                         traversal_stack[stack_ptr].dist = d0;
196
197                                         /* Three children are hit, push all onto stack and sort 3
198                                          * stack items, continue with closest child.
199                                          */
200                                         r = __bscf(child_mask);
201                                         int c2 = __float_as_int(cnodes[r]);
202                                         float d2 = ((float*)&dist)[r];
203                                         if(child_mask == 0) {
204                                                 ++stack_ptr;
205                                                 kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
206                                                 traversal_stack[stack_ptr].addr = c2;
207                                                 traversal_stack[stack_ptr].dist = d2;
208                                                 qbvh_stack_sort(&traversal_stack[stack_ptr],
209                                                                 &traversal_stack[stack_ptr - 1],
210                                                                 &traversal_stack[stack_ptr - 2]);
211                                                 node_addr = traversal_stack[stack_ptr].addr;
212                                                 --stack_ptr;
213                                                 continue;
214                                         }
215
216                                         /* Four children are hit, push all onto stack and sort 4
217                                          * stack items, continue with closest child.
218                                          */
219                                         r = __bscf(child_mask);
220                                         int c3 = __float_as_int(cnodes[r]);
221                                         float d3 = ((float*)&dist)[r];
222                                         ++stack_ptr;
223                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
224                                         traversal_stack[stack_ptr].addr = c3;
225                                         traversal_stack[stack_ptr].dist = d3;
226                                         ++stack_ptr;
227                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
228                                         traversal_stack[stack_ptr].addr = c2;
229                                         traversal_stack[stack_ptr].dist = d2;
230                                         qbvh_stack_sort(&traversal_stack[stack_ptr],
231                                                         &traversal_stack[stack_ptr - 1],
232                                                         &traversal_stack[stack_ptr - 2],
233                                                         &traversal_stack[stack_ptr - 3]);
234                                 }
235
236                                 node_addr = traversal_stack[stack_ptr].addr;
237                                 --stack_ptr;
238                         }
239
240                         /* If node is leaf, fetch triangle list. */
241                         if(node_addr < 0) {
242                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
243                                 int prim_addr = __float_as_int(leaf.x);
244
245 #if BVH_FEATURE(BVH_INSTANCING)
246                                 if(prim_addr >= 0) {
247 #endif
248                                         int prim_addr2 = __float_as_int(leaf.y);
249                                         const uint type = __float_as_int(leaf.w);
250                                         const uint p_type = type & PRIMITIVE_ALL;
251                                         bool hit;
252
253                                         /* Pop. */
254                                         node_addr = traversal_stack[stack_ptr].addr;
255                                         --stack_ptr;
256
257                                         /* Primitive intersection. */
258                                         switch(p_type) {
259                                                 case PRIMITIVE_TRIANGLE: {
260                                                         for(; prim_addr < prim_addr2; prim_addr++) {
261                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
262                                                                 /* Only primitives from volume object. */
263                                                                 uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
264                                                                 int object_flag = kernel_tex_fetch(__object_flag, tri_object);
265                                                                 if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
266                                                                         continue;
267                                                                 }
268                                                                 /* Intersect ray against primitive. */
269                                                                 hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, prim_addr);
270                                                                 if(hit) {
271                                                                         /* Update number of hits now, so we do proper check on max bounces. */
272                                                                         num_hits++;
273 #if BVH_FEATURE(BVH_INSTANCING)
274                                                                         num_hits_in_instance++;
275 #endif
276                                                                         if(num_hits == max_hits) {
277 #if BVH_FEATURE(BVH_INSTANCING)
278 #  if BVH_FEATURE(BVH_MOTION)
279                                                                                 float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
280 #  else
281                                                                                 Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
282                                                                                 float t_fac = 1.0f / len(transform_direction(&itfm, dir));
283 #  endif
284                                                                                 for(int i = 0; i < num_hits_in_instance; i++) {
285                                                                                         (isect_array-i-1)->t *= t_fac;
286                                                                                 }
287 #endif  /* BVH_FEATURE(BVH_INSTANCING) */
288                                                                                 return num_hits;
289                                                                         }
290                                                                         /* Move on to next entry in intersections array */
291                                                                         isect_array++;
292                                                                         isect_array->t = isect_t;
293                                                                 }
294                                                         }
295                                                         break;
296                                                 }
297 #if BVH_FEATURE(BVH_MOTION)
298                                                 case PRIMITIVE_MOTION_TRIANGLE: {
299                                                         for(; prim_addr < prim_addr2; prim_addr++) {
300                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
301                                                                 /* Only primitives from volume object. */
302                                                                 uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
303                                                                 int object_flag = kernel_tex_fetch(__object_flag, tri_object);
304                                                                 if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
305                                                                         continue;
306                                                                 }
307                                                                 /* Intersect ray against primitive. */
308                                                                 hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
309                                                                 if(hit) {
310                                                                         /* Update number of hits now, so we do proper check on max bounces. */
311                                                                         num_hits++;
312 #  if BVH_FEATURE(BVH_INSTANCING)
313                                                                         num_hits_in_instance++;
314 #  endif
315                                                                         if(num_hits == max_hits) {
316 #  if BVH_FEATURE(BVH_INSTANCING)
317 #    if BVH_FEATURE(BVH_MOTION)
318                                                                                 float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
319 #    else
320                                                                                 Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
321                                                                                 float t_fac = 1.0f / len(transform_direction(&itfm, dir));
322 #    endif
323                                                                                 for(int i = 0; i < num_hits_in_instance; i++) {
324                                                                                         (isect_array-i-1)->t *= t_fac;
325                                                                                 }
326 #  endif  /* BVH_FEATURE(BVH_INSTANCING) */
327                                                                                 return num_hits;
328                                                                         }
329                                                                         /* Move on to next entry in intersections array */
330                                                                         isect_array++;
331                                                                         isect_array->t = isect_t;
332                                                                 }
333                                                         }
334                                                         break;
335                                                 }
336 #endif
337                                         }
338                                 }
339 #if BVH_FEATURE(BVH_INSTANCING)
340                                 else {
341                                         /* Instance push. */
342                                         object = kernel_tex_fetch(__prim_object, -prim_addr-1);
343                                         int object_flag = kernel_tex_fetch(__object_flag, object);
344
345                                         if(object_flag & SD_OBJECT_HAS_VOLUME) {
346
347 #  if BVH_FEATURE(BVH_MOTION)
348                                                 bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
349 #  else
350                                                 bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
351 #  endif
352
353                                                 if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
354                                                 if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
355                                                 if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
356                                                 tfar = ssef(isect_t);
357                                                 idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
358 #  if BVH_FEATURE(BVH_HAIR)
359                                                 dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
360 #  endif
361 #  ifdef __KERNEL_AVX2__
362                                                 P_idir = P*idir;
363                                                 P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
364 #  endif
365 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
366                                                 org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
367 #  endif
368
369                                                 triangle_intersect_precalc(dir, &isect_precalc);
370                                                 num_hits_in_instance = 0;
371                                                 isect_array->t = isect_t;
372
373                                                 ++stack_ptr;
374                                                 kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
375                                                 traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
376
377                                                 node_addr = kernel_tex_fetch(__object_node, object);
378                                         }
379                                         else {
380                                                 /* Pop. */
381                                                 object = OBJECT_NONE;
382                                                 node_addr = traversal_stack[stack_ptr].addr;
383                                                 --stack_ptr;
384                                         }
385                                 }
386                         }
387 #endif  /* FEATURE(BVH_INSTANCING) */
388                 } while(node_addr != ENTRYPOINT_SENTINEL);
389
390 #if BVH_FEATURE(BVH_INSTANCING)
391                 if(stack_ptr >= 0) {
392                         kernel_assert(object != OBJECT_NONE);
393
394                         /* Instance pop. */
395                         if(num_hits_in_instance) {
396                                 float t_fac;
397 #  if BVH_FEATURE(BVH_MOTION)
398                                 bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
399 #  else
400                                 bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
401 #  endif
402                                 triangle_intersect_precalc(dir, &isect_precalc);
403                                 /* Scale isect->t to adjust for instancing. */
404                                 for(int i = 0; i < num_hits_in_instance; i++) {
405                                         (isect_array-i-1)->t *= t_fac;
406                                 }
407                         }
408                         else {
409                                 float ignore_t = FLT_MAX;
410 #  if BVH_FEATURE(BVH_MOTION)
411                                 bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
412 #  else
413                                 bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
414 #  endif
415                                 triangle_intersect_precalc(dir, &isect_precalc);
416                         }
417
418                         if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
419                         if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
420                         if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
421                         tfar = ssef(isect_t);
422 #  if BVH_FEATURE(BVH_HAIR)
423                         dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
424 #  endif
425                         idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
426 #  ifdef __KERNEL_AVX2__
427                         P_idir = P*idir;
428                         P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
429 #  endif
430 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
431                         org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
432 #  endif
433
434                         triangle_intersect_precalc(dir, &isect_precalc);
435                         isect_t = tmax;
436                         isect_array->t = isect_t;
437
438                         object = OBJECT_NONE;
439                         node_addr = traversal_stack[stack_ptr].addr;
440                         --stack_ptr;
441                 }
442 #endif  /* FEATURE(BVH_INSTANCING) */
443         } while(node_addr != ENTRYPOINT_SENTINEL);
444
445         return num_hits;
446 }
447
448 #undef NODE_INTERSECT