Fix T48824: Crash when having too many ray-to-volume intersections
[blender.git] / intern / cycles / kernel / bvh / bvh_volume_all.h
1 /*
2  * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
3  * and code copyright 2009-2012 Intel Corporation
4  *
5  * Modifications Copyright 2011-2014, Blender Foundation.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19
20 #ifdef __QBVH__
21 #  include "qbvh_volume_all.h"
22 #endif
23
24 #if BVH_FEATURE(BVH_HAIR)
25 #  define NODE_INTERSECT bvh_node_intersect
26 #else
27 #  define NODE_INTERSECT bvh_aligned_node_intersect
28 #endif
29
30 /* This is a template BVH traversal function for volumes, where
31  * various features can be enabled/disabled. This way we can compile optimized
32  * versions for each case without new features slowing things down.
33  *
34  * BVH_INSTANCING: object instancing
35  * BVH_MOTION: motion blur rendering
36  *
37  */
38
39 ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
40                                             const Ray *ray,
41                                             Intersection *isect_array,
42                                             const uint max_hits,
43                                             const uint visibility)
44 {
45         /* todo:
46          * - test if pushing distance on the stack helps (for non shadow rays)
47          * - separate version for shadow rays
48          * - likely and unlikely for if() statements
49          * - test restrict attribute for pointers
50          */
51
52         /* traversal stack in CUDA thread-local memory */
53         int traversal_stack[BVH_STACK_SIZE];
54         traversal_stack[0] = ENTRYPOINT_SENTINEL;
55
56         /* traversal variables in registers */
57         int stack_ptr = 0;
58         int node_addr = kernel_data.bvh.root;
59
60         /* ray parameters in registers */
61         const float tmax = ray->t;
62         float3 P = ray->P;
63         float3 dir = bvh_clamp_direction(ray->D);
64         float3 idir = bvh_inverse_direction(dir);
65         int object = OBJECT_NONE;
66         float isect_t = tmax;
67
68 #if BVH_FEATURE(BVH_MOTION)
69         Transform ob_itfm;
70 #endif
71
72 #if BVH_FEATURE(BVH_INSTANCING)
73         int num_hits_in_instance = 0;
74 #endif
75
76         uint num_hits = 0;
77         isect_array->t = tmax;
78
79 #if defined(__KERNEL_SSE2__)
80         const shuffle_swap_t shuf_identity = shuffle_swap_identity();
81         const shuffle_swap_t shuf_swap = shuffle_swap_swap();
82
83         const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
84         ssef Psplat[3], idirsplat[3];
85 #  if BVH_FEATURE(BVH_HAIR)
86         ssef tnear(0.0f), tfar(isect_t);
87 #  endif
88         shuffle_swap_t shufflexyz[3];
89
90         Psplat[0] = ssef(P.x);
91         Psplat[1] = ssef(P.y);
92         Psplat[2] = ssef(P.z);
93
94         ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
95
96         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
97 #endif
98
99         IsectPrecalc isect_precalc;
100         triangle_intersect_precalc(dir, &isect_precalc);
101
102         /* traversal loop */
103         do {
104                 do {
105                         /* traverse internal nodes */
106                         while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
107                                 int node_addr_child1, traverse_mask;
108                                 float dist[2];
109                                 float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
110
111 #if !defined(__KERNEL_SSE2__)
112                                 traverse_mask = NODE_INTERSECT(kg,
113                                                                P,
114 #  if BVH_FEATURE(BVH_HAIR)
115                                                                dir,
116 #  endif
117                                                                idir,
118                                                                isect_t,
119                                                                node_addr,
120                                                                visibility,
121                                                                dist);
122 #else // __KERNEL_SSE2__
123                                 traverse_mask = NODE_INTERSECT(kg,
124                                                                P,
125                                                                dir,
126 #  if BVH_FEATURE(BVH_HAIR)
127                                                                tnear,
128                                                                tfar,
129 #  endif
130                                                                tsplat,
131                                                                Psplat,
132                                                                idirsplat,
133                                                                shufflexyz,
134                                                                node_addr,
135                                                                visibility,
136                                                                dist);
137 #endif // __KERNEL_SSE2__
138
139                                 node_addr = __float_as_int(cnodes.z);
140                                 node_addr_child1 = __float_as_int(cnodes.w);
141
142                                 if(traverse_mask == 3) {
143                                         /* Both children were intersected, push the farther one. */
144                                         bool is_closest_child1 = (dist[1] < dist[0]);
145                                         if(is_closest_child1) {
146                                                 int tmp = node_addr;
147                                                 node_addr = node_addr_child1;
148                                                 node_addr_child1 = tmp;
149                                         }
150
151                                         ++stack_ptr;
152                                         kernel_assert(stack_ptr < BVH_STACK_SIZE);
153                                         traversal_stack[stack_ptr] = node_addr_child1;
154                                 }
155                                 else {
156                                         /* One child was intersected. */
157                                         if(traverse_mask == 2) {
158                                                 node_addr = node_addr_child1;
159                                         }
160                                         else if(traverse_mask == 0) {
161                                                 /* Neither child was intersected. */
162                                                 node_addr = traversal_stack[stack_ptr];
163                                                 --stack_ptr;
164                                         }
165                                 }
166                         }
167
168                         /* if node is leaf, fetch triangle list */
169                         if(node_addr < 0) {
170                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
171                                 int prim_addr = __float_as_int(leaf.x);
172
173 #if BVH_FEATURE(BVH_INSTANCING)
174                                 if(prim_addr >= 0) {
175 #endif
176                                         const int prim_addr2 = __float_as_int(leaf.y);
177                                         const uint type = __float_as_int(leaf.w);
178                                         bool hit;
179
180                                         /* pop */
181                                         node_addr = traversal_stack[stack_ptr];
182                                         --stack_ptr;
183
184                                         /* primitive intersection */
185                                         switch(type & PRIMITIVE_ALL) {
186                                                 case PRIMITIVE_TRIANGLE: {
187                                                         /* intersect ray against primitive */
188                                                         for(; prim_addr < prim_addr2; prim_addr++) {
189                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
190                                                                 /* only primitives from volume object */
191                                                                 uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
192                                                                 int object_flag = kernel_tex_fetch(__object_flag, tri_object);
193                                                                 if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
194                                                                         continue;
195                                                                 }
196                                                                 hit = triangle_intersect(kg,
197                                                                                          &isect_precalc,
198                                                                                          isect_array,
199                                                                                          P,
200                                                                                          visibility,
201                                                                                          object,
202                                                                                          prim_addr);
203                                                                 if(hit) {
204                                                                         /* Update number of hits now, so we do proper check on max bounces. */
205                                                                         num_hits++;
206 #if BVH_FEATURE(BVH_INSTANCING)
207                                                                         num_hits_in_instance++;
208 #endif
209                                                                         if(num_hits == max_hits) {
210 #if BVH_FEATURE(BVH_INSTANCING)
211 #  if BVH_FEATURE(BVH_MOTION)
212                                                                                 float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
213 #  else
214                                                                                 Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
215                                                                                 float t_fac = 1.0f / len(transform_direction(&itfm, dir));
216 #  endif
217                                                                                 for(int i = 0; i < num_hits_in_instance; i++) {
218                                                                                         (isect_array-i-1)->t *= t_fac;
219                                                                                 }
220 #endif  /* BVH_FEATURE(BVH_INSTANCING) */
221                                                                                 return num_hits;
222                                                                         }
223                                                                         /* Move on to next entry in intersections array */
224                                                                         isect_array++;
225                                                                         isect_array->t = isect_t;
226                                                                 }
227                                                         }
228                                                         break;
229                                                 }
230 #if BVH_FEATURE(BVH_MOTION)
231                                                 case PRIMITIVE_MOTION_TRIANGLE: {
232                                                         /* intersect ray against primitive */
233                                                         for(; prim_addr < prim_addr2; prim_addr++) {
234                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
235                                                                 /* only primitives from volume object */
236                                                                 uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
237                                                                 int object_flag = kernel_tex_fetch(__object_flag, tri_object);
238                                                                 if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
239                                                                         continue;
240                                                                 }
241                                                                 hit = motion_triangle_intersect(kg,
242                                                                                                 isect_array,
243                                                                                                 P,
244                                                                                                 dir,
245                                                                                                 ray->time,
246                                                                                                 visibility,
247                                                                                                 object,
248                                                                                                 prim_addr);
249                                                                 if(hit) {
250                                                                         /* Update number of hits now, so we do proper check on max bounces. */
251                                                                         num_hits++;
252 #  if BVH_FEATURE(BVH_INSTANCING)
253                                                                         num_hits_in_instance++;
254 #  endif
255                                                                         if(num_hits == max_hits) {
256 #  if BVH_FEATURE(BVH_INSTANCING)
257 #    if BVH_FEATURE(BVH_MOTION)
258                                                                                 float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
259 #    else
260                                                                                 Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
261                                                                                 float t_fac = 1.0f / len(transform_direction(&itfm, dir));
262 #    endif
263                                                                                 for(int i = 0; i < num_hits_in_instance; i++) {
264                                                                                         (isect_array-i-1)->t *= t_fac;
265                                                                                 }
266 #  endif  /* BVH_FEATURE(BVH_INSTANCING) */
267                                                                                 return num_hits;
268                                                                         }
269                                                                         /* Move on to next entry in intersections array */
270                                                                         isect_array++;
271                                                                         isect_array->t = isect_t;
272                                                                 }
273                                                         }
274                                                         break;
275                                                 }
276 #endif  /* BVH_MOTION */
277                                                 default: {
278                                                         break;
279                                                 }
280                                         }
281                                 }
282 #if BVH_FEATURE(BVH_INSTANCING)
283                                 else {
284                                         /* instance push */
285                                         object = kernel_tex_fetch(__prim_object, -prim_addr-1);
286                                         int object_flag = kernel_tex_fetch(__object_flag, object);
287
288                                         if(object_flag & SD_OBJECT_HAS_VOLUME) {
289
290 #  if BVH_FEATURE(BVH_MOTION)
291                                                 bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
292 #  else
293                                                 bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
294 #  endif
295
296                                                 triangle_intersect_precalc(dir, &isect_precalc);
297                                                 num_hits_in_instance = 0;
298                                                 isect_array->t = isect_t;
299
300 #  if defined(__KERNEL_SSE2__)
301                                                 Psplat[0] = ssef(P.x);
302                                                 Psplat[1] = ssef(P.y);
303                                                 Psplat[2] = ssef(P.z);
304
305                                                 tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
306 #    if BVH_FEATURE(BVH_HAIR)
307                                                 tfar = ssef(isect_t);
308 #    endif
309
310                                                 gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
311 #  endif
312
313                                                 ++stack_ptr;
314                                                 kernel_assert(stack_ptr < BVH_STACK_SIZE);
315                                                 traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
316
317                                                 node_addr = kernel_tex_fetch(__object_node, object);
318                                         }
319                                         else {
320                                                 /* pop */
321                                                 object = OBJECT_NONE;
322                                                 node_addr = traversal_stack[stack_ptr];
323                                                 --stack_ptr;
324                                         }
325                                 }
326                         }
327 #endif  /* FEATURE(BVH_INSTANCING) */
328                 } while(node_addr != ENTRYPOINT_SENTINEL);
329
330 #if BVH_FEATURE(BVH_INSTANCING)
331                 if(stack_ptr >= 0) {
332                         kernel_assert(object != OBJECT_NONE);
333
334                         if(num_hits_in_instance) {
335                                 float t_fac;
336 #  if BVH_FEATURE(BVH_MOTION)
337                                 bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
338 #  else
339                                 bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
340 #  endif
341                                 triangle_intersect_precalc(dir, &isect_precalc);
342                                 /* Scale isect->t to adjust for instancing. */
343                                 for(int i = 0; i < num_hits_in_instance; i++) {
344                                         (isect_array-i-1)->t *= t_fac;
345                                 }
346                         }
347                         else {
348                                 float ignore_t = FLT_MAX;
349 #  if BVH_FEATURE(BVH_MOTION)
350                                 bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
351 #  else
352                                 bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
353 #  endif
354                                 triangle_intersect_precalc(dir, &isect_precalc);
355                         }
356
357                         isect_t = tmax;
358                         isect_array->t = isect_t;
359
360 #  if defined(__KERNEL_SSE2__)
361                         Psplat[0] = ssef(P.x);
362                         Psplat[1] = ssef(P.y);
363                         Psplat[2] = ssef(P.z);
364
365                         tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
366 #    if BVH_FEATURE(BVH_HAIR)
367                         tfar = ssef(isect_t);
368 #    endif
369
370                         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
371 #  endif
372
373                         object = OBJECT_NONE;
374                         node_addr = traversal_stack[stack_ptr];
375                         --stack_ptr;
376                 }
377 #endif  /* FEATURE(BVH_MOTION) */
378         } while(node_addr != ENTRYPOINT_SENTINEL);
379
380         return num_hits;
381 }
382
383 ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
384                                          const Ray *ray,
385                                          Intersection *isect_array,
386                                          const uint max_hits,
387                                          const uint visibility)
388 {
389 #ifdef __QBVH__
390         if(kernel_data.bvh.use_qbvh) {
391                 return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
392                                                     ray,
393                                                     isect_array,
394                                                     max_hits,
395                                                     visibility);
396         }
397         else
398 #endif
399         {
400                 kernel_assert(kernel_data.bvh.use_qbvh == false);
401                 return BVH_FUNCTION_FULL_NAME(BVH)(kg,
402                                                    ray,
403                                                    isect_array,
404                                                    max_hits,
405                                                    visibility);
406         }
407 }
408
409 #undef BVH_FUNCTION_NAME
410 #undef BVH_FUNCTION_FEATURES
411 #undef NODE_INTERSECT