Fix T69044: OpenCL fail due to bad fract function.
[blender.git] / intern / cycles / kernel / bvh / qbvh_shadow_all.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* This is a template BVH traversal function, where various features can be
18  * enabled/disabled. This way we can compile optimized versions for each case
19  * without new features slowing things down.
20  *
21  * BVH_INSTANCING: object instancing
22  * BVH_HAIR: hair curve rendering
23  * BVH_MOTION: motion blur rendering
24  */
25
26 #if BVH_FEATURE(BVH_HAIR)
27 #  define NODE_INTERSECT qbvh_node_intersect
28 #else
29 #  define NODE_INTERSECT qbvh_aligned_node_intersect
30 #endif
31
32 ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
33                                              const Ray *ray,
34                                              Intersection *isect_array,
35                                              const uint visibility,
36                                              const uint max_hits,
37                                              uint *num_hits)
38 {
39   /* TODO(sergey):
40    *  - Test if pushing distance on the stack helps.
41    * - Likely and unlikely for if() statements.
42    * - Test restrict attribute for pointers.
43    */
44
45   /* Traversal stack in CUDA thread-local memory. */
46   QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
47   traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
48
49   /* Traversal variables in registers. */
50   int stack_ptr = 0;
51   int node_addr = kernel_data.bvh.root;
52
53   /* Ray parameters in registers. */
54   const float tmax = ray->t;
55   float3 P = ray->P;
56   float3 dir = bvh_clamp_direction(ray->D);
57   float3 idir = bvh_inverse_direction(dir);
58   int object = OBJECT_NONE;
59   float isect_t = tmax;
60
61 #if BVH_FEATURE(BVH_MOTION)
62   Transform ob_itfm;
63 #endif
64
65   *num_hits = 0;
66   isect_array->t = tmax;
67
68 #if BVH_FEATURE(BVH_INSTANCING)
69   int num_hits_in_instance = 0;
70 #endif
71
72   ssef tnear(0.0f), tfar(isect_t);
73 #if BVH_FEATURE(BVH_HAIR)
74   sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
75 #endif
76   sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
77
78 #ifdef __KERNEL_AVX2__
79   float3 P_idir = P * idir;
80   sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
81 #endif
82 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
83   sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
84 #endif
85
86   /* Offsets to select the side that becomes the lower or upper bound. */
87   int near_x, near_y, near_z;
88   int far_x, far_y, far_z;
89   qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
90
91   /* Traversal loop. */
92   do {
93     do {
94       /* Traverse internal nodes. */
95       while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
96         float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
97         (void)inodes;
98
99         if (false
100 #ifdef __VISIBILITY_FLAG__
101             || ((__float_as_uint(inodes.x) & visibility) == 0)
102 #endif
103 #if BVH_FEATURE(BVH_MOTION)
104             || UNLIKELY(ray->time < inodes.y) || UNLIKELY(ray->time > inodes.z)
105 #endif
106         ) {
107           /* Pop. */
108           node_addr = traversal_stack[stack_ptr].addr;
109           --stack_ptr;
110           continue;
111         }
112
113         ssef dist;
114         int child_mask = NODE_INTERSECT(kg,
115                                         tnear,
116                                         tfar,
117 #ifdef __KERNEL_AVX2__
118                                         P_idir4,
119 #endif
120 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
121                                         org4,
122 #endif
123 #if BVH_FEATURE(BVH_HAIR)
124                                         dir4,
125 #endif
126                                         idir4,
127                                         near_x,
128                                         near_y,
129                                         near_z,
130                                         far_x,
131                                         far_y,
132                                         far_z,
133                                         node_addr,
134                                         &dist);
135
136         if (child_mask != 0) {
137           float4 cnodes;
138 #if BVH_FEATURE(BVH_HAIR)
139           if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
140             cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
141           }
142           else
143 #endif
144           {
145             cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
146           }
147
148           /* One child is hit, continue with that child. */
149           int r = __bscf(child_mask);
150           if (child_mask == 0) {
151             node_addr = __float_as_int(cnodes[r]);
152             continue;
153           }
154
155           /* Two children are hit, push far child, and continue with
156            * closer child.
157            */
158           int c0 = __float_as_int(cnodes[r]);
159           float d0 = ((float *)&dist)[r];
160           r = __bscf(child_mask);
161           int c1 = __float_as_int(cnodes[r]);
162           float d1 = ((float *)&dist)[r];
163           if (child_mask == 0) {
164             if (d1 < d0) {
165               node_addr = c1;
166               ++stack_ptr;
167               kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
168               traversal_stack[stack_ptr].addr = c0;
169               traversal_stack[stack_ptr].dist = d0;
170               continue;
171             }
172             else {
173               node_addr = c0;
174               ++stack_ptr;
175               kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
176               traversal_stack[stack_ptr].addr = c1;
177               traversal_stack[stack_ptr].dist = d1;
178               continue;
179             }
180           }
181
182           /* Here starts the slow path for 3 or 4 hit children. We push
183            * all nodes onto the stack to sort them there.
184            */
185           ++stack_ptr;
186           kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
187           traversal_stack[stack_ptr].addr = c1;
188           traversal_stack[stack_ptr].dist = d1;
189           ++stack_ptr;
190           kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
191           traversal_stack[stack_ptr].addr = c0;
192           traversal_stack[stack_ptr].dist = d0;
193
194           /* Three children are hit, push all onto stack and sort 3
195            * stack items, continue with closest child.
196            */
197           r = __bscf(child_mask);
198           int c2 = __float_as_int(cnodes[r]);
199           float d2 = ((float *)&dist)[r];
200           if (child_mask == 0) {
201             ++stack_ptr;
202             kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
203             traversal_stack[stack_ptr].addr = c2;
204             traversal_stack[stack_ptr].dist = d2;
205             qbvh_stack_sort(&traversal_stack[stack_ptr],
206                             &traversal_stack[stack_ptr - 1],
207                             &traversal_stack[stack_ptr - 2]);
208             node_addr = traversal_stack[stack_ptr].addr;
209             --stack_ptr;
210             continue;
211           }
212
213           /* Four children are hit, push all onto stack and sort 4
214            * stack items, continue with closest child.
215            */
216           r = __bscf(child_mask);
217           int c3 = __float_as_int(cnodes[r]);
218           float d3 = ((float *)&dist)[r];
219           ++stack_ptr;
220           kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
221           traversal_stack[stack_ptr].addr = c3;
222           traversal_stack[stack_ptr].dist = d3;
223           ++stack_ptr;
224           kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
225           traversal_stack[stack_ptr].addr = c2;
226           traversal_stack[stack_ptr].dist = d2;
227           qbvh_stack_sort(&traversal_stack[stack_ptr],
228                           &traversal_stack[stack_ptr - 1],
229                           &traversal_stack[stack_ptr - 2],
230                           &traversal_stack[stack_ptr - 3]);
231         }
232
233         node_addr = traversal_stack[stack_ptr].addr;
234         --stack_ptr;
235       }
236
237       /* If node is leaf, fetch triangle list. */
238       if (node_addr < 0) {
239         float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
240 #ifdef __VISIBILITY_FLAG__
241         if ((__float_as_uint(leaf.z) & visibility) == 0) {
242           /* Pop. */
243           node_addr = traversal_stack[stack_ptr].addr;
244           --stack_ptr;
245           continue;
246         }
247 #endif
248
249         int prim_addr = __float_as_int(leaf.x);
250
251 #if BVH_FEATURE(BVH_INSTANCING)
252         if (prim_addr >= 0) {
253 #endif
254           int prim_addr2 = __float_as_int(leaf.y);
255           const uint type = __float_as_int(leaf.w);
256           const uint p_type = type & PRIMITIVE_ALL;
257
258           /* Pop. */
259           node_addr = traversal_stack[stack_ptr].addr;
260           --stack_ptr;
261
262           /* Primitive intersection. */
263           while (prim_addr < prim_addr2) {
264             kernel_assert((kernel_tex_fetch(__prim_type, prim_addr) & PRIMITIVE_ALL) == p_type);
265             bool hit;
266
267             /* todo: specialized intersect functions which don't fill in
268              * isect unless needed and check SD_HAS_TRANSPARENT_SHADOW?
269              * might give a few % performance improvement */
270
271             switch (p_type) {
272               case PRIMITIVE_TRIANGLE: {
273                 hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
274                 break;
275               }
276 #if BVH_FEATURE(BVH_MOTION)
277               case PRIMITIVE_MOTION_TRIANGLE: {
278                 hit = motion_triangle_intersect(
279                     kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
280                 break;
281               }
282 #endif
283 #if BVH_FEATURE(BVH_HAIR)
284               case PRIMITIVE_CURVE:
285               case PRIMITIVE_MOTION_CURVE: {
286                 const uint curve_type = kernel_tex_fetch(__prim_type, prim_addr);
287                 if (kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) {
288                   hit = cardinal_curve_intersect(kg,
289                                                  isect_array,
290                                                  P,
291                                                  dir,
292                                                  visibility,
293                                                  object,
294                                                  prim_addr,
295                                                  ray->time,
296                                                  curve_type);
297                 }
298                 else {
299                   hit = curve_intersect(kg,
300                                         isect_array,
301                                         P,
302                                         dir,
303                                         visibility,
304                                         object,
305                                         prim_addr,
306                                         ray->time,
307                                         curve_type);
308                 }
309                 break;
310               }
311 #endif
312               default: {
313                 hit = false;
314                 break;
315               }
316             }
317
318             /* Shadow ray early termination. */
319             if (hit) {
320               /* detect if this surface has a shader with transparent shadows */
321
322               /* todo: optimize so primitive visibility flag indicates if
323                * the primitive has a transparent shadow shader? */
324               int prim = kernel_tex_fetch(__prim_index, isect_array->prim);
325               int shader = 0;
326
327 #ifdef __HAIR__
328               if (kernel_tex_fetch(__prim_type, isect_array->prim) & PRIMITIVE_ALL_TRIANGLE)
329 #endif
330               {
331                 shader = kernel_tex_fetch(__tri_shader, prim);
332               }
333 #ifdef __HAIR__
334               else {
335                 float4 str = kernel_tex_fetch(__curves, prim);
336                 shader = __float_as_int(str.z);
337               }
338 #endif
339               int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
340
341               /* if no transparent shadows, all light is blocked */
342               if (!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
343                 return true;
344               }
345               /* if maximum number of hits reached, block all light */
346               else if (*num_hits == max_hits) {
347                 return true;
348               }
349
350               /* move on to next entry in intersections array */
351               isect_array++;
352               (*num_hits)++;
353 #if BVH_FEATURE(BVH_INSTANCING)
354               num_hits_in_instance++;
355 #endif
356
357               isect_array->t = isect_t;
358             }
359
360             prim_addr++;
361           }
362         }
363 #if BVH_FEATURE(BVH_INSTANCING)
364         else {
365           /* Instance push. */
366           object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
367
368 #  if BVH_FEATURE(BVH_MOTION)
369           isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
370 #  else
371           isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
372 #  endif
373
374           num_hits_in_instance = 0;
375           isect_array->t = isect_t;
376
377           qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
378           tfar = ssef(isect_t);
379 #  if BVH_FEATURE(BVH_HAIR)
380           dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
381 #  endif
382           idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
383 #  ifdef __KERNEL_AVX2__
384           P_idir = P * idir;
385           P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
386 #  endif
387 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
388           org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
389 #  endif
390
391           ++stack_ptr;
392           kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
393           traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
394
395           node_addr = kernel_tex_fetch(__object_node, object);
396         }
397       }
398 #endif /* FEATURE(BVH_INSTANCING) */
399     } while (node_addr != ENTRYPOINT_SENTINEL);
400
401 #if BVH_FEATURE(BVH_INSTANCING)
402     if (stack_ptr >= 0) {
403       kernel_assert(object != OBJECT_NONE);
404
405       /* Instance pop. */
406       if (num_hits_in_instance) {
407         float t_fac;
408 #  if BVH_FEATURE(BVH_MOTION)
409         bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
410 #  else
411         bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
412 #  endif
413         /* Scale isect->t to adjust for instancing. */
414         for (int i = 0; i < num_hits_in_instance; i++) {
415           (isect_array - i - 1)->t *= t_fac;
416         }
417       }
418       else {
419 #  if BVH_FEATURE(BVH_MOTION)
420         bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
421 #  else
422         bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
423 #  endif
424       }
425
426       isect_t = tmax;
427       isect_array->t = isect_t;
428
429       qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
430       tfar = ssef(isect_t);
431 #  if BVH_FEATURE(BVH_HAIR)
432       dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
433 #  endif
434       idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
435 #  ifdef __KERNEL_AVX2__
436       P_idir = P * idir;
437       P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
438 #  endif
439 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
440       org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
441 #  endif
442
443       object = OBJECT_NONE;
444       node_addr = traversal_stack[stack_ptr].addr;
445       --stack_ptr;
446     }
447 #endif /* FEATURE(BVH_INSTANCING) */
448   } while (node_addr != ENTRYPOINT_SENTINEL);
449
450   return false;
451 }
452
453 #undef NODE_INTERSECT