ClangFormat: apply to source, most of intern
[blender.git] / intern / cycles / kernel / bvh / qbvh_volume_all.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* This is a template BVH traversal function for volumes, where
18  * various features can be enabled/disabled. This way we can compile optimized
19  * versions for each case without new features slowing things down.
20  *
21  * BVH_INSTANCING: object instancing
22  * BVH_MOTION: motion blur rendering
23  */
24
25 #if BVH_FEATURE(BVH_HAIR)
26 #  define NODE_INTERSECT qbvh_node_intersect
27 #else
28 #  define NODE_INTERSECT qbvh_aligned_node_intersect
29 #endif
30
31 ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
32                                              const Ray *ray,
33                                              Intersection *isect_array,
34                                              const uint max_hits,
35                                              const uint visibility)
36 {
37   /* TODO(sergey):
38    * - Test if pushing distance on the stack helps.
39    * - Likely and unlikely for if() statements.
40    * - Test restrict attribute for pointers.
41    */
42
43   /* Traversal stack in CUDA thread-local memory. */
44   QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
45   traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
46
47   /* Traversal variables in registers. */
48   int stack_ptr = 0;
49   int node_addr = kernel_data.bvh.root;
50
51   /* Ray parameters in registers. */
52   const float tmax = ray->t;
53   float3 P = ray->P;
54   float3 dir = bvh_clamp_direction(ray->D);
55   float3 idir = bvh_inverse_direction(dir);
56   int object = OBJECT_NONE;
57   float isect_t = tmax;
58
59 #if BVH_FEATURE(BVH_MOTION)
60   Transform ob_itfm;
61 #endif
62
63   uint num_hits = 0;
64   isect_array->t = tmax;
65
66 #if BVH_FEATURE(BVH_INSTANCING)
67   int num_hits_in_instance = 0;
68 #endif
69
70   ssef tnear(0.0f), tfar(isect_t);
71 #if BVH_FEATURE(BVH_HAIR)
72   sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
73 #endif
74   sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
75
76 #ifdef __KERNEL_AVX2__
77   float3 P_idir = P * idir;
78   sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
79 #endif
80 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
81   sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
82 #endif
83
84   /* Offsets to select the side that becomes the lower or upper bound. */
85   int near_x, near_y, near_z;
86   int far_x, far_y, far_z;
87   qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
88
89   /* Traversal loop. */
90   do {
91     do {
92       /* Traverse internal nodes. */
93       while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
94         float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
95
96 #ifdef __VISIBILITY_FLAG__
97         if ((__float_as_uint(inodes.x) & visibility) == 0) {
98           /* Pop. */
99           node_addr = traversal_stack[stack_ptr].addr;
100           --stack_ptr;
101           continue;
102         }
103 #endif
104
105         ssef dist;
106         int child_mask = NODE_INTERSECT(kg,
107                                         tnear,
108                                         tfar,
109 #ifdef __KERNEL_AVX2__
110                                         P_idir4,
111 #endif
112 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
113                                         org4,
114 #endif
115 #if BVH_FEATURE(BVH_HAIR)
116                                         dir4,
117 #endif
118                                         idir4,
119                                         near_x,
120                                         near_y,
121                                         near_z,
122                                         far_x,
123                                         far_y,
124                                         far_z,
125                                         node_addr,
126                                         &dist);
127
128         if (child_mask != 0) {
129           float4 cnodes;
130 #if BVH_FEATURE(BVH_HAIR)
131           if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
132             cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 13);
133           }
134           else
135 #endif
136           {
137             cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 7);
138           }
139
140           /* One child is hit, continue with that child. */
141           int r = __bscf(child_mask);
142           if (child_mask == 0) {
143             node_addr = __float_as_int(cnodes[r]);
144             continue;
145           }
146
147           /* Two children are hit, push far child, and continue with
148            * closer child.
149            */
150           int c0 = __float_as_int(cnodes[r]);
151           float d0 = ((float *)&dist)[r];
152           r = __bscf(child_mask);
153           int c1 = __float_as_int(cnodes[r]);
154           float d1 = ((float *)&dist)[r];
155           if (child_mask == 0) {
156             if (d1 < d0) {
157               node_addr = c1;
158               ++stack_ptr;
159               kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
160               traversal_stack[stack_ptr].addr = c0;
161               traversal_stack[stack_ptr].dist = d0;
162               continue;
163             }
164             else {
165               node_addr = c0;
166               ++stack_ptr;
167               kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
168               traversal_stack[stack_ptr].addr = c1;
169               traversal_stack[stack_ptr].dist = d1;
170               continue;
171             }
172           }
173
174           /* Here starts the slow path for 3 or 4 hit children. We push
175            * all nodes onto the stack to sort them there.
176            */
177           ++stack_ptr;
178           kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
179           traversal_stack[stack_ptr].addr = c1;
180           traversal_stack[stack_ptr].dist = d1;
181           ++stack_ptr;
182           kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
183           traversal_stack[stack_ptr].addr = c0;
184           traversal_stack[stack_ptr].dist = d0;
185
186           /* Three children are hit, push all onto stack and sort 3
187            * stack items, continue with closest child.
188            */
189           r = __bscf(child_mask);
190           int c2 = __float_as_int(cnodes[r]);
191           float d2 = ((float *)&dist)[r];
192           if (child_mask == 0) {
193             ++stack_ptr;
194             kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
195             traversal_stack[stack_ptr].addr = c2;
196             traversal_stack[stack_ptr].dist = d2;
197             qbvh_stack_sort(&traversal_stack[stack_ptr],
198                             &traversal_stack[stack_ptr - 1],
199                             &traversal_stack[stack_ptr - 2]);
200             node_addr = traversal_stack[stack_ptr].addr;
201             --stack_ptr;
202             continue;
203           }
204
205           /* Four children are hit, push all onto stack and sort 4
206            * stack items, continue with closest child.
207            */
208           r = __bscf(child_mask);
209           int c3 = __float_as_int(cnodes[r]);
210           float d3 = ((float *)&dist)[r];
211           ++stack_ptr;
212           kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
213           traversal_stack[stack_ptr].addr = c3;
214           traversal_stack[stack_ptr].dist = d3;
215           ++stack_ptr;
216           kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
217           traversal_stack[stack_ptr].addr = c2;
218           traversal_stack[stack_ptr].dist = d2;
219           qbvh_stack_sort(&traversal_stack[stack_ptr],
220                           &traversal_stack[stack_ptr - 1],
221                           &traversal_stack[stack_ptr - 2],
222                           &traversal_stack[stack_ptr - 3]);
223         }
224
225         node_addr = traversal_stack[stack_ptr].addr;
226         --stack_ptr;
227       }
228
229       /* If node is leaf, fetch triangle list. */
230       if (node_addr < 0) {
231         float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
232
233         if ((__float_as_uint(leaf.z) & visibility) == 0) {
234           /* Pop. */
235           node_addr = traversal_stack[stack_ptr].addr;
236           --stack_ptr;
237           continue;
238         }
239
240         int prim_addr = __float_as_int(leaf.x);
241
242 #if BVH_FEATURE(BVH_INSTANCING)
243         if (prim_addr >= 0) {
244 #endif
245           int prim_addr2 = __float_as_int(leaf.y);
246           const uint type = __float_as_int(leaf.w);
247           const uint p_type = type & PRIMITIVE_ALL;
248           bool hit;
249
250           /* Pop. */
251           node_addr = traversal_stack[stack_ptr].addr;
252           --stack_ptr;
253
254           /* Primitive intersection. */
255           switch (p_type) {
256             case PRIMITIVE_TRIANGLE: {
257               for (; prim_addr < prim_addr2; prim_addr++) {
258                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
259                 /* Only primitives from volume object. */
260                 uint tri_object = (object == OBJECT_NONE) ?
261                                       kernel_tex_fetch(__prim_object, prim_addr) :
262                                       object;
263                 int object_flag = kernel_tex_fetch(__object_flag, tri_object);
264                 if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
265                   continue;
266                 }
267                 /* Intersect ray against primitive. */
268                 hit = triangle_intersect(kg, isect_array, P, dir, visibility, object, prim_addr);
269                 if (hit) {
270                   /* Move on to next entry in intersections array. */
271                   isect_array++;
272                   num_hits++;
273 #if BVH_FEATURE(BVH_INSTANCING)
274                   num_hits_in_instance++;
275 #endif
276                   isect_array->t = isect_t;
277                   if (num_hits == max_hits) {
278 #if BVH_FEATURE(BVH_INSTANCING)
279                     if (object != OBJECT_NONE) {
280 #  if BVH_FEATURE(BVH_MOTION)
281                       float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
282 #  else
283                       Transform itfm = object_fetch_transform(
284                           kg, object, OBJECT_INVERSE_TRANSFORM);
285                       float t_fac = 1.0f / len(transform_direction(&itfm, dir));
286 #  endif
287                       for (int i = 0; i < num_hits_in_instance; i++) {
288                         (isect_array - i - 1)->t *= t_fac;
289                       }
290                     }
291 #endif /* BVH_FEATURE(BVH_INSTANCING) */
292                     return num_hits;
293                   }
294                 }
295               }
296               break;
297             }
298 #if BVH_FEATURE(BVH_MOTION)
299             case PRIMITIVE_MOTION_TRIANGLE: {
300               for (; prim_addr < prim_addr2; prim_addr++) {
301                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
302                 /* Only primitives from volume object. */
303                 uint tri_object = (object == OBJECT_NONE) ?
304                                       kernel_tex_fetch(__prim_object, prim_addr) :
305                                       object;
306                 int object_flag = kernel_tex_fetch(__object_flag, tri_object);
307                 if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
308                   continue;
309                 }
310                 /* Intersect ray against primitive. */
311                 hit = motion_triangle_intersect(
312                     kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
313                 if (hit) {
314                   /* Move on to next entry in intersections array. */
315                   isect_array++;
316                   num_hits++;
317 #  if BVH_FEATURE(BVH_INSTANCING)
318                   num_hits_in_instance++;
319 #  endif
320                   isect_array->t = isect_t;
321                   if (num_hits == max_hits) {
322 #  if BVH_FEATURE(BVH_INSTANCING)
323                     if (object != OBJECT_NONE) {
324 #    if BVH_FEATURE(BVH_MOTION)
325                       float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
326 #    else
327                       Transform itfm = object_fetch_transform(
328                           kg, object, OBJECT_INVERSE_TRANSFORM);
329                       float t_fac = 1.0f / len(transform_direction(&itfm, dir));
330 #    endif
331                       for (int i = 0; i < num_hits_in_instance; i++) {
332                         (isect_array - i - 1)->t *= t_fac;
333                       }
334                     }
335 #  endif /* BVH_FEATURE(BVH_INSTANCING) */
336                     return num_hits;
337                   }
338                 }
339               }
340               break;
341             }
342 #endif
343           }
344         }
345 #if BVH_FEATURE(BVH_INSTANCING)
346         else {
347           /* Instance push. */
348           object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
349           int object_flag = kernel_tex_fetch(__object_flag, object);
350           if (object_flag & SD_OBJECT_HAS_VOLUME) {
351 #  if BVH_FEATURE(BVH_MOTION)
352             isect_t = bvh_instance_motion_push(
353                 kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
354 #  else
355             isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
356 #  endif
357
358             qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
359             tfar = ssef(isect_t);
360             idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
361 #  if BVH_FEATURE(BVH_HAIR)
362             dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
363 #  endif
364 #  ifdef __KERNEL_AVX2__
365             P_idir = P * idir;
366             P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
367 #  endif
368 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
369             org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
370 #  endif
371
372             num_hits_in_instance = 0;
373             isect_array->t = isect_t;
374
375             ++stack_ptr;
376             kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
377             traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
378
379             node_addr = kernel_tex_fetch(__object_node, object);
380           }
381           else {
382             /* Pop. */
383             object = OBJECT_NONE;
384             node_addr = traversal_stack[stack_ptr].addr;
385             --stack_ptr;
386           }
387         }
388       }
389 #endif /* FEATURE(BVH_INSTANCING) */
390     } while (node_addr != ENTRYPOINT_SENTINEL);
391
392 #if BVH_FEATURE(BVH_INSTANCING)
393     if (stack_ptr >= 0) {
394       kernel_assert(object != OBJECT_NONE);
395
396       /* Instance pop. */
397       if (num_hits_in_instance) {
398         float t_fac;
399 #  if BVH_FEATURE(BVH_MOTION)
400         bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
401 #  else
402         bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
403 #  endif
404         /* Scale isect->t to adjust for instancing. */
405         for (int i = 0; i < num_hits_in_instance; i++) {
406           (isect_array - i - 1)->t *= t_fac;
407         }
408       }
409       else {
410 #  if BVH_FEATURE(BVH_MOTION)
411         bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
412 #  else
413         bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
414 #  endif
415       }
416
417       isect_t = tmax;
418       isect_array->t = isect_t;
419
420       qbvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
421       tfar = ssef(isect_t);
422 #  if BVH_FEATURE(BVH_HAIR)
423       dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
424 #  endif
425       idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
426 #  ifdef __KERNEL_AVX2__
427       P_idir = P * idir;
428       P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
429 #  endif
430 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
431       org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
432 #  endif
433
434       object = OBJECT_NONE;
435       node_addr = traversal_stack[stack_ptr].addr;
436       --stack_ptr;
437     }
438 #endif /* FEATURE(BVH_INSTANCING) */
439   } while (node_addr != ENTRYPOINT_SENTINEL);
440
441   return num_hits;
442 }
443
444 #undef NODE_INTERSECT