ClangFormat: apply to source, most of intern
[blender.git] / intern / cycles / kernel / bvh / obvh_volume.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* This is a template BVH traversal function for volumes, where
18  * various features can be enabled/disabled. This way we can compile optimized
19  * versions for each case without new features slowing things down.
20  *
21  * BVH_INSTANCING: object instancing
22  * BVH_MOTION: motion blur rendering
23  */
24
25 #if BVH_FEATURE(BVH_HAIR)
26 #  define NODE_INTERSECT obvh_node_intersect
27 #else
28 #  define NODE_INTERSECT obvh_aligned_node_intersect
29 #endif
30
31 ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
32                                              const Ray *ray,
33                                              Intersection *isect,
34                                              const uint visibility)
35 {
36   /* Traversal stack in CUDA thread-local memory. */
37   OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
38   traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
39
40   /* Traversal variables in registers. */
41   int stack_ptr = 0;
42   int node_addr = kernel_data.bvh.root;
43
44   /* Ray parameters in registers. */
45   float3 P = ray->P;
46   float3 dir = bvh_clamp_direction(ray->D);
47   float3 idir = bvh_inverse_direction(dir);
48   int object = OBJECT_NONE;
49
50 #if BVH_FEATURE(BVH_MOTION)
51   Transform ob_itfm;
52 #endif
53
54   isect->t = ray->t;
55   isect->u = 0.0f;
56   isect->v = 0.0f;
57   isect->prim = PRIM_NONE;
58   isect->object = OBJECT_NONE;
59
60   avxf tnear(0.0f), tfar(ray->t);
61 #if BVH_FEATURE(BVH_HAIR)
62   avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
63 #endif
64   avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
65
66 #ifdef __KERNEL_AVX2__
67   float3 P_idir = P * idir;
68   avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
69 #endif
70 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
71   avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
72 #endif
73
74   /* Offsets to select the side that becomes the lower or upper bound. */
75   int near_x, near_y, near_z;
76   int far_x, far_y, far_z;
77   obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
78
79   /* Traversal loop. */
80   do {
81     do {
82       /* Traverse internal nodes. */
83       while (node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
84         float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
85
86 #ifdef __VISIBILITY_FLAG__
87         if ((__float_as_uint(inodes.x) & visibility) == 0) {
88           /* Pop. */
89           node_addr = traversal_stack[stack_ptr].addr;
90           --stack_ptr;
91           continue;
92         }
93 #endif
94
95         avxf dist;
96         int child_mask = NODE_INTERSECT(kg,
97                                         tnear,
98                                         tfar,
99 #ifdef __KERNEL_AVX2__
100                                         P_idir4,
101 #endif
102 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
103                                         org4,
104 #endif
105 #if BVH_FEATURE(BVH_HAIR)
106                                         dir4,
107 #endif
108                                         idir4,
109                                         near_x,
110                                         near_y,
111                                         near_z,
112                                         far_x,
113                                         far_y,
114                                         far_z,
115                                         node_addr,
116                                         &dist);
117
118         if (child_mask != 0) {
119           avxf cnodes;
120 #if BVH_FEATURE(BVH_HAIR)
121           if (__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
122             cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 26);
123           }
124           else
125 #endif
126           {
127             cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr + 14);
128           }
129
130           /* One child is hit, continue with that child. */
131           int r = __bscf(child_mask);
132           if (child_mask == 0) {
133             node_addr = __float_as_int(cnodes[r]);
134             continue;
135           }
136
137           /* Two children are hit, push far child, and continue with
138            * closer child.
139            */
140           int c0 = __float_as_int(cnodes[r]);
141           float d0 = ((float *)&dist)[r];
142           r = __bscf(child_mask);
143           int c1 = __float_as_int(cnodes[r]);
144           float d1 = ((float *)&dist)[r];
145           if (child_mask == 0) {
146             if (d1 < d0) {
147               node_addr = c1;
148               ++stack_ptr;
149               kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
150               traversal_stack[stack_ptr].addr = c0;
151               traversal_stack[stack_ptr].dist = d0;
152               continue;
153             }
154             else {
155               node_addr = c0;
156               ++stack_ptr;
157               kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
158               traversal_stack[stack_ptr].addr = c1;
159               traversal_stack[stack_ptr].dist = d1;
160               continue;
161             }
162           }
163
164           /* Here starts the slow path for 3 or 4 hit children. We push
165            * all nodes onto the stack to sort them there.
166            */
167           ++stack_ptr;
168           kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
169           traversal_stack[stack_ptr].addr = c1;
170           traversal_stack[stack_ptr].dist = d1;
171           ++stack_ptr;
172           kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
173           traversal_stack[stack_ptr].addr = c0;
174           traversal_stack[stack_ptr].dist = d0;
175
176           /* Three children are hit, push all onto stack and sort 3
177            * stack items, continue with closest child.
178            */
179           r = __bscf(child_mask);
180           int c2 = __float_as_int(cnodes[r]);
181           float d2 = ((float *)&dist)[r];
182           if (child_mask == 0) {
183             ++stack_ptr;
184             kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
185             traversal_stack[stack_ptr].addr = c2;
186             traversal_stack[stack_ptr].dist = d2;
187             obvh_stack_sort(&traversal_stack[stack_ptr],
188                             &traversal_stack[stack_ptr - 1],
189                             &traversal_stack[stack_ptr - 2]);
190             node_addr = traversal_stack[stack_ptr].addr;
191             --stack_ptr;
192             continue;
193           }
194
195           /* Four children are hit, push all onto stack and sort 4
196            * stack items, continue with closest child.
197            */
198           r = __bscf(child_mask);
199           int c3 = __float_as_int(cnodes[r]);
200           float d3 = ((float *)&dist)[r];
201           if (child_mask == 0) {
202             ++stack_ptr;
203             kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
204             traversal_stack[stack_ptr].addr = c3;
205             traversal_stack[stack_ptr].dist = d3;
206             ++stack_ptr;
207             kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
208             traversal_stack[stack_ptr].addr = c2;
209             traversal_stack[stack_ptr].dist = d2;
210             obvh_stack_sort(&traversal_stack[stack_ptr],
211                             &traversal_stack[stack_ptr - 1],
212                             &traversal_stack[stack_ptr - 2],
213                             &traversal_stack[stack_ptr - 3]);
214             node_addr = traversal_stack[stack_ptr].addr;
215             --stack_ptr;
216             continue;
217           }
218
219           ++stack_ptr;
220           kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
221           traversal_stack[stack_ptr].addr = c3;
222           traversal_stack[stack_ptr].dist = d3;
223           ++stack_ptr;
224           kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
225           traversal_stack[stack_ptr].addr = c2;
226           traversal_stack[stack_ptr].dist = d2;
227
228           /* Five children are hit, push all onto stack and sort 5
229            * stack items, continue with closest child
230            */
231           r = __bscf(child_mask);
232           int c4 = __float_as_int(cnodes[r]);
233           float d4 = ((float *)&dist)[r];
234           if (child_mask == 0) {
235             ++stack_ptr;
236             kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
237             traversal_stack[stack_ptr].addr = c4;
238             traversal_stack[stack_ptr].dist = d4;
239             obvh_stack_sort(&traversal_stack[stack_ptr],
240                             &traversal_stack[stack_ptr - 1],
241                             &traversal_stack[stack_ptr - 2],
242                             &traversal_stack[stack_ptr - 3],
243                             &traversal_stack[stack_ptr - 4]);
244             node_addr = traversal_stack[stack_ptr].addr;
245             --stack_ptr;
246             continue;
247           }
248
249           /* Six children are hit, push all onto stack and sort 6
250            * stack items, continue with closest child.
251            */
252           r = __bscf(child_mask);
253           int c5 = __float_as_int(cnodes[r]);
254           float d5 = ((float *)&dist)[r];
255           if (child_mask == 0) {
256             ++stack_ptr;
257             kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
258             traversal_stack[stack_ptr].addr = c5;
259             traversal_stack[stack_ptr].dist = d5;
260             ++stack_ptr;
261             kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
262             traversal_stack[stack_ptr].addr = c4;
263             traversal_stack[stack_ptr].dist = d4;
264             obvh_stack_sort(&traversal_stack[stack_ptr],
265                             &traversal_stack[stack_ptr - 1],
266                             &traversal_stack[stack_ptr - 2],
267                             &traversal_stack[stack_ptr - 3],
268                             &traversal_stack[stack_ptr - 4],
269                             &traversal_stack[stack_ptr - 5]);
270             node_addr = traversal_stack[stack_ptr].addr;
271             --stack_ptr;
272             continue;
273           }
274
275           ++stack_ptr;
276           kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
277           traversal_stack[stack_ptr].addr = c5;
278           traversal_stack[stack_ptr].dist = d5;
279           ++stack_ptr;
280           kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
281           traversal_stack[stack_ptr].addr = c4;
282           traversal_stack[stack_ptr].dist = d4;
283
284           /* Seven children are hit, push all onto stack and sort 7
285            * stack items, continue with closest child.
286            */
287           r = __bscf(child_mask);
288           int c6 = __float_as_int(cnodes[r]);
289           float d6 = ((float *)&dist)[r];
290           if (child_mask == 0) {
291             ++stack_ptr;
292             kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
293             traversal_stack[stack_ptr].addr = c6;
294             traversal_stack[stack_ptr].dist = d6;
295             obvh_stack_sort(&traversal_stack[stack_ptr],
296                             &traversal_stack[stack_ptr - 1],
297                             &traversal_stack[stack_ptr - 2],
298                             &traversal_stack[stack_ptr - 3],
299                             &traversal_stack[stack_ptr - 4],
300                             &traversal_stack[stack_ptr - 5],
301                             &traversal_stack[stack_ptr - 6]);
302             node_addr = traversal_stack[stack_ptr].addr;
303             --stack_ptr;
304             continue;
305           }
306
307           /* Eight children are hit, push all onto stack and sort 8
308            * stack items, continue with closest child.
309            */
310           r = __bscf(child_mask);
311           int c7 = __float_as_int(cnodes[r]);
312           float d7 = ((float *)&dist)[r];
313           ++stack_ptr;
314           kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
315           traversal_stack[stack_ptr].addr = c7;
316           traversal_stack[stack_ptr].dist = d7;
317           ++stack_ptr;
318           kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
319           traversal_stack[stack_ptr].addr = c6;
320           traversal_stack[stack_ptr].dist = d6;
321           obvh_stack_sort(&traversal_stack[stack_ptr],
322                           &traversal_stack[stack_ptr - 1],
323                           &traversal_stack[stack_ptr - 2],
324                           &traversal_stack[stack_ptr - 3],
325                           &traversal_stack[stack_ptr - 4],
326                           &traversal_stack[stack_ptr - 5],
327                           &traversal_stack[stack_ptr - 6],
328                           &traversal_stack[stack_ptr - 7]);
329           node_addr = traversal_stack[stack_ptr].addr;
330           --stack_ptr;
331           continue;
332         }
333
334         node_addr = traversal_stack[stack_ptr].addr;
335         --stack_ptr;
336       }
337
338       /* If node is leaf, fetch triangle list. */
339       if (node_addr < 0) {
340         float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr - 1));
341
342         if ((__float_as_uint(leaf.z) & visibility) == 0) {
343           /* Pop. */
344           node_addr = traversal_stack[stack_ptr].addr;
345           --stack_ptr;
346           continue;
347         }
348
349         int prim_addr = __float_as_int(leaf.x);
350
351 #if BVH_FEATURE(BVH_INSTANCING)
352         if (prim_addr >= 0) {
353 #endif
354           int prim_addr2 = __float_as_int(leaf.y);
355           const uint type = __float_as_int(leaf.w);
356           const uint p_type = type & PRIMITIVE_ALL;
357
358           /* Pop. */
359           node_addr = traversal_stack[stack_ptr].addr;
360           --stack_ptr;
361
362           /* Primitive intersection. */
363           switch (p_type) {
364             case PRIMITIVE_TRIANGLE: {
365               for (; prim_addr < prim_addr2; prim_addr++) {
366                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
367                 /* Only primitives from volume object. */
368                 uint tri_object = (object == OBJECT_NONE) ?
369                                       kernel_tex_fetch(__prim_object, prim_addr) :
370                                       object;
371                 int object_flag = kernel_tex_fetch(__object_flag, tri_object);
372                 if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
373                   continue;
374                 }
375                 /* Intersect ray against primitive. */
376                 triangle_intersect(kg, isect, P, dir, visibility, object, prim_addr);
377               }
378               break;
379             }
380 #if BVH_FEATURE(BVH_MOTION)
381             case PRIMITIVE_MOTION_TRIANGLE: {
382               for (; prim_addr < prim_addr2; prim_addr++) {
383                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
384                 /* Only primitives from volume object. */
385                 uint tri_object = (object == OBJECT_NONE) ?
386                                       kernel_tex_fetch(__prim_object, prim_addr) :
387                                       object;
388                 int object_flag = kernel_tex_fetch(__object_flag, tri_object);
389                 if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
390                   continue;
391                 }
392                 /* Intersect ray against primitive. */
393                 motion_triangle_intersect(
394                     kg, isect, P, dir, ray->time, visibility, object, prim_addr);
395               }
396               break;
397             }
398 #endif
399           }
400         }
401 #if BVH_FEATURE(BVH_INSTANCING)
402         else {
403           /* Instance push. */
404           object = kernel_tex_fetch(__prim_object, -prim_addr - 1);
405           int object_flag = kernel_tex_fetch(__object_flag, object);
406           if (object_flag & SD_OBJECT_HAS_VOLUME) {
407 #  if BVH_FEATURE(BVH_MOTION)
408             isect->t = bvh_instance_motion_push(
409                 kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
410 #  else
411             isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
412 #  endif
413
414             obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
415             tfar = avxf(isect->t);
416 #  if BVH_FEATURE(BVH_HAIR)
417             dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
418 #  endif
419             idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
420 #  ifdef __KERNEL_AVX2__
421             P_idir = P * idir;
422             P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
423 #  endif
424 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
425             org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
426 #  endif
427
428             ++stack_ptr;
429             kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
430             traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
431
432             node_addr = kernel_tex_fetch(__object_node, object);
433           }
434           else {
435             /* Pop. */
436             object = OBJECT_NONE;
437             node_addr = traversal_stack[stack_ptr].addr;
438             --stack_ptr;
439           }
440         }
441       }
442 #endif /* FEATURE(BVH_INSTANCING) */
443     } while (node_addr != ENTRYPOINT_SENTINEL);
444
445 #if BVH_FEATURE(BVH_INSTANCING)
446     if (stack_ptr >= 0) {
447       kernel_assert(object != OBJECT_NONE);
448
449       /* Instance pop. */
450 #  if BVH_FEATURE(BVH_MOTION)
451       isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
452 #  else
453       isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
454 #  endif
455
456       obvh_near_far_idx_calc(idir, &near_x, &near_y, &near_z, &far_x, &far_y, &far_z);
457       tfar = avxf(isect->t);
458 #  if BVH_FEATURE(BVH_HAIR)
459       dir4 = avx3f(avxf(dir.x), avxf(dir.y), avxf(dir.z));
460 #  endif
461       idir4 = avx3f(avxf(idir.x), avxf(idir.y), avxf(idir.z));
462 #  ifdef __KERNEL_AVX2__
463       P_idir = P * idir;
464       P_idir4 = avx3f(P_idir.x, P_idir.y, P_idir.z);
465 #  endif
466 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
467       org4 = avx3f(avxf(P.x), avxf(P.y), avxf(P.z));
468 #  endif
469
470       object = OBJECT_NONE;
471       node_addr = traversal_stack[stack_ptr].addr;
472       --stack_ptr;
473     }
474 #endif /* FEATURE(BVH_INSTANCING) */
475   } while (node_addr != ENTRYPOINT_SENTINEL);
476
477   return (isect->prim != PRIM_NONE);
478 }
479
480 #undef NODE_INTERSECT