Cycles: Support visibility check for inner nodes of QBVH
[blender.git] / intern / cycles / kernel / geom / geom_qbvh.h
1 /*
2  * Copyright 2011-2014, Blender Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 struct QBVHStackItem {
18         int addr;
19         float dist;
20 };
21
22 /* TOOD(sergey): Investigate if using intrinsics helps for both
23  * stack item swap and float comparison.
24  */
25 ccl_device_inline void qbvh_item_swap(QBVHStackItem *__restrict a,
26                                       QBVHStackItem *__restrict b)
27 {
28         QBVHStackItem tmp = *a;
29         *a = *b;
30         *b = tmp;
31 }
32
33 ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
34                                        QBVHStackItem *__restrict s2,
35                                        QBVHStackItem *__restrict s3)
36 {
37         if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
38         if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
39         if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
40 }
41
42 ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
43                                        QBVHStackItem *__restrict s2,
44                                        QBVHStackItem *__restrict s3,
45                                        QBVHStackItem *__restrict s4)
46 {
47         if(s2->dist < s1->dist) { qbvh_item_swap(s2, s1); }
48         if(s4->dist < s3->dist) { qbvh_item_swap(s4, s3); }
49         if(s3->dist < s1->dist) { qbvh_item_swap(s3, s1); }
50         if(s4->dist < s2->dist) { qbvh_item_swap(s4, s2); }
51         if(s3->dist < s2->dist) { qbvh_item_swap(s3, s2); }
52 }
53
54 ccl_device_inline int qbvh_node_intersect(KernelGlobals *__restrict kg,
55                                           const ssef& tnear,
56                                           const ssef& tfar,
57 #ifdef __KERNEL_AVX2__
58                                           const sse3f& org_idir,
59 #else
60                                           const sse3f& org,
61 #endif
62                                           const sse3f& idir,
63                                           const int near_x,
64                                           const int near_y,
65                                           const int near_z,
66                                           const int far_x,
67                                           const int far_y,
68                                           const int far_z,
69                                           const int nodeAddr,
70                                           ssef *__restrict dist)
71 {
72         const int offset = nodeAddr*BVH_QNODE_SIZE + 1;
73 #ifdef __KERNEL_AVX2__
74         const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, org_idir.x);
75         const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, org_idir.y);
76         const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, org_idir.z);
77         const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, org_idir.x);
78         const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, org_idir.y);
79         const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, org_idir.z);
80 #else
81         const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - org.x) * idir.x;
82         const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - org.y) * idir.y;
83         const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - org.z) * idir.z;
84         const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - org.x) * idir.x;
85         const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - org.y) * idir.y;
86         const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - org.z) * idir.z;
87 #endif
88
89 #ifdef __KERNEL_SSE41__
90         const ssef tNear = maxi(maxi(tnear_x, tnear_y), maxi(tnear_z, tnear));
91         const ssef tFar = mini(mini(tfar_x, tfar_y), mini(tfar_z, tfar));
92         const sseb vmask = cast(tNear) > cast(tFar);
93         int mask = (int)movemask(vmask)^0xf;
94 #else
95         const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear);
96         const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar);
97         const sseb vmask = tNear <= tFar;
98         int mask = (int)movemask(vmask);
99 #endif
100         *dist = tNear;
101         return mask;
102 }
103
104 ccl_device_inline int qbvh_node_intersect_robust(KernelGlobals *__restrict kg,
105                                                  const ssef& tnear,
106                                                  const ssef& tfar,
107 #ifdef __KERNEL_AVX2__
108                                                  const sse3f& P_idir,
109 #else
110                                                  const sse3f& P,
111 #endif
112                                                  const sse3f& idir,
113                                                  const int near_x,
114                                                  const int near_y,
115                                                  const int near_z,
116                                                  const int far_x,
117                                                  const int far_y,
118                                                  const int far_z,
119                                                  const int nodeAddr,
120                                                  const float difl,
121                                                  ssef *__restrict dist)
122 {
123         const int offset = nodeAddr*BVH_QNODE_SIZE + 1;
124 #ifdef __KERNEL_AVX2__
125         const ssef tnear_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x), idir.x, P_idir.x);
126         const ssef tnear_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y), idir.y, P_idir.y);
127         const ssef tnear_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z), idir.z, P_idir.z);
128         const ssef tfar_x = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x), idir.x, P_idir.x);
129         const ssef tfar_y = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y), idir.y, P_idir.y);
130         const ssef tfar_z = msub(kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z), idir.z, P_idir.z);
131 #else
132         const ssef tnear_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_x) - P.x) * idir.x;
133         const ssef tnear_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_y) - P.y) * idir.y;
134         const ssef tnear_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+near_z) - P.z) * idir.z;
135         const ssef tfar_x = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_x) - P.x) * idir.x;
136         const ssef tfar_y = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_y) - P.y) * idir.y;
137         const ssef tfar_z = (kernel_tex_fetch_ssef(__bvh_nodes, offset+far_z) - P.z) * idir.z;
138 #endif
139
140         const float round_down = 1.0f - difl;
141         const float round_up = 1.0f + difl;
142         const ssef tNear = max4(tnear_x, tnear_y, tnear_z, tnear);
143         const ssef tFar = min4(tfar_x, tfar_y, tfar_z, tfar);
144         const sseb vmask = round_down*tNear <= round_up*tFar;
145         *dist = tNear;
146         return (int)movemask(vmask);
147 }