Cycles: add transparent shadow support, i.e. shadows through Transparent BSDF
[blender.git] / intern / cycles / kernel / kernel_bvh.h
1 /*
2  * Adapted from code Copyright 2009-2010 NVIDIA Corporation
3  * Modifications Copyright 2011, Blender Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 CCL_NAMESPACE_BEGIN
19
20 /*
21  * "Persistent while-while kernel" used in:
22  *
23  * "Understanding the Efficiency of Ray Traversal on GPUs",
24  * Timo Aila and Samuli Laine,
25  * Proc. High-Performance Graphics 2009
26  */
27
28 /* bottom-most stack entry, indicating the end of traversal */
29
30 #define ENTRYPOINT_SENTINEL 0x76543210
31 /* 64 object BVH + 64 mesh BVH + 64 object node splitting */
32 #define BVH_STACK_SIZE 192
33 #define BVH_NODE_SIZE 4
34 #define TRI_NODE_SIZE 3
35
36 __device_inline float3 bvh_inverse_direction(float3 dir)
37 {
38         /* avoid divide by zero (ooeps = exp2f(-80.0f)) */
39         float ooeps = 0.00000000000000000000000082718061255302767487140869206996285356581211090087890625f;
40         float3 idir;
41
42         idir.x = 1.0f/((fabsf(dir.x) > ooeps)? dir.x: copysignf(ooeps, dir.x));
43         idir.y = 1.0f/((fabsf(dir.y) > ooeps)? dir.y: copysignf(ooeps, dir.y));
44         idir.z = 1.0f/((fabsf(dir.z) > ooeps)? dir.z: copysignf(ooeps, dir.z));
45
46         return idir;
47 }
48
49 __device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax)
50 {
51         Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
52
53         *P = transform(&tfm, ray->P);
54
55         float3 dir = transform_direction(&tfm, ray->D);
56
57         float len;
58         dir = normalize_len(dir, &len);
59
60         *idir = bvh_inverse_direction(dir);
61
62         if(*t != FLT_MAX)
63                 *t *= len;
64 }
65
66 __device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax)
67 {
68         Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
69
70         if(*t != FLT_MAX)
71                 *t *= len(transform_direction(&tfm, 1.0f/(*idir)));
72
73         *P = ray->P;
74         *idir = bvh_inverse_direction(ray->D);
75 }
76
77 /* intersect two bounding boxes */
78 __device_inline void bvh_node_intersect(KernelGlobals *kg,
79         bool *traverseChild0, bool *traverseChild1,
80         bool *closestChild1, int *nodeAddr0, int *nodeAddr1,
81         float3 P, float3 idir, float t, uint visibility, int nodeAddr)
82 {
83         /* fetch node data */
84         float4 n0xy = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0);
85         float4 n1xy = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1);
86         float4 nz = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2);
87         float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
88
89         /* intersect ray against child nodes */
90         float3 ood = P * idir;
91         float c0lox = n0xy.x * idir.x - ood.x;
92         float c0hix = n0xy.y * idir.x - ood.x;
93         float c0loy = n0xy.z * idir.y - ood.y;
94         float c0hiy = n0xy.w * idir.y - ood.y;
95         float c0loz = nz.x * idir.z - ood.z;
96         float c0hiz = nz.y * idir.z - ood.z;
97         float c1loz = nz.z * idir.z - ood.z;
98         float c1hiz = nz.w * idir.z - ood.z;
99
100         float c0min_x = min(c0lox, c0hix);
101         float c0min_y = min(c0loy, c0hiy);
102         float c0min_z = min(c0loz, c0hiz);
103
104         float c0min = max4(c0min_x, c0min_y, c0min_z, 0.0f);
105         float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
106         float c1lox = n1xy.x * idir.x - ood.x;
107         float c1hix = n1xy.y * idir.x - ood.x;
108         float c1loy = n1xy.z * idir.y - ood.y;
109         float c1hiy = n1xy.w * idir.y - ood.y;
110         float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
111         float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
112
113         /* decide which nodes to traverse next */
114 #ifdef __VISIBILITY_FLAG__
115         /* this visibility test gives a 5% performance hit, how to solve? */
116         *traverseChild0 = (c0max >= c0min) && (__float_as_int(cnodes.z) & visibility);
117         *traverseChild1 = (c1max >= c1min) && (__float_as_int(cnodes.w) & visibility);
118 #else
119         *traverseChild0 = (c0max >= c0min);
120         *traverseChild1 = (c1max >= c1min);
121 #endif
122
123         *nodeAddr0 = __float_as_int(cnodes.x);
124         *nodeAddr1 = __float_as_int(cnodes.y);
125
126         *closestChild1 = (c1min < c0min);
127 }
128
129 /* Sven Woop's algorithm */
130 __device_inline void bvh_triangle_intersect(KernelGlobals *kg, Intersection *isect,
131         float3 P, float3 idir, uint visibility, int object, int triAddr)
132 {
133         /* compute and check intersection t-value */
134         float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0);
135         float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1);
136         float3 dir = 1.0f/idir;
137
138         float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
139         float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z);
140         float t = Oz * invDz;
141
142         if(t > 0.0f && t < isect->t) {
143                 /* compute and check barycentric u */
144                 float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z;
145                 float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z;
146                 float u = Ox + t*Dx;
147
148                 if(u >= 0.0f) {
149                         /* compute and check barycentric v */
150                         float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
151                         float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z;
152                         float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z;
153                         float v = Oy + t*Dy;
154
155                         if(v >= 0.0f && u + v <= 1.0f) {
156 #ifdef __VISIBILITY_FLAG__
157                                 /* visibility flag test. we do it here under the assumption
158                                    that most triangles are culled by node flags */
159                                 if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
160 #endif
161                                 {
162                                         /* record intersection */
163                                         isect->prim = triAddr;
164                                         isect->object = object;
165                                         isect->u = u;
166                                         isect->v = v;
167                                         isect->t = t;
168                                 }
169                         }
170                 }
171         }
172 }
173
174 __device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect)
175 {
176         /* traversal stack in CUDA thread-local memory */
177         int traversalStack[BVH_STACK_SIZE];
178         traversalStack[0] = ENTRYPOINT_SENTINEL;
179
180         /* traversal variables in registers */
181         int stackPtr = 0;
182         int nodeAddr = kernel_data.bvh.root;
183
184         /* ray parameters in registers */
185         const float tmax = ray->t;
186         float3 P = ray->P;
187         float3 idir = bvh_inverse_direction(ray->D);
188         int object = ~0;
189
190         isect->t = tmax;
191         isect->object = ~0;
192         isect->prim = ~0;
193         isect->u = 0.0f;
194         isect->v = 0.0f;
195
196         /* traversal loop */
197         do {
198                 do
199                 {
200                         /* traverse internal nodes */
201                         while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL)
202                         {
203                                 bool traverseChild0, traverseChild1, closestChild1;
204                                 int nodeAddrChild1;
205
206                                 bvh_node_intersect(kg, &traverseChild0, &traverseChild1,
207                                         &closestChild1, &nodeAddr, &nodeAddrChild1,
208                                         P, idir, isect->t, visibility, nodeAddr);
209
210                                 if(traverseChild0 != traverseChild1) {
211                                         /* one child was intersected */
212                                         if(traverseChild1) {
213                                                 nodeAddr = nodeAddrChild1;
214                                         }
215                                 }
216                                 else {
217                                         if(!traverseChild0) {
218                                                 /* neither child was intersected */
219                                                 nodeAddr = traversalStack[stackPtr];
220                                                 --stackPtr;
221                                         }
222                                         else {
223                                                 /* both children were intersected, push the farther one */
224                                                 if(closestChild1) {
225                                                         int tmp = nodeAddr;
226                                                         nodeAddr = nodeAddrChild1;
227                                                         nodeAddrChild1 = tmp;
228                                                 }
229
230                                                 ++stackPtr;
231                                                 traversalStack[stackPtr] = nodeAddrChild1;
232                                         }
233                                 }
234                         }
235
236                         /* if node is leaf, fetch triangle list */
237                         if(nodeAddr < 0) {
238                                 float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+(BVH_NODE_SIZE-1));
239                                 int primAddr = __float_as_int(leaf.x);
240
241 #ifdef __INSTANCING__
242                                 if(primAddr >= 0) {
243 #endif
244                                         int primAddr2 = __float_as_int(leaf.y);
245
246                                         /* pop */
247                                         nodeAddr = traversalStack[stackPtr];
248                                         --stackPtr;
249
250                                         /* triangle intersection */
251                                         while(primAddr < primAddr2) {
252                                                 /* intersect ray against triangle */
253                                                 bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr);
254
255                                                 /* shadow ray early termination */
256                                                 if(visibility == PATH_RAY_SHADOW_OPAQUE && isect->prim != ~0)
257                                                         return true;
258
259                                                 primAddr++;
260                                         }
261 #ifdef __INSTANCING__
262                                 }
263                                 else {
264                                         /* instance push */
265                                         object = kernel_tex_fetch(__prim_object, -primAddr-1);
266
267                                         bvh_instance_push(kg, object, ray, &P, &idir, &isect->t, tmax);
268
269                                         ++stackPtr;
270                                         traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
271
272                                         nodeAddr = kernel_tex_fetch(__object_node, object);
273                                 }
274 #endif
275                         }
276                 } while(nodeAddr != ENTRYPOINT_SENTINEL);
277
278 #ifdef __INSTANCING__
279                 if(stackPtr >= 0) {
280                         kernel_assert(object != ~0);
281
282                         /* instance pop */
283                         bvh_instance_pop(kg, object, ray, &P, &idir, &isect->t, tmax);
284                         object = ~0;
285                         nodeAddr = traversalStack[stackPtr];
286                         --stackPtr;
287                 }
288 #endif
289         } while(nodeAddr != ENTRYPOINT_SENTINEL);
290
291         return (isect->prim != ~0);
292 }
293
294 __device_inline float3 ray_offset(float3 P, float3 Ng)
295 {
296 #ifdef __INTERSECTION_REFINE__
297         const float epsilon_f = 1e-5f;
298         const int epsilon_i = 32;
299
300         float3 res;
301
302         /* x component */
303         if(fabsf(P.x) < epsilon_f) {
304                 res.x = P.x + Ng.x*epsilon_f;
305         }
306         else {
307                 uint ix = __float_as_uint(P.x);
308                 ix += ((ix ^ __float_as_uint(Ng.x)) >> 31)? -epsilon_i: epsilon_i;
309                 res.x = __uint_as_float(ix);
310         }
311
312         /* y component */
313         if(fabsf(P.y) < epsilon_f) {
314                 res.y = P.y + Ng.y*epsilon_f;
315         }
316         else {
317                 uint iy = __float_as_uint(P.y);
318                 iy += ((iy ^ __float_as_uint(Ng.y)) >> 31)? -epsilon_i: epsilon_i;
319                 res.y = __uint_as_float(iy);
320         }
321
322         /* z component */
323         if(fabsf(P.z) < epsilon_f) {
324                 res.z = P.z + Ng.z*epsilon_f;
325         }
326         else {
327                 uint iz = __float_as_uint(P.z);
328                 iz += ((iz ^ __float_as_uint(Ng.z)) >> 31)? -epsilon_i: epsilon_i;
329                 res.z = __uint_as_float(iz);
330         }
331
332         return res;
333 #else
334         const float epsilon_f = 1e-4f;
335         return P + epsilon_f*Ng;
336 #endif
337 }
338
339 __device_inline float3 bvh_triangle_refine(KernelGlobals *kg, const Intersection *isect, const Ray *ray)
340 {
341         float3 P = ray->P;
342         float3 D = ray->D;
343         float t = isect->t;
344
345 #ifdef __INTERSECTION_REFINE__
346         if(isect->object != ~0) {
347                 Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
348
349                 P = transform(&tfm, P);
350                 D = transform_direction(&tfm, D*t);
351                 D = normalize_len(D, &t);
352         }
353
354         P = P + D*t;
355
356         float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0);
357         float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
358         float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z);
359         float rt = Oz * invDz;
360
361         P = P + D*rt;
362
363         if(isect->object != ~0) {
364                 Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
365                 P = transform(&tfm, P);
366         }
367
368         return P;
369 #else
370         return P + D*t;
371 #endif
372 }
373
374 CCL_NAMESPACE_END
375