Rename clip proxy rebuild function so it's no longer called in the same way
[blender.git] / intern / cycles / kernel / kernel_bvh.h
1 /*
2  * Adapted from code Copyright 2009-2010 NVIDIA Corporation
3  * Modifications Copyright 2011, Blender Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 CCL_NAMESPACE_BEGIN
19
20 /*
21  * "Persistent while-while kernel" used in:
22  *
23  * "Understanding the Efficiency of Ray Traversal on GPUs",
24  * Timo Aila and Samuli Laine,
25  * Proc. High-Performance Graphics 2009
26  */
27
28 /* bottom-most stack entry, indicating the end of traversal */
29
30 #define ENTRYPOINT_SENTINEL 0x76543210
31 /* 64 object BVH + 64 mesh BVH + 64 object node splitting */
32 #define BVH_STACK_SIZE 192
33 #define BVH_NODE_SIZE 4
34 #define TRI_NODE_SIZE 3
35
36 __device_inline float3 bvh_inverse_direction(float3 dir)
37 {
38         /* avoid divide by zero (ooeps = exp2f(-80.0f)) */
39         float ooeps = 0.00000000000000000000000082718061255302767487140869206996285356581211090087890625f;
40         float3 idir;
41
42         idir.x = 1.0f/((fabsf(dir.x) > ooeps)? dir.x: copysignf(ooeps, dir.x));
43         idir.y = 1.0f/((fabsf(dir.y) > ooeps)? dir.y: copysignf(ooeps, dir.y));
44         idir.z = 1.0f/((fabsf(dir.z) > ooeps)? dir.z: copysignf(ooeps, dir.z));
45
46         return idir;
47 }
48
49 __device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax)
50 {
51         Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
52
53         *P = transform(&tfm, ray->P);
54
55         float3 dir = transform_direction(&tfm, ray->D);
56
57         float len;
58         dir = normalize_len(dir, &len);
59
60         *idir = bvh_inverse_direction(dir);
61
62         if(*t != FLT_MAX)
63                 *t *= len;
64 }
65
66 __device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *idir, float *t, const float tmax)
67 {
68         Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
69
70         if(*t != FLT_MAX)
71                 *t *= len(transform_direction(&tfm, 1.0f/(*idir)));
72
73         *P = ray->P;
74         *idir = bvh_inverse_direction(ray->D);
75 }
76
77 /* intersect two bounding boxes */
78 __device_inline void bvh_node_intersect(KernelGlobals *kg,
79         bool *traverseChild0, bool *traverseChild1,
80         bool *closestChild1, int *nodeAddr0, int *nodeAddr1,
81         float3 P, float3 idir, float t, uint visibility, int nodeAddr)
82 {
83         /* fetch node data */
84         float4 n0xy = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+0);
85         float4 n1xy = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+1);
86         float4 nz = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+2);
87         float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_NODE_SIZE+3);
88
89         /* intersect ray against child nodes */
90         float3 ood = P * idir;
91         float c0lox = n0xy.x * idir.x - ood.x;
92         float c0hix = n0xy.y * idir.x - ood.x;
93         float c0loy = n0xy.z * idir.y - ood.y;
94         float c0hiy = n0xy.w * idir.y - ood.y;
95         float c0loz = nz.x * idir.z - ood.z;
96         float c0hiz = nz.y * idir.z - ood.z;
97         float c0min = max4(min(c0lox, c0hix), min(c0loy, c0hiy), min(c0loz, c0hiz), 0.0f);
98         float c0max = min4(max(c0lox, c0hix), max(c0loy, c0hiy), max(c0loz, c0hiz), t);
99
100         float c1loz = nz.z * idir.z - ood.z;
101         float c1hiz = nz.w * idir.z - ood.z;
102         float c1lox = n1xy.x * idir.x - ood.x;
103         float c1hix = n1xy.y * idir.x - ood.x;
104         float c1loy = n1xy.z * idir.y - ood.y;
105         float c1hiy = n1xy.w * idir.y - ood.y;
106         float c1min = max4(min(c1lox, c1hix), min(c1loy, c1hiy), min(c1loz, c1hiz), 0.0f);
107         float c1max = min4(max(c1lox, c1hix), max(c1loy, c1hiy), max(c1loz, c1hiz), t);
108
109         /* decide which nodes to traverse next */
110 #ifdef __VISIBILITY_FLAG__
111         /* this visibility test gives a 5% performance hit, how to solve? */
112         *traverseChild0 = (c0max >= c0min) && (__float_as_int(cnodes.z) & visibility);
113         *traverseChild1 = (c1max >= c1min) && (__float_as_int(cnodes.w) & visibility);
114 #else
115         *traverseChild0 = (c0max >= c0min);
116         *traverseChild1 = (c1max >= c1min);
117 #endif
118
119         *nodeAddr0 = __float_as_int(cnodes.x);
120         *nodeAddr1 = __float_as_int(cnodes.y);
121
122         *closestChild1 = (c1min < c0min);
123 }
124
125 /* Sven Woop's algorithm */
126 __device_inline void bvh_triangle_intersect(KernelGlobals *kg, Intersection *isect,
127         float3 P, float3 idir, uint visibility, int object, int triAddr)
128 {
129         /* compute and check intersection t-value */
130         float4 v00 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+0);
131         float4 v11 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+1);
132         float3 dir = 1.0f/idir;
133
134         float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
135         float invDz = 1.0f/(dir.x*v00.x + dir.y*v00.y + dir.z*v00.z);
136         float t = Oz * invDz;
137
138         if(t > 0.0f && t < isect->t) {
139                 /* compute and check barycentric u */
140                 float Ox = v11.w + P.x*v11.x + P.y*v11.y + P.z*v11.z;
141                 float Dx = dir.x*v11.x + dir.y*v11.y + dir.z*v11.z;
142                 float u = Ox + t*Dx;
143
144                 if(u >= 0.0f) {
145                         /* compute and check barycentric v */
146                         float4 v22 = kernel_tex_fetch(__tri_woop, triAddr*TRI_NODE_SIZE+2);
147                         float Oy = v22.w + P.x*v22.x + P.y*v22.y + P.z*v22.z;
148                         float Dy = dir.x*v22.x + dir.y*v22.y + dir.z*v22.z;
149                         float v = Oy + t*Dy;
150
151                         if(v >= 0.0f && u + v <= 1.0f) {
152 #ifdef __VISIBILITY_FLAG__
153                                 /* visibility flag test. we do it here under the assumption
154                                    that most triangles are culled by node flags */
155                                 if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
156 #endif
157                                 {
158                                         /* record intersection */
159                                         isect->prim = triAddr;
160                                         isect->object = object;
161                                         isect->u = u;
162                                         isect->v = v;
163                                         isect->t = t;
164                                 }
165                         }
166                 }
167         }
168 }
169
170 __device_inline bool scene_intersect(KernelGlobals *kg, const Ray *ray, const uint visibility, Intersection *isect)
171 {
172         /* traversal stack in CUDA thread-local memory */
173         int traversalStack[BVH_STACK_SIZE];
174         traversalStack[0] = ENTRYPOINT_SENTINEL;
175
176         /* traversal variables in registers */
177         int stackPtr = 0;
178         int nodeAddr = kernel_data.bvh.root;
179
180         /* ray parameters in registers */
181         const float tmax = ray->t;
182         float3 P = ray->P;
183         float3 idir = bvh_inverse_direction(ray->D);
184         int object = ~0;
185
186         isect->t = tmax;
187         isect->object = ~0;
188         isect->prim = ~0;
189         isect->u = 0.0f;
190         isect->v = 0.0f;
191
192         /* traversal loop */
193         do {
194                 do
195                 {
196                         /* traverse internal nodes */
197                         while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL)
198                         {
199                                 bool traverseChild0, traverseChild1, closestChild1;
200                                 int nodeAddrChild1;
201
202                                 bvh_node_intersect(kg, &traverseChild0, &traverseChild1,
203                                         &closestChild1, &nodeAddr, &nodeAddrChild1,
204                                         P, idir, isect->t, visibility, nodeAddr);
205
206                                 if(traverseChild0 != traverseChild1) {
207                                         /* one child was intersected */
208                                         if(traverseChild1) {
209                                                 nodeAddr = nodeAddrChild1;
210                                         }
211                                 }
212                                 else {
213                                         if(!traverseChild0) {
214                                                 /* neither child was intersected */
215                                                 nodeAddr = traversalStack[stackPtr];
216                                                 --stackPtr;
217                                         }
218                                         else {
219                                                 /* both children were intersected, push the farther one */
220                                                 if(closestChild1) {
221                                                         int tmp = nodeAddr;
222                                                         nodeAddr = nodeAddrChild1;
223                                                         nodeAddrChild1 = tmp;
224                                                 }
225
226                                                 ++stackPtr;
227                                                 traversalStack[stackPtr] = nodeAddrChild1;
228                                         }
229                                 }
230                         }
231
232                         /* if node is leaf, fetch triangle list */
233                         if(nodeAddr < 0) {
234                                 float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_NODE_SIZE+(BVH_NODE_SIZE-1));
235                                 int primAddr = __float_as_int(leaf.x);
236
237 #ifdef __INSTANCING__
238                                 if(primAddr >= 0) {
239 #endif
240                                         int primAddr2 = __float_as_int(leaf.y);
241
242                                         /* pop */
243                                         nodeAddr = traversalStack[stackPtr];
244                                         --stackPtr;
245
246                                         /* triangle intersection */
247                                         while(primAddr < primAddr2) {
248                                                 /* intersect ray against triangle */
249                                                 bvh_triangle_intersect(kg, isect, P, idir, visibility, object, primAddr);
250
251                                                 /* shadow ray early termination */
252                                                 if(visibility == PATH_RAY_SHADOW_OPAQUE && isect->prim != ~0)
253                                                         return true;
254
255                                                 primAddr++;
256                                         }
257 #ifdef __INSTANCING__
258                                 }
259                                 else {
260                                         /* instance push */
261                                         object = kernel_tex_fetch(__prim_object, -primAddr-1);
262
263                                         bvh_instance_push(kg, object, ray, &P, &idir, &isect->t, tmax);
264
265                                         ++stackPtr;
266                                         traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
267
268                                         nodeAddr = kernel_tex_fetch(__object_node, object);
269                                 }
270 #endif
271                         }
272                 } while(nodeAddr != ENTRYPOINT_SENTINEL);
273
274 #ifdef __INSTANCING__
275                 if(stackPtr >= 0) {
276                         kernel_assert(object != ~0);
277
278                         /* instance pop */
279                         bvh_instance_pop(kg, object, ray, &P, &idir, &isect->t, tmax);
280                         object = ~0;
281                         nodeAddr = traversalStack[stackPtr];
282                         --stackPtr;
283                 }
284 #endif
285         } while(nodeAddr != ENTRYPOINT_SENTINEL);
286
287         return (isect->prim != ~0);
288 }
289
290 __device_inline float3 ray_offset(float3 P, float3 Ng)
291 {
292 #ifdef __INTERSECTION_REFINE__
293         const float epsilon_f = 1e-5f;
294         const int epsilon_i = 32;
295
296         float3 res;
297
298         /* x component */
299         if(fabsf(P.x) < epsilon_f) {
300                 res.x = P.x + Ng.x*epsilon_f;
301         }
302         else {
303                 uint ix = __float_as_uint(P.x);
304                 ix += ((ix ^ __float_as_uint(Ng.x)) >> 31)? -epsilon_i: epsilon_i;
305                 res.x = __uint_as_float(ix);
306         }
307
308         /* y component */
309         if(fabsf(P.y) < epsilon_f) {
310                 res.y = P.y + Ng.y*epsilon_f;
311         }
312         else {
313                 uint iy = __float_as_uint(P.y);
314                 iy += ((iy ^ __float_as_uint(Ng.y)) >> 31)? -epsilon_i: epsilon_i;
315                 res.y = __uint_as_float(iy);
316         }
317
318         /* z component */
319         if(fabsf(P.z) < epsilon_f) {
320                 res.z = P.z + Ng.z*epsilon_f;
321         }
322         else {
323                 uint iz = __float_as_uint(P.z);
324                 iz += ((iz ^ __float_as_uint(Ng.z)) >> 31)? -epsilon_i: epsilon_i;
325                 res.z = __uint_as_float(iz);
326         }
327
328         return res;
329 #else
330         const float epsilon_f = 1e-4f;
331         return P + epsilon_f*Ng;
332 #endif
333 }
334
335 __device_inline float3 bvh_triangle_refine(KernelGlobals *kg, const Intersection *isect, const Ray *ray)
336 {
337         float3 P = ray->P;
338         float3 D = ray->D;
339         float t = isect->t;
340
341 #ifdef __INTERSECTION_REFINE__
342         if(isect->object != ~0) {
343                 Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_INVERSE_TRANSFORM);
344
345                 P = transform(&tfm, P);
346                 D = transform_direction(&tfm, D*t);
347                 D = normalize_len(D, &t);
348         }
349
350         P = P + D*t;
351
352         float4 v00 = kernel_tex_fetch(__tri_woop, isect->prim*TRI_NODE_SIZE+0);
353         float Oz = v00.w - P.x*v00.x - P.y*v00.y - P.z*v00.z;
354         float invDz = 1.0f/(D.x*v00.x + D.y*v00.y + D.z*v00.z);
355         float rt = Oz * invDz;
356
357         P = P + D*rt;
358
359         if(isect->object != ~0) {
360                 Transform tfm = object_fetch_transform(kg, isect->object, OBJECT_TRANSFORM);
361                 P = transform(&tfm, P);
362         }
363
364         return P;
365 #else
366         return P + D*t;
367 #endif
368 }
369
370 CCL_NAMESPACE_END
371