2 * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
3 * and code copyright 2009-2012 Intel Corporation
5 * Modifications Copyright 2011-2014, Blender Foundation.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
20 /* This is a template BVH traversal function for volumes, where
21 * various features can be enabled/disabled. This way we can compile optimized
22 * versions for each case without new features slowing things down.
24 * BVH_INSTANCING: object instancing
25 * BVH_HAIR: hair curve rendering
26 * BVH_MOTION: motion blur rendering
30 ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
33 const uint visibility)
36 * - Test if pushing distance on the stack helps.
37 * - Likely and unlikely for if() statements.
38 * - Test restrict attribute for pointers.
41 /* Traversal stack in CUDA thread-local memory. */
42 QBVHStackItem traversalStack[BVH_QSTACK_SIZE];
43 traversalStack[0].addr = ENTRYPOINT_SENTINEL;
45 /* Traversal variables in registers. */
47 int nodeAddr = kernel_data.bvh.root;
49 /* Ray parameters in registers. */
51 float3 dir = bvh_clamp_direction(ray->D);
52 float3 idir = bvh_inverse_direction(dir);
53 int object = OBJECT_NONE;
55 #if BVH_FEATURE(BVH_MOTION)
59 #ifndef __KERNEL_SSE41__
68 isect->prim = PRIM_NONE;
69 isect->object = OBJECT_NONE;
71 ssef tnear(0.0f), tfar(ray->t);
72 sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
74 #ifdef __KERNEL_AVX2__
75 float3 P_idir = P*idir;
76 sse3f P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
78 sse3f org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
81 /* Offsets to select the side that becomes the lower or upper bound. */
82 int near_x, near_y, near_z;
83 int far_x, far_y, far_z;
85 if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
86 if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
87 if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
89 IsectPrecalc isect_precalc;
90 triangle_intersect_precalc(dir, &isect_precalc);
95 /* Traverse internal nodes. */
96 while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
98 int traverseChild = qbvh_node_intersect(kg,
101 #ifdef __KERNEL_AVX2__
107 near_x, near_y, near_z,
112 if(traverseChild != 0) {
113 float4 cnodes = kernel_tex_fetch(__bvh_nodes, nodeAddr*BVH_QNODE_SIZE+6);
115 /* One child is hit, continue with that child. */
116 int r = __bscf(traverseChild);
117 if(traverseChild == 0) {
118 nodeAddr = __float_as_int(cnodes[r]);
122 /* Two children are hit, push far child, and continue with
125 int c0 = __float_as_int(cnodes[r]);
126 float d0 = ((float*)&dist)[r];
127 r = __bscf(traverseChild);
128 int c1 = __float_as_int(cnodes[r]);
129 float d1 = ((float*)&dist)[r];
130 if(traverseChild == 0) {
134 kernel_assert(stackPtr < BVH_QSTACK_SIZE);
135 traversalStack[stackPtr].addr = c0;
136 traversalStack[stackPtr].dist = d0;
142 kernel_assert(stackPtr < BVH_QSTACK_SIZE);
143 traversalStack[stackPtr].addr = c1;
144 traversalStack[stackPtr].dist = d1;
149 /* Here starts the slow path for 3 or 4 hit children. We push
150 * all nodes onto the stack to sort them there.
153 kernel_assert(stackPtr < BVH_QSTACK_SIZE);
154 traversalStack[stackPtr].addr = c1;
155 traversalStack[stackPtr].dist = d1;
157 kernel_assert(stackPtr < BVH_QSTACK_SIZE);
158 traversalStack[stackPtr].addr = c0;
159 traversalStack[stackPtr].dist = d0;
161 /* Three children are hit, push all onto stack and sort 3
162 * stack items, continue with closest child.
164 r = __bscf(traverseChild);
165 int c2 = __float_as_int(cnodes[r]);
166 float d2 = ((float*)&dist)[r];
167 if(traverseChild == 0) {
169 kernel_assert(stackPtr < BVH_QSTACK_SIZE);
170 traversalStack[stackPtr].addr = c2;
171 traversalStack[stackPtr].dist = d2;
172 qbvh_stack_sort(&traversalStack[stackPtr],
173 &traversalStack[stackPtr - 1],
174 &traversalStack[stackPtr - 2]);
175 nodeAddr = traversalStack[stackPtr].addr;
180 /* Four children are hit, push all onto stack and sort 4
181 * stack items, continue with closest child.
183 r = __bscf(traverseChild);
184 int c3 = __float_as_int(cnodes[r]);
185 float d3 = ((float*)&dist)[r];
187 kernel_assert(stackPtr < BVH_QSTACK_SIZE);
188 traversalStack[stackPtr].addr = c3;
189 traversalStack[stackPtr].dist = d3;
191 kernel_assert(stackPtr < BVH_QSTACK_SIZE);
192 traversalStack[stackPtr].addr = c2;
193 traversalStack[stackPtr].dist = d2;
194 qbvh_stack_sort(&traversalStack[stackPtr],
195 &traversalStack[stackPtr - 1],
196 &traversalStack[stackPtr - 2],
197 &traversalStack[stackPtr - 3]);
200 nodeAddr = traversalStack[stackPtr].addr;
204 /* If node is leaf, fetch triangle list. */
206 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-nodeAddr-1)*BVH_QNODE_LEAF_SIZE);
207 int primAddr = __float_as_int(leaf.x);
209 #if BVH_FEATURE(BVH_INSTANCING)
212 int primAddr2 = __float_as_int(leaf.y);
213 const uint type = __float_as_int(leaf.w);
214 const uint p_type = type & PRIMITIVE_ALL;
217 nodeAddr = traversalStack[stackPtr].addr;
220 /* Primitive intersection. */
222 case PRIMITIVE_TRIANGLE: {
223 for(; primAddr < primAddr2; primAddr++) {
224 kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
225 /* Only primitives from volume object. */
226 uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
227 int object_flag = kernel_tex_fetch(__object_flag, tri_object);
228 if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
231 /* Intersect ray against primitive. */
232 triangle_intersect(kg, &isect_precalc, isect, P, visibility, object, primAddr);
236 #if BVH_FEATURE(BVH_MOTION)
237 case PRIMITIVE_MOTION_TRIANGLE: {
238 for(; primAddr < primAddr2; primAddr++) {
239 kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
240 /* Only primitives from volume object. */
241 uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
242 int object_flag = kernel_tex_fetch(__object_flag, tri_object);
243 if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
246 /* Intersect ray against primitive. */
247 motion_triangle_intersect(kg, isect, P, dir, ray->time, visibility, object, primAddr);
252 #if BVH_FEATURE(BVH_HAIR)
253 case PRIMITIVE_CURVE:
254 case PRIMITIVE_MOTION_CURVE: {
255 for(; primAddr < primAddr2; primAddr++) {
256 kernel_assert(kernel_tex_fetch(__prim_type, primAddr) == type);
257 /* Only primitives from volume object. */
258 uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, primAddr): object;
259 int object_flag = kernel_tex_fetch(__object_flag, tri_object);
260 if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
263 /* Intersect ray against primitive. */
264 if(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE)
265 bvh_cardinal_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0);
267 bvh_curve_intersect(kg, isect, P, dir, visibility, object, primAddr, ray->time, type, NULL, 0, 0);
274 #if BVH_FEATURE(BVH_INSTANCING)
277 object = kernel_tex_fetch(__prim_object, -primAddr-1);
278 int object_flag = kernel_tex_fetch(__object_flag, object);
280 if(object_flag & SD_OBJECT_HAS_VOLUME) {
282 # if BVH_FEATURE(BVH_MOTION)
283 bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
285 bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
288 if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
289 if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
290 if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
291 tfar = ssef(isect->t);
292 idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
293 # ifdef __KERNEL_AVX2__
295 P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
297 org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
299 triangle_intersect_precalc(dir, &isect_precalc);
302 kernel_assert(stackPtr < BVH_QSTACK_SIZE);
303 traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
305 nodeAddr = kernel_tex_fetch(__object_node, object);
309 object = OBJECT_NONE;
310 nodeAddr = traversalStack[stackPtr].addr;
315 #endif /* FEATURE(BVH_INSTANCING) */
316 } while(nodeAddr != ENTRYPOINT_SENTINEL);
318 #if BVH_FEATURE(BVH_INSTANCING)
320 kernel_assert(object != OBJECT_NONE);
323 # if BVH_FEATURE(BVH_MOTION)
324 bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
326 bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
329 if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
330 if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
331 if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
332 tfar = ssef(isect->t);
333 idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
334 # ifdef __KERNEL_AVX2__
336 P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
338 org = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
340 triangle_intersect_precalc(dir, &isect_precalc);
342 object = OBJECT_NONE;
343 nodeAddr = traversalStack[stackPtr].addr;
346 #endif /* FEATURE(BVH_INSTANCING) */
347 } while(nodeAddr != ENTRYPOINT_SENTINEL);
349 return (isect->prim != PRIM_NONE);