0dc0575556c18a274fd81e6cf9584586201aa2a9
[blender.git] / intern / cycles / kernel / bvh / qbvh_local.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* This is a template BVH traversal function for finding local intersections
18  * around the shading point, for subsurface scattering and bevel. We disable
19  * various features for performance, and for instanced objects avoid traversing
20  * other parts of the scene.
21  *
22  * BVH_MOTION: motion blur rendering
23  *
24  */
25
26 #if BVH_FEATURE(BVH_HAIR)
27 #  define NODE_INTERSECT qbvh_node_intersect
28 #else
29 #  define NODE_INTERSECT qbvh_aligned_node_intersect
30 #endif
31
32 ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
33                                              const Ray *ray,
34                                              LocalIntersection *local_isect,
35                                              int local_object,
36                                              uint *lcg_state,
37                                              int max_hits)
38 {
39         /* TODO(sergey):
40          * - Test if pushing distance on the stack helps (for non shadow rays).
41          * - Separate version for shadow rays.
42          * - Likely and unlikely for if() statements.
43          * - SSE for hair.
44          * - Test restrict attribute for pointers.
45          */
46
47         /* Traversal stack in CUDA thread-local memory. */
48         QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
49         traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
50
51         /* Traversal variables in registers. */
52         int stack_ptr = 0;
53         int node_addr = kernel_tex_fetch(__object_node, local_object);
54
55         /* Ray parameters in registers. */
56         float3 P = ray->P;
57         float3 dir = bvh_clamp_direction(ray->D);
58         float3 idir = bvh_inverse_direction(dir);
59         int object = OBJECT_NONE;
60         float isect_t = ray->t;
61
62         if(local_isect) {
63                 local_isect->num_hits = 0;
64         }
65
66         kernel_assert((local_isect == NULL) == (max_hits == 0));
67
68         const int object_flag = kernel_tex_fetch(__object_flag, local_object);
69         if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
70 #if BVH_FEATURE(BVH_MOTION)
71                 Transform ob_itfm;
72                 isect_t = bvh_instance_motion_push(kg,
73                                                    local_object,
74                                                    ray,
75                                                    &P,
76                                                    &dir,
77                                                    &idir,
78                                                    isect_t,
79                                                    &ob_itfm);
80 #else
81                 isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
82 #endif
83                 object = local_object;
84         }
85
86 #ifndef __KERNEL_SSE41__
87         if(!isfinite(P.x)) {
88                 return false;
89         }
90 #endif
91
92         ssef tnear(0.0f), tfar(isect_t);
93 #if BVH_FEATURE(BVH_HAIR)
94         sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
95 #endif
96         sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
97
98 #ifdef __KERNEL_AVX2__
99         float3 P_idir = P*idir;
100         sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
101 #endif
102 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
103         sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
104 #endif
105
106         /* Offsets to select the side that becomes the lower or upper bound. */
107         int near_x, near_y, near_z;
108         int far_x, far_y, far_z;
109         qbvh_near_far_idx_calc(idir,
110                                &near_x, &near_y, &near_z,
111                                &far_x, &far_y, &far_z);
112
113         /* Traversal loop. */
114         do {
115                 do {
116                         /* Traverse internal nodes. */
117                         while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
118                                 ssef dist;
119                                 int child_mask = NODE_INTERSECT(kg,
120                                                                 tnear,
121                                                                 tfar,
122 #ifdef __KERNEL_AVX2__
123                                                                 P_idir4,
124 #endif
125 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
126                                                                 org4,
127 #endif
128 #if BVH_FEATURE(BVH_HAIR)
129                                                                 dir4,
130 #endif
131                                                                 idir4,
132                                                                 near_x, near_y, near_z,
133                                                                 far_x, far_y, far_z,
134                                                                 node_addr,
135                                                                 &dist);
136
137                                 if(child_mask != 0) {
138                                         float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
139                                         float4 cnodes;
140 #if BVH_FEATURE(BVH_HAIR)
141                                         if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
142                                                 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
143                                         }
144                                         else
145 #endif
146                                         {
147                                                 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
148                                         }
149
150                                         /* One child is hit, continue with that child. */
151                                         int r = __bscf(child_mask);
152                                         if(child_mask == 0) {
153                                                 node_addr = __float_as_int(cnodes[r]);
154                                                 continue;
155                                         }
156
157                                         /* Two children are hit, push far child, and continue with
158                                          * closer child.
159                                          */
160                                         int c0 = __float_as_int(cnodes[r]);
161                                         float d0 = ((float*)&dist)[r];
162                                         r = __bscf(child_mask);
163                                         int c1 = __float_as_int(cnodes[r]);
164                                         float d1 = ((float*)&dist)[r];
165                                         if(child_mask == 0) {
166                                                 if(d1 < d0) {
167                                                         node_addr = c1;
168                                                         ++stack_ptr;
169                                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
170                                                         traversal_stack[stack_ptr].addr = c0;
171                                                         traversal_stack[stack_ptr].dist = d0;
172                                                         continue;
173                                                 }
174                                                 else {
175                                                         node_addr = c0;
176                                                         ++stack_ptr;
177                                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
178                                                         traversal_stack[stack_ptr].addr = c1;
179                                                         traversal_stack[stack_ptr].dist = d1;
180                                                         continue;
181                                                 }
182                                         }
183
184                                         /* Here starts the slow path for 3 or 4 hit children. We push
185                                          * all nodes onto the stack to sort them there.
186                                          */
187                                         ++stack_ptr;
188                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
189                                         traversal_stack[stack_ptr].addr = c1;
190                                         traversal_stack[stack_ptr].dist = d1;
191                                         ++stack_ptr;
192                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
193                                         traversal_stack[stack_ptr].addr = c0;
194                                         traversal_stack[stack_ptr].dist = d0;
195
196                                         /* Three children are hit, push all onto stack and sort 3
197                                          * stack items, continue with closest child.
198                                          */
199                                         r = __bscf(child_mask);
200                                         int c2 = __float_as_int(cnodes[r]);
201                                         float d2 = ((float*)&dist)[r];
202                                         if(child_mask == 0) {
203                                                 ++stack_ptr;
204                                                 kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
205                                                 traversal_stack[stack_ptr].addr = c2;
206                                                 traversal_stack[stack_ptr].dist = d2;
207                                                 qbvh_stack_sort(&traversal_stack[stack_ptr],
208                                                                 &traversal_stack[stack_ptr - 1],
209                                                                 &traversal_stack[stack_ptr - 2]);
210                                                 node_addr = traversal_stack[stack_ptr].addr;
211                                                 --stack_ptr;
212                                                 continue;
213                                         }
214
215                                         /* Four children are hit, push all onto stack and sort 4
216                                          * stack items, continue with closest child.
217                                          */
218                                         r = __bscf(child_mask);
219                                         int c3 = __float_as_int(cnodes[r]);
220                                         float d3 = ((float*)&dist)[r];
221                                         ++stack_ptr;
222                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
223                                         traversal_stack[stack_ptr].addr = c3;
224                                         traversal_stack[stack_ptr].dist = d3;
225                                         ++stack_ptr;
226                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
227                                         traversal_stack[stack_ptr].addr = c2;
228                                         traversal_stack[stack_ptr].dist = d2;
229                                         qbvh_stack_sort(&traversal_stack[stack_ptr],
230                                                         &traversal_stack[stack_ptr - 1],
231                                                         &traversal_stack[stack_ptr - 2],
232                                                         &traversal_stack[stack_ptr - 3]);
233                                 }
234
235                                 node_addr = traversal_stack[stack_ptr].addr;
236                                 --stack_ptr;
237                         }
238
239                         /* If node is leaf, fetch triangle list. */
240                         if(node_addr < 0) {
241                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
242                                 int prim_addr = __float_as_int(leaf.x);
243
244                                 int prim_addr2 = __float_as_int(leaf.y);
245                                 const uint type = __float_as_int(leaf.w);
246
247                                 /* Pop. */
248                                 node_addr = traversal_stack[stack_ptr].addr;
249                                 --stack_ptr;
250
251                                 /* Primitive intersection. */
252                                 switch(type & PRIMITIVE_ALL) {
253                                         case PRIMITIVE_TRIANGLE: {
254                                                 /* Intersect ray against primitive, */
255                                                 for(; prim_addr < prim_addr2; prim_addr++) {
256                                                         kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
257                                                         if(triangle_intersect_local(kg,
258                                                                                     local_isect,
259                                                                                     P,
260                                                                                     dir,
261                                                                                     object,
262                                                                                     local_object,
263                                                                                     prim_addr,
264                                                                                     isect_t,
265                                                                                     lcg_state,
266                                                                                     max_hits)) {
267                                                                 return true;
268                                                         }
269                                                 }
270                                                 break;
271                                         }
272 #if BVH_FEATURE(BVH_MOTION)
273                                         case PRIMITIVE_MOTION_TRIANGLE: {
274                                                 /* Intersect ray against primitive. */
275                                                 for(; prim_addr < prim_addr2; prim_addr++) {
276                                                         kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
277                                                         if(motion_triangle_intersect_local(kg,
278                                                                                            local_isect,
279                                                                                            P,
280                                                                                            dir,
281                                                                                            ray->time,
282                                                                                            object,
283                                                                                            local_object,
284                                                                                            prim_addr,
285                                                                                            isect_t,
286                                                                                            lcg_state,
287                                                                                            max_hits)) {
288                                                                 return true;
289                                                         }
290                                                 }
291                                                 break;
292                                         }
293 #endif
294                                         default:
295                                                 break;
296                                 }
297                         }
298                 } while(node_addr != ENTRYPOINT_SENTINEL);
299         } while(node_addr != ENTRYPOINT_SENTINEL);
300
301         return false;
302 }
303
304 #undef NODE_INTERSECT