Cycles: Cleanup, use explicit comparison with NULL
[blender.git] / intern / cycles / kernel / bvh / qbvh_local.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* This is a template BVH traversal function for finding local intersections
18  * around the shading point, for subsurface scattering and bevel. We disable
19  * various features for performance, and for instanced objects avoid traversing
20  * other parts of the scene.
21  *
22  * BVH_MOTION: motion blur rendering
23  *
24  */
25
26 #if BVH_FEATURE(BVH_HAIR)
27 #  define NODE_INTERSECT qbvh_node_intersect
28 #else
29 #  define NODE_INTERSECT qbvh_aligned_node_intersect
30 #endif
31
32 ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
33                                              const Ray *ray,
34                                              LocalIntersection *local_isect,
35                                              int local_object,
36                                              uint *lcg_state,
37                                              int max_hits)
38 {
39         /* TODO(sergey):
40          * - Test if pushing distance on the stack helps (for non shadow rays).
41          * - Separate version for shadow rays.
42          * - Likely and unlikely for if() statements.
43          * - SSE for hair.
44          * - Test restrict attribute for pointers.
45          */
46
47         /* Traversal stack in CUDA thread-local memory. */
48         QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
49         traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
50
51         /* Traversal variables in registers. */
52         int stack_ptr = 0;
53         int node_addr = kernel_tex_fetch(__object_node, local_object);
54
55         /* Ray parameters in registers. */
56         float3 P = ray->P;
57         float3 dir = bvh_clamp_direction(ray->D);
58         float3 idir = bvh_inverse_direction(dir);
59         int object = OBJECT_NONE;
60         float isect_t = ray->t;
61
62         if(local_isect != NULL) {
63                 local_isect->num_hits = 0;
64         }
65         kernel_assert((local_isect == NULL) == (max_hits == 0));
66
67         const int object_flag = kernel_tex_fetch(__object_flag, local_object);
68         if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
69 #if BVH_FEATURE(BVH_MOTION)
70                 Transform ob_itfm;
71                 isect_t = bvh_instance_motion_push(kg,
72                                                    local_object,
73                                                    ray,
74                                                    &P,
75                                                    &dir,
76                                                    &idir,
77                                                    isect_t,
78                                                    &ob_itfm);
79 #else
80                 isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
81 #endif
82                 object = local_object;
83         }
84
85 #ifndef __KERNEL_SSE41__
86         if(!isfinite(P.x)) {
87                 return false;
88         }
89 #endif
90
91         ssef tnear(0.0f), tfar(isect_t);
92 #if BVH_FEATURE(BVH_HAIR)
93         sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
94 #endif
95         sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
96
97 #ifdef __KERNEL_AVX2__
98         float3 P_idir = P*idir;
99         sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
100 #endif
101 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
102         sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
103 #endif
104
105         /* Offsets to select the side that becomes the lower or upper bound. */
106         int near_x, near_y, near_z;
107         int far_x, far_y, far_z;
108         qbvh_near_far_idx_calc(idir,
109                                &near_x, &near_y, &near_z,
110                                &far_x, &far_y, &far_z);
111
112         /* Traversal loop. */
113         do {
114                 do {
115                         /* Traverse internal nodes. */
116                         while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
117                                 ssef dist;
118                                 int child_mask = NODE_INTERSECT(kg,
119                                                                 tnear,
120                                                                 tfar,
121 #ifdef __KERNEL_AVX2__
122                                                                 P_idir4,
123 #endif
124 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
125                                                                 org4,
126 #endif
127 #if BVH_FEATURE(BVH_HAIR)
128                                                                 dir4,
129 #endif
130                                                                 idir4,
131                                                                 near_x, near_y, near_z,
132                                                                 far_x, far_y, far_z,
133                                                                 node_addr,
134                                                                 &dist);
135
136                                 if(child_mask != 0) {
137                                         float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
138                                         float4 cnodes;
139 #if BVH_FEATURE(BVH_HAIR)
140                                         if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
141                                                 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
142                                         }
143                                         else
144 #endif
145                                         {
146                                                 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
147                                         }
148
149                                         /* One child is hit, continue with that child. */
150                                         int r = __bscf(child_mask);
151                                         if(child_mask == 0) {
152                                                 node_addr = __float_as_int(cnodes[r]);
153                                                 continue;
154                                         }
155
156                                         /* Two children are hit, push far child, and continue with
157                                          * closer child.
158                                          */
159                                         int c0 = __float_as_int(cnodes[r]);
160                                         float d0 = ((float*)&dist)[r];
161                                         r = __bscf(child_mask);
162                                         int c1 = __float_as_int(cnodes[r]);
163                                         float d1 = ((float*)&dist)[r];
164                                         if(child_mask == 0) {
165                                                 if(d1 < d0) {
166                                                         node_addr = c1;
167                                                         ++stack_ptr;
168                                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
169                                                         traversal_stack[stack_ptr].addr = c0;
170                                                         traversal_stack[stack_ptr].dist = d0;
171                                                         continue;
172                                                 }
173                                                 else {
174                                                         node_addr = c0;
175                                                         ++stack_ptr;
176                                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
177                                                         traversal_stack[stack_ptr].addr = c1;
178                                                         traversal_stack[stack_ptr].dist = d1;
179                                                         continue;
180                                                 }
181                                         }
182
183                                         /* Here starts the slow path for 3 or 4 hit children. We push
184                                          * all nodes onto the stack to sort them there.
185                                          */
186                                         ++stack_ptr;
187                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
188                                         traversal_stack[stack_ptr].addr = c1;
189                                         traversal_stack[stack_ptr].dist = d1;
190                                         ++stack_ptr;
191                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
192                                         traversal_stack[stack_ptr].addr = c0;
193                                         traversal_stack[stack_ptr].dist = d0;
194
195                                         /* Three children are hit, push all onto stack and sort 3
196                                          * stack items, continue with closest child.
197                                          */
198                                         r = __bscf(child_mask);
199                                         int c2 = __float_as_int(cnodes[r]);
200                                         float d2 = ((float*)&dist)[r];
201                                         if(child_mask == 0) {
202                                                 ++stack_ptr;
203                                                 kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
204                                                 traversal_stack[stack_ptr].addr = c2;
205                                                 traversal_stack[stack_ptr].dist = d2;
206                                                 qbvh_stack_sort(&traversal_stack[stack_ptr],
207                                                                 &traversal_stack[stack_ptr - 1],
208                                                                 &traversal_stack[stack_ptr - 2]);
209                                                 node_addr = traversal_stack[stack_ptr].addr;
210                                                 --stack_ptr;
211                                                 continue;
212                                         }
213
214                                         /* Four children are hit, push all onto stack and sort 4
215                                          * stack items, continue with closest child.
216                                          */
217                                         r = __bscf(child_mask);
218                                         int c3 = __float_as_int(cnodes[r]);
219                                         float d3 = ((float*)&dist)[r];
220                                         ++stack_ptr;
221                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
222                                         traversal_stack[stack_ptr].addr = c3;
223                                         traversal_stack[stack_ptr].dist = d3;
224                                         ++stack_ptr;
225                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
226                                         traversal_stack[stack_ptr].addr = c2;
227                                         traversal_stack[stack_ptr].dist = d2;
228                                         qbvh_stack_sort(&traversal_stack[stack_ptr],
229                                                         &traversal_stack[stack_ptr - 1],
230                                                         &traversal_stack[stack_ptr - 2],
231                                                         &traversal_stack[stack_ptr - 3]);
232                                 }
233
234                                 node_addr = traversal_stack[stack_ptr].addr;
235                                 --stack_ptr;
236                         }
237
238                         /* If node is leaf, fetch triangle list. */
239                         if(node_addr < 0) {
240                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
241                                 int prim_addr = __float_as_int(leaf.x);
242
243                                 int prim_addr2 = __float_as_int(leaf.y);
244                                 const uint type = __float_as_int(leaf.w);
245
246                                 /* Pop. */
247                                 node_addr = traversal_stack[stack_ptr].addr;
248                                 --stack_ptr;
249
250                                 /* Primitive intersection. */
251                                 switch(type & PRIMITIVE_ALL) {
252                                         case PRIMITIVE_TRIANGLE: {
253                                                 /* Intersect ray against primitive, */
254                                                 for(; prim_addr < prim_addr2; prim_addr++) {
255                                                         kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
256                                                         if(triangle_intersect_local(kg,
257                                                                                     local_isect,
258                                                                                     P,
259                                                                                     dir,
260                                                                                     object,
261                                                                                     local_object,
262                                                                                     prim_addr,
263                                                                                     isect_t,
264                                                                                     lcg_state,
265                                                                                     max_hits)) {
266                                                                 return true;
267                                                         }
268                                                 }
269                                                 break;
270                                         }
271 #if BVH_FEATURE(BVH_MOTION)
272                                         case PRIMITIVE_MOTION_TRIANGLE: {
273                                                 /* Intersect ray against primitive. */
274                                                 for(; prim_addr < prim_addr2; prim_addr++) {
275                                                         kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
276                                                         if(motion_triangle_intersect_local(kg,
277                                                                                            local_isect,
278                                                                                            P,
279                                                                                            dir,
280                                                                                            ray->time,
281                                                                                            object,
282                                                                                            local_object,
283                                                                                            prim_addr,
284                                                                                            isect_t,
285                                                                                            lcg_state,
286                                                                                            max_hits)) {
287                                                                 return true;
288                                                         }
289                                                 }
290                                                 break;
291                                         }
292 #endif
293                                         default:
294                                                 break;
295                                 }
296                         }
297                 } while(node_addr != ENTRYPOINT_SENTINEL);
298         } while(node_addr != ENTRYPOINT_SENTINEL);
299
300         return false;
301 }
302
303 #undef NODE_INTERSECT