Fix T56626: Cycles ambient occlusion only local : crash
[blender.git] / intern / cycles / kernel / bvh / obvh_local.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* This is a template BVH traversal function for subsurface scattering, where
18  * various features can be enabled/disabled. This way we can compile optimized
19  * versions for each case without new features slowing things down.
20  *
21  * BVH_MOTION: motion blur rendering
22  *
23  */
24
25 #if BVH_FEATURE(BVH_HAIR)
26 #  define NODE_INTERSECT obvh_node_intersect
27 #else
28 #  define NODE_INTERSECT obvh_aligned_node_intersect
29 #endif
30
31 ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
32                                              const Ray *ray,
33                                              LocalIntersection *local_isect,
34                                              int local_object,
35                                              uint *lcg_state,
36                                              int max_hits)
37 {
38         /* Traversal stack in CUDA thread-local memory. */
39         OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
40         traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
41
42         /* Traversal variables in registers. */
43         int stack_ptr = 0;
44         int node_addr = kernel_tex_fetch(__object_node, local_object);
45
46         /* Ray parameters in registers. */
47         float3 P = ray->P;
48         float3 dir = bvh_clamp_direction(ray->D);
49         float3 idir = bvh_inverse_direction(dir);
50         int object = OBJECT_NONE;
51         float isect_t = ray->t;
52
53         if(local_isect != NULL) {
54                 local_isect->num_hits = 0;
55         }
56         kernel_assert((local_isect == NULL) == (max_hits == 0));
57
58         const int object_flag = kernel_tex_fetch(__object_flag, local_object);
59         if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
60 #if BVH_FEATURE(BVH_MOTION)
61                 Transform ob_itfm;
62                 isect_t = bvh_instance_motion_push(kg,
63                                                    local_object,
64                                                    ray,
65                                                    &P,
66                                                    &dir,
67                                                    &idir,
68                                                    isect_t,
69                                                    &ob_itfm);
70 #else
71                 isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
72 #endif
73                 object = local_object;
74         }
75
76 #ifndef __KERNEL_SSE41__
77         if(!isfinite(P.x)) {
78                 return false;
79         }
80 #endif
81
82         avxf tnear(0.0f), tfar(isect_t);
83 #if BVH_FEATURE(BVH_HAIR)
84         avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
85 #endif
86         avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
87
88 #ifdef __KERNEL_AVX2__
89         float3 P_idir = P*idir;
90         avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
91 #endif
92 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
93         avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
94 #endif
95
96         /* Offsets to select the side that becomes the lower or upper bound. */
97         int near_x, near_y, near_z;
98         int far_x, far_y, far_z;
99         obvh_near_far_idx_calc(idir,
100                                &near_x, &near_y, &near_z,
101                                &far_x, &far_y, &far_z);
102
103         /* Traversal loop. */
104         do {
105                 do {
106                         /* Traverse internal nodes. */
107                         while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
108                                 avxf dist;
109                                 int child_mask = NODE_INTERSECT(kg,
110                                                                 tnear,
111                                                                 tfar,
112 #ifdef __KERNEL_AVX2__
113                                                                 P_idir4,
114 #endif
115 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
116                                                                 org4,
117 #endif
118 #if BVH_FEATURE(BVH_HAIR)
119                                                                 dir4,
120 #endif
121                                                                 idir4,
122                                                                 near_x, near_y, near_z,
123                                                                 far_x, far_y, far_z,
124                                                                 node_addr,
125                                                                 &dist);
126
127                                 if(child_mask != 0) {
128                                         float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
129                                         avxf cnodes;
130 #if BVH_FEATURE(BVH_HAIR)
131                                         if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
132                                                 cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
133                                         }
134                                         else
135 #endif
136                                         {
137                                                 cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
138                                         }
139
140                                         /* One child is hit, continue with that child. */
141                                         int r = __bscf(child_mask);
142                                         if(child_mask == 0) {
143                                                 node_addr = __float_as_int(cnodes[r]);
144                                                 continue;
145                                         }
146
147                                         /* Two children are hit, push far child, and continue with
148                                          * closer child.
149                                          */
150                                         int c0 = __float_as_int(cnodes[r]);
151                                         float d0 = ((float*)&dist)[r];
152                                         r = __bscf(child_mask);
153                                         int c1 = __float_as_int(cnodes[r]);
154                                         float d1 = ((float*)&dist)[r];
155                                         if(child_mask == 0) {
156                                                 if(d1 < d0) {
157                                                         node_addr = c1;
158                                                         ++stack_ptr;
159                                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
160                                                         traversal_stack[stack_ptr].addr = c0;
161                                                         traversal_stack[stack_ptr].dist = d0;
162                                                         continue;
163                                                 }
164                                                 else {
165                                                         node_addr = c0;
166                                                         ++stack_ptr;
167                                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
168                                                         traversal_stack[stack_ptr].addr = c1;
169                                                         traversal_stack[stack_ptr].dist = d1;
170                                                         continue;
171                                                 }
172                                         }
173
174                                         /* Here starts the slow path for 3 or 4 hit children. We push
175                                          * all nodes onto the stack to sort them there.
176                                          */
177                                         ++stack_ptr;
178                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
179                                         traversal_stack[stack_ptr].addr = c1;
180                                         traversal_stack[stack_ptr].dist = d1;
181                                         ++stack_ptr;
182                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
183                                         traversal_stack[stack_ptr].addr = c0;
184                                         traversal_stack[stack_ptr].dist = d0;
185
186                                         /* Three children are hit, push all onto stack and sort 3
187                                          * stack items, continue with closest child.
188                                          */
189                                         r = __bscf(child_mask);
190                                         int c2 = __float_as_int(cnodes[r]);
191                                         float d2 = ((float*)&dist)[r];
192                                         if(child_mask == 0) {
193                                                 ++stack_ptr;
194                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
195                                                 traversal_stack[stack_ptr].addr = c2;
196                                                 traversal_stack[stack_ptr].dist = d2;
197                                                 obvh_stack_sort(&traversal_stack[stack_ptr],
198                                                                 &traversal_stack[stack_ptr - 1],
199                                                                 &traversal_stack[stack_ptr - 2]);
200                                                 node_addr = traversal_stack[stack_ptr].addr;
201                                                 --stack_ptr;
202                                                 continue;
203                                         }
204
205                                         /* Four children are hit, push all onto stack and sort 4
206                                          * stack items, continue with closest child.
207                                          */
208                                         r = __bscf(child_mask);
209                                         int c3 = __float_as_int(cnodes[r]);
210                                         float d3 = ((float*)&dist)[r];
211                                         if(child_mask == 0) {
212                                                 ++stack_ptr;
213                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
214                                                 traversal_stack[stack_ptr].addr = c3;
215                                                 traversal_stack[stack_ptr].dist = d3;
216                                                 ++stack_ptr;
217                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
218                                                 traversal_stack[stack_ptr].addr = c2;
219                                                 traversal_stack[stack_ptr].dist = d2;
220                                                 obvh_stack_sort(&traversal_stack[stack_ptr],
221                                                                 &traversal_stack[stack_ptr - 1],
222                                                                 &traversal_stack[stack_ptr - 2],
223                                                                 &traversal_stack[stack_ptr - 3]);
224                                                 node_addr = traversal_stack[stack_ptr].addr;
225                                                 --stack_ptr;
226                                                 continue;
227                                         }
228
229                                         ++stack_ptr;
230                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
231                                         traversal_stack[stack_ptr].addr = c3;
232                                         traversal_stack[stack_ptr].dist = d3;
233                                         ++stack_ptr;
234                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
235                                         traversal_stack[stack_ptr].addr = c2;
236                                         traversal_stack[stack_ptr].dist = d2;
237
238                                         /* Five children are hit, push all onto stack and sort 5
239                                          * stack items, continue with closest child
240                                          */
241                                         r = __bscf(child_mask);
242                                         int c4 = __float_as_int(cnodes[r]);
243                                         float d4 = ((float*)&dist)[r];
244                                         if(child_mask == 0) {
245                                                 ++stack_ptr;
246                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
247                                                 traversal_stack[stack_ptr].addr = c4;
248                                                 traversal_stack[stack_ptr].dist = d4;
249                                                 obvh_stack_sort(&traversal_stack[stack_ptr],
250                                                                 &traversal_stack[stack_ptr - 1],
251                                                                 &traversal_stack[stack_ptr - 2],
252                                                                 &traversal_stack[stack_ptr - 3],
253                                                                 &traversal_stack[stack_ptr - 4]);
254                                                 node_addr = traversal_stack[stack_ptr].addr;
255                                                 --stack_ptr;
256                                                 continue;
257                                         }
258                                         /* Six children are hit, push all onto stack and sort 6
259                                          * stack items, continue with closest child.
260                                          */
261                                         r = __bscf(child_mask);
262                                         int c5 = __float_as_int(cnodes[r]);
263                                         float d5 = ((float*)&dist)[r];
264                                         if(child_mask == 0) {
265                                                 ++stack_ptr;
266                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
267                                                 traversal_stack[stack_ptr].addr = c5;
268                                                 traversal_stack[stack_ptr].dist = d5;
269                                                 ++stack_ptr;
270                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
271                                                 traversal_stack[stack_ptr].addr = c4;
272                                                 traversal_stack[stack_ptr].dist = d4;
273                                                 obvh_stack_sort(&traversal_stack[stack_ptr],
274                                                                 &traversal_stack[stack_ptr - 1],
275                                                                 &traversal_stack[stack_ptr - 2],
276                                                                 &traversal_stack[stack_ptr - 3],
277                                                                 &traversal_stack[stack_ptr - 4],
278                                                                 &traversal_stack[stack_ptr - 5]);
279                                                 node_addr = traversal_stack[stack_ptr].addr;
280                                                 --stack_ptr;
281                                                 continue;
282                                         }
283
284                                         ++stack_ptr;
285                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
286                                         traversal_stack[stack_ptr].addr = c5;
287                                         traversal_stack[stack_ptr].dist = d5;
288                                         ++stack_ptr;
289                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
290                                         traversal_stack[stack_ptr].addr = c4;
291                                         traversal_stack[stack_ptr].dist = d4;
292
293                                         /* Seven children are hit, push all onto stack and sort 7
294                                          * stack items, continue with closest child.
295                                          */
296                                         r = __bscf(child_mask);
297                                         int c6 = __float_as_int(cnodes[r]);
298                                         float d6 = ((float*)&dist)[r];
299                                         if(child_mask == 0) {
300                                                 ++stack_ptr;
301                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
302                                                 traversal_stack[stack_ptr].addr = c6;
303                                                 traversal_stack[stack_ptr].dist = d6;
304                                                 obvh_stack_sort(&traversal_stack[stack_ptr],
305                                                                 &traversal_stack[stack_ptr - 1],
306                                                                 &traversal_stack[stack_ptr - 2],
307                                                                 &traversal_stack[stack_ptr - 3],
308                                                                 &traversal_stack[stack_ptr - 4],
309                                                                 &traversal_stack[stack_ptr - 5],
310                                                                 &traversal_stack[stack_ptr - 6]);
311                                                 node_addr = traversal_stack[stack_ptr].addr;
312                                                 --stack_ptr;
313                                                 continue;
314                                         }
315                                         /* Eight children are hit, push all onto stack and sort 8
316                                          * stack items, continue with closest child.
317                                          */
318                                         r = __bscf(child_mask);
319                                         int c7 = __float_as_int(cnodes[r]);
320                                         float d7 = ((float*)&dist)[r];
321                                         ++stack_ptr;
322                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
323                                         traversal_stack[stack_ptr].addr = c7;
324                                         traversal_stack[stack_ptr].dist = d7;
325                                         ++stack_ptr;
326                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
327                                         traversal_stack[stack_ptr].addr = c6;
328                                         traversal_stack[stack_ptr].dist = d6;
329                                         obvh_stack_sort(&traversal_stack[stack_ptr],
330                                                         &traversal_stack[stack_ptr - 1],
331                                                         &traversal_stack[stack_ptr - 2],
332                                                         &traversal_stack[stack_ptr - 3],
333                                                         &traversal_stack[stack_ptr - 4],
334                                                         &traversal_stack[stack_ptr - 5],
335                                                         &traversal_stack[stack_ptr - 6],
336                                                         &traversal_stack[stack_ptr - 7]);
337                                         node_addr = traversal_stack[stack_ptr].addr;
338                                         --stack_ptr;
339                                         continue;
340                                 }
341
342                                 node_addr = traversal_stack[stack_ptr].addr;
343                                 --stack_ptr;
344                         }
345
346                         /* If node is leaf, fetch triangle list. */
347                         if(node_addr < 0) {
348                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
349                                 int prim_addr = __float_as_int(leaf.x);
350
351                                 int prim_addr2 = __float_as_int(leaf.y);
352                                 const uint type = __float_as_int(leaf.w);
353
354                                 /* Pop. */
355                                 node_addr = traversal_stack[stack_ptr].addr;
356                                 --stack_ptr;
357
358                                 /* Primitive intersection. */
359                                 switch(type & PRIMITIVE_ALL) {
360                                         case PRIMITIVE_TRIANGLE: {
361                                                 /* Intersect ray against primitive, */
362                                                 for(; prim_addr < prim_addr2; prim_addr++) {
363                                                         kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
364                                                         if(triangle_intersect_local(kg,
365                                                                                     local_isect,
366                                                                                     P,
367                                                                                     dir,
368                                                                                     object,
369                                                                                     local_object,
370                                                                                     prim_addr,
371                                                                                     isect_t,
372                                                                                     lcg_state,
373                                                                                     max_hits))
374                                                         {
375                                                                 return true;
376                                                         }
377                                                 }
378                                                 break;
379                                         }
380 #if BVH_FEATURE(BVH_MOTION)
381                                         case PRIMITIVE_MOTION_TRIANGLE: {
382                                                 /* Intersect ray against primitive. */
383                                                 for(; prim_addr < prim_addr2; prim_addr++) {
384                                                         kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
385                                                         if(motion_triangle_intersect_local(kg,
386                                                                                            local_isect,
387                                                                                            P,
388                                                                                            dir,
389                                                                                            ray->time,
390                                                                                            object,
391                                                                                            local_object,
392                                                                                            prim_addr,
393                                                                                            isect_t,
394                                                                                            lcg_state,
395                                                                                            max_hits))
396                                                         {
397                                                                 return true;
398                                                         }
399                                                 }
400                                                 break;
401                                         }
402 #endif
403                                         default:
404                                                 break;
405                                 }
406                         }
407                 } while(node_addr != ENTRYPOINT_SENTINEL);
408         } while(node_addr != ENTRYPOINT_SENTINEL);
409         return false;
410 }
411
412 #undef NODE_INTERSECT