50bcfa79b6cae909282e7c7fda16bfc08ce19024
[blender.git] / intern / cycles / kernel / bvh / obvh_local.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* This is a template BVH traversal function for subsurface scattering, where
18  * various features can be enabled/disabled. This way we can compile optimized
19  * versions for each case without new features slowing things down.
20  *
21  * BVH_MOTION: motion blur rendering
22  *
23  */
24
25 #if BVH_FEATURE(BVH_HAIR)
26 #  define NODE_INTERSECT obvh_node_intersect
27 #else
28 #  define NODE_INTERSECT obvh_aligned_node_intersect
29 #endif
30
31 ccl_device bool BVH_FUNCTION_FULL_NAME(OBVH)(KernelGlobals *kg,
32                                              const Ray *ray,
33                                              LocalIntersection *local_isect,
34                                              int local_object,
35                                              uint *lcg_state,
36                                              int max_hits)
37 {
38         /* Traversal stack in CUDA thread-local memory. */
39         OBVHStackItem traversal_stack[BVH_OSTACK_SIZE];
40         traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
41
42         /* Traversal variables in registers. */
43         int stack_ptr = 0;
44         int node_addr = kernel_tex_fetch(__object_node, local_object);
45
46         /* Ray parameters in registers. */
47         float3 P = ray->P;
48         float3 dir = bvh_clamp_direction(ray->D);
49         float3 idir = bvh_inverse_direction(dir);
50         int object = OBJECT_NONE;
51         float isect_t = ray->t;
52
53         local_isect->num_hits = 0;
54
55         const int object_flag = kernel_tex_fetch(__object_flag, local_object);
56         if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
57 #if BVH_FEATURE(BVH_MOTION)
58                 Transform ob_itfm;
59                 isect_t = bvh_instance_motion_push(kg,
60                                                    local_object,
61                                                    ray,
62                                                    &P,
63                                                    &dir,
64                                                    &idir,
65                                                    isect_t,
66                                                    &ob_itfm);
67 #else
68                 isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
69 #endif
70                 object = local_object;
71         }
72
73 #ifndef __KERNEL_SSE41__
74         if(!isfinite(P.x)) {
75                 return false;
76         }
77 #endif
78
79         avxf tnear(0.0f), tfar(isect_t);
80 #if BVH_FEATURE(BVH_HAIR)
81         avx3f dir4(avxf(dir.x), avxf(dir.y), avxf(dir.z));
82 #endif
83         avx3f idir4(avxf(idir.x), avxf(idir.y), avxf(idir.z));
84
85 #ifdef __KERNEL_AVX2__
86         float3 P_idir = P*idir;
87         avx3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
88 #endif
89 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
90         avx3f org4(avxf(P.x), avxf(P.y), avxf(P.z));
91 #endif
92
93         /* Offsets to select the side that becomes the lower or upper bound. */
94         int near_x, near_y, near_z;
95         int far_x, far_y, far_z;
96         obvh_near_far_idx_calc(idir,
97                                &near_x, &near_y, &near_z,
98                                &far_x, &far_y, &far_z);
99
100         /* Traversal loop. */
101         do {
102                 do {
103                         /* Traverse internal nodes. */
104                         while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
105                                 avxf dist;
106                                 int child_mask = NODE_INTERSECT(kg,
107                                                                 tnear,
108                                                                 tfar,
109 #ifdef __KERNEL_AVX2__
110                                                                 P_idir4,
111 #endif
112 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
113                                                                 org4,
114 #endif
115 #if BVH_FEATURE(BVH_HAIR)
116                                                                 dir4,
117 #endif
118                                                                 idir4,
119                                                                 near_x, near_y, near_z,
120                                                                 far_x, far_y, far_z,
121                                                                 node_addr,
122                                                                 &dist);
123
124                                 if(child_mask != 0) {
125                                         float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
126                                         avxf cnodes;
127 #if BVH_FEATURE(BVH_HAIR)
128                                         if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
129                                                 cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+26);
130                                         }
131                                         else
132 #endif
133                                         {
134                                                 cnodes = kernel_tex_fetch_avxf(__bvh_nodes, node_addr+14);
135                                         }
136
137                                         /* One child is hit, continue with that child. */
138                                         int r = __bscf(child_mask);
139                                         if(child_mask == 0) {
140                                                 node_addr = __float_as_int(cnodes[r]);
141                                                 continue;
142                                         }
143
144                                         /* Two children are hit, push far child, and continue with
145                                          * closer child.
146                                          */
147                                         int c0 = __float_as_int(cnodes[r]);
148                                         float d0 = ((float*)&dist)[r];
149                                         r = __bscf(child_mask);
150                                         int c1 = __float_as_int(cnodes[r]);
151                                         float d1 = ((float*)&dist)[r];
152                                         if(child_mask == 0) {
153                                                 if(d1 < d0) {
154                                                         node_addr = c1;
155                                                         ++stack_ptr;
156                                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
157                                                         traversal_stack[stack_ptr].addr = c0;
158                                                         traversal_stack[stack_ptr].dist = d0;
159                                                         continue;
160                                                 }
161                                                 else {
162                                                         node_addr = c0;
163                                                         ++stack_ptr;
164                                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
165                                                         traversal_stack[stack_ptr].addr = c1;
166                                                         traversal_stack[stack_ptr].dist = d1;
167                                                         continue;
168                                                 }
169                                         }
170
171                                         /* Here starts the slow path for 3 or 4 hit children. We push
172                                          * all nodes onto the stack to sort them there.
173                                          */
174                                         ++stack_ptr;
175                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
176                                         traversal_stack[stack_ptr].addr = c1;
177                                         traversal_stack[stack_ptr].dist = d1;
178                                         ++stack_ptr;
179                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
180                                         traversal_stack[stack_ptr].addr = c0;
181                                         traversal_stack[stack_ptr].dist = d0;
182
183                                         /* Three children are hit, push all onto stack and sort 3
184                                          * stack items, continue with closest child.
185                                          */
186                                         r = __bscf(child_mask);
187                                         int c2 = __float_as_int(cnodes[r]);
188                                         float d2 = ((float*)&dist)[r];
189                                         if(child_mask == 0) {
190                                                 ++stack_ptr;
191                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
192                                                 traversal_stack[stack_ptr].addr = c2;
193                                                 traversal_stack[stack_ptr].dist = d2;
194                                                 obvh_stack_sort(&traversal_stack[stack_ptr],
195                                                                 &traversal_stack[stack_ptr - 1],
196                                                                 &traversal_stack[stack_ptr - 2]);
197                                                 node_addr = traversal_stack[stack_ptr].addr;
198                                                 --stack_ptr;
199                                                 continue;
200                                         }
201
202                                         /* Four children are hit, push all onto stack and sort 4
203                                          * stack items, continue with closest child.
204                                          */
205                                         r = __bscf(child_mask);
206                                         int c3 = __float_as_int(cnodes[r]);
207                                         float d3 = ((float*)&dist)[r];
208                                         if(child_mask == 0) {
209                                                 ++stack_ptr;
210                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
211                                                 traversal_stack[stack_ptr].addr = c3;
212                                                 traversal_stack[stack_ptr].dist = d3;
213                                                 ++stack_ptr;
214                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
215                                                 traversal_stack[stack_ptr].addr = c2;
216                                                 traversal_stack[stack_ptr].dist = d2;
217                                                 obvh_stack_sort(&traversal_stack[stack_ptr],
218                                                                 &traversal_stack[stack_ptr - 1],
219                                                                 &traversal_stack[stack_ptr - 2],
220                                                                 &traversal_stack[stack_ptr - 3]);
221                                                 node_addr = traversal_stack[stack_ptr].addr;
222                                                 --stack_ptr;
223                                                 continue;
224                                         }
225
226                                         ++stack_ptr;
227                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
228                                         traversal_stack[stack_ptr].addr = c3;
229                                         traversal_stack[stack_ptr].dist = d3;
230                                         ++stack_ptr;
231                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
232                                         traversal_stack[stack_ptr].addr = c2;
233                                         traversal_stack[stack_ptr].dist = d2;
234
235                                         /* Five children are hit, push all onto stack and sort 5
236                                          * stack items, continue with closest child
237                                          */
238                                         r = __bscf(child_mask);
239                                         int c4 = __float_as_int(cnodes[r]);
240                                         float d4 = ((float*)&dist)[r];
241                                         if(child_mask == 0) {
242                                                 ++stack_ptr;
243                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
244                                                 traversal_stack[stack_ptr].addr = c4;
245                                                 traversal_stack[stack_ptr].dist = d4;
246                                                 obvh_stack_sort(&traversal_stack[stack_ptr],
247                                                                 &traversal_stack[stack_ptr - 1],
248                                                                 &traversal_stack[stack_ptr - 2],
249                                                                 &traversal_stack[stack_ptr - 3],
250                                                                 &traversal_stack[stack_ptr - 4]);
251                                                 node_addr = traversal_stack[stack_ptr].addr;
252                                                 --stack_ptr;
253                                                 continue;
254                                         }
255                                         /* Six children are hit, push all onto stack and sort 6
256                                          * stack items, continue with closest child.
257                                          */
258                                         r = __bscf(child_mask);
259                                         int c5 = __float_as_int(cnodes[r]);
260                                         float d5 = ((float*)&dist)[r];
261                                         if(child_mask == 0) {
262                                                 ++stack_ptr;
263                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
264                                                 traversal_stack[stack_ptr].addr = c5;
265                                                 traversal_stack[stack_ptr].dist = d5;
266                                                 ++stack_ptr;
267                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
268                                                 traversal_stack[stack_ptr].addr = c4;
269                                                 traversal_stack[stack_ptr].dist = d4;
270                                                 obvh_stack_sort(&traversal_stack[stack_ptr],
271                                                                 &traversal_stack[stack_ptr - 1],
272                                                                 &traversal_stack[stack_ptr - 2],
273                                                                 &traversal_stack[stack_ptr - 3],
274                                                                 &traversal_stack[stack_ptr - 4],
275                                                                 &traversal_stack[stack_ptr - 5]);
276                                                 node_addr = traversal_stack[stack_ptr].addr;
277                                                 --stack_ptr;
278                                                 continue;
279                                         }
280
281                                         ++stack_ptr;
282                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
283                                         traversal_stack[stack_ptr].addr = c5;
284                                         traversal_stack[stack_ptr].dist = d5;
285                                         ++stack_ptr;
286                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
287                                         traversal_stack[stack_ptr].addr = c4;
288                                         traversal_stack[stack_ptr].dist = d4;
289
290                                         /* Seven children are hit, push all onto stack and sort 7
291                                          * stack items, continue with closest child.
292                                          */
293                                         r = __bscf(child_mask);
294                                         int c6 = __float_as_int(cnodes[r]);
295                                         float d6 = ((float*)&dist)[r];
296                                         if(child_mask == 0) {
297                                                 ++stack_ptr;
298                                                 kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
299                                                 traversal_stack[stack_ptr].addr = c6;
300                                                 traversal_stack[stack_ptr].dist = d6;
301                                                 obvh_stack_sort(&traversal_stack[stack_ptr],
302                                                                 &traversal_stack[stack_ptr - 1],
303                                                                 &traversal_stack[stack_ptr - 2],
304                                                                 &traversal_stack[stack_ptr - 3],
305                                                                 &traversal_stack[stack_ptr - 4],
306                                                                 &traversal_stack[stack_ptr - 5],
307                                                                 &traversal_stack[stack_ptr - 6]);
308                                                 node_addr = traversal_stack[stack_ptr].addr;
309                                                 --stack_ptr;
310                                                 continue;
311                                         }
312                                         /* Eight children are hit, push all onto stack and sort 8
313                                          * stack items, continue with closest child.
314                                          */
315                                         r = __bscf(child_mask);
316                                         int c7 = __float_as_int(cnodes[r]);
317                                         float d7 = ((float*)&dist)[r];
318                                         ++stack_ptr;
319                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
320                                         traversal_stack[stack_ptr].addr = c7;
321                                         traversal_stack[stack_ptr].dist = d7;
322                                         ++stack_ptr;
323                                         kernel_assert(stack_ptr < BVH_OSTACK_SIZE);
324                                         traversal_stack[stack_ptr].addr = c6;
325                                         traversal_stack[stack_ptr].dist = d6;
326                                         obvh_stack_sort(&traversal_stack[stack_ptr],
327                                                         &traversal_stack[stack_ptr - 1],
328                                                         &traversal_stack[stack_ptr - 2],
329                                                         &traversal_stack[stack_ptr - 3],
330                                                         &traversal_stack[stack_ptr - 4],
331                                                         &traversal_stack[stack_ptr - 5],
332                                                         &traversal_stack[stack_ptr - 6],
333                                                         &traversal_stack[stack_ptr - 7]);
334                                         node_addr = traversal_stack[stack_ptr].addr;
335                                         --stack_ptr;
336                                         continue;
337                                 }
338
339                                 node_addr = traversal_stack[stack_ptr].addr;
340                                 --stack_ptr;
341                         }
342
343                         /* If node is leaf, fetch triangle list. */
344                         if(node_addr < 0) {
345                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
346                                 int prim_addr = __float_as_int(leaf.x);
347
348                                 int prim_addr2 = __float_as_int(leaf.y);
349                                 const uint type = __float_as_int(leaf.w);
350
351                                 /* Pop. */
352                                 node_addr = traversal_stack[stack_ptr].addr;
353                                 --stack_ptr;
354
355                                 /* Primitive intersection. */
356                                 switch(type & PRIMITIVE_ALL) {
357                                         case PRIMITIVE_TRIANGLE: {
358                                                 /* Intersect ray against primitive, */
359                                                 for(; prim_addr < prim_addr2; prim_addr++) {
360                                                         kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
361                                                         if(triangle_intersect_local(kg,
362                                                                                     local_isect,
363                                                                                     P,
364                                                                                     dir,
365                                                                                     object,
366                                                                                     local_object,
367                                                                                     prim_addr,
368                                                                                     isect_t,
369                                                                                     lcg_state,
370                                                                                     max_hits))
371                                                         {
372                                                                 return true;
373                                                         }
374                                                 }
375                                                 break;
376                                         }
377 #if BVH_FEATURE(BVH_MOTION)
378                                         case PRIMITIVE_MOTION_TRIANGLE: {
379                                                 /* Intersect ray against primitive. */
380                                                 for(; prim_addr < prim_addr2; prim_addr++) {
381                                                         kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
382                                                         if(motion_triangle_intersect_local(kg,
383                                                                                            local_isect,
384                                                                                            P,
385                                                                                            dir,
386                                                                                            ray->time,
387                                                                                            object,
388                                                                                            local_object,
389                                                                                            prim_addr,
390                                                                                            isect_t,
391                                                                                            lcg_state,
392                                                                                            max_hits))
393                                                         {
394                                                                 return true;
395                                                         }
396                                                 }
397                                                 break;
398                                         }
399 #endif
400                                         default:
401                                                 break;
402                                 }
403                         }
404                 } while(node_addr != ENTRYPOINT_SENTINEL);
405         } while(node_addr != ENTRYPOINT_SENTINEL);
406         return false;
407 }
408
409 #undef NODE_INTERSECT