4d3028b37bf03e7b8bd2c72fec1f6921d7a2d024
[blender.git] / intern / cycles / kernel / bvh / qbvh_volume_all.h
1 /*
2  * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
3  * and code copyright 2009-2012 Intel Corporation
4  *
5  * Modifications Copyright 2011-2014, Blender Foundation.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19
20 /* This is a template BVH traversal function for volumes, where
21  * various features can be enabled/disabled. This way we can compile optimized
22  * versions for each case without new features slowing things down.
23  *
24  * BVH_INSTANCING: object instancing
25  * BVH_MOTION: motion blur rendering
26  *
27  */
28
29 #if BVH_FEATURE(BVH_HAIR)
30 #  define NODE_INTERSECT qbvh_node_intersect
31 #else
32 #  define NODE_INTERSECT qbvh_aligned_node_intersect
33 #endif
34
35 ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
36                                              const Ray *ray,
37                                              Intersection *isect_array,
38                                              const uint max_hits,
39                                              const uint visibility)
40 {
41         /* TODO(sergey):
42          * - Test if pushing distance on the stack helps.
43          * - Likely and unlikely for if() statements.
44          * - Test restrict attribute for pointers.
45          */
46
47         /* Traversal stack in CUDA thread-local memory. */
48         QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
49         traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
50
51         /* Traversal variables in registers. */
52         int stack_ptr = 0;
53         int node_addr = kernel_data.bvh.root;
54
55         /* Ray parameters in registers. */
56         const float tmax = ray->t;
57         float3 P = ray->P;
58         float3 dir = bvh_clamp_direction(ray->D);
59         float3 idir = bvh_inverse_direction(dir);
60         int object = OBJECT_NONE;
61         float isect_t = tmax;
62
63 #if BVH_FEATURE(BVH_MOTION)
64         Transform ob_itfm;
65 #endif
66
67         uint num_hits = 0;
68         isect_array->t = tmax;
69
70 #ifndef __KERNEL_SSE41__
71         if(!isfinite(P.x)) {
72                 return false;
73         }
74 #endif
75
76 #if BVH_FEATURE(BVH_INSTANCING)
77         int num_hits_in_instance = 0;
78 #endif
79
80         ssef tnear(0.0f), tfar(isect_t);
81 #if BVH_FEATURE(BVH_HAIR)
82         sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
83 #endif
84         sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
85
86 #ifdef __KERNEL_AVX2__
87         float3 P_idir = P*idir;
88         sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
89 #endif
90 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
91         sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
92 #endif
93
94         /* Offsets to select the side that becomes the lower or upper bound. */
95         int near_x, near_y, near_z;
96         int far_x, far_y, far_z;
97
98         if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
99         if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
100         if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
101
102         IsectPrecalc isect_precalc;
103         triangle_intersect_precalc(dir, &isect_precalc);
104
105         /* Traversal loop. */
106         do {
107                 do {
108                         /* Traverse internal nodes. */
109                         while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
110 #ifdef __VISIBILITY_FLAG__
111                                 float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
112                                 if((__float_as_uint(inodes.x) & visibility) == 0) {
113                                         /* Pop. */
114                                         node_addr = traversal_stack[stack_ptr].addr;
115                                         --stack_ptr;
116                                         continue;
117                                 }
118 #endif
119
120                                 ssef dist;
121                                 int child_mask = NODE_INTERSECT(kg,
122                                                                 tnear,
123                                                                 tfar,
124 #ifdef __KERNEL_AVX2__
125                                                                 P_idir4,
126 #endif
127 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
128                                                                 org4,
129 #endif
130 #if BVH_FEATURE(BVH_HAIR)
131                                                                 dir4,
132 #endif
133                                                                 idir4,
134                                                                 near_x, near_y, near_z,
135                                                                 far_x, far_y, far_z,
136                                                                 node_addr,
137                                                                 &dist);
138
139                                 if(child_mask != 0) {
140                                         float4 cnodes;
141 #if BVH_FEATURE(BVH_HAIR)
142                                         if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
143                                                 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
144                                         }
145                                         else
146 #endif
147                                         {
148                                                 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
149                                         }
150
151                                         /* One child is hit, continue with that child. */
152                                         int r = __bscf(child_mask);
153                                         if(child_mask == 0) {
154                                                 node_addr = __float_as_int(cnodes[r]);
155                                                 continue;
156                                         }
157
158                                         /* Two children are hit, push far child, and continue with
159                                          * closer child.
160                                          */
161                                         int c0 = __float_as_int(cnodes[r]);
162                                         float d0 = ((float*)&dist)[r];
163                                         r = __bscf(child_mask);
164                                         int c1 = __float_as_int(cnodes[r]);
165                                         float d1 = ((float*)&dist)[r];
166                                         if(child_mask == 0) {
167                                                 if(d1 < d0) {
168                                                         node_addr = c1;
169                                                         ++stack_ptr;
170                                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
171                                                         traversal_stack[stack_ptr].addr = c0;
172                                                         traversal_stack[stack_ptr].dist = d0;
173                                                         continue;
174                                                 }
175                                                 else {
176                                                         node_addr = c0;
177                                                         ++stack_ptr;
178                                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
179                                                         traversal_stack[stack_ptr].addr = c1;
180                                                         traversal_stack[stack_ptr].dist = d1;
181                                                         continue;
182                                                 }
183                                         }
184
185                                         /* Here starts the slow path for 3 or 4 hit children. We push
186                                          * all nodes onto the stack to sort them there.
187                                          */
188                                         ++stack_ptr;
189                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
190                                         traversal_stack[stack_ptr].addr = c1;
191                                         traversal_stack[stack_ptr].dist = d1;
192                                         ++stack_ptr;
193                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
194                                         traversal_stack[stack_ptr].addr = c0;
195                                         traversal_stack[stack_ptr].dist = d0;
196
197                                         /* Three children are hit, push all onto stack and sort 3
198                                          * stack items, continue with closest child.
199                                          */
200                                         r = __bscf(child_mask);
201                                         int c2 = __float_as_int(cnodes[r]);
202                                         float d2 = ((float*)&dist)[r];
203                                         if(child_mask == 0) {
204                                                 ++stack_ptr;
205                                                 kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
206                                                 traversal_stack[stack_ptr].addr = c2;
207                                                 traversal_stack[stack_ptr].dist = d2;
208                                                 qbvh_stack_sort(&traversal_stack[stack_ptr],
209                                                                 &traversal_stack[stack_ptr - 1],
210                                                                 &traversal_stack[stack_ptr - 2]);
211                                                 node_addr = traversal_stack[stack_ptr].addr;
212                                                 --stack_ptr;
213                                                 continue;
214                                         }
215
216                                         /* Four children are hit, push all onto stack and sort 4
217                                          * stack items, continue with closest child.
218                                          */
219                                         r = __bscf(child_mask);
220                                         int c3 = __float_as_int(cnodes[r]);
221                                         float d3 = ((float*)&dist)[r];
222                                         ++stack_ptr;
223                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
224                                         traversal_stack[stack_ptr].addr = c3;
225                                         traversal_stack[stack_ptr].dist = d3;
226                                         ++stack_ptr;
227                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
228                                         traversal_stack[stack_ptr].addr = c2;
229                                         traversal_stack[stack_ptr].dist = d2;
230                                         qbvh_stack_sort(&traversal_stack[stack_ptr],
231                                                         &traversal_stack[stack_ptr - 1],
232                                                         &traversal_stack[stack_ptr - 2],
233                                                         &traversal_stack[stack_ptr - 3]);
234                                 }
235
236                                 node_addr = traversal_stack[stack_ptr].addr;
237                                 --stack_ptr;
238                         }
239
240                         /* If node is leaf, fetch triangle list. */
241                         if(node_addr < 0) {
242                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
243                                 int prim_addr = __float_as_int(leaf.x);
244
245 #if BVH_FEATURE(BVH_INSTANCING)
246                                 if(prim_addr >= 0) {
247 #endif
248                                         int prim_addr2 = __float_as_int(leaf.y);
249                                         const uint type = __float_as_int(leaf.w);
250                                         const uint p_type = type & PRIMITIVE_ALL;
251                                         bool hit;
252
253                                         /* Pop. */
254                                         node_addr = traversal_stack[stack_ptr].addr;
255                                         --stack_ptr;
256
257                                         /* Primitive intersection. */
258                                         switch(p_type) {
259                                                 case PRIMITIVE_TRIANGLE: {
260                                                         for(; prim_addr < prim_addr2; prim_addr++) {
261                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
262                                                                 /* Only primitives from volume object. */
263                                                                 uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
264                                                                 int object_flag = kernel_tex_fetch(__object_flag, tri_object);
265                                                                 if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
266                                                                         continue;
267                                                                 }
268                                                                 /* Intersect ray against primitive. */
269                                                                 hit = triangle_intersect(kg, &isect_precalc, isect_array, P, visibility, object, prim_addr);
270                                                                 if(hit) {
271                                                                         /* Move on to next entry in intersections array. */
272                                                                         isect_array++;
273                                                                         num_hits++;
274 #if BVH_FEATURE(BVH_INSTANCING)
275                                                                         num_hits_in_instance++;
276 #endif
277                                                                         isect_array->t = isect_t;
278                                                                         if(num_hits == max_hits) {
279 #if BVH_FEATURE(BVH_INSTANCING)
280 #  if BVH_FEATURE(BVH_MOTION)
281                                                                                 float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
282 #  else
283                                                                                 Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
284                                                                                 float t_fac = 1.0f / len(transform_direction(&itfm, dir));
285 #  endif
286                                                                                 for(int i = 0; i < num_hits_in_instance; i++) {
287                                                                                         (isect_array-i-1)->t *= t_fac;
288                                                                                 }
289 #endif  /* BVH_FEATURE(BVH_INSTANCING) */
290                                                                                 return num_hits;
291                                                                         }
292                                                                 }
293                                                         }
294                                                         break;
295                                                 }
296 #if BVH_FEATURE(BVH_MOTION)
297                                                 case PRIMITIVE_MOTION_TRIANGLE: {
298                                                         for(; prim_addr < prim_addr2; prim_addr++) {
299                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
300                                                                 /* Only primitives from volume object. */
301                                                                 uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
302                                                                 int object_flag = kernel_tex_fetch(__object_flag, tri_object);
303                                                                 if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
304                                                                         continue;
305                                                                 }
306                                                                 /* Intersect ray against primitive. */
307                                                                 hit = motion_triangle_intersect(kg, isect_array, P, dir, ray->time, visibility, object, prim_addr);
308                                                                 if(hit) {
309                                                                         /* Move on to next entry in intersections array. */
310                                                                         isect_array++;
311                                                                         num_hits++;
312 #  if BVH_FEATURE(BVH_INSTANCING)
313                                                                         num_hits_in_instance++;
314 #  endif
315                                                                         isect_array->t = isect_t;
316                                                                         if(num_hits == max_hits) {
317 #  if BVH_FEATURE(BVH_INSTANCING)
318 #    if BVH_FEATURE(BVH_MOTION)
319                                                                                 float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
320 #    else
321                                                                                 Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
322                                                                                 float t_fac = 1.0f / len(transform_direction(&itfm, dir));
323 #    endif
324                                                                                 for(int i = 0; i < num_hits_in_instance; i++) {
325                                                                                         (isect_array-i-1)->t *= t_fac;
326                                                                                 }
327 #  endif  /* BVH_FEATURE(BVH_INSTANCING) */
328                                                                                 return num_hits;
329                                                                         }
330                                                                 }
331                                                         }
332                                                         break;
333                                                 }
334 #endif
335                                         }
336                                 }
337 #if BVH_FEATURE(BVH_INSTANCING)
338                                 else {
339                                         /* Instance push. */
340                                         object = kernel_tex_fetch(__prim_object, -prim_addr-1);
341                                         int object_flag = kernel_tex_fetch(__object_flag, object);
342
343                                         if(object_flag & SD_OBJECT_HAS_VOLUME) {
344
345 #  if BVH_FEATURE(BVH_MOTION)
346                                                 bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
347 #  else
348                                                 bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
349 #  endif
350
351                                                 if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
352                                                 if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
353                                                 if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
354                                                 tfar = ssef(isect_t);
355                                                 idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
356 #  if BVH_FEATURE(BVH_HAIR)
357                                                 dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
358 #  endif
359 #  ifdef __KERNEL_AVX2__
360                                                 P_idir = P*idir;
361                                                 P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
362 #  endif
363 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
364                                                 org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
365 #  endif
366
367                                                 triangle_intersect_precalc(dir, &isect_precalc);
368                                                 num_hits_in_instance = 0;
369                                                 isect_array->t = isect_t;
370
371                                                 ++stack_ptr;
372                                                 kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
373                                                 traversal_stack[stack_ptr].addr = ENTRYPOINT_SENTINEL;
374
375                                                 node_addr = kernel_tex_fetch(__object_node, object);
376                                         }
377                                         else {
378                                                 /* Pop. */
379                                                 object = OBJECT_NONE;
380                                                 node_addr = traversal_stack[stack_ptr].addr;
381                                                 --stack_ptr;
382                                         }
383                                 }
384                         }
385 #endif  /* FEATURE(BVH_INSTANCING) */
386                 } while(node_addr != ENTRYPOINT_SENTINEL);
387
388 #if BVH_FEATURE(BVH_INSTANCING)
389                 if(stack_ptr >= 0) {
390                         kernel_assert(object != OBJECT_NONE);
391
392                         /* Instance pop. */
393                         if(num_hits_in_instance) {
394                                 float t_fac;
395 #  if BVH_FEATURE(BVH_MOTION)
396                                 bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
397 #  else
398                                 bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
399 #  endif
400                                 triangle_intersect_precalc(dir, &isect_precalc);
401                                 /* Scale isect->t to adjust for instancing. */
402                                 for(int i = 0; i < num_hits_in_instance; i++) {
403                                         (isect_array-i-1)->t *= t_fac;
404                                 }
405                         }
406                         else {
407                                 float ignore_t = FLT_MAX;
408 #  if BVH_FEATURE(BVH_MOTION)
409                                 bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
410 #  else
411                                 bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
412 #  endif
413                                 triangle_intersect_precalc(dir, &isect_precalc);
414                         }
415
416                         if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
417                         if(idir.y >= 0.0f) { near_y = 2; far_y = 3; } else { near_y = 3; far_y = 2; }
418                         if(idir.z >= 0.0f) { near_z = 4; far_z = 5; } else { near_z = 5; far_z = 4; }
419                         tfar = ssef(isect_t);
420 #  if BVH_FEATURE(BVH_HAIR)
421                         dir4 = sse3f(ssef(dir.x), ssef(dir.y), ssef(dir.z));
422 #  endif
423                         idir4 = sse3f(ssef(idir.x), ssef(idir.y), ssef(idir.z));
424 #  ifdef __KERNEL_AVX2__
425                         P_idir = P*idir;
426                         P_idir4 = sse3f(P_idir.x, P_idir.y, P_idir.z);
427 #  endif
428 #  if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
429                         org4 = sse3f(ssef(P.x), ssef(P.y), ssef(P.z));
430 #  endif
431
432                         triangle_intersect_precalc(dir, &isect_precalc);
433                         isect_t = tmax;
434                         isect_array->t = isect_t;
435
436                         object = OBJECT_NONE;
437                         node_addr = traversal_stack[stack_ptr].addr;
438                         --stack_ptr;
439                 }
440 #endif  /* FEATURE(BVH_INSTANCING) */
441         } while(node_addr != ENTRYPOINT_SENTINEL);
442
443         return num_hits;
444 }
445
446 #undef NODE_INTERSECT