7eddc2891d021ad4b124007190ca9e41b3b8c971
[blender.git] / intern / cycles / kernel / bvh / bvh_volume_all.h
1 /*
2  * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
3  * and code copyright 2009-2012 Intel Corporation
4  *
5  * Modifications Copyright 2011-2014, Blender Foundation.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19
20 #ifdef __QBVH__
21 #  include "qbvh_volume_all.h"
22 #endif
23
24 #if BVH_FEATURE(BVH_HAIR)
25 #  define NODE_INTERSECT bvh_node_intersect
26 #else
27 #  define NODE_INTERSECT bvh_aligned_node_intersect
28 #endif
29
30 /* This is a template BVH traversal function for volumes, where
31  * various features can be enabled/disabled. This way we can compile optimized
32  * versions for each case without new features slowing things down.
33  *
34  * BVH_INSTANCING: object instancing
35  * BVH_MOTION: motion blur rendering
36  *
37  */
38
39 ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
40                                             const Ray *ray,
41                                             Intersection *isect_array,
42                                             const uint max_hits,
43                                             const uint visibility)
44 {
45         /* todo:
46          * - test if pushing distance on the stack helps (for non shadow rays)
47          * - separate version for shadow rays
48          * - likely and unlikely for if() statements
49          * - test restrict attribute for pointers
50          */
51
52         /* traversal stack in CUDA thread-local memory */
53         int traversal_stack[BVH_STACK_SIZE];
54         traversal_stack[0] = ENTRYPOINT_SENTINEL;
55
56         /* traversal variables in registers */
57         int stack_ptr = 0;
58         int node_addr = kernel_data.bvh.root;
59
60         /* ray parameters in registers */
61         const float tmax = ray->t;
62         float3 P = ray->P;
63         float3 dir = bvh_clamp_direction(ray->D);
64         float3 idir = bvh_inverse_direction(dir);
65         int object = OBJECT_NONE;
66         float isect_t = tmax;
67
68 #if BVH_FEATURE(BVH_MOTION)
69         Transform ob_itfm;
70 #endif
71
72 #if BVH_FEATURE(BVH_INSTANCING)
73         int num_hits_in_instance = 0;
74 #endif
75
76         uint num_hits = 0;
77         isect_array->t = tmax;
78
79 #if defined(__KERNEL_SSE2__)
80         const shuffle_swap_t shuf_identity = shuffle_swap_identity();
81         const shuffle_swap_t shuf_swap = shuffle_swap_swap();
82
83         const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
84         ssef Psplat[3], idirsplat[3];
85 #  if BVH_FEATURE(BVH_HAIR)
86         ssef tnear(0.0f), tfar(isect_t);
87 #  endif
88         shuffle_swap_t shufflexyz[3];
89
90         Psplat[0] = ssef(P.x);
91         Psplat[1] = ssef(P.y);
92         Psplat[2] = ssef(P.z);
93
94         ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
95
96         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
97 #endif
98
99         IsectPrecalc isect_precalc;
100         triangle_intersect_precalc(dir, &isect_precalc);
101
102         /* traversal loop */
103         do {
104                 do {
105                         /* traverse internal nodes */
106                         while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
107                                 int node_addr_child1, traverse_mask;
108                                 float dist[2];
109                                 float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
110
111 #if !defined(__KERNEL_SSE2__)
112                                 traverse_mask = NODE_INTERSECT(kg,
113                                                                P,
114 #  if BVH_FEATURE(BVH_HAIR)
115                                                                dir,
116 #  endif
117                                                                idir,
118                                                                isect_t,
119                                                                node_addr,
120                                                                visibility,
121                                                                dist);
122 #else // __KERNEL_SSE2__
123                                 traverse_mask = NODE_INTERSECT(kg,
124                                                                P,
125                                                                dir,
126 #  if BVH_FEATURE(BVH_HAIR)
127                                                                tnear,
128                                                                tfar,
129 #  endif
130                                                                tsplat,
131                                                                Psplat,
132                                                                idirsplat,
133                                                                shufflexyz,
134                                                                node_addr,
135                                                                visibility,
136                                                                dist);
137 #endif // __KERNEL_SSE2__
138
139                                 node_addr = __float_as_int(cnodes.z);
140                                 node_addr_child1 = __float_as_int(cnodes.w);
141
142                                 if(traverse_mask == 3) {
143                                         /* Both children were intersected, push the farther one. */
144                                         bool is_closest_child1 = (dist[1] < dist[0]);
145                                         if(is_closest_child1) {
146                                                 int tmp = node_addr;
147                                                 node_addr = node_addr_child1;
148                                                 node_addr_child1 = tmp;
149                                         }
150
151                                         ++stack_ptr;
152                                         kernel_assert(stack_ptr < BVH_STACK_SIZE);
153                                         traversal_stack[stack_ptr] = node_addr_child1;
154                                 }
155                                 else {
156                                         /* One child was intersected. */
157                                         if(traverse_mask == 2) {
158                                                 node_addr = node_addr_child1;
159                                         }
160                                         else if(traverse_mask == 0) {
161                                                 /* Neither child was intersected. */
162                                                 node_addr = traversal_stack[stack_ptr];
163                                                 --stack_ptr;
164                                         }
165                                 }
166                         }
167
168                         /* if node is leaf, fetch triangle list */
169                         if(node_addr < 0) {
170                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
171                                 int prim_addr = __float_as_int(leaf.x);
172
173 #if BVH_FEATURE(BVH_INSTANCING)
174                                 if(prim_addr >= 0) {
175 #endif
176                                         const int prim_addr2 = __float_as_int(leaf.y);
177                                         const uint type = __float_as_int(leaf.w);
178                                         bool hit;
179
180                                         /* pop */
181                                         node_addr = traversal_stack[stack_ptr];
182                                         --stack_ptr;
183
184                                         /* primitive intersection */
185                                         switch(type & PRIMITIVE_ALL) {
186                                                 case PRIMITIVE_TRIANGLE: {
187                                                         /* intersect ray against primitive */
188                                                         for(; prim_addr < prim_addr2; prim_addr++) {
189                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
190                                                                 /* only primitives from volume object */
191                                                                 uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
192                                                                 int object_flag = kernel_tex_fetch(__object_flag, tri_object);
193                                                                 if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
194                                                                         continue;
195                                                                 }
196                                                                 hit = triangle_intersect(kg,
197                                                                                          &isect_precalc,
198                                                                                          isect_array,
199                                                                                          P,
200                                                                                          visibility,
201                                                                                          object,
202                                                                                          prim_addr);
203                                                                 if(hit) {
204                                                                         /* Move on to next entry in intersections array. */
205                                                                         isect_array++;
206                                                                         num_hits++;
207 #if BVH_FEATURE(BVH_INSTANCING)
208                                                                         num_hits_in_instance++;
209 #endif
210                                                                         isect_array->t = isect_t;
211                                                                         if(num_hits == max_hits) {
212 #if BVH_FEATURE(BVH_INSTANCING)
213 #  if BVH_FEATURE(BVH_MOTION)
214                                                                                 float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
215 #  else
216                                                                                 Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
217                                                                                 float t_fac = 1.0f / len(transform_direction(&itfm, dir));
218 #  endif
219                                                                                 for(int i = 0; i < num_hits_in_instance; i++) {
220                                                                                         (isect_array-i-1)->t *= t_fac;
221                                                                                 }
222 #endif  /* BVH_FEATURE(BVH_INSTANCING) */
223                                                                                 return num_hits;
224                                                                         }
225                                                                 }
226                                                         }
227                                                         break;
228                                                 }
229 #if BVH_FEATURE(BVH_MOTION)
230                                                 case PRIMITIVE_MOTION_TRIANGLE: {
231                                                         /* intersect ray against primitive */
232                                                         for(; prim_addr < prim_addr2; prim_addr++) {
233                                                                 kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
234                                                                 /* only primitives from volume object */
235                                                                 uint tri_object = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
236                                                                 int object_flag = kernel_tex_fetch(__object_flag, tri_object);
237                                                                 if((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
238                                                                         continue;
239                                                                 }
240                                                                 hit = motion_triangle_intersect(kg,
241                                                                                                 isect_array,
242                                                                                                 P,
243                                                                                                 dir,
244                                                                                                 ray->time,
245                                                                                                 visibility,
246                                                                                                 object,
247                                                                                                 prim_addr);
248                                                                 if(hit) {
249                                                                         /* Move on to next entry in intersections array. */
250                                                                         isect_array++;
251                                                                         num_hits++;
252 #  if BVH_FEATURE(BVH_INSTANCING)
253                                                                         num_hits_in_instance++;
254 #  endif
255                                                                         isect_array->t = isect_t;
256                                                                         if(num_hits == max_hits) {
257 #  if BVH_FEATURE(BVH_INSTANCING)
258 #    if BVH_FEATURE(BVH_MOTION)
259                                                                                 float t_fac = 1.0f / len(transform_direction(&ob_itfm, dir));
260 #    else
261                                                                                 Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
262                                                                                 float t_fac = 1.0f / len(transform_direction(&itfm, dir));
263 #    endif
264                                                                                 for(int i = 0; i < num_hits_in_instance; i++) {
265                                                                                         (isect_array-i-1)->t *= t_fac;
266                                                                                 }
267 #  endif  /* BVH_FEATURE(BVH_INSTANCING) */
268                                                                                 return num_hits;
269                                                                         }
270                                                                 }
271                                                         }
272                                                         break;
273                                                 }
274 #endif  /* BVH_MOTION */
275                                                 default: {
276                                                         break;
277                                                 }
278                                         }
279                                 }
280 #if BVH_FEATURE(BVH_INSTANCING)
281                                 else {
282                                         /* instance push */
283                                         object = kernel_tex_fetch(__prim_object, -prim_addr-1);
284                                         int object_flag = kernel_tex_fetch(__object_flag, object);
285
286                                         if(object_flag & SD_OBJECT_HAS_VOLUME) {
287
288 #  if BVH_FEATURE(BVH_MOTION)
289                                                 bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
290 #  else
291                                                 bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
292 #  endif
293
294                                                 triangle_intersect_precalc(dir, &isect_precalc);
295                                                 num_hits_in_instance = 0;
296                                                 isect_array->t = isect_t;
297
298 #  if defined(__KERNEL_SSE2__)
299                                                 Psplat[0] = ssef(P.x);
300                                                 Psplat[1] = ssef(P.y);
301                                                 Psplat[2] = ssef(P.z);
302
303                                                 tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
304 #    if BVH_FEATURE(BVH_HAIR)
305                                                 tfar = ssef(isect_t);
306 #    endif
307
308                                                 gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
309 #  endif
310
311                                                 ++stack_ptr;
312                                                 kernel_assert(stack_ptr < BVH_STACK_SIZE);
313                                                 traversal_stack[stack_ptr] = ENTRYPOINT_SENTINEL;
314
315                                                 node_addr = kernel_tex_fetch(__object_node, object);
316                                         }
317                                         else {
318                                                 /* pop */
319                                                 object = OBJECT_NONE;
320                                                 node_addr = traversal_stack[stack_ptr];
321                                                 --stack_ptr;
322                                         }
323                                 }
324                         }
325 #endif  /* FEATURE(BVH_INSTANCING) */
326                 } while(node_addr != ENTRYPOINT_SENTINEL);
327
328 #if BVH_FEATURE(BVH_INSTANCING)
329                 if(stack_ptr >= 0) {
330                         kernel_assert(object != OBJECT_NONE);
331
332                         if(num_hits_in_instance) {
333                                 float t_fac;
334 #  if BVH_FEATURE(BVH_MOTION)
335                                 bvh_instance_motion_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac, &ob_itfm);
336 #  else
337                                 bvh_instance_pop_factor(kg, object, ray, &P, &dir, &idir, &t_fac);
338 #  endif
339                                 triangle_intersect_precalc(dir, &isect_precalc);
340                                 /* Scale isect->t to adjust for instancing. */
341                                 for(int i = 0; i < num_hits_in_instance; i++) {
342                                         (isect_array-i-1)->t *= t_fac;
343                                 }
344                         }
345                         else {
346                                 float ignore_t = FLT_MAX;
347 #  if BVH_FEATURE(BVH_MOTION)
348                                 bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
349 #  else
350                                 bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
351 #  endif
352                                 triangle_intersect_precalc(dir, &isect_precalc);
353                         }
354
355                         isect_t = tmax;
356                         isect_array->t = isect_t;
357
358 #  if defined(__KERNEL_SSE2__)
359                         Psplat[0] = ssef(P.x);
360                         Psplat[1] = ssef(P.y);
361                         Psplat[2] = ssef(P.z);
362
363                         tsplat = ssef(0.0f, 0.0f, -isect_t, -isect_t);
364 #    if BVH_FEATURE(BVH_HAIR)
365                         tfar = ssef(isect_t);
366 #    endif
367
368                         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
369 #  endif
370
371                         object = OBJECT_NONE;
372                         node_addr = traversal_stack[stack_ptr];
373                         --stack_ptr;
374                 }
375 #endif  /* FEATURE(BVH_MOTION) */
376         } while(node_addr != ENTRYPOINT_SENTINEL);
377
378         return num_hits;
379 }
380
381 ccl_device_inline uint BVH_FUNCTION_NAME(KernelGlobals *kg,
382                                          const Ray *ray,
383                                          Intersection *isect_array,
384                                          const uint max_hits,
385                                          const uint visibility)
386 {
387 #ifdef __QBVH__
388         if(kernel_data.bvh.use_qbvh) {
389                 return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
390                                                     ray,
391                                                     isect_array,
392                                                     max_hits,
393                                                     visibility);
394         }
395         else
396 #endif
397         {
398                 kernel_assert(kernel_data.bvh.use_qbvh == false);
399                 return BVH_FUNCTION_FULL_NAME(BVH)(kg,
400                                                    ray,
401                                                    isect_array,
402                                                    max_hits,
403                                                    visibility);
404         }
405 }
406
407 #undef BVH_FUNCTION_NAME
408 #undef BVH_FUNCTION_FEATURES
409 #undef NODE_INTERSECT