8def71bc890b7b011a535047ca2fb86951af52d8
[blender.git] / intern / cycles / kernel / bvh / bvh_local.h
1 /*
2  * Adapted from code Copyright 2009-2010 NVIDIA Corporation,
3  * and code copyright 2009-2012 Intel Corporation
4  *
5  * Modifications Copyright 2011-2013, Blender Foundation.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19
20 #ifdef __QBVH__
21 #  include "kernel/bvh/qbvh_local.h"
22 #  ifdef __KERNEL_AVX2__
23 #    include "kernel/bvh/obvh_local.h"
24 #  endif
25 #endif
26
27 #if BVH_FEATURE(BVH_HAIR)
28 #  define NODE_INTERSECT bvh_node_intersect
29 #else
30 #  define NODE_INTERSECT bvh_aligned_node_intersect
31 #endif
32
33 /* This is a template BVH traversal function for finding local intersections
34  * around the shading point, for subsurface scattering and bevel. We disable
35  * various features for performance, and for instanced objects avoid traversing
36  * other parts of the scene.
37  *
38  * BVH_MOTION: motion blur rendering
39  *
40  */
41
42 #ifndef __KERNEL_GPU__
43 ccl_device
44 #else
45 ccl_device_inline
46 #endif
47 bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
48                                  const Ray *ray,
49                                  LocalIntersection *local_isect,
50                                  int local_object,
51                                  uint *lcg_state,
52                                  int max_hits)
53 {
54         /* todo:
55          * - test if pushing distance on the stack helps (for non shadow rays)
56          * - separate version for shadow rays
57          * - likely and unlikely for if() statements
58          * - test restrict attribute for pointers
59          */
60
61         /* traversal stack in CUDA thread-local memory */
62         int traversal_stack[BVH_STACK_SIZE];
63         traversal_stack[0] = ENTRYPOINT_SENTINEL;
64
65         /* traversal variables in registers */
66         int stack_ptr = 0;
67         int node_addr = kernel_tex_fetch(__object_node, local_object);
68
69         /* ray parameters in registers */
70         float3 P = ray->P;
71         float3 dir = bvh_clamp_direction(ray->D);
72         float3 idir = bvh_inverse_direction(dir);
73         int object = OBJECT_NONE;
74         float isect_t = ray->t;
75
76         if(local_isect) {
77                 local_isect->num_hits = 0;
78         }
79
80         kernel_assert((local_isect == NULL) == (max_hits == 0));
81
82         const int object_flag = kernel_tex_fetch(__object_flag, local_object);
83         if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
84 #if BVH_FEATURE(BVH_MOTION)
85                 Transform ob_itfm;
86                 isect_t = bvh_instance_motion_push(kg,
87                                                    local_object,
88                                                    ray,
89                                                    &P,
90                                                    &dir,
91                                                    &idir,
92                                                    isect_t,
93                                                    &ob_itfm);
94 #else
95                 isect_t = bvh_instance_push(kg, local_object, ray, &P, &dir, &idir, isect_t);
96 #endif
97                 object = local_object;
98         }
99
100 #if defined(__KERNEL_SSE2__)
101         const shuffle_swap_t shuf_identity = shuffle_swap_identity();
102         const shuffle_swap_t shuf_swap = shuffle_swap_swap();
103
104         const ssef pn = cast(ssei(0, 0, 0x80000000, 0x80000000));
105         ssef Psplat[3], idirsplat[3];
106 #  if BVH_FEATURE(BVH_HAIR)
107         ssef tnear(0.0f), tfar(isect_t);
108 #  endif
109         shuffle_swap_t shufflexyz[3];
110
111         Psplat[0] = ssef(P.x);
112         Psplat[1] = ssef(P.y);
113         Psplat[2] = ssef(P.z);
114
115         ssef tsplat(0.0f, 0.0f, -isect_t, -isect_t);
116
117         gen_idirsplat_swap(pn, shuf_identity, shuf_swap, idir, idirsplat, shufflexyz);
118 #endif
119
120         /* traversal loop */
121         do {
122                 do {
123                         /* traverse internal nodes */
124                         while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
125                                 int node_addr_child1, traverse_mask;
126                                 float dist[2];
127                                 float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
128
129 #if !defined(__KERNEL_SSE2__)
130                                 traverse_mask = NODE_INTERSECT(kg,
131                                                                P,
132 #  if BVH_FEATURE(BVH_HAIR)
133                                                                dir,
134 #  endif
135                                                                idir,
136                                                                isect_t,
137                                                                node_addr,
138                                                                PATH_RAY_ALL_VISIBILITY,
139                                                                dist);
140 #else // __KERNEL_SSE2__
141                                 traverse_mask = NODE_INTERSECT(kg,
142                                                                P,
143                                                                dir,
144 #  if BVH_FEATURE(BVH_HAIR)
145                                                                tnear,
146                                                                tfar,
147 #  endif
148                                                                tsplat,
149                                                                Psplat,
150                                                                idirsplat,
151                                                                shufflexyz,
152                                                                node_addr,
153                                                                PATH_RAY_ALL_VISIBILITY,
154                                                                dist);
155 #endif // __KERNEL_SSE2__
156
157                                 node_addr = __float_as_int(cnodes.z);
158                                 node_addr_child1 = __float_as_int(cnodes.w);
159
160                                 if(traverse_mask == 3) {
161                                         /* Both children were intersected, push the farther one. */
162                                         bool is_closest_child1 = (dist[1] < dist[0]);
163                                         if(is_closest_child1) {
164                                                 int tmp = node_addr;
165                                                 node_addr = node_addr_child1;
166                                                 node_addr_child1 = tmp;
167                                         }
168
169                                         ++stack_ptr;
170                                         kernel_assert(stack_ptr < BVH_STACK_SIZE);
171                                         traversal_stack[stack_ptr] = node_addr_child1;
172                                 }
173                                 else {
174                                         /* One child was intersected. */
175                                         if(traverse_mask == 2) {
176                                                 node_addr = node_addr_child1;
177                                         }
178                                         else if(traverse_mask == 0) {
179                                                 /* Neither child was intersected. */
180                                                 node_addr = traversal_stack[stack_ptr];
181                                                 --stack_ptr;
182                                         }
183                                 }
184                         }
185
186                         /* if node is leaf, fetch triangle list */
187                         if(node_addr < 0) {
188                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
189                                 int prim_addr = __float_as_int(leaf.x);
190
191                                 const int prim_addr2 = __float_as_int(leaf.y);
192                                 const uint type = __float_as_int(leaf.w);
193
194                                 /* pop */
195                                 node_addr = traversal_stack[stack_ptr];
196                                 --stack_ptr;
197
198                                 /* primitive intersection */
199                                 switch(type & PRIMITIVE_ALL) {
200                                         case PRIMITIVE_TRIANGLE: {
201                                                 /* intersect ray against primitive */
202                                                 for(; prim_addr < prim_addr2; prim_addr++) {
203                                                         kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
204                                                         if(triangle_intersect_local(kg,
205                                                                                     local_isect,
206                                                                                     P,
207                                                                                     dir,
208                                                                                     object,
209                                                                                     local_object,
210                                                                                     prim_addr,
211                                                                                     isect_t,
212                                                                                     lcg_state,
213                                                                                     max_hits)) {
214                                                                 return true;
215                                                         }
216                                                 }
217                                                 break;
218                                         }
219 #if BVH_FEATURE(BVH_MOTION)
220                                         case PRIMITIVE_MOTION_TRIANGLE: {
221                                                 /* intersect ray against primitive */
222                                                 for(; prim_addr < prim_addr2; prim_addr++) {
223                                                         kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
224                                                         if(motion_triangle_intersect_local(kg,
225                                                                                            local_isect,
226                                                                                            P,
227                                                                                            dir,
228                                                                                            ray->time,
229                                                                                            object,
230                                                                                            local_object,
231                                                                                            prim_addr,
232                                                                                            isect_t,
233                                                                                            lcg_state,
234                                                                                            max_hits)) {
235                                                                 return true;
236                                                         }
237                                                 }
238                                                 break;
239                                         }
240 #endif
241                                         default: {
242                                                 break;
243                                         }
244                                 }
245                         }
246                 } while(node_addr != ENTRYPOINT_SENTINEL);
247         } while(node_addr != ENTRYPOINT_SENTINEL);
248
249         return false;
250 }
251
252 ccl_device_inline bool BVH_FUNCTION_NAME(KernelGlobals *kg,
253                                          const Ray *ray,
254                                          LocalIntersection *local_isect,
255                                          int local_object,
256                                          uint *lcg_state,
257                                          int max_hits)
258 {
259         switch(kernel_data.bvh.bvh_layout) {
260 #ifdef __KERNEL_AVX2__
261                 case BVH_LAYOUT_BVH8:
262                         return BVH_FUNCTION_FULL_NAME(OBVH)(kg,
263                                                             ray,
264                                                             local_isect,
265                                                             local_object,
266                                                             lcg_state,
267                                                             max_hits);
268 #endif
269 #ifdef __QBVH__
270                 case BVH_LAYOUT_BVH4:
271                         return BVH_FUNCTION_FULL_NAME(QBVH)(kg,
272                                                             ray,
273                                                             local_isect,
274                                                             local_object,
275                                                             lcg_state,
276                                                             max_hits);
277 #endif
278                 case BVH_LAYOUT_BVH2:
279                         return BVH_FUNCTION_FULL_NAME(BVH)(kg,
280                                                            ray,
281                                                            local_isect,
282                                                            local_object,
283                                                            lcg_state,
284                                                            max_hits);
285         }
286         kernel_assert(!"Should not happen");
287         return false;
288 }
289
290 #undef BVH_FUNCTION_NAME
291 #undef BVH_FUNCTION_FEATURES
292 #undef NODE_INTERSECT