Cycles: Make object flag names more obvious that hey are object and not shader
[blender.git] / intern / cycles / kernel / bvh / qbvh_subsurface.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* This is a template BVH traversal function for subsurface scattering, where
18  * various features can be enabled/disabled. This way we can compile optimized
19  * versions for each case without new features slowing things down.
20  *
21  * BVH_MOTION: motion blur rendering
22  *
23  */
24
25 #if BVH_FEATURE(BVH_HAIR)
26 #  define NODE_INTERSECT qbvh_node_intersect
27 #else
28 #  define NODE_INTERSECT qbvh_aligned_node_intersect
29 #endif
30
31 ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
32                                              const Ray *ray,
33                                              SubsurfaceIntersection *ss_isect,
34                                              int subsurface_object,
35                                              uint *lcg_state,
36                                              int max_hits)
37 {
38         /* TODO(sergey):
39          * - Test if pushing distance on the stack helps (for non shadow rays).
40          * - Separate version for shadow rays.
41          * - Likely and unlikely for if() statements.
42          * - SSE for hair.
43          * - Test restrict attribute for pointers.
44          */
45
46         /* Traversal stack in CUDA thread-local memory. */
47         QBVHStackItem traversal_stack[BVH_QSTACK_SIZE];
48         traversal_stack[0].addr = ENTRYPOINT_SENTINEL;
49
50         /* Traversal variables in registers. */
51         int stack_ptr = 0;
52         int node_addr = kernel_tex_fetch(__object_node, subsurface_object);
53
54         /* Ray parameters in registers. */
55         float3 P = ray->P;
56         float3 dir = bvh_clamp_direction(ray->D);
57         float3 idir = bvh_inverse_direction(dir);
58         int object = OBJECT_NONE;
59         float isect_t = ray->t;
60
61         ss_isect->num_hits = 0;
62
63         const int object_flag = kernel_tex_fetch(__object_flag, subsurface_object);
64         if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
65 #if BVH_FEATURE(BVH_MOTION)
66                 Transform ob_itfm;
67                 bvh_instance_motion_push(kg,
68                                          subsurface_object,
69                                          ray,
70                                          &P,
71                                          &dir,
72                                          &idir,
73                                          &isect_t,
74                                          &ob_itfm);
75 #else
76                 bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t);
77 #endif
78                 object = subsurface_object;
79         }
80
81 #ifndef __KERNEL_SSE41__
82         if(!isfinite(P.x)) {
83                 return;
84         }
85 #endif
86
87         ssef tnear(0.0f), tfar(isect_t);
88 #if BVH_FEATURE(BVH_HAIR)
89         sse3f dir4(ssef(dir.x), ssef(dir.y), ssef(dir.z));
90 #endif
91         sse3f idir4(ssef(idir.x), ssef(idir.y), ssef(idir.z));
92
93 #ifdef __KERNEL_AVX2__
94         float3 P_idir = P*idir;
95         sse3f P_idir4(P_idir.x, P_idir.y, P_idir.z);
96 #endif
97 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
98         sse3f org4(ssef(P.x), ssef(P.y), ssef(P.z));
99 #endif
100
101         /* Offsets to select the side that becomes the lower or upper bound. */
102         int near_x, near_y, near_z;
103         int far_x, far_y, far_z;
104         qbvh_near_far_idx_calc(idir,
105                                &near_x, &near_y, &near_z,
106                                &far_x, &far_y, &far_z);
107
108         IsectPrecalc isect_precalc;
109         triangle_intersect_precalc(dir, &isect_precalc);
110
111         /* Traversal loop. */
112         do {
113                 do {
114                         /* Traverse internal nodes. */
115                         while(node_addr >= 0 && node_addr != ENTRYPOINT_SENTINEL) {
116                                 ssef dist;
117                                 int child_mask = NODE_INTERSECT(kg,
118                                                                 tnear,
119                                                                 tfar,
120 #ifdef __KERNEL_AVX2__
121                                                                 P_idir4,
122 #endif
123 #if BVH_FEATURE(BVH_HAIR) || !defined(__KERNEL_AVX2__)
124                                                                 org4,
125 #endif
126 #if BVH_FEATURE(BVH_HAIR)
127                                                                 dir4,
128 #endif
129                                                                 idir4,
130                                                                 near_x, near_y, near_z,
131                                                                 far_x, far_y, far_z,
132                                                                 node_addr,
133                                                                 &dist);
134
135                                 if(child_mask != 0) {
136                                         float4 inodes = kernel_tex_fetch(__bvh_nodes, node_addr+0);
137                                         float4 cnodes;
138 #if BVH_FEATURE(BVH_HAIR)
139                                         if(__float_as_uint(inodes.x) & PATH_RAY_NODE_UNALIGNED) {
140                                                 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+13);
141                                         }
142                                         else
143 #endif
144                                         {
145                                                 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr+7);
146                                         }
147
148                                         /* One child is hit, continue with that child. */
149                                         int r = __bscf(child_mask);
150                                         if(child_mask == 0) {
151                                                 node_addr = __float_as_int(cnodes[r]);
152                                                 continue;
153                                         }
154
155                                         /* Two children are hit, push far child, and continue with
156                                          * closer child.
157                                          */
158                                         int c0 = __float_as_int(cnodes[r]);
159                                         float d0 = ((float*)&dist)[r];
160                                         r = __bscf(child_mask);
161                                         int c1 = __float_as_int(cnodes[r]);
162                                         float d1 = ((float*)&dist)[r];
163                                         if(child_mask == 0) {
164                                                 if(d1 < d0) {
165                                                         node_addr = c1;
166                                                         ++stack_ptr;
167                                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
168                                                         traversal_stack[stack_ptr].addr = c0;
169                                                         traversal_stack[stack_ptr].dist = d0;
170                                                         continue;
171                                                 }
172                                                 else {
173                                                         node_addr = c0;
174                                                         ++stack_ptr;
175                                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
176                                                         traversal_stack[stack_ptr].addr = c1;
177                                                         traversal_stack[stack_ptr].dist = d1;
178                                                         continue;
179                                                 }
180                                         }
181
182                                         /* Here starts the slow path for 3 or 4 hit children. We push
183                                          * all nodes onto the stack to sort them there.
184                                          */
185                                         ++stack_ptr;
186                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
187                                         traversal_stack[stack_ptr].addr = c1;
188                                         traversal_stack[stack_ptr].dist = d1;
189                                         ++stack_ptr;
190                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
191                                         traversal_stack[stack_ptr].addr = c0;
192                                         traversal_stack[stack_ptr].dist = d0;
193
194                                         /* Three children are hit, push all onto stack and sort 3
195                                          * stack items, continue with closest child.
196                                          */
197                                         r = __bscf(child_mask);
198                                         int c2 = __float_as_int(cnodes[r]);
199                                         float d2 = ((float*)&dist)[r];
200                                         if(child_mask == 0) {
201                                                 ++stack_ptr;
202                                                 kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
203                                                 traversal_stack[stack_ptr].addr = c2;
204                                                 traversal_stack[stack_ptr].dist = d2;
205                                                 qbvh_stack_sort(&traversal_stack[stack_ptr],
206                                                                 &traversal_stack[stack_ptr - 1],
207                                                                 &traversal_stack[stack_ptr - 2]);
208                                                 node_addr = traversal_stack[stack_ptr].addr;
209                                                 --stack_ptr;
210                                                 continue;
211                                         }
212
213                                         /* Four children are hit, push all onto stack and sort 4
214                                          * stack items, continue with closest child.
215                                          */
216                                         r = __bscf(child_mask);
217                                         int c3 = __float_as_int(cnodes[r]);
218                                         float d3 = ((float*)&dist)[r];
219                                         ++stack_ptr;
220                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
221                                         traversal_stack[stack_ptr].addr = c3;
222                                         traversal_stack[stack_ptr].dist = d3;
223                                         ++stack_ptr;
224                                         kernel_assert(stack_ptr < BVH_QSTACK_SIZE);
225                                         traversal_stack[stack_ptr].addr = c2;
226                                         traversal_stack[stack_ptr].dist = d2;
227                                         qbvh_stack_sort(&traversal_stack[stack_ptr],
228                                                         &traversal_stack[stack_ptr - 1],
229                                                         &traversal_stack[stack_ptr - 2],
230                                                         &traversal_stack[stack_ptr - 3]);
231                                 }
232
233                                 node_addr = traversal_stack[stack_ptr].addr;
234                                 --stack_ptr;
235                         }
236
237                         /* If node is leaf, fetch triangle list. */
238                         if(node_addr < 0) {
239                                 float4 leaf = kernel_tex_fetch(__bvh_leaf_nodes, (-node_addr-1));
240                                 int prim_addr = __float_as_int(leaf.x);
241
242                                 int prim_addr2 = __float_as_int(leaf.y);
243                                 const uint type = __float_as_int(leaf.w);
244
245                                 /* Pop. */
246                                 node_addr = traversal_stack[stack_ptr].addr;
247                                 --stack_ptr;
248
249                                 /* Primitive intersection. */
250                                 switch(type & PRIMITIVE_ALL) {
251                                         case PRIMITIVE_TRIANGLE: {
252                                                 /* Intersect ray against primitive, */
253                                                 for(; prim_addr < prim_addr2; prim_addr++) {
254                                                         kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
255                                                         triangle_intersect_subsurface(kg,
256                                                                                       &isect_precalc,
257                                                                                       ss_isect,
258                                                                                       P,
259                                                                                       object,
260                                                                                       prim_addr,
261                                                                                       isect_t,
262                                                                                       lcg_state,
263                                                                                       max_hits);
264                                                 }
265                                                 break;
266                                         }
267 #if BVH_FEATURE(BVH_MOTION)
268                                         case PRIMITIVE_MOTION_TRIANGLE: {
269                                                 /* Intersect ray against primitive. */
270                                                 for(; prim_addr < prim_addr2; prim_addr++) {
271                                                         kernel_assert(kernel_tex_fetch(__prim_type, prim_addr) == type);
272                                                         motion_triangle_intersect_subsurface(kg,
273                                                                                              ss_isect,
274                                                                                              P,
275                                                                                              dir,
276                                                                                              ray->time,
277                                                                                              object,
278                                                                                              prim_addr,
279                                                                                              isect_t,
280                                                                                              lcg_state,
281                                                                                              max_hits);
282                                                 }
283                                                 break;
284                                         }
285 #endif
286                                         default:
287                                                 break;
288                                 }
289                         }
290                 } while(node_addr != ENTRYPOINT_SENTINEL);
291         } while(node_addr != ENTRYPOINT_SENTINEL);
292 }
293
294 #undef NODE_INTERSECT