Cycles: Remove ccl_fetch and SOA
[blender.git] / intern / cycles / kernel / kernel_shadow.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* Attenuate throughput accordingly to the given intersection event.
20  * Returns true if the throughput is zero and traversal can be aborted.
21  */
22 ccl_device_forceinline bool shadow_handle_transparent_isect(
23         KernelGlobals *kg,
24         ShaderData *shadow_sd,
25         ccl_addr_space PathState *state,
26 #    ifdef __VOLUME__
27         struct PathState *volume_state,
28 #    endif
29         Intersection *isect,
30         Ray *ray,
31         float3 *throughput)
32 {
33 #ifdef __VOLUME__
34         /* Attenuation between last surface and next surface. */
35         if(volume_state->volume_stack[0].shader != SHADER_NONE) {
36                 Ray segment_ray = *ray;
37                 segment_ray.t = isect->t;
38                 kernel_volume_shadow(kg,
39                                      shadow_sd,
40                                      volume_state,
41                                      &segment_ray,
42                                      throughput);
43         }
44 #endif
45         /* Setup shader data at surface. */
46         shader_setup_from_ray(kg, shadow_sd, isect, ray);
47         /* Attenuation from transparent surface. */
48         if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
49                 path_state_modify_bounce(state, true);
50                 shader_eval_surface(kg,
51                                     shadow_sd,
52                                     NULL,
53                                     state,
54                                     0.0f,
55                                     PATH_RAY_SHADOW,
56                                     SHADER_CONTEXT_SHADOW);
57                 path_state_modify_bounce(state, false);
58                 *throughput *= shader_bsdf_transparency(kg, shadow_sd);
59         }
60         /* Stop if all light is blocked. */
61         if(is_zero(*throughput)) {
62                 return true;
63         }
64 #ifdef __VOLUME__
65         /* Exit/enter volume. */
66         kernel_volume_stack_enter_exit(kg, shadow_sd, volume_state->volume_stack);
67 #endif
68         return false;
69 }
70
71 /* Special version which only handles opaque shadows. */
72 ccl_device bool shadow_blocked_opaque(KernelGlobals *kg,
73                                       ShaderData *shadow_sd,
74                                       ccl_addr_space PathState *state,
75                                       Ray *ray,
76                                       Intersection *isect,
77                                       float3 *shadow)
78 {
79         const bool blocked = scene_intersect(kg,
80                                              *ray,
81                                              PATH_RAY_SHADOW_OPAQUE,
82                                              isect,
83                                              NULL,
84                                              0.0f, 0.0f);
85 #ifdef __VOLUME__
86         if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
87                 /* Apply attenuation from current volume shader. */
88                 kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
89         }
90 #endif
91         return blocked;
92 }
93
94 #ifdef __TRANSPARENT_SHADOWS__
95 #  ifdef __SHADOW_RECORD_ALL__
96 /* Shadow function to compute how much light is blocked,
97  *
98  * We trace a single ray. If it hits any opaque surface, or more than a given
99  * number of transparent surfaces is hit, then we consider the geometry to be
100  * entirely blocked. If not, all transparent surfaces will be recorded and we
101  * will shade them one by one to determine how much light is blocked. This all
102  * happens in one scene intersection function.
103  *
104  * Recording all hits works well in some cases but may be slower in others. If
105  * we have many semi-transparent hairs, one intersection may be faster because
106  * you'd be reinteresecting the same hairs a lot with each step otherwise. If
107  * however there is mostly binary transparency then we may be recording many
108  * unnecessary intersections when one of the first surfaces blocks all light.
109  *
110  * From tests in real scenes it seems the performance loss is either minimal,
111  * or there is a performance increase anyway due to avoiding the need to send
112  * two rays with transparent shadows.
113  *
114  * On CPU it'll handle all transparent bounces (by allocating storage for
115  * intersections when they don't fit into the stack storage).
116  *
117  * On GPU it'll only handle SHADOW_STACK_MAX_HITS-1 intersections, so this
118  * is something to be kept an eye on.
119  */
120
121 #    define SHADOW_STACK_MAX_HITS 64
122
123 /* Actual logic with traversal loop implementation which is free from device
124  * specific tweaks.
125  *
126  * Note that hits array should be as big as max_hits+1.
127  */
128 ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
129                                                     ShaderData *shadow_sd,
130                                                     ccl_addr_space PathState *state,
131                                                     Ray *ray,
132                                                     Intersection *hits,
133                                                     uint max_hits,
134                                                     float3 *shadow)
135 {
136         /* Intersect to find an opaque surface, or record all transparent
137          * surface hits.
138          */
139         uint num_hits;
140         const bool blocked = scene_intersect_shadow_all(kg,
141                                                         ray,
142                                                         hits,
143                                                         max_hits,
144                                                         &num_hits);
145         /* If no opaque surface found but we did find transparent hits,
146          * shade them.
147          */
148         if(!blocked && num_hits > 0) {
149                 float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
150                 float3 Pend = ray->P + ray->D*ray->t;
151                 float last_t = 0.0f;
152                 int bounce = state->transparent_bounce;
153                 Intersection *isect = hits;
154 #    ifdef __VOLUME__
155                 PathState ps = *state;
156 #    endif
157                 sort_intersections(hits, num_hits);
158                 for(int hit = 0; hit < num_hits; hit++, isect++) {
159                         /* Adjust intersection distance for moving ray forward. */
160                         float new_t = isect->t;
161                         isect->t -= last_t;
162                         /* Skip hit if we did not move forward, step by step raytracing
163                          * would have skipped it as well then.
164                          */
165                         if(last_t == new_t) {
166                                 continue;
167                         }
168                         last_t = new_t;
169                         /* Attenuate the throughput. */
170                         if(shadow_handle_transparent_isect(kg,
171                                                            shadow_sd,
172                                                            state,
173 #ifdef __VOLUME__
174                                                            &ps,
175 #endif
176                                                            isect,
177                                                            ray,
178                                                            &throughput))
179                         {
180                                 return true;
181                         }
182                         /* Move ray forward. */
183                         ray->P = shadow_sd->P;
184                         if(ray->t != FLT_MAX) {
185                                 ray->D = normalize_len(Pend - ray->P, &ray->t);
186                         }
187                         bounce++;
188                 }
189 #    ifdef __VOLUME__
190                 /* Attenuation for last line segment towards light. */
191                 if(ps.volume_stack[0].shader != SHADER_NONE) {
192                         kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
193                 }
194 #    endif
195                 *shadow = throughput;
196                 return is_zero(throughput);
197         }
198 #    ifdef __VOLUME__
199         if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
200                 /* Apply attenuation from current volume shader/ */
201                 kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
202         }
203 #    endif
204         return blocked;
205 }
206
207 /* Here we do all device specific trickery before invoking actual traversal
208  * loop to help readability of the actual logic.
209  */
210 ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
211                                                ShaderData *shadow_sd,
212                                                ccl_addr_space PathState *state,
213                                                Ray *ray,
214                                                uint max_hits,
215                                                float3 *shadow)
216 {
217 #    ifdef __KERNEL_CUDA__
218         Intersection *hits = kg->hits_stack;
219 #    else
220         Intersection hits_stack[SHADOW_STACK_MAX_HITS];
221         Intersection *hits = hits_stack;
222 #    endif
223 #    ifndef __KERNEL_GPU__
224         /* Prefer to use stack but use dynamic allocation if too deep max hits
225          * we need max_hits + 1 storage space due to the logic in
226          * scene_intersect_shadow_all which will first store and then check if
227          * the limit is exceeded.
228          *
229          * Ignore this on GPU because of slow/unavailable malloc().
230          */
231         if(max_hits + 1 > SHADOW_STACK_MAX_HITS) {
232                 if(kg->transparent_shadow_intersections == NULL) {
233                         const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
234                         kg->transparent_shadow_intersections =
235                                 (Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1));
236                 }
237                 hits = kg->transparent_shadow_intersections;
238         }
239 #    endif  /* __KERNEL_GPU__ */
240         /* Invoke actual traversal. */
241         return shadow_blocked_transparent_all_loop(kg,
242                                                    shadow_sd,
243                                                    state,
244                                                    ray,
245                                                    hits,
246                                                    max_hits,
247                                                    shadow);
248 }
249 #  endif  /* __SHADOW_RECORD_ALL__ */
250
251 #  if defined(__KERNEL_GPU__) || !defined(__SHADOW_RECORD_ALL__)
252 /* Shadow function to compute how much light is blocked,
253  *
254  * Here we raytrace from one transparent surface to the next step by step.
255  * To minimize overhead in cases where we don't need transparent shadows, we
256  * first trace a regular shadow ray. We check if the hit primitive was
257  * potentially transparent, and only in that case start marching. this gives
258  * one extra ray cast for the cases were we do want transparency.
259  */
260
261 /* This function is only implementing device-independent traversal logic
262  * which requires some precalculation done.
263  */
264 ccl_device bool shadow_blocked_transparent_stepped_loop(
265         KernelGlobals *kg,
266         ShaderData *shadow_sd,
267         ccl_addr_space PathState *state,
268         Ray *ray,
269         Intersection *isect,
270         const bool blocked,
271         const bool is_transparent_isect,
272         float3 *shadow)
273 {
274         if(blocked && is_transparent_isect) {
275                 float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
276                 float3 Pend = ray->P + ray->D*ray->t;
277                 int bounce = state->transparent_bounce;
278 #    ifdef __VOLUME__
279                 PathState ps = *state;
280 #    endif
281                 for(;;) {
282                         if(bounce >= kernel_data.integrator.transparent_max_bounce) {
283                                 return true;
284                         }
285                         if(!scene_intersect(kg,
286                                             *ray,
287                                             PATH_RAY_SHADOW_TRANSPARENT,
288                                             isect,
289                                             NULL,
290                                             0.0f, 0.0f))
291                         {
292                                 break;
293                         }
294                         if(!shader_transparent_shadow(kg, isect)) {
295                                 return true;
296                         }
297                         /* Attenuate the throughput. */
298                         if(shadow_handle_transparent_isect(kg,
299                                                            shadow_sd,
300                                                            state,
301 #ifdef __VOLUME__
302                                                            &ps,
303 #endif
304                                                            isect,
305                                                            ray,
306                                                            &throughput))
307                         {
308                                 return true;
309                         }
310                         /* Move ray forward. */
311                         ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng);
312                         if(ray->t != FLT_MAX) {
313                                 ray->D = normalize_len(Pend - ray->P, &ray->t);
314                         }
315                         bounce++;
316                 }
317 #    ifdef __VOLUME__
318                 /* Attenuation for last line segment towards light. */
319                 if(ps.volume_stack[0].shader != SHADER_NONE) {
320                         kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
321                 }
322 #    endif
323                 *shadow *= throughput;
324                 return is_zero(throughput);
325         }
326 #    ifdef __VOLUME__
327         if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
328                 /* Apply attenuation from current volume shader. */
329                 kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
330         }
331 #    endif
332         return blocked;
333 }
334
335 ccl_device bool shadow_blocked_transparent_stepped(
336         KernelGlobals *kg,
337         ShaderData *shadow_sd,
338         ccl_addr_space PathState *state,
339         Ray *ray,
340         Intersection *isect,
341         float3 *shadow)
342 {
343         const bool blocked = scene_intersect(kg,
344                                              *ray,
345                                              PATH_RAY_SHADOW_OPAQUE,
346                                              isect,
347                                              NULL,
348                                              0.0f, 0.0f);
349         const bool is_transparent_isect = blocked
350                 ? shader_transparent_shadow(kg, isect)
351                 : false;
352         return shadow_blocked_transparent_stepped_loop(kg,
353                                                        shadow_sd,
354                                                        state,
355                                                        ray,
356                                                        isect,
357                                                        blocked,
358                                                        is_transparent_isect,
359                                                        shadow);
360 }
361
362 #  endif  /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */
363 #endif /* __TRANSPARENT_SHADOWS__ */
364
365 ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
366                                       ShaderData *shadow_sd,
367                                       ccl_addr_space PathState *state,
368                                       ccl_addr_space Ray *ray_input,
369                                       float3 *shadow)
370 {
371         /* Special trickery for split kernel: some data is coming from the
372          * global memory.
373          */
374 #ifdef __SPLIT_KERNEL__
375         Ray private_ray = *ray_input;
376         Ray *ray = &private_ray;
377         Intersection *isect = &kernel_split_state.isect_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
378 #else  /* __SPLIT_KERNEL__ */
379         Ray *ray = ray_input;
380         Intersection isect_object;
381         Intersection *isect = &isect_object;
382 #endif  /* __SPLIT_KERNEL__ */
383         /* Some common early checks. */
384         *shadow = make_float3(1.0f, 1.0f, 1.0f);
385         if(ray->t == 0.0f) {
386                 return false;
387         }
388         /* Do actual shadow shading. */
389         /* First of all, we check if integrator requires transparent shadows.
390          * if not, we use simplest and fastest ever way to calculate occlusion.
391          */
392 #ifdef __TRANSPARENT_SHADOWS__
393         if(!kernel_data.integrator.transparent_shadows)
394 #endif
395         {
396                 return shadow_blocked_opaque(kg,
397                                              shadow_sd,
398                                              state,
399                                              ray,
400                                              isect,
401                                              shadow);
402         }
403 #ifdef __TRANSPARENT_SHADOWS__
404 #  ifdef __SHADOW_RECORD_ALL__
405         /* For the transparent shadows we try to use record-all logic on the
406          * devices which supports this.
407          */
408         const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
409         /* Check transparent bounces here, for volume scatter which can do
410          * lighting before surface path termination is checked.
411          */
412         if(state->transparent_bounce >= transparent_max_bounce) {
413                 return true;
414         }
415         const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
416 #    ifdef __KERNEL_GPU__
417         /* On GPU we do trickey with tracing opaque ray first, this avoids speed
418          * regressions in some files.
419          *
420          * TODO(sergey): Check why using record-all behavior causes slowdown in such
421          * cases. Could that be caused by a higher spill pressure?
422          */
423         const bool blocked = scene_intersect(kg,
424                                              *ray,
425                                              PATH_RAY_SHADOW_OPAQUE,
426                                              isect,
427                                              NULL,
428                                              0.0f, 0.0f);
429         const bool is_transparent_isect = blocked
430                 ? shader_transparent_shadow(kg, isect)
431                 : false;
432         if(!blocked || !is_transparent_isect ||
433            max_hits + 1 >= SHADOW_STACK_MAX_HITS)
434         {
435                 return shadow_blocked_transparent_stepped_loop(kg,
436                                                                shadow_sd,
437                                                                state,
438                                                                ray,
439                                                                isect,
440                                                                blocked,
441                                                                is_transparent_isect,
442                                                                shadow);
443         }
444 #    endif  /* __KERNEL_GPU__ */
445         return shadow_blocked_transparent_all(kg,
446                                               shadow_sd,
447                                               state,
448                                               ray,
449                                               max_hits,
450                                               shadow);
451 #  else  /* __SHADOW_RECORD_ALL__ */
452         /* Fallback to a slowest version which works on all devices. */
453         return shadow_blocked_transparent_stepped(kg,
454                                                   shadow_sd,
455                                                   state,
456                                                   ray,
457                                                   isect,
458                                                   shadow);
459 #  endif  /* __SHADOW_RECORD_ALL__ */
460 #endif  /* __TRANSPARENT_SHADOWS__ */
461 }
462
463 #undef SHADOW_STACK_MAX_HITS
464
465 CCL_NAMESPACE_END