Fix Cycles shadow catcher objects influencing each other.
[blender.git] / intern / cycles / kernel / kernel_shadow.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* Attenuate throughput accordingly to the given intersection event.
20  * Returns true if the throughput is zero and traversal can be aborted.
21  */
22 ccl_device_forceinline bool shadow_handle_transparent_isect(
23         KernelGlobals *kg,
24         ShaderData *shadow_sd,
25         ccl_addr_space PathState *state,
26 #    ifdef __VOLUME__
27         ccl_addr_space struct PathState *volume_state,
28 #    endif
29         Intersection *isect,
30         Ray *ray,
31         float3 *throughput)
32 {
33 #ifdef __VOLUME__
34         /* Attenuation between last surface and next surface. */
35         if(volume_state->volume_stack[0].shader != SHADER_NONE) {
36                 Ray segment_ray = *ray;
37                 segment_ray.t = isect->t;
38                 kernel_volume_shadow(kg,
39                                      shadow_sd,
40                                      volume_state,
41                                      &segment_ray,
42                                      throughput);
43         }
44 #endif
45         /* Setup shader data at surface. */
46         shader_setup_from_ray(kg, shadow_sd, isect, ray);
47         /* Attenuation from transparent surface. */
48         if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
49                 path_state_modify_bounce(state, true);
50                 shader_eval_surface(kg,
51                                     shadow_sd,
52                                     NULL,
53                                     state,
54                                     0.0f,
55                                     PATH_RAY_SHADOW,
56                                     SHADER_CONTEXT_SHADOW);
57                 path_state_modify_bounce(state, false);
58                 *throughput *= shader_bsdf_transparency(kg, shadow_sd);
59         }
60         /* Stop if all light is blocked. */
61         if(is_zero(*throughput)) {
62                 return true;
63         }
64 #ifdef __VOLUME__
65         /* Exit/enter volume. */
66         kernel_volume_stack_enter_exit(kg, shadow_sd, volume_state->volume_stack);
67 #endif
68         return false;
69 }
70
71 /* Special version which only handles opaque shadows. */
72 ccl_device bool shadow_blocked_opaque(KernelGlobals *kg,
73                                       ShaderData *shadow_sd,
74                                       ccl_addr_space PathState *state,
75                                       const uint visibility,
76                                       Ray *ray,
77                                       Intersection *isect,
78                                       float3 *shadow)
79 {
80         const bool blocked = scene_intersect(kg,
81                                              *ray,
82                                              visibility & PATH_RAY_SHADOW_OPAQUE,
83                                              isect,
84                                              NULL,
85                                              0.0f, 0.0f);
86 #ifdef __VOLUME__
87         if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
88                 /* Apply attenuation from current volume shader. */
89                 kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
90         }
91 #endif
92         return blocked;
93 }
94
95 #ifdef __TRANSPARENT_SHADOWS__
96 #  ifdef __SHADOW_RECORD_ALL__
97 /* Shadow function to compute how much light is blocked,
98  *
99  * We trace a single ray. If it hits any opaque surface, or more than a given
100  * number of transparent surfaces is hit, then we consider the geometry to be
101  * entirely blocked. If not, all transparent surfaces will be recorded and we
102  * will shade them one by one to determine how much light is blocked. This all
103  * happens in one scene intersection function.
104  *
105  * Recording all hits works well in some cases but may be slower in others. If
106  * we have many semi-transparent hairs, one intersection may be faster because
107  * you'd be reinteresecting the same hairs a lot with each step otherwise. If
108  * however there is mostly binary transparency then we may be recording many
109  * unnecessary intersections when one of the first surfaces blocks all light.
110  *
111  * From tests in real scenes it seems the performance loss is either minimal,
112  * or there is a performance increase anyway due to avoiding the need to send
113  * two rays with transparent shadows.
114  *
115  * On CPU it'll handle all transparent bounces (by allocating storage for
116  * intersections when they don't fit into the stack storage).
117  *
118  * On GPU it'll only handle SHADOW_STACK_MAX_HITS-1 intersections, so this
119  * is something to be kept an eye on.
120  */
121
122 #    define SHADOW_STACK_MAX_HITS 64
123
124 /* Actual logic with traversal loop implementation which is free from device
125  * specific tweaks.
126  *
127  * Note that hits array should be as big as max_hits+1.
128  */
129 ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
130                                                     ShaderData *shadow_sd,
131                                                     ccl_addr_space PathState *state,
132                                                     const uint visibility,
133                                                     Ray *ray,
134                                                     Intersection *hits,
135                                                     uint max_hits,
136                                                     float3 *shadow)
137 {
138         /* Intersect to find an opaque surface, or record all transparent
139          * surface hits.
140          */
141         uint num_hits;
142         const bool blocked = scene_intersect_shadow_all(kg,
143                                                         ray,
144                                                         hits,
145                                                         visibility,
146                                                         max_hits,
147                                                         &num_hits);
148         /* If no opaque surface found but we did find transparent hits,
149          * shade them.
150          */
151         if(!blocked && num_hits > 0) {
152                 float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
153                 float3 Pend = ray->P + ray->D*ray->t;
154                 float last_t = 0.0f;
155                 int bounce = state->transparent_bounce;
156                 Intersection *isect = hits;
157 #    ifdef __VOLUME__
158 #      ifdef __SPLIT_KERNEL__
159                 ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
160 #      else
161                 PathState ps_object;
162                 PathState *ps = &ps_object;
163 #      endif
164                 *ps = *state;
165 #    endif
166                 sort_intersections(hits, num_hits);
167                 for(int hit = 0; hit < num_hits; hit++, isect++) {
168                         /* Adjust intersection distance for moving ray forward. */
169                         float new_t = isect->t;
170                         isect->t -= last_t;
171                         /* Skip hit if we did not move forward, step by step raytracing
172                          * would have skipped it as well then.
173                          */
174                         if(last_t == new_t) {
175                                 continue;
176                         }
177                         last_t = new_t;
178                         /* Attenuate the throughput. */
179                         if(shadow_handle_transparent_isect(kg,
180                                                            shadow_sd,
181                                                            state,
182 #ifdef __VOLUME__
183                                                            ps,
184 #endif
185                                                            isect,
186                                                            ray,
187                                                            &throughput))
188                         {
189                                 return true;
190                         }
191                         /* Move ray forward. */
192                         ray->P = shadow_sd->P;
193                         if(ray->t != FLT_MAX) {
194                                 ray->D = normalize_len(Pend - ray->P, &ray->t);
195                         }
196                         bounce++;
197                 }
198 #    ifdef __VOLUME__
199                 /* Attenuation for last line segment towards light. */
200                 if(ps->volume_stack[0].shader != SHADER_NONE) {
201                         kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
202                 }
203 #    endif
204                 *shadow = throughput;
205                 return is_zero(throughput);
206         }
207 #    ifdef __VOLUME__
208         if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
209                 /* Apply attenuation from current volume shader/ */
210                 kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
211         }
212 #    endif
213         return blocked;
214 }
215
216 /* Here we do all device specific trickery before invoking actual traversal
217  * loop to help readability of the actual logic.
218  */
219 ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
220                                                ShaderData *shadow_sd,
221                                                ccl_addr_space PathState *state,
222                                                const uint visibility,
223                                                Ray *ray,
224                                                uint max_hits,
225                                                float3 *shadow)
226 {
227 #    ifdef __SPLIT_KERNEL__
228         Intersection hits_[SHADOW_STACK_MAX_HITS];
229         Intersection *hits = &hits_[0];
230 #    elif defined(__KERNEL_CUDA__)
231         Intersection *hits = kg->hits_stack;
232 #    else
233         Intersection hits_stack[SHADOW_STACK_MAX_HITS];
234         Intersection *hits = hits_stack;
235 #    endif
236 #    ifndef __KERNEL_GPU__
237         /* Prefer to use stack but use dynamic allocation if too deep max hits
238          * we need max_hits + 1 storage space due to the logic in
239          * scene_intersect_shadow_all which will first store and then check if
240          * the limit is exceeded.
241          *
242          * Ignore this on GPU because of slow/unavailable malloc().
243          */
244         if(max_hits + 1 > SHADOW_STACK_MAX_HITS) {
245                 if(kg->transparent_shadow_intersections == NULL) {
246                         const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
247                         kg->transparent_shadow_intersections =
248                                 (Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1));
249                 }
250                 hits = kg->transparent_shadow_intersections;
251         }
252 #    endif  /* __KERNEL_GPU__ */
253         /* Invoke actual traversal. */
254         return shadow_blocked_transparent_all_loop(kg,
255                                                    shadow_sd,
256                                                    state,
257                                                    visibility,
258                                                    ray,
259                                                    hits,
260                                                    max_hits,
261                                                    shadow);
262 }
263 #  endif  /* __SHADOW_RECORD_ALL__ */
264
265 #  if defined(__KERNEL_GPU__) || !defined(__SHADOW_RECORD_ALL__)
266 /* Shadow function to compute how much light is blocked,
267  *
268  * Here we raytrace from one transparent surface to the next step by step.
269  * To minimize overhead in cases where we don't need transparent shadows, we
270  * first trace a regular shadow ray. We check if the hit primitive was
271  * potentially transparent, and only in that case start marching. this gives
272  * one extra ray cast for the cases were we do want transparency.
273  */
274
275 /* This function is only implementing device-independent traversal logic
276  * which requires some precalculation done.
277  */
278 ccl_device bool shadow_blocked_transparent_stepped_loop(
279         KernelGlobals *kg,
280         ShaderData *shadow_sd,
281         ccl_addr_space PathState *state,
282         const uint visibility,
283         Ray *ray,
284         Intersection *isect,
285         const bool blocked,
286         const bool is_transparent_isect,
287         float3 *shadow)
288 {
289         if(blocked && is_transparent_isect) {
290                 float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
291                 float3 Pend = ray->P + ray->D*ray->t;
292                 int bounce = state->transparent_bounce;
293 #    ifdef __VOLUME__
294 #      ifdef __SPLIT_KERNEL__
295                 ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
296 #      else
297                 PathState ps_object;
298                 PathState *ps = &ps_object;
299 #      endif
300                 *ps = *state;
301 #    endif
302                 for(;;) {
303                         if(bounce >= kernel_data.integrator.transparent_max_bounce) {
304                                 return true;
305                         }
306                         if(!scene_intersect(kg,
307                                             *ray,
308                                             visibility & PATH_RAY_SHADOW_TRANSPARENT,
309                                             isect,
310                                             NULL,
311                                             0.0f, 0.0f))
312                         {
313                                 break;
314                         }
315                         if(!shader_transparent_shadow(kg, isect)) {
316                                 return true;
317                         }
318                         /* Attenuate the throughput. */
319                         if(shadow_handle_transparent_isect(kg,
320                                                            shadow_sd,
321                                                            state,
322 #ifdef __VOLUME__
323                                                            ps,
324 #endif
325                                                            isect,
326                                                            ray,
327                                                            &throughput))
328                         {
329                                 return true;
330                         }
331                         /* Move ray forward. */
332                         ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng);
333                         if(ray->t != FLT_MAX) {
334                                 ray->D = normalize_len(Pend - ray->P, &ray->t);
335                         }
336                         bounce++;
337                 }
338 #    ifdef __VOLUME__
339                 /* Attenuation for last line segment towards light. */
340                 if(ps->volume_stack[0].shader != SHADER_NONE) {
341                         kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
342                 }
343 #    endif
344                 *shadow *= throughput;
345                 return is_zero(throughput);
346         }
347 #    ifdef __VOLUME__
348         if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
349                 /* Apply attenuation from current volume shader. */
350                 kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
351         }
352 #    endif
353         return blocked;
354 }
355
356 ccl_device bool shadow_blocked_transparent_stepped(
357         KernelGlobals *kg,
358         ShaderData *shadow_sd,
359         ccl_addr_space PathState *state,
360         const uint visibility,
361         Ray *ray,
362         Intersection *isect,
363         float3 *shadow)
364 {
365         bool blocked = scene_intersect(kg,
366                                        *ray,
367                                        visibility & PATH_RAY_SHADOW_OPAQUE,
368                                        isect,
369                                        NULL,
370                                        0.0f, 0.0f);
371         bool is_transparent_isect = blocked
372                 ? shader_transparent_shadow(kg, isect)
373                 : false;
374         return shadow_blocked_transparent_stepped_loop(kg,
375                                                        shadow_sd,
376                                                        state,
377                                                        visibility,
378                                                        ray,
379                                                        isect,
380                                                        blocked,
381                                                        is_transparent_isect,
382                                                        shadow);
383 }
384
385 #  endif  /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */
386 #endif /* __TRANSPARENT_SHADOWS__ */
387
388 ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
389                                       ShaderData *shadow_sd,
390                                       ccl_addr_space PathState *state,
391                                       Ray *ray_input,
392                                       float3 *shadow)
393 {
394         Ray *ray = ray_input;
395         Intersection isect;
396         /* Some common early checks. */
397         *shadow = make_float3(1.0f, 1.0f, 1.0f);
398         if(ray->t == 0.0f) {
399                 return false;
400         }
401 #ifdef __SHADOW_TRICKS__
402         const uint visibility = (state->flag & PATH_RAY_SHADOW_CATCHER)
403                 ? PATH_RAY_SHADOW_NON_CATCHER
404                 : PATH_RAY_SHADOW;
405 #else
406         const uint visibility = PATH_RAY_SHADOW;
407 #endif
408         /* Do actual shadow shading. */
409         /* First of all, we check if integrator requires transparent shadows.
410          * if not, we use simplest and fastest ever way to calculate occlusion.
411          */
412 #ifdef __TRANSPARENT_SHADOWS__
413         if(!kernel_data.integrator.transparent_shadows)
414 #endif
415         {
416                 return shadow_blocked_opaque(kg,
417                                              shadow_sd,
418                                              state,
419                                              visibility,
420                                              ray,
421                                              &isect,
422                                              shadow);
423         }
424 #ifdef __TRANSPARENT_SHADOWS__
425 #  ifdef __SHADOW_RECORD_ALL__
426         /* For the transparent shadows we try to use record-all logic on the
427          * devices which supports this.
428          */
429         const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
430         /* Check transparent bounces here, for volume scatter which can do
431          * lighting before surface path termination is checked.
432          */
433         if(state->transparent_bounce >= transparent_max_bounce) {
434                 return true;
435         }
436         const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
437 #    ifdef __KERNEL_GPU__
438         /* On GPU we do trickey with tracing opaque ray first, this avoids speed
439          * regressions in some files.
440          *
441          * TODO(sergey): Check why using record-all behavior causes slowdown in such
442          * cases. Could that be caused by a higher spill pressure?
443          */
444         const bool blocked = scene_intersect(kg,
445                                              *ray,
446                                              visibility & PATH_RAY_SHADOW_OPAQUE,
447                                              &isect,
448                                              NULL,
449                                              0.0f, 0.0f);
450         const bool is_transparent_isect = blocked
451                 ? shader_transparent_shadow(kg, &isect)
452                 : false;
453         if(!blocked || !is_transparent_isect ||
454            max_hits + 1 >= SHADOW_STACK_MAX_HITS)
455         {
456                 return shadow_blocked_transparent_stepped_loop(kg,
457                                                                shadow_sd,
458                                                                state,
459                                                                visibility,
460                                                                ray,
461                                                                &isect,
462                                                                blocked,
463                                                                is_transparent_isect,
464                                                                shadow);
465         }
466 #    endif  /* __KERNEL_GPU__ */
467         return shadow_blocked_transparent_all(kg,
468                                               shadow_sd,
469                                               state,
470                                               visibility,
471                                               ray,
472                                               max_hits,
473                                               shadow);
474 #  else  /* __SHADOW_RECORD_ALL__ */
475         /* Fallback to a slowest version which works on all devices. */
476         return shadow_blocked_transparent_stepped(kg,
477                                                   shadow_sd,
478                                                   state,
479                                                   visibility,
480                                                   ray,
481                                                   &isect,
482                                                   shadow);
483 #  endif  /* __SHADOW_RECORD_ALL__ */
484 #endif  /* __TRANSPARENT_SHADOWS__ */
485 }
486
487 #undef SHADOW_STACK_MAX_HITS
488
489 CCL_NAMESPACE_END