Merge branch 'master' into blender2.8
[blender.git] / intern / cycles / kernel / kernel_shadow.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 #ifdef __VOLUME__
20 typedef struct VolumeState {
21 #  ifdef __SPLIT_KERNEL__
22 #  else
23         PathState ps;
24 #  endif
25 } VolumeState;
26
27 /* Get PathState ready for use for volume stack evaluation. */
28 #  ifdef __SPLIT_KERNEL__
29 ccl_addr_space
30 #  endif
31 ccl_device_inline PathState *shadow_blocked_volume_path_state(
32         KernelGlobals *kg,
33         VolumeState *volume_state,
34         ccl_addr_space PathState *state,
35         ShaderData *sd,
36         Ray *ray)
37 {
38 #  ifdef __SPLIT_KERNEL__
39         ccl_addr_space PathState *ps =
40                 &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
41 #  else
42         PathState *ps = &volume_state->ps;
43 #  endif
44         *ps = *state;
45         /* We are checking for shadow on the "other" side of the surface, so need
46          * to discard volume we are currently at.
47          */
48         if(dot(sd->Ng, ray->D) < 0.0f) {
49                 kernel_volume_stack_enter_exit(kg, sd, ps->volume_stack);
50         }
51         return ps;
52 }
53 #endif  /* __VOLUME__ */
54
55 /* Attenuate throughput accordingly to the given intersection event.
56  * Returns true if the throughput is zero and traversal can be aborted.
57  */
58 ccl_device_forceinline bool shadow_handle_transparent_isect(
59         KernelGlobals *kg,
60         ShaderData *shadow_sd,
61         ccl_addr_space PathState *state,
62 #    ifdef __VOLUME__
63         ccl_addr_space struct PathState *volume_state,
64 #    endif
65         Intersection *isect,
66         Ray *ray,
67         float3 *throughput)
68 {
69 #ifdef __VOLUME__
70         /* Attenuation between last surface and next surface. */
71         if(volume_state->volume_stack[0].shader != SHADER_NONE) {
72                 Ray segment_ray = *ray;
73                 segment_ray.t = isect->t;
74                 kernel_volume_shadow(kg,
75                                      shadow_sd,
76                                      volume_state,
77                                      &segment_ray,
78                                      throughput);
79         }
80 #endif
81         /* Setup shader data at surface. */
82         shader_setup_from_ray(kg, shadow_sd, isect, ray);
83         /* Attenuation from transparent surface. */
84         if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
85                 path_state_modify_bounce(state, true);
86                 shader_eval_surface(kg,
87                                     shadow_sd,
88                                     state,
89                                     PATH_RAY_SHADOW);
90                 path_state_modify_bounce(state, false);
91                 *throughput *= shader_bsdf_transparency(kg, shadow_sd);
92         }
93         /* Stop if all light is blocked. */
94         if(is_zero(*throughput)) {
95                 return true;
96         }
97 #ifdef __VOLUME__
98         /* Exit/enter volume. */
99         kernel_volume_stack_enter_exit(kg, shadow_sd, volume_state->volume_stack);
100 #endif
101         return false;
102 }
103
104 /* Special version which only handles opaque shadows. */
105 ccl_device bool shadow_blocked_opaque(KernelGlobals *kg,
106                                       ShaderData *shadow_sd,
107                                       ccl_addr_space PathState *state,
108                                       const uint visibility,
109                                       Ray *ray,
110                                       Intersection *isect,
111                                       float3 *shadow)
112 {
113         const bool blocked = scene_intersect(kg,
114                                              *ray,
115                                              visibility & PATH_RAY_SHADOW_OPAQUE,
116                                              isect,
117                                              NULL,
118                                              0.0f, 0.0f);
119 #ifdef __VOLUME__
120         if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
121                 /* Apply attenuation from current volume shader. */
122                 kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
123         }
124 #endif
125         return blocked;
126 }
127
128 #ifdef __TRANSPARENT_SHADOWS__
129 #  ifdef __SHADOW_RECORD_ALL__
130 /* Shadow function to compute how much light is blocked,
131  *
132  * We trace a single ray. If it hits any opaque surface, or more than a given
133  * number of transparent surfaces is hit, then we consider the geometry to be
134  * entirely blocked. If not, all transparent surfaces will be recorded and we
135  * will shade them one by one to determine how much light is blocked. This all
136  * happens in one scene intersection function.
137  *
138  * Recording all hits works well in some cases but may be slower in others. If
139  * we have many semi-transparent hairs, one intersection may be faster because
140  * you'd be reinteresecting the same hairs a lot with each step otherwise. If
141  * however there is mostly binary transparency then we may be recording many
142  * unnecessary intersections when one of the first surfaces blocks all light.
143  *
144  * From tests in real scenes it seems the performance loss is either minimal,
145  * or there is a performance increase anyway due to avoiding the need to send
146  * two rays with transparent shadows.
147  *
148  * On CPU it'll handle all transparent bounces (by allocating storage for
149  * intersections when they don't fit into the stack storage).
150  *
151  * On GPU it'll only handle SHADOW_STACK_MAX_HITS-1 intersections, so this
152  * is something to be kept an eye on.
153  */
154
155 #    define SHADOW_STACK_MAX_HITS 64
156
157 /* Actual logic with traversal loop implementation which is free from device
158  * specific tweaks.
159  *
160  * Note that hits array should be as big as max_hits+1.
161  */
162 ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
163                                                     ShaderData *sd,
164                                                     ShaderData *shadow_sd,
165                                                     ccl_addr_space PathState *state,
166                                                     const uint visibility,
167                                                     Ray *ray,
168                                                     Intersection *hits,
169                                                     uint max_hits,
170                                                     float3 *shadow)
171 {
172         /* Intersect to find an opaque surface, or record all transparent
173          * surface hits.
174          */
175         uint num_hits;
176         const bool blocked = scene_intersect_shadow_all(kg,
177                                                         ray,
178                                                         hits,
179                                                         visibility,
180                                                         max_hits,
181                                                         &num_hits);
182 #    ifdef __VOLUME__
183         VolumeState volume_state;
184 #    endif
185         /* If no opaque surface found but we did find transparent hits,
186          * shade them.
187          */
188         if(!blocked && num_hits > 0) {
189                 float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
190                 float3 Pend = ray->P + ray->D*ray->t;
191                 float last_t = 0.0f;
192                 int bounce = state->transparent_bounce;
193                 Intersection *isect = hits;
194 #    ifdef __VOLUME__
195 #      ifdef __SPLIT_KERNEL__
196                 ccl_addr_space
197 #      endif
198                 PathState *ps = shadow_blocked_volume_path_state(kg,
199                                                                  &volume_state,
200                                                                  state,
201                                                                  sd,
202                                                                  ray);
203 #    endif
204                 sort_intersections(hits, num_hits);
205                 for(int hit = 0; hit < num_hits; hit++, isect++) {
206                         /* Adjust intersection distance for moving ray forward. */
207                         float new_t = isect->t;
208                         isect->t -= last_t;
209                         /* Skip hit if we did not move forward, step by step raytracing
210                          * would have skipped it as well then.
211                          */
212                         if(last_t == new_t) {
213                                 continue;
214                         }
215                         last_t = new_t;
216                         /* Attenuate the throughput. */
217                         if(shadow_handle_transparent_isect(kg,
218                                                            shadow_sd,
219                                                            state,
220 #ifdef __VOLUME__
221                                                            ps,
222 #endif
223                                                            isect,
224                                                            ray,
225                                                            &throughput))
226                         {
227                                 return true;
228                         }
229                         /* Move ray forward. */
230                         ray->P = shadow_sd->P;
231                         if(ray->t != FLT_MAX) {
232                                 ray->D = normalize_len(Pend - ray->P, &ray->t);
233                         }
234                         bounce++;
235                 }
236 #    ifdef __VOLUME__
237                 /* Attenuation for last line segment towards light. */
238                 if(ps->volume_stack[0].shader != SHADER_NONE) {
239                         kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
240                 }
241 #    endif
242                 *shadow = throughput;
243                 return is_zero(throughput);
244         }
245 #    ifdef __VOLUME__
246         if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
247                 /* Apply attenuation from current volume shader. */
248 #      ifdef __SPLIT_KERNEL__
249                 ccl_addr_space
250 #      endif
251                 PathState *ps = shadow_blocked_volume_path_state(kg,
252                                                                  &volume_state,
253                                                                  state,
254                                                                  sd,
255                                                                  ray);
256                 kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow);
257         }
258 #    endif
259         return blocked;
260 }
261
262 /* Here we do all device specific trickery before invoking actual traversal
263  * loop to help readability of the actual logic.
264  */
265 ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
266                                                ShaderData *sd,
267                                                ShaderData *shadow_sd,
268                                                ccl_addr_space PathState *state,
269                                                const uint visibility,
270                                                Ray *ray,
271                                                uint max_hits,
272                                                float3 *shadow)
273 {
274 #    ifdef __SPLIT_KERNEL__
275         Intersection hits_[SHADOW_STACK_MAX_HITS];
276         Intersection *hits = &hits_[0];
277 #    elif defined(__KERNEL_CUDA__)
278         Intersection *hits = kg->hits_stack;
279 #    else
280         Intersection hits_stack[SHADOW_STACK_MAX_HITS];
281         Intersection *hits = hits_stack;
282 #    endif
283 #    ifndef __KERNEL_GPU__
284         /* Prefer to use stack but use dynamic allocation if too deep max hits
285          * we need max_hits + 1 storage space due to the logic in
286          * scene_intersect_shadow_all which will first store and then check if
287          * the limit is exceeded.
288          *
289          * Ignore this on GPU because of slow/unavailable malloc().
290          */
291         if(max_hits + 1 > SHADOW_STACK_MAX_HITS) {
292                 if(kg->transparent_shadow_intersections == NULL) {
293                         const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
294                         kg->transparent_shadow_intersections =
295                                 (Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1));
296                 }
297                 hits = kg->transparent_shadow_intersections;
298         }
299 #    endif  /* __KERNEL_GPU__ */
300         /* Invoke actual traversal. */
301         return shadow_blocked_transparent_all_loop(kg,
302                                                    sd,
303                                                    shadow_sd,
304                                                    state,
305                                                    visibility,
306                                                    ray,
307                                                    hits,
308                                                    max_hits,
309                                                    shadow);
310 }
311 #  endif  /* __SHADOW_RECORD_ALL__ */
312
313 #  if defined(__KERNEL_GPU__) || !defined(__SHADOW_RECORD_ALL__)
314 /* Shadow function to compute how much light is blocked,
315  *
316  * Here we raytrace from one transparent surface to the next step by step.
317  * To minimize overhead in cases where we don't need transparent shadows, we
318  * first trace a regular shadow ray. We check if the hit primitive was
319  * potentially transparent, and only in that case start marching. this gives
320  * one extra ray cast for the cases were we do want transparency.
321  */
322
323 /* This function is only implementing device-independent traversal logic
324  * which requires some precalculation done.
325  */
326 ccl_device bool shadow_blocked_transparent_stepped_loop(
327         KernelGlobals *kg,
328         ShaderData *sd,
329         ShaderData *shadow_sd,
330         ccl_addr_space PathState *state,
331         const uint visibility,
332         Ray *ray,
333         Intersection *isect,
334         const bool blocked,
335         const bool is_transparent_isect,
336         float3 *shadow)
337 {
338 #    ifdef __VOLUME__
339         VolumeState volume_state;
340 #    endif
341         if(blocked && is_transparent_isect) {
342                 float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
343                 float3 Pend = ray->P + ray->D*ray->t;
344                 int bounce = state->transparent_bounce;
345 #    ifdef __VOLUME__
346 #      ifdef __SPLIT_KERNEL__
347                 ccl_addr_space
348 #      endif
349                 PathState *ps = shadow_blocked_volume_path_state(kg,
350                                                                  &volume_state,
351                                                                  state,
352                                                                  sd,
353                                                                  ray);
354 #    endif
355                 for(;;) {
356                         if(bounce >= kernel_data.integrator.transparent_max_bounce) {
357                                 return true;
358                         }
359                         if(!scene_intersect(kg,
360                                             *ray,
361                                             visibility & PATH_RAY_SHADOW_TRANSPARENT,
362                                             isect,
363                                             NULL,
364                                             0.0f, 0.0f))
365                         {
366                                 break;
367                         }
368                         if(!shader_transparent_shadow(kg, isect)) {
369                                 return true;
370                         }
371                         /* Attenuate the throughput. */
372                         if(shadow_handle_transparent_isect(kg,
373                                                            shadow_sd,
374                                                            state,
375 #ifdef __VOLUME__
376                                                            ps,
377 #endif
378                                                            isect,
379                                                            ray,
380                                                            &throughput))
381                         {
382                                 return true;
383                         }
384                         /* Move ray forward. */
385                         ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng);
386                         if(ray->t != FLT_MAX) {
387                                 ray->D = normalize_len(Pend - ray->P, &ray->t);
388                         }
389                         bounce++;
390                 }
391 #    ifdef __VOLUME__
392                 /* Attenuation for last line segment towards light. */
393                 if(ps->volume_stack[0].shader != SHADER_NONE) {
394                         kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
395                 }
396 #    endif
397                 *shadow *= throughput;
398                 return is_zero(throughput);
399         }
400 #    ifdef __VOLUME__
401         if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
402                 /* Apply attenuation from current volume shader. */
403 #      ifdef __SPLIT_KERNEL__
404                 ccl_addr_space
405 #      endif
406                 PathState *ps = shadow_blocked_volume_path_state(kg,
407                                                                  &volume_state,
408                                                                  state,
409                                                                  sd,
410                                                                  ray);
411                 kernel_volume_shadow(kg, shadow_sd, ps, ray, shadow);
412         }
413 #    endif
414         return blocked;
415 }
416
417 ccl_device bool shadow_blocked_transparent_stepped(
418         KernelGlobals *kg,
419         ShaderData *sd,
420         ShaderData *shadow_sd,
421         ccl_addr_space PathState *state,
422         const uint visibility,
423         Ray *ray,
424         Intersection *isect,
425         float3 *shadow)
426 {
427         bool blocked = scene_intersect(kg,
428                                        *ray,
429                                        visibility & PATH_RAY_SHADOW_OPAQUE,
430                                        isect,
431                                        NULL,
432                                        0.0f, 0.0f);
433         bool is_transparent_isect = blocked
434                 ? shader_transparent_shadow(kg, isect)
435                 : false;
436         return shadow_blocked_transparent_stepped_loop(kg,
437                                                        sd,
438                                                        shadow_sd,
439                                                        state,
440                                                        visibility,
441                                                        ray,
442                                                        isect,
443                                                        blocked,
444                                                        is_transparent_isect,
445                                                        shadow);
446 }
447
448 #  endif  /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */
449 #endif /* __TRANSPARENT_SHADOWS__ */
450
451 ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
452                                       ShaderData *sd,
453                                       ShaderData *shadow_sd,
454                                       ccl_addr_space PathState *state,
455                                       Ray *ray_input,
456                                       float3 *shadow)
457 {
458         Ray *ray = ray_input;
459         Intersection isect;
460         /* Some common early checks. */
461         *shadow = make_float3(1.0f, 1.0f, 1.0f);
462         if(ray->t == 0.0f) {
463                 return false;
464         }
465 #ifdef __SHADOW_TRICKS__
466         const uint visibility = (state->flag & PATH_RAY_SHADOW_CATCHER)
467                 ? PATH_RAY_SHADOW_NON_CATCHER
468                 : PATH_RAY_SHADOW;
469 #else
470         const uint visibility = PATH_RAY_SHADOW;
471 #endif
472         /* Do actual shadow shading. */
473         /* First of all, we check if integrator requires transparent shadows.
474          * if not, we use simplest and fastest ever way to calculate occlusion.
475          */
476 #ifdef __TRANSPARENT_SHADOWS__
477         if(!kernel_data.integrator.transparent_shadows)
478 #endif
479         {
480                 return shadow_blocked_opaque(kg,
481                                              shadow_sd,
482                                              state,
483                                              visibility,
484                                              ray,
485                                              &isect,
486                                              shadow);
487         }
488 #ifdef __TRANSPARENT_SHADOWS__
489 #  ifdef __SHADOW_RECORD_ALL__
490         /* For the transparent shadows we try to use record-all logic on the
491          * devices which supports this.
492          */
493         const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
494         /* Check transparent bounces here, for volume scatter which can do
495          * lighting before surface path termination is checked.
496          */
497         if(state->transparent_bounce >= transparent_max_bounce) {
498                 return true;
499         }
500         const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
501 #    ifdef __KERNEL_GPU__
502         /* On GPU we do trickey with tracing opaque ray first, this avoids speed
503          * regressions in some files.
504          *
505          * TODO(sergey): Check why using record-all behavior causes slowdown in such
506          * cases. Could that be caused by a higher spill pressure?
507          */
508         const bool blocked = scene_intersect(kg,
509                                              *ray,
510                                              visibility & PATH_RAY_SHADOW_OPAQUE,
511                                              &isect,
512                                              NULL,
513                                              0.0f, 0.0f);
514         const bool is_transparent_isect = blocked
515                 ? shader_transparent_shadow(kg, &isect)
516                 : false;
517         if(!blocked || !is_transparent_isect ||
518            max_hits + 1 >= SHADOW_STACK_MAX_HITS)
519         {
520                 return shadow_blocked_transparent_stepped_loop(kg,
521                                                                sd,
522                                                                shadow_sd,
523                                                                state,
524                                                                visibility,
525                                                                ray,
526                                                                &isect,
527                                                                blocked,
528                                                                is_transparent_isect,
529                                                                shadow);
530         }
531 #    endif  /* __KERNEL_GPU__ */
532         return shadow_blocked_transparent_all(kg,
533                                               sd,
534                                               shadow_sd,
535                                               state,
536                                               visibility,
537                                               ray,
538                                               max_hits,
539                                               shadow);
540 #  else  /* __SHADOW_RECORD_ALL__ */
541         /* Fallback to a slowest version which works on all devices. */
542         return shadow_blocked_transparent_stepped(kg,
543                                                   sd,
544                                                   shadow_sd,
545                                                   state,
546                                                   visibility,
547                                                   ray,
548                                                   &isect,
549                                                   shadow);
550 #  endif  /* __SHADOW_RECORD_ALL__ */
551 #endif  /* __TRANSPARENT_SHADOWS__ */
552 }
553
554 #undef SHADOW_STACK_MAX_HITS
555
556 CCL_NAMESPACE_END