Cycles: Cleanup, style
[blender.git] / intern / cycles / kernel / kernel_shadow.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* Attenuate throughput accordingly to the given intersection event.
20  * Returns true if the throughput is zero and traversal can be aborted.
21  */
22 ccl_device_forceinline bool shadow_handle_transparent_isect(
23         KernelGlobals *kg,
24         ShaderData *shadow_sd,
25         ccl_addr_space PathState *state,
26 #    ifdef __VOLUME__
27         ccl_addr_space struct PathState *volume_state,
28 #    endif
29         Intersection *isect,
30         Ray *ray,
31         float3 *throughput)
32 {
33 #ifdef __VOLUME__
34         /* Attenuation between last surface and next surface. */
35         if(volume_state->volume_stack[0].shader != SHADER_NONE) {
36                 Ray segment_ray = *ray;
37                 segment_ray.t = isect->t;
38                 kernel_volume_shadow(kg,
39                                      shadow_sd,
40                                      volume_state,
41                                      &segment_ray,
42                                      throughput);
43         }
44 #endif
45         /* Setup shader data at surface. */
46         shader_setup_from_ray(kg, shadow_sd, isect, ray);
47         /* Attenuation from transparent surface. */
48         if(!(shadow_sd->flag & SD_HAS_ONLY_VOLUME)) {
49                 path_state_modify_bounce(state, true);
50                 shader_eval_surface(kg,
51                                     shadow_sd,
52                                     NULL,
53                                     state,
54                                     0.0f,
55                                     PATH_RAY_SHADOW,
56                                     SHADER_CONTEXT_SHADOW);
57                 path_state_modify_bounce(state, false);
58                 *throughput *= shader_bsdf_transparency(kg, shadow_sd);
59         }
60         /* Stop if all light is blocked. */
61         if(is_zero(*throughput)) {
62                 return true;
63         }
64 #ifdef __VOLUME__
65         /* Exit/enter volume. */
66         kernel_volume_stack_enter_exit(kg, shadow_sd, volume_state->volume_stack);
67 #endif
68         return false;
69 }
70
71 /* Special version which only handles opaque shadows. */
72 ccl_device bool shadow_blocked_opaque(KernelGlobals *kg,
73                                       ShaderData *shadow_sd,
74                                       ccl_addr_space PathState *state,
75                                       Ray *ray,
76                                       Intersection *isect,
77                                       float3 *shadow)
78 {
79         const bool blocked = scene_intersect(kg,
80                                              *ray,
81                                              PATH_RAY_SHADOW_OPAQUE,
82                                              isect,
83                                              NULL,
84                                              0.0f, 0.0f);
85 #ifdef __VOLUME__
86         if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
87                 /* Apply attenuation from current volume shader. */
88                 kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
89         }
90 #endif
91         return blocked;
92 }
93
94 #ifdef __TRANSPARENT_SHADOWS__
95 #  ifdef __SHADOW_RECORD_ALL__
96 /* Shadow function to compute how much light is blocked,
97  *
98  * We trace a single ray. If it hits any opaque surface, or more than a given
99  * number of transparent surfaces is hit, then we consider the geometry to be
100  * entirely blocked. If not, all transparent surfaces will be recorded and we
101  * will shade them one by one to determine how much light is blocked. This all
102  * happens in one scene intersection function.
103  *
104  * Recording all hits works well in some cases but may be slower in others. If
105  * we have many semi-transparent hairs, one intersection may be faster because
106  * you'd be reinteresecting the same hairs a lot with each step otherwise. If
107  * however there is mostly binary transparency then we may be recording many
108  * unnecessary intersections when one of the first surfaces blocks all light.
109  *
110  * From tests in real scenes it seems the performance loss is either minimal,
111  * or there is a performance increase anyway due to avoiding the need to send
112  * two rays with transparent shadows.
113  *
114  * On CPU it'll handle all transparent bounces (by allocating storage for
115  * intersections when they don't fit into the stack storage).
116  *
117  * On GPU it'll only handle SHADOW_STACK_MAX_HITS-1 intersections, so this
118  * is something to be kept an eye on.
119  */
120
121 #    define SHADOW_STACK_MAX_HITS 64
122
123 /* Actual logic with traversal loop implementation which is free from device
124  * specific tweaks.
125  *
126  * Note that hits array should be as big as max_hits+1.
127  */
128 ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
129                                                     ShaderData *shadow_sd,
130                                                     ccl_addr_space PathState *state,
131                                                     const int skip_object,
132                                                     Ray *ray,
133                                                     Intersection *hits,
134                                                     uint max_hits,
135                                                     float3 *shadow)
136 {
137         /* Intersect to find an opaque surface, or record all transparent
138          * surface hits.
139          */
140         uint num_hits;
141         const bool blocked = scene_intersect_shadow_all(kg,
142                                                         ray,
143                                                         hits,
144                                                         skip_object,
145                                                         max_hits,
146                                                         &num_hits);
147         /* If no opaque surface found but we did find transparent hits,
148          * shade them.
149          */
150         if(!blocked && num_hits > 0) {
151                 float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
152                 float3 Pend = ray->P + ray->D*ray->t;
153                 float last_t = 0.0f;
154                 int bounce = state->transparent_bounce;
155                 Intersection *isect = hits;
156 #    ifdef __VOLUME__
157 #      ifdef __SPLIT_KERNEL__
158                 ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
159 #      else
160                 PathState ps_object;
161                 PathState *ps = &ps_object;
162 #      endif
163                 *ps = *state;
164 #    endif
165                 sort_intersections(hits, num_hits);
166                 for(int hit = 0; hit < num_hits; hit++, isect++) {
167                         /* Adjust intersection distance for moving ray forward. */
168                         float new_t = isect->t;
169                         isect->t -= last_t;
170                         /* Skip hit if we did not move forward, step by step raytracing
171                          * would have skipped it as well then.
172                          */
173                         if(last_t == new_t) {
174                                 continue;
175                         }
176                         last_t = new_t;
177                         /* Attenuate the throughput. */
178                         if(shadow_handle_transparent_isect(kg,
179                                                            shadow_sd,
180                                                            state,
181 #ifdef __VOLUME__
182                                                            ps,
183 #endif
184                                                            isect,
185                                                            ray,
186                                                            &throughput))
187                         {
188                                 return true;
189                         }
190                         /* Move ray forward. */
191                         ray->P = shadow_sd->P;
192                         if(ray->t != FLT_MAX) {
193                                 ray->D = normalize_len(Pend - ray->P, &ray->t);
194                         }
195                         bounce++;
196                 }
197 #    ifdef __VOLUME__
198                 /* Attenuation for last line segment towards light. */
199                 if(ps->volume_stack[0].shader != SHADER_NONE) {
200                         kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
201                 }
202 #    endif
203                 *shadow = throughput;
204                 return is_zero(throughput);
205         }
206 #    ifdef __VOLUME__
207         if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
208                 /* Apply attenuation from current volume shader/ */
209                 kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
210         }
211 #    endif
212         return blocked;
213 }
214
215 /* Here we do all device specific trickery before invoking actual traversal
216  * loop to help readability of the actual logic.
217  */
218 ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
219                                                ShaderData *shadow_sd,
220                                                ccl_addr_space PathState *state,
221                                                const int skip_object,
222                                                Ray *ray,
223                                                uint max_hits,
224                                                float3 *shadow)
225 {
226 #    ifdef __SPLIT_KERNEL__
227         Intersection hits_[SHADOW_STACK_MAX_HITS];
228         Intersection *hits = &hits_[0];
229 #    elif defined(__KERNEL_CUDA__)
230         Intersection *hits = kg->hits_stack;
231 #    else
232         Intersection hits_stack[SHADOW_STACK_MAX_HITS];
233         Intersection *hits = hits_stack;
234 #    endif
235 #    ifndef __KERNEL_GPU__
236         /* Prefer to use stack but use dynamic allocation if too deep max hits
237          * we need max_hits + 1 storage space due to the logic in
238          * scene_intersect_shadow_all which will first store and then check if
239          * the limit is exceeded.
240          *
241          * Ignore this on GPU because of slow/unavailable malloc().
242          */
243         if(max_hits + 1 > SHADOW_STACK_MAX_HITS) {
244                 if(kg->transparent_shadow_intersections == NULL) {
245                         const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
246                         kg->transparent_shadow_intersections =
247                                 (Intersection*)malloc(sizeof(Intersection)*(transparent_max_bounce + 1));
248                 }
249                 hits = kg->transparent_shadow_intersections;
250         }
251 #    endif  /* __KERNEL_GPU__ */
252         /* Invoke actual traversal. */
253         return shadow_blocked_transparent_all_loop(kg,
254                                                    shadow_sd,
255                                                    state,
256                                                    skip_object,
257                                                    ray,
258                                                    hits,
259                                                    max_hits,
260                                                    shadow);
261 }
262 #  endif  /* __SHADOW_RECORD_ALL__ */
263
264 #  if defined(__KERNEL_GPU__) || !defined(__SHADOW_RECORD_ALL__)
265 /* Shadow function to compute how much light is blocked,
266  *
267  * Here we raytrace from one transparent surface to the next step by step.
268  * To minimize overhead in cases where we don't need transparent shadows, we
269  * first trace a regular shadow ray. We check if the hit primitive was
270  * potentially transparent, and only in that case start marching. this gives
271  * one extra ray cast for the cases were we do want transparency.
272  */
273
274 /* This function is only implementing device-independent traversal logic
275  * which requires some precalculation done.
276  */
277 ccl_device bool shadow_blocked_transparent_stepped_loop(
278         KernelGlobals *kg,
279         ShaderData *shadow_sd,
280         ccl_addr_space PathState *state,
281         const int skip_object,
282         Ray *ray,
283         Intersection *isect,
284         const bool blocked,
285         const bool is_transparent_isect,
286         float3 *shadow)
287 {
288         if((blocked && is_transparent_isect) || skip_object != OBJECT_NONE) {
289                 float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
290                 float3 Pend = ray->P + ray->D*ray->t;
291                 int bounce = state->transparent_bounce;
292 #    ifdef __VOLUME__
293 #      ifdef __SPLIT_KERNEL__
294                 ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
295 #      else
296                 PathState ps_object;
297                 PathState *ps = &ps_object;
298 #      endif
299                 *ps = *state;
300 #    endif
301                 for(;;) {
302                         if(bounce >= kernel_data.integrator.transparent_max_bounce) {
303                                 return true;
304                         }
305                         if(!scene_intersect(kg,
306                                             *ray,
307                                             PATH_RAY_SHADOW_TRANSPARENT,
308                                             isect,
309                                             NULL,
310                                             0.0f, 0.0f))
311                         {
312                                 break;
313                         }
314 #ifdef __SHADOW_TRICKS__
315                         if(skip_object != OBJECT_NONE) {
316                                 const int isect_object = (isect->object == PRIM_NONE)
317                                         ? kernel_tex_fetch(__prim_object, isect->prim)
318                                         : isect->object;
319                                 if(isect_object == skip_object) {
320                                         shader_setup_from_ray(kg, shadow_sd, isect, ray);
321                                         /* Move ray forward. */
322                                         ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng);
323                                         if(ray->t != FLT_MAX) {
324                                                 ray->D = normalize_len(Pend - ray->P, &ray->t);
325                                         }
326                                         bounce++;
327                                         continue;
328                                 }
329                         }
330 #endif
331                         if(!shader_transparent_shadow(kg, isect)) {
332                                 return true;
333                         }
334                         /* Attenuate the throughput. */
335                         if(shadow_handle_transparent_isect(kg,
336                                                            shadow_sd,
337                                                            state,
338 #ifdef __VOLUME__
339                                                            ps,
340 #endif
341                                                            isect,
342                                                            ray,
343                                                            &throughput))
344                         {
345                                 return true;
346                         }
347                         /* Move ray forward. */
348                         ray->P = ray_offset(shadow_sd->P, -shadow_sd->Ng);
349                         if(ray->t != FLT_MAX) {
350                                 ray->D = normalize_len(Pend - ray->P, &ray->t);
351                         }
352                         bounce++;
353                 }
354 #    ifdef __VOLUME__
355                 /* Attenuation for last line segment towards light. */
356                 if(ps->volume_stack[0].shader != SHADER_NONE) {
357                         kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
358                 }
359 #    endif
360                 *shadow *= throughput;
361                 return is_zero(throughput);
362         }
363 #    ifdef __VOLUME__
364         if(!blocked && state->volume_stack[0].shader != SHADER_NONE) {
365                 /* Apply attenuation from current volume shader. */
366                 kernel_volume_shadow(kg, shadow_sd, state, ray, shadow);
367         }
368 #    endif
369         return blocked;
370 }
371
372 ccl_device bool shadow_blocked_transparent_stepped(
373         KernelGlobals *kg,
374         ShaderData *shadow_sd,
375         ccl_addr_space PathState *state,
376         const int skip_object,
377         Ray *ray,
378         Intersection *isect,
379         float3 *shadow)
380 {
381         bool blocked, is_transparent_isect;
382         if(skip_object == OBJECT_NONE) {
383                 blocked = scene_intersect(kg,
384                                           *ray,
385                                           PATH_RAY_SHADOW_OPAQUE,
386                                           isect,
387                                           NULL,
388                                           0.0f, 0.0f);
389                 is_transparent_isect = blocked
390                                 ? shader_transparent_shadow(kg, isect)
391                                 : false;
392         }
393         else {
394                 blocked = false;
395                 is_transparent_isect = false;
396         }
397         return shadow_blocked_transparent_stepped_loop(kg,
398                                                        shadow_sd,
399                                                        state,
400                                                        skip_object,
401                                                        ray,
402                                                        isect,
403                                                        blocked,
404                                                        is_transparent_isect,
405                                                        shadow);
406 }
407
408 #  endif  /* __KERNEL_GPU__ || !__SHADOW_RECORD_ALL__ */
409 #endif /* __TRANSPARENT_SHADOWS__ */
410
411 ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
412                                       ShaderData *shadow_sd,
413                                       ccl_addr_space PathState *state,
414                                       Ray *ray_input,
415                                       float3 *shadow)
416 {
417         Ray *ray = ray_input;
418         Intersection isect;
419         /* Some common early checks. */
420         *shadow = make_float3(1.0f, 1.0f, 1.0f);
421         if(ray->t == 0.0f) {
422                 return false;
423         }
424 #ifdef __SHADOW_TRICKS__
425         const int skip_object = state->catcher_object;
426 #else
427         const int skip_object = OBJECT_NONE;
428 #endif
429         /* Do actual shadow shading. */
430         /* First of all, we check if integrator requires transparent shadows.
431          * if not, we use simplest and fastest ever way to calculate occlusion.
432          *
433          * NOTE: We can't do quick opaque test here if we are on shadow-catcher
434          * path because we don't want catcher object to be casting shadow here.
435          */
436 #ifdef __TRANSPARENT_SHADOWS__
437         if(!kernel_data.integrator.transparent_shadows &&
438            skip_object == OBJECT_NONE)
439 #endif
440         {
441                 return shadow_blocked_opaque(kg,
442                                              shadow_sd,
443                                              state,
444                                              ray,
445                                              &isect,
446                                              shadow);
447         }
448 #ifdef __TRANSPARENT_SHADOWS__
449 #  ifdef __SHADOW_RECORD_ALL__
450         /* For the transparent shadows we try to use record-all logic on the
451          * devices which supports this.
452          */
453         const int transparent_max_bounce = kernel_data.integrator.transparent_max_bounce;
454         /* Check transparent bounces here, for volume scatter which can do
455          * lighting before surface path termination is checked.
456          */
457         if(state->transparent_bounce >= transparent_max_bounce) {
458                 return true;
459         }
460         const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1;
461 #    ifdef __KERNEL_GPU__
462         /* On GPU we do trickey with tracing opaque ray first, this avoids speed
463          * regressions in some files.
464          *
465          * TODO(sergey): Check why using record-all behavior causes slowdown in such
466          * cases. Could that be caused by a higher spill pressure?
467          */
468         const bool blocked = scene_intersect(kg,
469                                              *ray,
470                                              PATH_RAY_SHADOW_OPAQUE,
471                                              &isect,
472                                              NULL,
473                                              0.0f, 0.0f);
474         const bool is_transparent_isect = blocked
475                 ? shader_transparent_shadow(kg, &isect)
476                 : false;
477         if(!blocked || !is_transparent_isect ||
478            max_hits + 1 >= SHADOW_STACK_MAX_HITS)
479         {
480                 return shadow_blocked_transparent_stepped_loop(kg,
481                                                                shadow_sd,
482                                                                state,
483                                                                skip_object,
484                                                                ray,
485                                                                &isect,
486                                                                blocked,
487                                                                is_transparent_isect,
488                                                                shadow);
489         }
490 #    endif  /* __KERNEL_GPU__ */
491         return shadow_blocked_transparent_all(kg,
492                                               shadow_sd,
493                                               state,
494                                               skip_object,
495                                               ray,
496                                               max_hits,
497                                               shadow);
498 #  else  /* __SHADOW_RECORD_ALL__ */
499         /* Fallback to a slowest version which works on all devices. */
500         return shadow_blocked_transparent_stepped(kg,
501                                                   shadow_sd,
502                                                   state,
503                                                   skip_object,
504                                                   ray,
505                                                   &isect,
506                                                   shadow);
507 #  endif  /* __SHADOW_RECORD_ALL__ */
508 #endif  /* __TRANSPARENT_SHADOWS__ */
509 }
510
511 #undef SHADOW_STACK_MAX_HITS
512
513 CCL_NAMESPACE_END