ClangFormat: apply to source, most of intern
[blender.git] / intern / cycles / kernel / kernel_path_branched.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 #ifdef __BRANCHED_PATH__
20
21 ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
22                                                ShaderData *sd,
23                                                ShaderData *emission_sd,
24                                                PathRadiance *L,
25                                                ccl_addr_space PathState *state,
26                                                float3 throughput)
27 {
28   int num_samples = kernel_data.integrator.ao_samples;
29   float num_samples_inv = 1.0f / num_samples;
30   float ao_factor = kernel_data.background.ao_factor;
31   float3 ao_N;
32   float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
33   float3 ao_alpha = shader_bsdf_alpha(kg, sd);
34
35   for (int j = 0; j < num_samples; j++) {
36     float bsdf_u, bsdf_v;
37     path_branched_rng_2D(
38         kg, state->rng_hash, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
39
40     float3 ao_D;
41     float ao_pdf;
42
43     sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
44
45     if (dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
46       Ray light_ray;
47       float3 ao_shadow;
48
49       light_ray.P = ray_offset(sd->P, sd->Ng);
50       light_ray.D = ao_D;
51       light_ray.t = kernel_data.background.ao_distance;
52       light_ray.time = sd->time;
53       light_ray.dP = sd->dP;
54       light_ray.dD = differential3_zero();
55
56       if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
57         path_radiance_accum_ao(
58             L, state, throughput * num_samples_inv, ao_alpha, ao_bsdf, ao_shadow);
59       }
60       else {
61         path_radiance_accum_total_ao(L, state, throughput * num_samples_inv, ao_bsdf);
62       }
63     }
64   }
65 }
66
67 #  ifndef __SPLIT_KERNEL__
68
69 #    ifdef __VOLUME__
70 ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg,
71                                                         ShaderData *sd,
72                                                         PathState *state,
73                                                         Ray *ray,
74                                                         float3 *throughput,
75                                                         ccl_addr_space Intersection *isect,
76                                                         bool hit,
77                                                         ShaderData *indirect_sd,
78                                                         ShaderData *emission_sd,
79                                                         PathRadiance *L)
80 {
81   /* Sanitize volume stack. */
82   if (!hit) {
83     kernel_volume_clean_stack(kg, state->volume_stack);
84   }
85
86   if (state->volume_stack[0].shader == SHADER_NONE) {
87     return;
88   }
89
90   /* volume attenuation, emission, scatter */
91   Ray volume_ray = *ray;
92   volume_ray.t = (hit) ? isect->t : FLT_MAX;
93
94   bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
95
96 #      ifdef __VOLUME_DECOUPLED__
97   /* decoupled ray marching only supported on CPU */
98   if (kernel_data.integrator.volume_decoupled) {
99     /* cache steps along volume for repeated sampling */
100     VolumeSegment volume_segment;
101
102     shader_setup_from_volume(kg, sd, &volume_ray);
103     kernel_volume_decoupled_record(kg, state, &volume_ray, sd, &volume_segment, heterogeneous);
104
105     /* direct light sampling */
106     if (volume_segment.closure_flag & SD_SCATTER) {
107       volume_segment.sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
108
109       int all = kernel_data.integrator.sample_all_lights_direct;
110
111       kernel_branched_path_volume_connect_light(
112           kg, sd, emission_sd, *throughput, state, L, all, &volume_ray, &volume_segment);
113
114       /* indirect light sampling */
115       int num_samples = kernel_data.integrator.volume_samples;
116       float num_samples_inv = 1.0f / num_samples;
117
118       for (int j = 0; j < num_samples; j++) {
119         PathState ps = *state;
120         Ray pray = *ray;
121         float3 tp = *throughput;
122
123         /* branch RNG state */
124         path_state_branch(&ps, j, num_samples);
125
126         /* scatter sample. if we use distance sampling and take just one
127          * sample for direct and indirect light, we could share this
128          * computation, but makes code a bit complex */
129         float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL);
130         float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE);
131
132         VolumeIntegrateResult result = kernel_volume_decoupled_scatter(
133             kg, &ps, &pray, sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
134
135         if (result == VOLUME_PATH_SCATTERED &&
136             kernel_path_volume_bounce(kg, sd, &tp, &ps, &L->state, &pray)) {
137           kernel_path_indirect(kg, indirect_sd, emission_sd, &pray, tp * num_samples_inv, &ps, L);
138
139           /* for render passes, sum and reset indirect light pass variables
140            * for the next samples */
141           path_radiance_sum_indirect(L);
142           path_radiance_reset_indirect(L);
143         }
144       }
145     }
146
147     /* emission and transmittance */
148     if (volume_segment.closure_flag & SD_EMISSION)
149       path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
150     *throughput *= volume_segment.accum_transmittance;
151
152     /* free cached steps */
153     kernel_volume_decoupled_free(kg, &volume_segment);
154   }
155   else
156 #      endif /* __VOLUME_DECOUPLED__ */
157   {
158     /* GPU: no decoupled ray marching, scatter probalistically */
159     int num_samples = kernel_data.integrator.volume_samples;
160     float num_samples_inv = 1.0f / num_samples;
161
162     /* todo: we should cache the shader evaluations from stepping
163      * through the volume, for now we redo them multiple times */
164
165     for (int j = 0; j < num_samples; j++) {
166       PathState ps = *state;
167       Ray pray = *ray;
168       float3 tp = (*throughput) * num_samples_inv;
169
170       /* branch RNG state */
171       path_state_branch(&ps, j, num_samples);
172
173       VolumeIntegrateResult result = kernel_volume_integrate(
174           kg, &ps, sd, &volume_ray, L, &tp, heterogeneous);
175
176 #      ifdef __VOLUME_SCATTER__
177       if (result == VOLUME_PATH_SCATTERED) {
178         /* todo: support equiangular, MIS and all light sampling.
179          * alternatively get decoupled ray marching working on the GPU */
180         kernel_path_volume_connect_light(kg, sd, emission_sd, tp, state, L);
181
182         if (kernel_path_volume_bounce(kg, sd, &tp, &ps, &L->state, &pray)) {
183           kernel_path_indirect(kg, indirect_sd, emission_sd, &pray, tp, &ps, L);
184
185           /* for render passes, sum and reset indirect light pass variables
186            * for the next samples */
187           path_radiance_sum_indirect(L);
188           path_radiance_reset_indirect(L);
189         }
190       }
191 #      endif /* __VOLUME_SCATTER__ */
192     }
193
194     /* todo: avoid this calculation using decoupled ray marching */
195     kernel_volume_shadow(kg, emission_sd, state, &volume_ray, throughput);
196   }
197 }
198 #    endif /* __VOLUME__ */
199
200 /* bounce off surface and integrate indirect light */
201 ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
202                                                                      ShaderData *sd,
203                                                                      ShaderData *indirect_sd,
204                                                                      ShaderData *emission_sd,
205                                                                      float3 throughput,
206                                                                      float num_samples_adjust,
207                                                                      PathState *state,
208                                                                      PathRadiance *L)
209 {
210   float sum_sample_weight = 0.0f;
211 #    ifdef __DENOISING_FEATURES__
212   if (state->denoising_feature_weight > 0.0f) {
213     for (int i = 0; i < sd->num_closure; i++) {
214       const ShaderClosure *sc = &sd->closure[i];
215
216       /* transparency is not handled here, but in outer loop */
217       if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
218         continue;
219       }
220
221       sum_sample_weight += sc->sample_weight;
222     }
223   }
224   else {
225     sum_sample_weight = 1.0f;
226   }
227 #    endif /* __DENOISING_FEATURES__ */
228
229   for (int i = 0; i < sd->num_closure; i++) {
230     const ShaderClosure *sc = &sd->closure[i];
231
232     /* transparency is not handled here, but in outer loop */
233     if (!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
234       continue;
235     }
236
237     int num_samples;
238
239     if (CLOSURE_IS_BSDF_DIFFUSE(sc->type))
240       num_samples = kernel_data.integrator.diffuse_samples;
241     else if (CLOSURE_IS_BSDF_BSSRDF(sc->type))
242       num_samples = 1;
243     else if (CLOSURE_IS_BSDF_GLOSSY(sc->type))
244       num_samples = kernel_data.integrator.glossy_samples;
245     else
246       num_samples = kernel_data.integrator.transmission_samples;
247
248     num_samples = ceil_to_int(num_samples_adjust * num_samples);
249
250     float num_samples_inv = num_samples_adjust / num_samples;
251
252     for (int j = 0; j < num_samples; j++) {
253       PathState ps = *state;
254       float3 tp = throughput;
255       Ray bsdf_ray;
256 #    ifdef __SHADOW_TRICKS__
257       float shadow_transparency = L->shadow_transparency;
258 #    endif
259
260       ps.rng_hash = cmj_hash(state->rng_hash, i);
261
262       if (!kernel_branched_path_surface_bounce(
263               kg, sd, sc, j, num_samples, &tp, &ps, &L->state, &bsdf_ray, sum_sample_weight)) {
264         continue;
265       }
266
267       ps.rng_hash = state->rng_hash;
268
269       kernel_path_indirect(kg, indirect_sd, emission_sd, &bsdf_ray, tp * num_samples_inv, &ps, L);
270
271       /* for render passes, sum and reset indirect light pass variables
272        * for the next samples */
273       path_radiance_sum_indirect(L);
274       path_radiance_reset_indirect(L);
275
276 #    ifdef __SHADOW_TRICKS__
277       L->shadow_transparency = shadow_transparency;
278 #    endif
279     }
280   }
281 }
282
283 #    ifdef __SUBSURFACE__
284 ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
285                                                         ShaderData *sd,
286                                                         ShaderData *indirect_sd,
287                                                         ShaderData *emission_sd,
288                                                         PathRadiance *L,
289                                                         PathState *state,
290                                                         Ray *ray,
291                                                         float3 throughput)
292 {
293   for (int i = 0; i < sd->num_closure; i++) {
294     ShaderClosure *sc = &sd->closure[i];
295
296     if (!CLOSURE_IS_BSSRDF(sc->type))
297       continue;
298
299     /* set up random number generator */
300     uint lcg_state = lcg_state_init(state, 0x68bc21eb);
301     int num_samples = kernel_data.integrator.subsurface_samples * 3;
302     float num_samples_inv = 1.0f / num_samples;
303     uint bssrdf_rng_hash = cmj_hash(state->rng_hash, i);
304
305     /* do subsurface scatter step with copy of shader data, this will
306      * replace the BSSRDF with a diffuse BSDF closure */
307     for (int j = 0; j < num_samples; j++) {
308       PathState hit_state = *state;
309       path_state_branch(&hit_state, j, num_samples);
310       hit_state.rng_hash = bssrdf_rng_hash;
311
312       LocalIntersection ss_isect;
313       float bssrdf_u, bssrdf_v;
314       path_state_rng_2D(kg, &hit_state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
315       int num_hits = subsurface_scatter_multi_intersect(
316           kg, &ss_isect, sd, &hit_state, sc, &lcg_state, bssrdf_u, bssrdf_v, true);
317
318       hit_state.rng_offset += PRNG_BOUNCE_NUM;
319
320 #      ifdef __VOLUME__
321       Ray volume_ray = *ray;
322       bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
323                                       sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
324 #      endif /* __VOLUME__ */
325
326       /* compute lighting with the BSDF closure */
327       for (int hit = 0; hit < num_hits; hit++) {
328         ShaderData bssrdf_sd = *sd;
329         Bssrdf *bssrdf = (Bssrdf *)sc;
330         ClosureType bssrdf_type = sc->type;
331         float bssrdf_roughness = bssrdf->roughness;
332         subsurface_scatter_multi_setup(
333             kg, &ss_isect, hit, &bssrdf_sd, &hit_state, bssrdf_type, bssrdf_roughness);
334
335 #      ifdef __VOLUME__
336         if (need_update_volume_stack) {
337           /* Setup ray from previous surface point to the new one. */
338           float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng);
339           volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
340
341           for (int k = 0; k < VOLUME_STACK_SIZE; k++) {
342             hit_state.volume_stack[k] = state->volume_stack[k];
343           }
344
345           kernel_volume_stack_update_for_subsurface(
346               kg, emission_sd, &volume_ray, hit_state.volume_stack);
347         }
348 #      endif /* __VOLUME__ */
349
350 #      ifdef __EMISSION__
351         /* direct light */
352         if (kernel_data.integrator.use_direct_light) {
353           int all = (kernel_data.integrator.sample_all_lights_direct) ||
354                     (hit_state.flag & PATH_RAY_SHADOW_CATCHER);
355           kernel_branched_path_surface_connect_light(
356               kg, &bssrdf_sd, emission_sd, &hit_state, throughput, num_samples_inv, L, all);
357         }
358 #      endif /* __EMISSION__ */
359
360         /* indirect light */
361         kernel_branched_path_surface_indirect_light(
362             kg, &bssrdf_sd, indirect_sd, emission_sd, throughput, num_samples_inv, &hit_state, L);
363       }
364     }
365   }
366 }
367 #    endif /* __SUBSURFACE__ */
368
369 ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
370                                                uint rng_hash,
371                                                int sample,
372                                                Ray ray,
373                                                ccl_global float *buffer,
374                                                PathRadiance *L)
375 {
376   /* initialize */
377   float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
378
379   path_radiance_init(L, kernel_data.film.use_light_pass);
380
381   /* shader data memory used for both volumes and surfaces, saves stack space */
382   ShaderData sd;
383   /* shader data used by emission, shadows, volume stacks, indirect path */
384   ShaderDataTinyStorage emission_sd_storage;
385   ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
386   ShaderData indirect_sd;
387
388   PathState state;
389   path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
390
391   /* Main Loop
392    * Here we only handle transparency intersections from the camera ray.
393    * Indirect bounces are handled in kernel_branched_path_surface_indirect_light().
394    */
395   for (;;) {
396     /* Find intersection with objects in scene. */
397     Intersection isect;
398     bool hit = kernel_path_scene_intersect(kg, &state, &ray, &isect, L);
399
400 #    ifdef __VOLUME__
401     /* Volume integration. */
402     kernel_branched_path_volume(
403         kg, &sd, &state, &ray, &throughput, &isect, hit, &indirect_sd, emission_sd, L);
404 #    endif /* __VOLUME__ */
405
406     /* Shade background. */
407     if (!hit) {
408       kernel_path_background(kg, &state, &ray, throughput, &sd, L);
409       break;
410     }
411
412     /* Setup and evaluate shader. */
413     shader_setup_from_ray(kg, &sd, &isect, &ray);
414
415     /* Skip most work for volume bounding surface. */
416 #    ifdef __VOLUME__
417     if (!(sd.flag & SD_HAS_ONLY_VOLUME)) {
418 #    endif
419
420       shader_eval_surface(kg, &sd, &state, state.flag);
421       shader_merge_closures(&sd);
422
423       /* Apply shadow catcher, holdout, emission. */
424       if (!kernel_path_shader_apply(kg, &sd, &state, &ray, throughput, emission_sd, L, buffer)) {
425         break;
426       }
427
428       /* transparency termination */
429       if (state.flag & PATH_RAY_TRANSPARENT) {
430         /* path termination. this is a strange place to put the termination, it's
431        * mainly due to the mixed in MIS that we use. gives too many unneeded
432        * shader evaluations, only need emission if we are going to terminate */
433         float probability = path_state_continuation_probability(kg, &state, throughput);
434
435         if (probability == 0.0f) {
436           break;
437         }
438         else if (probability != 1.0f) {
439           float terminate = path_state_rng_1D(kg, &state, PRNG_TERMINATE);
440
441           if (terminate >= probability)
442             break;
443
444           throughput /= probability;
445         }
446       }
447
448       kernel_update_denoising_features(kg, &sd, &state, L);
449
450 #    ifdef __AO__
451       /* ambient occlusion */
452       if (kernel_data.integrator.use_ambient_occlusion) {
453         kernel_branched_path_ao(kg, &sd, emission_sd, L, &state, throughput);
454       }
455 #    endif /* __AO__ */
456
457 #    ifdef __SUBSURFACE__
458       /* bssrdf scatter to a different location on the same object */
459       if (sd.flag & SD_BSSRDF) {
460         kernel_branched_path_subsurface_scatter(
461             kg, &sd, &indirect_sd, emission_sd, L, &state, &ray, throughput);
462       }
463 #    endif /* __SUBSURFACE__ */
464
465       PathState hit_state = state;
466
467 #    ifdef __EMISSION__
468       /* direct light */
469       if (kernel_data.integrator.use_direct_light) {
470         int all = (kernel_data.integrator.sample_all_lights_direct) ||
471                   (state.flag & PATH_RAY_SHADOW_CATCHER);
472         kernel_branched_path_surface_connect_light(
473             kg, &sd, emission_sd, &hit_state, throughput, 1.0f, L, all);
474       }
475 #    endif /* __EMISSION__ */
476
477       /* indirect light */
478       kernel_branched_path_surface_indirect_light(
479           kg, &sd, &indirect_sd, emission_sd, throughput, 1.0f, &hit_state, L);
480
481       /* continue in case of transparency */
482       throughput *= shader_bsdf_transparency(kg, &sd);
483
484       if (is_zero(throughput))
485         break;
486
487       /* Update Path State */
488       path_state_next(kg, &state, LABEL_TRANSPARENT);
489
490 #    ifdef __VOLUME__
491     }
492     else {
493       if (!path_state_volume_next(kg, &state)) {
494         break;
495       }
496     }
497 #    endif
498
499     ray.P = ray_offset(sd.P, -sd.Ng);
500     ray.t -= sd.ray_length; /* clipping works through transparent */
501
502 #    ifdef __RAY_DIFFERENTIALS__
503     ray.dP = sd.dP;
504     ray.dD.dx = -sd.dI.dx;
505     ray.dD.dy = -sd.dI.dy;
506 #    endif /* __RAY_DIFFERENTIALS__ */
507
508 #    ifdef __VOLUME__
509     /* enter/exit volume */
510     kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
511 #    endif /* __VOLUME__ */
512   }
513 }
514
515 ccl_device void kernel_branched_path_trace(
516     KernelGlobals *kg, ccl_global float *buffer, int sample, int x, int y, int offset, int stride)
517 {
518   /* buffer offset */
519   int index = offset + x + y * stride;
520   int pass_stride = kernel_data.film.pass_stride;
521
522   buffer += index * pass_stride;
523
524   /* initialize random numbers and ray */
525   uint rng_hash;
526   Ray ray;
527
528   kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
529
530   /* integrate */
531   PathRadiance L;
532
533   if (ray.t != 0.0f) {
534     kernel_branched_path_integrate(kg, rng_hash, sample, ray, buffer, &L);
535     kernel_write_result(kg, buffer, sample, &L);
536   }
537 }
538
539 #  endif /* __SPLIT_KERNEL__ */
540
541 #endif /* __BRANCHED_PATH__ */
542
543 CCL_NAMESPACE_END