b37bc65f4dfbdbd0018c6b0df598fda43e37a686
[blender.git] / intern / cycles / kernel / kernel_path_branched.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 #ifdef __BRANCHED_PATH__
20
21 ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
22                                                ShaderData *sd,
23                                                ShaderData *emission_sd,
24                                                PathRadiance *L,
25                                                ccl_addr_space PathState *state,
26                                                float3 throughput)
27 {
28         int num_samples = kernel_data.integrator.ao_samples;
29         float num_samples_inv = 1.0f/num_samples;
30         float ao_factor = kernel_data.background.ao_factor;
31         float3 ao_N;
32         float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
33         float3 ao_alpha = shader_bsdf_alpha(kg, sd);
34
35         for(int j = 0; j < num_samples; j++) {
36                 float bsdf_u, bsdf_v;
37                 path_branched_rng_2D(kg, state->rng_hash, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
38
39                 float3 ao_D;
40                 float ao_pdf;
41
42                 sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
43
44                 if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
45                         Ray light_ray;
46                         float3 ao_shadow;
47
48                         light_ray.P = ray_offset(sd->P, sd->Ng);
49                         light_ray.D = ao_D;
50                         light_ray.t = kernel_data.background.ao_distance;
51                         light_ray.time = sd->time;
52                         light_ray.dP = sd->dP;
53                         light_ray.dD = differential3_zero();
54
55                         if(!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &ao_shadow)) {
56                                 path_radiance_accum_ao(L, state, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow);
57                         }
58                         else {
59                                 path_radiance_accum_total_ao(L, state, throughput*num_samples_inv, ao_bsdf);
60                         }
61                 }
62         }
63 }
64
65 #ifndef __SPLIT_KERNEL__
66
67 #ifdef __VOLUME__
68 ccl_device_forceinline void kernel_branched_path_volume(
69         KernelGlobals *kg,
70         ShaderData *sd,
71         PathState *state,
72         Ray *ray,
73         float3 *throughput,
74         ccl_addr_space Intersection *isect,
75         bool hit,
76         ShaderData *indirect_sd,
77         ShaderData *emission_sd,
78         PathRadiance *L)
79 {
80         /* Sanitize volume stack. */
81         if(!hit) {
82                 kernel_volume_clean_stack(kg, state->volume_stack);
83         }
84
85         if(state->volume_stack[0].shader == SHADER_NONE) {
86                 return;
87         }
88
89         /* volume attenuation, emission, scatter */
90         Ray volume_ray = *ray;
91         volume_ray.t = (hit)? isect->t: FLT_MAX;
92
93         bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
94
95 #  ifdef __VOLUME_DECOUPLED__
96         /* decoupled ray marching only supported on CPU */
97         if(kernel_data.integrator.volume_decoupled) {
98                 /* cache steps along volume for repeated sampling */
99                 VolumeSegment volume_segment;
100
101                 shader_setup_from_volume(kg, sd, &volume_ray);
102                 kernel_volume_decoupled_record(kg, state,
103                         &volume_ray, sd, &volume_segment, heterogeneous);
104
105                 /* direct light sampling */
106                 if(volume_segment.closure_flag & SD_SCATTER) {
107                         volume_segment.sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
108
109                         int all = kernel_data.integrator.sample_all_lights_direct;
110
111                         kernel_branched_path_volume_connect_light(kg, sd,
112                                 emission_sd, *throughput, state, L, all,
113                                 &volume_ray, &volume_segment);
114
115                         /* indirect light sampling */
116                         int num_samples = kernel_data.integrator.volume_samples;
117                         float num_samples_inv = 1.0f/num_samples;
118
119                         for(int j = 0; j < num_samples; j++) {
120                                 PathState ps = *state;
121                                 Ray pray = *ray;
122                                 float3 tp = *throughput;
123
124                                 /* branch RNG state */
125                                 path_state_branch(&ps, j, num_samples);
126
127                                 /* scatter sample. if we use distance sampling and take just one
128                                  * sample for direct and indirect light, we could share this
129                                  * computation, but makes code a bit complex */
130                                 float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL);
131                                 float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE);
132
133                                 VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
134                                         &ps, &pray, sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
135
136                                 if(result == VOLUME_PATH_SCATTERED &&
137                                    kernel_path_volume_bounce(kg,
138                                                              sd,
139                                                              &tp,
140                                                              &ps,
141                                                              &L->state,
142                                                              &pray))
143                                 {
144                                         kernel_path_indirect(kg,
145                                                              indirect_sd,
146                                                              emission_sd,
147                                                              &pray,
148                                                              tp*num_samples_inv,
149                                                              &ps,
150                                                              L);
151
152                                         /* for render passes, sum and reset indirect light pass variables
153                                          * for the next samples */
154                                         path_radiance_sum_indirect(L);
155                                         path_radiance_reset_indirect(L);
156                                 }
157                         }
158                 }
159
160                 /* emission and transmittance */
161                 if(volume_segment.closure_flag & SD_EMISSION)
162                         path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
163                 *throughput *= volume_segment.accum_transmittance;
164
165                 /* free cached steps */
166                 kernel_volume_decoupled_free(kg, &volume_segment);
167         }
168         else
169 #  endif  /* __VOLUME_DECOUPLED__ */
170         {
171                 /* GPU: no decoupled ray marching, scatter probalistically */
172                 int num_samples = kernel_data.integrator.volume_samples;
173                 float num_samples_inv = 1.0f/num_samples;
174
175                 /* todo: we should cache the shader evaluations from stepping
176                  * through the volume, for now we redo them multiple times */
177
178                 for(int j = 0; j < num_samples; j++) {
179                         PathState ps = *state;
180                         Ray pray = *ray;
181                         float3 tp = (*throughput) * num_samples_inv;
182
183                         /* branch RNG state */
184                         path_state_branch(&ps, j, num_samples);
185
186                         VolumeIntegrateResult result = kernel_volume_integrate(
187                                 kg, &ps, sd, &volume_ray, L, &tp, heterogeneous);
188
189 #  ifdef __VOLUME_SCATTER__
190                         if(result == VOLUME_PATH_SCATTERED) {
191                                 /* todo: support equiangular, MIS and all light sampling.
192                                  * alternatively get decoupled ray marching working on the GPU */
193                                 kernel_path_volume_connect_light(kg, sd, emission_sd, tp, state, L);
194
195                                 if(kernel_path_volume_bounce(kg,
196                                                              sd,
197                                                              &tp,
198                                                              &ps,
199                                                              &L->state,
200                                                              &pray))
201                                 {
202                                         kernel_path_indirect(kg,
203                                                              indirect_sd,
204                                                              emission_sd,
205                                                              &pray,
206                                                              tp,
207                                                              &ps,
208                                                              L);
209
210                                         /* for render passes, sum and reset indirect light pass variables
211                                          * for the next samples */
212                                         path_radiance_sum_indirect(L);
213                                         path_radiance_reset_indirect(L);
214                                 }
215                         }
216 # endif  /* __VOLUME_SCATTER__ */
217                 }
218
219                 /* todo: avoid this calculation using decoupled ray marching */
220                 kernel_volume_shadow(kg, emission_sd, state, &volume_ray, throughput);
221         }
222 }
223 #endif  /* __VOLUME__ */
224
225 /* bounce off surface and integrate indirect light */
226 ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
227         ShaderData *sd, ShaderData *indirect_sd, ShaderData *emission_sd,
228         float3 throughput, float num_samples_adjust, PathState *state, PathRadiance *L)
229 {
230         float sum_sample_weight = 0.0f;
231 #ifdef __DENOISING_FEATURES__
232         if(state->denoising_feature_weight > 0.0f) {
233                 for(int i = 0; i < sd->num_closure; i++) {
234                         const ShaderClosure *sc = &sd->closure[i];
235
236                         /* transparency is not handled here, but in outer loop */
237                         if(!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
238                                 continue;
239                         }
240
241                         sum_sample_weight += sc->sample_weight;
242                 }
243         }
244         else {
245                 sum_sample_weight = 1.0f;
246         }
247 #endif  /* __DENOISING_FEATURES__ */
248
249         for(int i = 0; i < sd->num_closure; i++) {
250                 const ShaderClosure *sc = &sd->closure[i];
251
252                 /* transparency is not handled here, but in outer loop */
253                 if(!CLOSURE_IS_BSDF(sc->type) || CLOSURE_IS_BSDF_TRANSPARENT(sc->type)) {
254                         continue;
255                 }
256
257                 int num_samples;
258
259                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
260                         num_samples = kernel_data.integrator.diffuse_samples;
261                 else if(CLOSURE_IS_BSDF_BSSRDF(sc->type))
262                         num_samples = 1;
263                 else if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
264                         num_samples = kernel_data.integrator.glossy_samples;
265                 else
266                         num_samples = kernel_data.integrator.transmission_samples;
267
268                 num_samples = ceil_to_int(num_samples_adjust*num_samples);
269
270                 float num_samples_inv = num_samples_adjust/num_samples;
271
272                 for(int j = 0; j < num_samples; j++) {
273                         PathState ps = *state;
274                         float3 tp = throughput;
275                         Ray bsdf_ray;
276 #ifdef __SHADOW_TRICKS__
277                         float shadow_transparency = L->shadow_transparency;
278 #endif
279
280                         ps.rng_hash = cmj_hash(state->rng_hash, i);
281
282                         if(!kernel_branched_path_surface_bounce(kg,
283                                                                 sd,
284                                                                 sc,
285                                                                 j,
286                                                                 num_samples,
287                                                                 &tp,
288                                                                 &ps,
289                                                                 &L->state,
290                                                                 &bsdf_ray,
291                                                                 sum_sample_weight))
292                         {
293                                 continue;
294                         }
295
296                         ps.rng_hash = state->rng_hash;
297
298                         kernel_path_indirect(kg,
299                                              indirect_sd,
300                                              emission_sd,
301                                              &bsdf_ray,
302                                              tp*num_samples_inv,
303                                              &ps,
304                                              L);
305
306                         /* for render passes, sum and reset indirect light pass variables
307                          * for the next samples */
308                         path_radiance_sum_indirect(L);
309                         path_radiance_reset_indirect(L);
310
311 #ifdef __SHADOW_TRICKS__
312                         L->shadow_transparency = shadow_transparency;
313 #endif
314                 }
315         }
316 }
317
318 #ifdef __SUBSURFACE__
319 ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
320                                                         ShaderData *sd,
321                                                         ShaderData *indirect_sd,
322                                                         ShaderData *emission_sd,
323                                                         PathRadiance *L,
324                                                         PathState *state,
325                                                         Ray *ray,
326                                                         float3 throughput)
327 {
328         for(int i = 0; i < sd->num_closure; i++) {
329                 ShaderClosure *sc = &sd->closure[i];
330
331                 if(!CLOSURE_IS_BSSRDF(sc->type))
332                         continue;
333
334                 /* set up random number generator */
335                 uint lcg_state = lcg_state_init(state, 0x68bc21eb);
336                 int num_samples = kernel_data.integrator.subsurface_samples;
337                 float num_samples_inv = 1.0f/num_samples;
338                 uint bssrdf_rng_hash = cmj_hash(state->rng_hash, i);
339
340                 /* do subsurface scatter step with copy of shader data, this will
341                  * replace the BSSRDF with a diffuse BSDF closure */
342                 for(int j = 0; j < num_samples; j++) {
343                         LocalIntersection ss_isect;
344                         float bssrdf_u, bssrdf_v;
345                         path_branched_rng_2D(kg, bssrdf_rng_hash, state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
346                         int num_hits = subsurface_scatter_multi_intersect(kg,
347                                                                           &ss_isect,
348                                                                           sd,
349                                                                           sc,
350                                                                           &lcg_state,
351                                                                           bssrdf_u, bssrdf_v,
352                                                                           true);
353 #ifdef __VOLUME__
354                         Ray volume_ray = *ray;
355                         bool need_update_volume_stack =
356                                 kernel_data.integrator.use_volumes &&
357                                 sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
358 #endif  /* __VOLUME__ */
359
360                         /* compute lighting with the BSDF closure */
361                         for(int hit = 0; hit < num_hits; hit++) {
362                                 ShaderData bssrdf_sd = *sd;
363                                 subsurface_scatter_multi_setup(kg,
364                                                                &ss_isect,
365                                                                hit,
366                                                                &bssrdf_sd,
367                                                                state,
368                                                                state->flag,
369                                                                sc,
370                                                                true);
371
372                                 PathState hit_state = *state;
373
374                                 path_state_branch(&hit_state, j, num_samples);
375
376 #ifdef __VOLUME__
377                                 if(need_update_volume_stack) {
378                                         /* Setup ray from previous surface point to the new one. */
379                                         float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng);
380                                         volume_ray.D = normalize_len(P - volume_ray.P,
381                                                                      &volume_ray.t);
382
383                                         kernel_volume_stack_update_for_subsurface(
384                                             kg,
385                                             emission_sd,
386                                             &volume_ray,
387                                             hit_state.volume_stack);
388                                 }
389 #endif  /* __VOLUME__ */
390
391 #ifdef __EMISSION__
392                                 /* direct light */
393                                 if(kernel_data.integrator.use_direct_light) {
394                                         int all = (kernel_data.integrator.sample_all_lights_direct) ||
395                                                   (state->flag & PATH_RAY_SHADOW_CATCHER);
396                                         kernel_branched_path_surface_connect_light(
397                                                 kg,
398                                                 &bssrdf_sd,
399                                                 emission_sd,
400                                                 &hit_state,
401                                                 throughput,
402                                                 num_samples_inv,
403                                                 L,
404                                                 all);
405                                 }
406 #endif  /* __EMISSION__ */
407
408                                 /* indirect light */
409                                 kernel_branched_path_surface_indirect_light(
410                                         kg,
411                                         &bssrdf_sd,
412                                         indirect_sd,
413                                         emission_sd,
414                                         throughput,
415                                         num_samples_inv,
416                                         &hit_state,
417                                         L);
418                         }
419                 }
420         }
421 }
422 #endif  /* __SUBSURFACE__ */
423
424 ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
425                                                uint rng_hash,
426                                                int sample,
427                                                Ray ray,
428                                                ccl_global float *buffer,
429                                                PathRadiance *L)
430 {
431         /* initialize */
432         float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
433
434         path_radiance_init(L, kernel_data.film.use_light_pass);
435
436         /* shader data memory used for both volumes and surfaces, saves stack space */
437         ShaderData sd;
438         /* shader data used by emission, shadows, volume stacks, indirect path */
439         ShaderDataTinyStorage emission_sd_storage;
440         ShaderData *emission_sd = AS_SHADER_DATA(&emission_sd_storage);
441         ShaderData indirect_sd;
442
443         PathState state;
444         path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray);
445
446         /* Main Loop
447          * Here we only handle transparency intersections from the camera ray.
448          * Indirect bounces are handled in kernel_branched_path_surface_indirect_light().
449          */
450         for(;;) {
451                 /* Find intersection with objects in scene. */
452                 Intersection isect;
453                 bool hit = kernel_path_scene_intersect(kg, &state, &ray, &isect, L);
454
455 #ifdef __VOLUME__
456                 /* Volume integration. */
457                 kernel_branched_path_volume(kg,
458                                             &sd,
459                                             &state,
460                                             &ray,
461                                             &throughput,
462                                             &isect,
463                                             hit,
464                                             &indirect_sd,
465                                             emission_sd,
466                                             L);
467 #endif  /* __VOLUME__ */
468
469                 /* Shade background. */
470                 if(!hit) {
471                         kernel_path_background(kg, &state, &ray, throughput, &sd, L);
472                         break;
473                 }
474
475                 /* Setup and evaluate shader. */
476                 shader_setup_from_ray(kg, &sd, &isect, &ray);
477                 shader_eval_surface(kg, &sd, &state, state.flag, MAX_CLOSURE);
478                 shader_merge_closures(&sd);
479
480                 /* Apply shadow catcher, holdout, emission. */
481                 if(!kernel_path_shader_apply(kg,
482                                              &sd,
483                                              &state,
484                                              &ray,
485                                              throughput,
486                                              emission_sd,
487                                              L,
488                                              buffer))
489                 {
490                         break;
491                 }
492
493                 /* transparency termination */
494                 if(state.flag & PATH_RAY_TRANSPARENT) {
495                         /* path termination. this is a strange place to put the termination, it's
496                          * mainly due to the mixed in MIS that we use. gives too many unneeded
497                          * shader evaluations, only need emission if we are going to terminate */
498                         float probability = path_state_continuation_probability(kg, &state, throughput);
499
500                         if(probability == 0.0f) {
501                                 break;
502                         }
503                         else if(probability != 1.0f) {
504                                 float terminate = path_state_rng_1D(kg, &state, PRNG_TERMINATE);
505
506                                 if(terminate >= probability)
507                                         break;
508
509                                 throughput /= probability;
510                         }
511                 }
512
513                 kernel_update_denoising_features(kg, &sd, &state, L);
514
515 #ifdef __AO__
516                 /* ambient occlusion */
517                 if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
518                         kernel_branched_path_ao(kg, &sd, emission_sd, L, &state, throughput);
519                 }
520 #endif  /* __AO__ */
521
522 #ifdef __SUBSURFACE__
523                 /* bssrdf scatter to a different location on the same object */
524                 if(sd.flag & SD_BSSRDF) {
525                         kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, emission_sd,
526                                                                 L, &state, &ray, throughput);
527                 }
528 #endif  /* __SUBSURFACE__ */
529
530                 if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
531                         PathState hit_state = state;
532
533 #ifdef __EMISSION__
534                         /* direct light */
535                         if(kernel_data.integrator.use_direct_light) {
536                                 int all = (kernel_data.integrator.sample_all_lights_direct) ||
537                                           (state.flag & PATH_RAY_SHADOW_CATCHER);
538                                 kernel_branched_path_surface_connect_light(kg,
539                                         &sd, emission_sd, &hit_state, throughput, 1.0f, L, all);
540                         }
541 #endif  /* __EMISSION__ */
542
543                         /* indirect light */
544                         kernel_branched_path_surface_indirect_light(kg,
545                                 &sd, &indirect_sd, emission_sd, throughput, 1.0f, &hit_state, L);
546
547                         /* continue in case of transparency */
548                         throughput *= shader_bsdf_transparency(kg, &sd);
549
550                         if(is_zero(throughput))
551                                 break;
552                 }
553
554                 /* Update Path State */
555                 state.flag |= PATH_RAY_TRANSPARENT;
556                 state.transparent_bounce++;
557
558                 ray.P = ray_offset(sd.P, -sd.Ng);
559                 ray.t -= sd.ray_length; /* clipping works through transparent */
560
561
562 #ifdef __RAY_DIFFERENTIALS__
563                 ray.dP = sd.dP;
564                 ray.dD.dx = -sd.dI.dx;
565                 ray.dD.dy = -sd.dI.dy;
566 #endif  /* __RAY_DIFFERENTIALS__ */
567
568 #ifdef __VOLUME__
569                 /* enter/exit volume */
570                 kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
571 #endif  /* __VOLUME__ */
572         }
573 }
574
575 ccl_device void kernel_branched_path_trace(KernelGlobals *kg,
576         ccl_global float *buffer,
577         int sample, int x, int y, int offset, int stride)
578 {
579         /* buffer offset */
580         int index = offset + x + y*stride;
581         int pass_stride = kernel_data.film.pass_stride;
582
583         buffer += index*pass_stride;
584
585         /* initialize random numbers and ray */
586         uint rng_hash;
587         Ray ray;
588
589         kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray);
590
591         /* integrate */
592         PathRadiance L;
593
594         if(ray.t != 0.0f) {
595                 kernel_branched_path_integrate(kg, rng_hash, sample, ray, buffer, &L);
596                 kernel_write_result(kg, buffer, sample, &L);
597         }
598 }
599
600 #endif  /* __SPLIT_KERNEL__ */
601
602 #endif  /* __BRANCHED_PATH__ */
603
604 CCL_NAMESPACE_END