Cycles: Add multi-scattering, energy-conserving GGX as an option to the Glossy, Aniso...
[blender.git] / intern / cycles / kernel / kernel_path_branched.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 #ifdef __BRANCHED_PATH__
20
21 ccl_device void kernel_branched_path_ao(KernelGlobals *kg,
22                                         ShaderData *sd,
23                                         ShaderData *emission_sd,
24                                         PathRadiance *L,
25                                         PathState *state,
26                                         RNG *rng,
27                                         float3 throughput)
28 {
29         int num_samples = kernel_data.integrator.ao_samples;
30         float num_samples_inv = 1.0f/num_samples;
31         float ao_factor = kernel_data.background.ao_factor;
32         float3 ao_N;
33         float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
34         float3 ao_alpha = shader_bsdf_alpha(kg, sd);
35
36         for(int j = 0; j < num_samples; j++) {
37                 float bsdf_u, bsdf_v;
38                 path_branched_rng_2D(kg, rng, state, j, num_samples, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
39
40                 float3 ao_D;
41                 float ao_pdf;
42
43                 sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
44
45                 if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) {
46                         Ray light_ray;
47                         float3 ao_shadow;
48
49                         light_ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng));
50                         light_ray.D = ao_D;
51                         light_ray.t = kernel_data.background.ao_distance;
52 #ifdef __OBJECT_MOTION__
53                         light_ray.time = ccl_fetch(sd, time);
54 #endif
55                         light_ray.dP = ccl_fetch(sd, dP);
56                         light_ray.dD = differential3_zero();
57
58                         if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow))
59                                 path_radiance_accum_ao(L, throughput*num_samples_inv, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
60                 }
61         }
62 }
63
64
65 /* bounce off surface and integrate indirect light */
66 ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
67         RNG *rng, ShaderData *sd, ShaderData *indirect_sd, ShaderData *emission_sd,
68         float3 throughput, float num_samples_adjust, PathState *state, PathRadiance *L)
69 {
70         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
71                 const ShaderClosure *sc = &ccl_fetch(sd, closure)[i];
72
73                 if(!CLOSURE_IS_BSDF(sc->type))
74                         continue;
75                 /* transparency is not handled here, but in outer loop */
76                 if(sc->type == CLOSURE_BSDF_TRANSPARENT_ID)
77                         continue;
78
79                 int num_samples;
80
81                 if(CLOSURE_IS_BSDF_DIFFUSE(sc->type))
82                         num_samples = kernel_data.integrator.diffuse_samples;
83                 else if(CLOSURE_IS_BSDF_BSSRDF(sc->type))
84                         num_samples = 1;
85                 else if(CLOSURE_IS_BSDF_GLOSSY(sc->type))
86                         num_samples = kernel_data.integrator.glossy_samples;
87                 else
88                         num_samples = kernel_data.integrator.transmission_samples;
89
90                 num_samples = ceil_to_int(num_samples_adjust*num_samples);
91
92                 float num_samples_inv = num_samples_adjust/num_samples;
93                 RNG bsdf_rng = cmj_hash(*rng, i);
94
95                 for(int j = 0; j < num_samples; j++) {
96                         PathState ps = *state;
97                         float3 tp = throughput;
98                         Ray bsdf_ray;
99
100                         if(!kernel_branched_path_surface_bounce(kg,
101                                                                 &bsdf_rng,
102                                                                 sd,
103                                                                 sc,
104                                                                 j,
105                                                                 num_samples,
106                                                                 &tp,
107                                                                 &ps,
108                                                                 L,
109                                                                 &bsdf_ray))
110                         {
111                                 continue;
112                         }
113
114                         kernel_path_indirect(kg,
115                                                          indirect_sd,
116                                              emission_sd,
117                                              rng,
118                                              &bsdf_ray,
119                                              tp*num_samples_inv,
120                                              num_samples,
121                                              &ps,
122                                              L);
123
124                         /* for render passes, sum and reset indirect light pass variables
125                          * for the next samples */
126                         path_radiance_sum_indirect(L);
127                         path_radiance_reset_indirect(L);
128                 }
129         }
130 }
131
132 #ifdef __SUBSURFACE__
133 ccl_device void kernel_branched_path_subsurface_scatter(KernelGlobals *kg,
134                                                         ShaderData *sd,
135                                                         ShaderData *indirect_sd,
136                                                         ShaderData *emission_sd,
137                                                         PathRadiance *L,
138                                                         PathState *state,
139                                                         RNG *rng,
140                                                         Ray *ray,
141                                                         float3 throughput)
142 {
143         for(int i = 0; i < ccl_fetch(sd, num_closure); i++) {
144                 ShaderClosure *sc = &ccl_fetch(sd, closure)[i];
145
146                 if(!CLOSURE_IS_BSSRDF(sc->type))
147                         continue;
148
149                 /* set up random number generator */
150                 uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
151                 int num_samples = kernel_data.integrator.subsurface_samples;
152                 float num_samples_inv = 1.0f/num_samples;
153                 RNG bssrdf_rng = cmj_hash(*rng, i);
154
155                 /* do subsurface scatter step with copy of shader data, this will
156                  * replace the BSSRDF with a diffuse BSDF closure */
157                 for(int j = 0; j < num_samples; j++) {
158                         SubsurfaceIntersection ss_isect;
159                         float bssrdf_u, bssrdf_v;
160                         path_branched_rng_2D(kg, &bssrdf_rng, state, j, num_samples, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
161                         int num_hits = subsurface_scatter_multi_intersect(kg,
162                                                                           &ss_isect,
163                                                                           sd,
164                                                                           sc,
165                                                                           &lcg_state,
166                                                                           bssrdf_u, bssrdf_v,
167                                                                           true);
168 #ifdef __VOLUME__
169                         Ray volume_ray = *ray;
170                         bool need_update_volume_stack = kernel_data.integrator.use_volumes &&
171                                                         ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME;
172 #endif
173
174                         /* compute lighting with the BSDF closure */
175                         for(int hit = 0; hit < num_hits; hit++) {
176                                 ShaderData bssrdf_sd = *sd;
177                                 subsurface_scatter_multi_setup(kg,
178                                                                &ss_isect,
179                                                                hit,
180                                                                &bssrdf_sd,
181                                                                state,
182                                                                state->flag,
183                                                                sc,
184                                                                true);
185
186                                 PathState hit_state = *state;
187
188                                 path_state_branch(&hit_state, j, num_samples);
189
190 #ifdef __VOLUME__
191                                 if(need_update_volume_stack) {
192                                         /* Setup ray from previous surface point to the new one. */
193                                         float3 P = ray_offset(bssrdf_sd.P, -bssrdf_sd.Ng);
194                                         volume_ray.D = normalize_len(P - volume_ray.P,
195                                                                      &volume_ray.t);
196
197                                         kernel_volume_stack_update_for_subsurface(
198                                             kg,
199                                             emission_sd,
200                                             &volume_ray,
201                                             hit_state.volume_stack);
202                                 }
203 #endif
204
205 #ifdef __EMISSION__
206                                 /* direct light */
207                                 if(kernel_data.integrator.use_direct_light) {
208                                         int all = kernel_data.integrator.sample_all_lights_direct;
209                                         kernel_branched_path_surface_connect_light(
210                                                 kg,
211                                                 rng,
212                                                 &bssrdf_sd,
213                                                 emission_sd,
214                                                 &hit_state,
215                                                 throughput,
216                                                 num_samples_inv,
217                                                 L,
218                                                 all);
219                                 }
220 #endif
221
222                                 /* indirect light */
223                                 kernel_branched_path_surface_indirect_light(
224                                         kg,
225                                         rng,
226                                         &bssrdf_sd,
227                                                 indirect_sd,
228                                         emission_sd,
229                                         throughput,
230                                         num_samples_inv,
231                                         &hit_state,
232                                         L);
233                         }
234                 }
235         }
236 }
237 #endif
238
239 ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer)
240 {
241         /* initialize */
242         PathRadiance L;
243         float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
244         float L_transparent = 0.0f;
245
246         path_radiance_init(&L, kernel_data.film.use_light_pass);
247
248         /* shader data memory used for both volumes and surfaces, saves stack space */
249         ShaderData sd;
250         /* shader data used by emission, shadows, volume stacks, indirect path */
251         ShaderData emission_sd, indirect_sd;
252
253         PathState state;
254         path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
255
256 #ifdef __KERNEL_DEBUG__
257         DebugData debug_data;
258         debug_data_init(&debug_data);
259 #endif
260
261         /* Main Loop
262          * Here we only handle transparency intersections from the camera ray.
263          * Indirect bounces are handled in kernel_branched_path_surface_indirect_light().
264          */
265         for(;;) {
266                 /* intersect scene */
267                 Intersection isect;
268                 uint visibility = path_state_ray_visibility(kg, &state);
269
270 #ifdef __HAIR__
271                 float difl = 0.0f, extmax = 0.0f;
272                 uint lcg_state = 0;
273
274                 if(kernel_data.bvh.have_curves) {
275                         if(kernel_data.cam.resolution == 1) {
276                                 float3 pixdiff = ray.dD.dx + ray.dD.dy;
277                                 /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
278                                 difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
279                         }
280
281                         extmax = kernel_data.curve.maximum_width;
282                         lcg_state = lcg_state_init(rng, &state, 0x51633e2d);
283                 }
284
285                 bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax);
286 #else
287                 bool hit = scene_intersect(kg, &ray, visibility, &isect, NULL, 0.0f, 0.0f);
288 #endif
289
290 #ifdef __KERNEL_DEBUG__
291                 debug_data.num_bvh_traversal_steps += isect.num_traversal_steps;
292                 debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
293                 debug_data.num_ray_bounces++;
294 #endif
295
296 #ifdef __VOLUME__
297                 /* volume attenuation, emission, scatter */
298                 if(state.volume_stack[0].shader != SHADER_NONE) {
299                         Ray volume_ray = ray;
300                         volume_ray.t = (hit)? isect.t: FLT_MAX;
301                         
302                         bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
303
304 #ifdef __VOLUME_DECOUPLED__
305                         /* decoupled ray marching only supported on CPU */
306
307                         /* cache steps along volume for repeated sampling */
308                         VolumeSegment volume_segment;
309
310                         shader_setup_from_volume(kg, &sd, &volume_ray);
311                         kernel_volume_decoupled_record(kg, &state,
312                                 &volume_ray, &sd, &volume_segment, heterogeneous);
313
314                         /* direct light sampling */
315                         if(volume_segment.closure_flag & SD_SCATTER) {
316                                 volume_segment.sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
317
318                                 int all = kernel_data.integrator.sample_all_lights_direct;
319
320                                 kernel_branched_path_volume_connect_light(kg, rng, &sd,
321                                         &emission_sd, throughput, &state, &L, all,
322                                         &volume_ray, &volume_segment);
323
324                                 /* indirect light sampling */
325                                 int num_samples = kernel_data.integrator.volume_samples;
326                                 float num_samples_inv = 1.0f/num_samples;
327
328                                 for(int j = 0; j < num_samples; j++) {
329                                         /* workaround to fix correlation bug in T38710, can find better solution
330                                          * in random number generator later, for now this is done here to not impact
331                                          * performance of rendering without volumes */
332                                         RNG tmp_rng = cmj_hash(*rng, state.rng_offset);
333
334                                         PathState ps = state;
335                                         Ray pray = ray;
336                                         float3 tp = throughput;
337
338                                         /* branch RNG state */
339                                         path_state_branch(&ps, j, num_samples);
340
341                                         /* scatter sample. if we use distance sampling and take just one
342                                          * sample for direct and indirect light, we could share this
343                                          * computation, but makes code a bit complex */
344                                         float rphase = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_PHASE);
345                                         float rscatter = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_SCATTER_DISTANCE);
346
347                                         VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
348                                                 &ps, &pray, &sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
349
350                                         (void)result;
351                                         kernel_assert(result == VOLUME_PATH_SCATTERED);
352
353                                         if(kernel_path_volume_bounce(kg,
354                                                                      rng,
355                                                                      &sd,
356                                                                      &tp,
357                                                                      &ps,
358                                                                      &L,
359                                                                      &pray))
360                                         {
361                                                 kernel_path_indirect(kg,
362                                                                      &indirect_sd,
363                                                                      &emission_sd,
364                                                                      rng,
365                                                                      &pray,
366                                                                      tp*num_samples_inv,
367                                                                      num_samples,
368                                                                      &ps,
369                                                                      &L);
370
371                                                 /* for render passes, sum and reset indirect light pass variables
372                                                  * for the next samples */
373                                                 path_radiance_sum_indirect(&L);
374                                                 path_radiance_reset_indirect(&L);
375                                         }
376                                 }
377                         }
378
379                         /* emission and transmittance */
380                         if(volume_segment.closure_flag & SD_EMISSION)
381                                 path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce);
382                         throughput *= volume_segment.accum_transmittance;
383
384                         /* free cached steps */
385                         kernel_volume_decoupled_free(kg, &volume_segment);
386 #else
387                         /* GPU: no decoupled ray marching, scatter probalistically */
388                         int num_samples = kernel_data.integrator.volume_samples;
389                         float num_samples_inv = 1.0f/num_samples;
390
391                         /* todo: we should cache the shader evaluations from stepping
392                          * through the volume, for now we redo them multiple times */
393
394                         for(int j = 0; j < num_samples; j++) {
395                                 PathState ps = state;
396                                 Ray pray = ray;
397                                 float3 tp = throughput * num_samples_inv;
398
399                                 /* branch RNG state */
400                                 path_state_branch(&ps, j, num_samples);
401
402                                 VolumeIntegrateResult result = kernel_volume_integrate(
403                                         kg, &ps, &sd, &volume_ray, &L, &tp, rng, heterogeneous);
404
405 #ifdef __VOLUME_SCATTER__
406                                 if(result == VOLUME_PATH_SCATTERED) {
407                                         /* todo: support equiangular, MIS and all light sampling.
408                                          * alternatively get decoupled ray marching working on the GPU */
409                                         kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, tp, &state, &L);
410
411                                         if(kernel_path_volume_bounce(kg,
412                                                                      rng,
413                                                                      &sd,
414                                                                      &tp,
415                                                                      &ps,
416                                                                      &L,
417                                                                      &pray))
418                                         {
419                                                 kernel_path_indirect(kg,
420                                                                      &indirect_sd,
421                                                                      &emission_sd,
422                                                                      rng,
423                                                                      &pray,
424                                                                      tp,
425                                                                      num_samples,
426                                                                      &ps,
427                                                                      &L);
428
429                                                 /* for render passes, sum and reset indirect light pass variables
430                                                  * for the next samples */
431                                                 path_radiance_sum_indirect(&L);
432                                                 path_radiance_reset_indirect(&L);
433                                         }
434                                 }
435 #endif
436                         }
437
438                         /* todo: avoid this calculation using decoupled ray marching */
439                         kernel_volume_shadow(kg, &emission_sd, &state, &volume_ray, &throughput);
440 #endif
441                 }
442 #endif
443
444                 if(!hit) {
445                         /* eval background shader if nothing hit */
446                         if(kernel_data.background.transparent) {
447                                 L_transparent += average(throughput);
448
449 #ifdef __PASSES__
450                                 if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
451 #endif
452                                         break;
453                         }
454
455 #ifdef __BACKGROUND__
456                         /* sample background shader */
457                         float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
458                         path_radiance_accum_background(&L, throughput, L_background, state.bounce);
459 #endif
460
461                         break;
462                 }
463
464                 /* setup shading */
465                 shader_setup_from_ray(kg, &sd, &isect, &ray);
466                 shader_eval_surface(kg, &sd, rng, &state, 0.0f, state.flag, SHADER_CONTEXT_MAIN);
467                 shader_merge_closures(&sd);
468
469                 /* holdout */
470 #ifdef __HOLDOUT__
471                 if(sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) {
472                         if(kernel_data.background.transparent) {
473                                 float3 holdout_weight;
474                                 
475                                 if(sd.flag & SD_HOLDOUT_MASK)
476                                         holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
477                                 else
478                                         holdout_weight = shader_holdout_eval(kg, &sd);
479
480                                 /* any throughput is ok, should all be identical here */
481                                 L_transparent += average(holdout_weight*throughput);
482                         }
483
484                         if(sd.flag & SD_HOLDOUT_MASK)
485                                 break;
486                 }
487 #endif
488
489                 /* holdout mask objects do not write data passes */
490                 kernel_write_data_passes(kg, buffer, &L, &sd, sample, &state, throughput);
491
492 #ifdef __EMISSION__
493                 /* emission */
494                 if(sd.flag & SD_EMISSION) {
495                         float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf);
496                         path_radiance_accum_emission(&L, throughput, emission, state.bounce);
497                 }
498 #endif
499
500                 /* transparency termination */
501                 if(state.flag & PATH_RAY_TRANSPARENT) {
502                         /* path termination. this is a strange place to put the termination, it's
503                          * mainly due to the mixed in MIS that we use. gives too many unneeded
504                          * shader evaluations, only need emission if we are going to terminate */
505                         float probability = path_state_terminate_probability(kg, &state, throughput);
506
507                         if(probability == 0.0f) {
508                                 break;
509                         }
510                         else if(probability != 1.0f) {
511                                 float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE);
512
513                                 if(terminate >= probability)
514                                         break;
515
516                                 throughput /= probability;
517                         }
518                 }
519
520 #ifdef __AO__
521                 /* ambient occlusion */
522                 if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
523                         kernel_branched_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput);
524                 }
525 #endif
526
527 #ifdef __SUBSURFACE__
528                 /* bssrdf scatter to a different location on the same object */
529                 if(sd.flag & SD_BSSRDF) {
530                         kernel_branched_path_subsurface_scatter(kg, &sd, &indirect_sd, &emission_sd,
531                                                                 &L, &state, rng, &ray, throughput);
532                 }
533 #endif
534
535                 if(!(sd.flag & SD_HAS_ONLY_VOLUME)) {
536                         PathState hit_state = state;
537
538 #ifdef __EMISSION__
539                         /* direct light */
540                         if(kernel_data.integrator.use_direct_light) {
541                                 int all = kernel_data.integrator.sample_all_lights_direct;
542                                 kernel_branched_path_surface_connect_light(kg, rng,
543                                         &sd, &emission_sd, &hit_state, throughput, 1.0f, &L, all);
544                         }
545 #endif
546
547                         /* indirect light */
548                         kernel_branched_path_surface_indirect_light(kg, rng,
549                                 &sd, &indirect_sd, &emission_sd, throughput, 1.0f, &hit_state, &L);
550
551                         /* continue in case of transparency */
552                         throughput *= shader_bsdf_transparency(kg, &sd);
553
554                         if(is_zero(throughput))
555                                 break;
556                 }
557
558                 /* Update Path State */
559                 state.flag |= PATH_RAY_TRANSPARENT;
560                 state.transparent_bounce++;
561
562                 ray.P = ray_offset(sd.P, -sd.Ng);
563                 ray.t -= sd.ray_length; /* clipping works through transparent */
564
565
566 #ifdef __RAY_DIFFERENTIALS__
567                 ray.dP = sd.dP;
568                 ray.dD.dx = -sd.dI.dx;
569                 ray.dD.dy = -sd.dI.dy;
570 #endif
571
572 #ifdef __VOLUME__
573                 /* enter/exit volume */
574                 kernel_volume_stack_enter_exit(kg, &sd, state.volume_stack);
575 #endif
576         }
577
578         float3 L_sum = path_radiance_clamp_and_sum(kg, &L);
579
580         kernel_write_light_passes(kg, buffer, &L, sample);
581
582 #ifdef __KERNEL_DEBUG__
583         kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
584 #endif
585
586         return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
587 }
588
589 ccl_device void kernel_branched_path_trace(KernelGlobals *kg,
590         ccl_global float *buffer, ccl_global uint *rng_state,
591         int sample, int x, int y, int offset, int stride)
592 {
593         /* buffer offset */
594         int index = offset + x + y*stride;
595         int pass_stride = kernel_data.film.pass_stride;
596
597         rng_state += index;
598         buffer += index*pass_stride;
599
600         /* initialize random numbers and ray */
601         RNG rng;
602         Ray ray;
603
604         kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
605
606         /* integrate */
607         float4 L;
608
609         if(ray.t != 0.0f)
610                 L = kernel_branched_path_integrate(kg, &rng, sample, ray, buffer);
611         else
612                 L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
613
614         /* accumulate result in output buffer */
615         kernel_write_pass_float4(buffer, sample, L);
616
617         path_rng_end(kg, rng_state, rng);
618 }
619
620 #endif  /* __BRANCHED_PATH__ */
621
622 CCL_NAMESPACE_END
623