Cycles: Deduplicate AO calculation
[blender-staging.git] / intern / cycles / kernel / kernel_path.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifdef __OSL__
18 #  include "osl_shader.h"
19 #endif
20
21 #include "kernel_random.h"
22 #include "kernel_projection.h"
23 #include "kernel_montecarlo.h"
24 #include "kernel_differential.h"
25 #include "kernel_camera.h"
26
27 #include "geom/geom.h"
28 #include "bvh/bvh.h"
29
30 #include "kernel_accumulate.h"
31 #include "kernel_shader.h"
32 #include "kernel_light.h"
33 #include "kernel_passes.h"
34
35 #ifdef __SUBSURFACE__
36 #  include "kernel_subsurface.h"
37 #endif
38
39 #ifdef __VOLUME__
40 #  include "kernel_volume.h"
41 #endif
42
43 #include "kernel_path_state.h"
44 #include "kernel_shadow.h"
45 #include "kernel_emission.h"
46 #include "kernel_path_common.h"
47 #include "kernel_path_surface.h"
48 #include "kernel_path_volume.h"
49
50 #ifdef __KERNEL_DEBUG__
51 #  include "kernel_debug.h"
52 #endif
53
54 CCL_NAMESPACE_BEGIN
55
56 ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
57                                         ShaderData *sd,
58                                         ShaderData *emission_sd,
59                                         PathRadiance *L,
60                                         PathState *state,
61                                         RNG *rng,
62                                         float3 throughput,
63                                         float3 ao_alpha)
64 {
65         /* todo: solve correlation */
66         float bsdf_u, bsdf_v;
67
68         path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
69
70         float ao_factor = kernel_data.background.ao_factor;
71         float3 ao_N;
72         float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
73         float3 ao_D;
74         float ao_pdf;
75
76         sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
77
78         if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) {
79                 Ray light_ray;
80                 float3 ao_shadow;
81
82                 light_ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng));
83                 light_ray.D = ao_D;
84                 light_ray.t = kernel_data.background.ao_distance;
85 #ifdef __OBJECT_MOTION__
86                 light_ray.time = ccl_fetch(sd, time);
87 #endif
88                 light_ray.dP = ccl_fetch(sd, dP);
89                 light_ray.dD = differential3_zero();
90
91                 if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) {
92                         path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
93                 }
94         }
95 }
96
97 ccl_device void kernel_path_indirect(KernelGlobals *kg,
98                                      ShaderData *sd,
99                                      ShaderData *emission_sd,
100                                      RNG *rng,
101                                      Ray *ray,
102                                      float3 throughput,
103                                      int num_samples,
104                                      PathState *state,
105                                      PathRadiance *L)
106 {
107         /* path iteration */
108         for(;;) {
109                 /* intersect scene */
110                 Intersection isect;
111                 uint visibility = path_state_ray_visibility(kg, state);
112                 bool hit = scene_intersect(kg,
113                                            *ray,
114                                            visibility,
115                                            &isect,
116                                            NULL,
117                                            0.0f, 0.0f);
118
119 #ifdef __LAMP_MIS__
120                 if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
121                         /* ray starting from previous non-transparent bounce */
122                         Ray light_ray;
123
124                         light_ray.P = ray->P - state->ray_t*ray->D;
125                         state->ray_t += isect.t;
126                         light_ray.D = ray->D;
127                         light_ray.t = state->ray_t;
128                         light_ray.time = ray->time;
129                         light_ray.dD = ray->dD;
130                         light_ray.dP = ray->dP;
131
132                         /* intersect with lamp */
133                         float3 emission;
134                         if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission)) {
135                                 path_radiance_accum_emission(L,
136                                                              throughput,
137                                                              emission,
138                                                              state->bounce);
139                         }
140                 }
141 #endif
142
143 #ifdef __VOLUME__
144                 /* volume attenuation, emission, scatter */
145                 if(state->volume_stack[0].shader != SHADER_NONE) {
146                         Ray volume_ray = *ray;
147                         volume_ray.t = (hit)? isect.t: FLT_MAX;
148
149                         bool heterogeneous =
150                                 volume_stack_is_heterogeneous(kg,
151                                                               state->volume_stack);
152
153 #  ifdef __VOLUME_DECOUPLED__
154                         int sampling_method =
155                                 volume_stack_sampling_method(kg,
156                                                              state->volume_stack);
157                         bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, false, sampling_method);
158
159                         if(decoupled) {
160                                 /* cache steps along volume for repeated sampling */
161                                 VolumeSegment volume_segment;
162
163                                 shader_setup_from_volume(kg,
164                                                          sd,
165                                                          &volume_ray);
166                                 kernel_volume_decoupled_record(kg,
167                                                                state,
168                                                                &volume_ray,
169                                                                sd,
170                                                                &volume_segment,
171                                                                heterogeneous);
172
173                                 volume_segment.sampling_method = sampling_method;
174
175                                 /* emission */
176                                 if(volume_segment.closure_flag & SD_EMISSION) {
177                                         path_radiance_accum_emission(L,
178                                                                      throughput,
179                                                                      volume_segment.accum_emission,
180                                                                      state->bounce);
181                                 }
182
183                                 /* scattering */
184                                 VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
185
186                                 if(volume_segment.closure_flag & SD_SCATTER) {
187                                         int all = kernel_data.integrator.sample_all_lights_indirect;
188
189                                         /* direct light sampling */
190                                         kernel_branched_path_volume_connect_light(kg,
191                                                                                   rng,
192                                                                                   sd,
193                                                                                   emission_sd,
194                                                                                   throughput,
195                                                                                   state,
196                                                                                   L,
197                                                                                   all,
198                                                                                   &volume_ray,
199                                                                                   &volume_segment);
200
201                                         /* indirect sample. if we use distance sampling and take just
202                                          * one sample for direct and indirect light, we could share
203                                          * this computation, but makes code a bit complex */
204                                         float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
205                                         float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
206
207                                         result = kernel_volume_decoupled_scatter(kg,
208                                                                                  state,
209                                                                                  &volume_ray,
210                                                                                  sd,
211                                                                                  &throughput,
212                                                                                  rphase,
213                                                                                  rscatter,
214                                                                                  &volume_segment,
215                                                                                  NULL,
216                                                                                  true);
217                                 }
218
219                                 /* free cached steps */
220                                 kernel_volume_decoupled_free(kg, &volume_segment);
221
222                                 if(result == VOLUME_PATH_SCATTERED) {
223                                         if(kernel_path_volume_bounce(kg,
224                                                                      rng,
225                                                                      sd,
226                                                                      &throughput,
227                                                                      state,
228                                                                      L,
229                                                                      ray))
230                                         {
231                                                 continue;
232                                         }
233                                         else {
234                                                 break;
235                                         }
236                                 }
237                                 else {
238                                         throughput *= volume_segment.accum_transmittance;
239                                 }
240                         }
241                         else
242 #  endif
243                         {
244                                 /* integrate along volume segment with distance sampling */
245                                 VolumeIntegrateResult result = kernel_volume_integrate(
246                                         kg, state, sd, &volume_ray, L, &throughput, rng, heterogeneous);
247
248 #  ifdef __VOLUME_SCATTER__
249                                 if(result == VOLUME_PATH_SCATTERED) {
250                                         /* direct lighting */
251                                         kernel_path_volume_connect_light(kg,
252                                                                          rng,
253                                                                          sd,
254                                                                          emission_sd,
255                                                                          throughput,
256                                                                          state,
257                                                                          L);
258
259                                         /* indirect light bounce */
260                                         if(kernel_path_volume_bounce(kg,
261                                                                      rng,
262                                                                      sd,
263                                                                      &throughput,
264                                                                      state,
265                                                                      L,
266                                                                      ray))
267                                         {
268                                                 continue;
269                                         }
270                                         else {
271                                                 break;
272                                         }
273                                 }
274 #  endif
275                         }
276                 }
277 #endif
278
279                 if(!hit) {
280 #ifdef __BACKGROUND__
281                         /* sample background shader */
282                         float3 L_background = indirect_background(kg, emission_sd, state, ray);
283                         path_radiance_accum_background(L,
284                                                        throughput,
285                                                        L_background,
286                                                        state->bounce);
287 #endif
288
289                         break;
290                 }
291
292                 /* setup shading */
293                 shader_setup_from_ray(kg,
294                                       sd,
295                                       &isect,
296                                       ray);
297                 float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
298                 shader_eval_surface(kg, sd, rng, state, rbsdf, state->flag, SHADER_CONTEXT_INDIRECT);
299 #ifdef __BRANCHED_PATH__
300                 shader_merge_closures(sd);
301 #endif
302
303                 /* blurring of bsdf after bounces, for rays that have a small likelihood
304                  * of following this particular path (diffuse, rough glossy) */
305                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
306                         float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
307
308                         if(blur_pdf < 1.0f) {
309                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
310                                 shader_bsdf_blur(kg, sd, blur_roughness);
311                         }
312                 }
313
314 #ifdef __EMISSION__
315                 /* emission */
316                 if(sd->flag & SD_EMISSION) {
317                         float3 emission = indirect_primitive_emission(kg,
318                                                                       sd,
319                                                                       isect.t,
320                                                                       state->flag,
321                                                                       state->ray_pdf);
322                         path_radiance_accum_emission(L, throughput, emission, state->bounce);
323                 }
324 #endif
325
326                 /* path termination. this is a strange place to put the termination, it's
327                  * mainly due to the mixed in MIS that we use. gives too many unneeded
328                  * shader evaluations, only need emission if we are going to terminate */
329                 float probability =
330                         path_state_terminate_probability(kg,
331                                                          state,
332                                                          throughput*num_samples);
333
334                 if(probability == 0.0f) {
335                         break;
336                 }
337                 else if(probability != 1.0f) {
338                         float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
339
340                         if(terminate >= probability)
341                                 break;
342
343                         throughput /= probability;
344                 }
345
346 #ifdef __AO__
347                 /* ambient occlusion */
348                 if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) {
349                         kernel_path_ao(kg, sd, emission_sd, L, state, rng, throughput, make_float3(0.0f, 0.0f, 0.0f));
350                 }
351 #endif
352
353 #ifdef __SUBSURFACE__
354                 /* bssrdf scatter to a different location on the same object, replacing
355                  * the closures with a diffuse BSDF */
356                 if(sd->flag & SD_BSSRDF) {
357                         float bssrdf_probability;
358                         ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
359
360                         /* modify throughput for picking bssrdf or bsdf */
361                         throughput *= bssrdf_probability;
362
363                         /* do bssrdf scatter step if we picked a bssrdf closure */
364                         if(sc) {
365                                 uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
366
367                                 float bssrdf_u, bssrdf_v;
368                                 path_state_rng_2D(kg,
369                                                   rng,
370                                                   state,
371                                                   PRNG_BSDF_U,
372                                                   &bssrdf_u, &bssrdf_v);
373                                 subsurface_scatter_step(kg,
374                                                         sd,
375                                                         state,
376                                                         state->flag,
377                                                         sc,
378                                                         &lcg_state,
379                                                         bssrdf_u, bssrdf_v,
380                                                         false);
381                         }
382                 }
383 #endif
384
385 #if defined(__EMISSION__) && defined(__BRANCHED_PATH__)
386                 if(kernel_data.integrator.use_direct_light) {
387                         int all = kernel_data.integrator.sample_all_lights_indirect;
388                         kernel_branched_path_surface_connect_light(kg,
389                                                                    rng,
390                                                                    sd,
391                                                                    emission_sd,
392                                                                    state,
393                                                                    throughput,
394                                                                    1.0f,
395                                                                    L,
396                                                                    all);
397                 }
398 #endif
399
400                 if(!kernel_path_surface_bounce(kg, rng, sd, &throughput, state, L, ray))
401                         break;
402         }
403 }
404
405 #ifdef __SUBSURFACE__
406 #  ifndef __KERNEL_CUDA__
407 ccl_device
408 #  else
409 ccl_device_inline
410 #  endif
411 bool kernel_path_subsurface_scatter(
412         KernelGlobals *kg,
413         ShaderData *sd,
414         ShaderData *emission_sd,
415         PathRadiance *L,
416         PathState *state,
417         RNG *rng,
418         Ray *ray,
419         float3 *throughput,
420         SubsurfaceIndirectRays *ss_indirect)
421 {
422         float bssrdf_probability;
423         ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
424
425         /* modify throughput for picking bssrdf or bsdf */
426         *throughput *= bssrdf_probability;
427
428         /* do bssrdf scatter step if we picked a bssrdf closure */
429         if(sc) {
430                 /* We should never have two consecutive BSSRDF bounces,
431                  * the second one should be converted to a diffuse BSDF to
432                  * avoid this.
433                  */
434                 kernel_assert(!ss_indirect->tracing);
435
436                 uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
437
438                 SubsurfaceIntersection ss_isect;
439                 float bssrdf_u, bssrdf_v;
440                 path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
441                 int num_hits = subsurface_scatter_multi_intersect(kg,
442                                                                   &ss_isect,
443                                                                   sd,
444                                                                   sc,
445                                                                   &lcg_state,
446                                                                   bssrdf_u, bssrdf_v,
447                                                                   false);
448 #  ifdef __VOLUME__
449                 ss_indirect->need_update_volume_stack =
450                         kernel_data.integrator.use_volumes &&
451                         ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME;
452 #  endif
453
454                 /* compute lighting with the BSDF closure */
455                 for(int hit = 0; hit < num_hits; hit++) {
456                         /* NOTE: We reuse the existing ShaderData, we assume the path
457                          * integration loop stops when this function returns true.
458                          */
459                         subsurface_scatter_multi_setup(kg,
460                                                        &ss_isect,
461                                                        hit,
462                                                        sd,
463                                                        state,
464                                                        state->flag,
465                                                        sc,
466                                                        false);
467
468                         PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
469                         Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
470                         float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
471                         PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays];
472
473                         *hit_state = *state;
474                         *hit_ray = *ray;
475                         *hit_tp = *throughput;
476
477                         hit_state->rng_offset += PRNG_BOUNCE_NUM;
478
479                         path_radiance_init(hit_L, kernel_data.film.use_light_pass);
480                         hit_L->direct_throughput = L->direct_throughput;
481                         path_radiance_copy_indirect(hit_L, L);
482
483                         kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L);
484
485                         if(kernel_path_surface_bounce(kg,
486                                                       rng,
487                                                       sd,
488                                                       hit_tp,
489                                                       hit_state,
490                                                       hit_L,
491                                                       hit_ray))
492                         {
493 #  ifdef __LAMP_MIS__
494                                 hit_state->ray_t = 0.0f;
495 #  endif
496
497 #  ifdef __VOLUME__
498                                 if(ss_indirect->need_update_volume_stack) {
499                                         Ray volume_ray = *ray;
500                                         /* Setup ray from previous surface point to the new one. */
501                                         volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
502                                                                      &volume_ray.t);
503
504                                         kernel_volume_stack_update_for_subsurface(
505                                             kg,
506                                             emission_sd,
507                                             &volume_ray,
508                                             hit_state->volume_stack);
509                                 }
510 #  endif
511                                 path_radiance_reset_indirect(L);
512                                 ss_indirect->num_rays++;
513                         }
514                         else {
515                                 path_radiance_accum_sample(L, hit_L, 1);
516                         }
517                 }
518                 return true;
519         }
520         return false;
521 }
522
523 ccl_device_inline void kernel_path_subsurface_init_indirect(
524         SubsurfaceIndirectRays *ss_indirect)
525 {
526         ss_indirect->tracing = false;
527         ss_indirect->num_rays = 0;
528 }
529
530 ccl_device void kernel_path_subsurface_accum_indirect(
531         SubsurfaceIndirectRays *ss_indirect,
532         PathRadiance *L)
533 {
534         if(ss_indirect->tracing) {
535                 path_radiance_sum_indirect(L);
536                 path_radiance_accum_sample(&ss_indirect->direct_L, L, 1);
537                 if(ss_indirect->num_rays == 0) {
538                         *L = ss_indirect->direct_L;
539                 }
540         }
541 }
542
543 ccl_device void kernel_path_subsurface_setup_indirect(
544         KernelGlobals *kg,
545         SubsurfaceIndirectRays *ss_indirect,
546         PathState *state,
547         Ray *ray,
548         PathRadiance *L,
549         float3 *throughput)
550 {
551         if(!ss_indirect->tracing) {
552                 ss_indirect->direct_L = *L;
553         }
554         ss_indirect->tracing = true;
555
556         /* Setup state, ray and throughput for indirect SSS rays. */
557         ss_indirect->num_rays--;
558
559         Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays];
560         PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays];
561
562         *state = ss_indirect->state[ss_indirect->num_rays];
563         *ray = *indirect_ray;
564         *L = *indirect_L;
565         *throughput = ss_indirect->throughputs[ss_indirect->num_rays];
566
567         state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
568 }
569
570 #endif  /* __SUBSURFACE__ */
571
572 ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
573                                                RNG *rng,
574                                                int sample,
575                                                Ray ray,
576                                                ccl_global float *buffer)
577 {
578         /* initialize */
579         PathRadiance L;
580         float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
581         float L_transparent = 0.0f;
582
583         path_radiance_init(&L, kernel_data.film.use_light_pass);
584
585         /* shader data memory used for both volumes and surfaces, saves stack space */
586         ShaderData sd;
587         /* shader data used by emission, shadows, volume stacks */
588         ShaderData emission_sd;
589
590         PathState state;
591         path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
592
593 #ifdef __KERNEL_DEBUG__
594         DebugData debug_data;
595         debug_data_init(&debug_data);
596 #endif
597
598 #ifdef __SUBSURFACE__
599         SubsurfaceIndirectRays ss_indirect;
600         kernel_path_subsurface_init_indirect(&ss_indirect);
601
602         for(;;) {
603 #endif
604
605         /* path iteration */
606         for(;;) {
607                 /* intersect scene */
608                 Intersection isect;
609                 uint visibility = path_state_ray_visibility(kg, &state);
610
611 #ifdef __HAIR__
612                 float difl = 0.0f, extmax = 0.0f;
613                 uint lcg_state = 0;
614
615                 if(kernel_data.bvh.have_curves) {
616                         if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) {       
617                                 float3 pixdiff = ray.dD.dx + ray.dD.dy;
618                                 /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
619                                 difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
620                         }
621
622                         extmax = kernel_data.curve.maximum_width;
623                         lcg_state = lcg_state_init(rng, &state, 0x51633e2d);
624                 }
625
626                 bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
627 #else
628                 bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
629 #endif
630
631 #ifdef __KERNEL_DEBUG__
632                 if(state.flag & PATH_RAY_CAMERA) {
633                         debug_data.num_bvh_traversal_steps += isect.num_traversal_steps;
634                         debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
635                 }
636                 debug_data.num_ray_bounces++;
637 #endif
638
639 #ifdef __LAMP_MIS__
640                 if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
641                         /* ray starting from previous non-transparent bounce */
642                         Ray light_ray;
643
644                         light_ray.P = ray.P - state.ray_t*ray.D;
645                         state.ray_t += isect.t;
646                         light_ray.D = ray.D;
647                         light_ray.t = state.ray_t;
648                         light_ray.time = ray.time;
649                         light_ray.dD = ray.dD;
650                         light_ray.dP = ray.dP;
651
652                         /* intersect with lamp */
653                         float3 emission;
654
655                         if(indirect_lamp_emission(kg, &emission_sd, &state, &light_ray, &emission))
656                                 path_radiance_accum_emission(&L, throughput, emission, state.bounce);
657                 }
658 #endif
659
660 #ifdef __VOLUME__
661                 /* volume attenuation, emission, scatter */
662                 if(state.volume_stack[0].shader != SHADER_NONE) {
663                         Ray volume_ray = ray;
664                         volume_ray.t = (hit)? isect.t: FLT_MAX;
665
666                         bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
667
668 #  ifdef __VOLUME_DECOUPLED__
669                         int sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
670                         bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, true, sampling_method);
671
672                         if(decoupled) {
673                                 /* cache steps along volume for repeated sampling */
674                                 VolumeSegment volume_segment;
675
676                                 shader_setup_from_volume(kg, &sd, &volume_ray);
677                                 kernel_volume_decoupled_record(kg, &state,
678                                         &volume_ray, &sd, &volume_segment, heterogeneous);
679
680                                 volume_segment.sampling_method = sampling_method;
681
682                                 /* emission */
683                                 if(volume_segment.closure_flag & SD_EMISSION)
684                                         path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce);
685
686                                 /* scattering */
687                                 VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
688
689                                 if(volume_segment.closure_flag & SD_SCATTER) {
690                                         int all = false;
691
692                                         /* direct light sampling */
693                                         kernel_branched_path_volume_connect_light(kg, rng, &sd,
694                                                 &emission_sd, throughput, &state, &L, all,
695                                                 &volume_ray, &volume_segment);
696
697                                         /* indirect sample. if we use distance sampling and take just
698                                          * one sample for direct and indirect light, we could share
699                                          * this computation, but makes code a bit complex */
700                                         float rphase = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_PHASE);
701                                         float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
702
703                                         result = kernel_volume_decoupled_scatter(kg,
704                                                 &state, &volume_ray, &sd, &throughput,
705                                                 rphase, rscatter, &volume_segment, NULL, true);
706                                 }
707
708                                 /* free cached steps */
709                                 kernel_volume_decoupled_free(kg, &volume_segment);
710
711                                 if(result == VOLUME_PATH_SCATTERED) {
712                                         if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
713                                                 continue;
714                                         else
715                                                 break;
716                                 }
717                                 else {
718                                         throughput *= volume_segment.accum_transmittance;
719                                 }
720                         }
721                         else
722 #  endif
723                         {
724                                 /* integrate along volume segment with distance sampling */
725                                 VolumeIntegrateResult result = kernel_volume_integrate(
726                                         kg, &state, &sd, &volume_ray, &L, &throughput, rng, heterogeneous);
727
728 #  ifdef __VOLUME_SCATTER__
729                                 if(result == VOLUME_PATH_SCATTERED) {
730                                         /* direct lighting */
731                                         kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
732
733                                         /* indirect light bounce */
734                                         if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
735                                                 continue;
736                                         else
737                                                 break;
738                                 }
739 #  endif
740                         }
741                 }
742 #endif
743
744                 if(!hit) {
745                         /* eval background shader if nothing hit */
746                         if(kernel_data.background.transparent && (state.flag & PATH_RAY_CAMERA)) {
747                                 L_transparent += average(throughput);
748
749 #ifdef __PASSES__
750                                 if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
751 #endif
752                                         break;
753                         }
754
755 #ifdef __BACKGROUND__
756                         /* sample background shader */
757                         float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
758                         path_radiance_accum_background(&L, throughput, L_background, state.bounce);
759 #endif
760
761                         break;
762                 }
763
764                 /* setup shading */
765                 shader_setup_from_ray(kg, &sd, &isect, &ray);
766                 float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF);
767                 shader_eval_surface(kg, &sd, rng, &state, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
768
769                 /* holdout */
770 #ifdef __HOLDOUT__
771                 if((sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) && (state.flag & PATH_RAY_CAMERA)) {
772                         if(kernel_data.background.transparent) {
773                                 float3 holdout_weight;
774                                 
775                                 if(sd.flag & SD_HOLDOUT_MASK)
776                                         holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
777                                 else
778                                         holdout_weight = shader_holdout_eval(kg, &sd);
779
780                                 /* any throughput is ok, should all be identical here */
781                                 L_transparent += average(holdout_weight*throughput);
782                         }
783
784                         if(sd.flag & SD_HOLDOUT_MASK)
785                                 break;
786                 }
787 #endif
788
789                 /* holdout mask objects do not write data passes */
790                 kernel_write_data_passes(kg, buffer, &L, &sd, sample, &state, throughput);
791
792                 /* blurring of bsdf after bounces, for rays that have a small likelihood
793                  * of following this particular path (diffuse, rough glossy) */
794                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
795                         float blur_pdf = kernel_data.integrator.filter_glossy*state.min_ray_pdf;
796
797                         if(blur_pdf < 1.0f) {
798                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
799                                 shader_bsdf_blur(kg, &sd, blur_roughness);
800                         }
801                 }
802
803 #ifdef __EMISSION__
804                 /* emission */
805                 if(sd.flag & SD_EMISSION) {
806                         /* todo: is isect.t wrong here for transparent surfaces? */
807                         float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf);
808                         path_radiance_accum_emission(&L, throughput, emission, state.bounce);
809                 }
810 #endif
811
812                 /* path termination. this is a strange place to put the termination, it's
813                  * mainly due to the mixed in MIS that we use. gives too many unneeded
814                  * shader evaluations, only need emission if we are going to terminate */
815                 float probability = path_state_terminate_probability(kg, &state, throughput);
816
817                 if(probability == 0.0f) {
818                         break;
819                 }
820                 else if(probability != 1.0f) {
821                         float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE);
822                         if(terminate >= probability)
823                                 break;
824
825                         throughput /= probability;
826                 }
827
828 #ifdef __AO__
829                 /* ambient occlusion */
830                 if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
831                         kernel_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput, shader_bsdf_alpha(kg, &sd));
832                 }
833 #endif
834
835 #ifdef __SUBSURFACE__
836                 /* bssrdf scatter to a different location on the same object, replacing
837                  * the closures with a diffuse BSDF */
838                 if(sd.flag & SD_BSSRDF) {
839                         if(kernel_path_subsurface_scatter(kg,
840                                                           &sd,
841                                                           &emission_sd,
842                                                           &L,
843                                                           &state,
844                                                           rng,
845                                                           &ray,
846                                                           &throughput,
847                                                           &ss_indirect))
848                         {
849                                 break;
850                         }
851                 }
852 #endif  /* __SUBSURFACE__ */
853
854                 /* direct lighting */
855                 kernel_path_surface_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
856
857                 /* compute direct lighting and next bounce */
858                 if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
859                         break;
860         }
861
862 #ifdef __SUBSURFACE__
863                 kernel_path_subsurface_accum_indirect(&ss_indirect, &L);
864
865                 /* Trace indirect subsurface rays by restarting the loop. this uses less
866                  * stack memory than invoking kernel_path_indirect.
867                  */
868                 if(ss_indirect.num_rays) {
869                         kernel_path_subsurface_setup_indirect(kg,
870                                                               &ss_indirect,
871                                                               &state,
872                                                               &ray,
873                                                               &L,
874                                                               &throughput);
875                 }
876                 else {
877                         break;
878                 }
879         }
880 #endif  /* __SUBSURFACE__ */
881
882         float3 L_sum = path_radiance_clamp_and_sum(kg, &L);
883
884         kernel_write_light_passes(kg, buffer, &L, sample);
885
886 #ifdef __KERNEL_DEBUG__
887         kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
888 #endif
889
890         return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
891 }
892
893 ccl_device void kernel_path_trace(KernelGlobals *kg,
894         ccl_global float *buffer, ccl_global uint *rng_state,
895         int sample, int x, int y, int offset, int stride)
896 {
897         /* buffer offset */
898         int index = offset + x + y*stride;
899         int pass_stride = kernel_data.film.pass_stride;
900
901         rng_state += index;
902         buffer += index*pass_stride;
903
904         /* initialize random numbers and ray */
905         RNG rng;
906         Ray ray;
907
908         kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
909
910         /* integrate */
911         float4 L;
912
913         if(ray.t != 0.0f)
914                 L = kernel_path_integrate(kg, &rng, sample, ray, buffer);
915         else
916                 L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
917
918         /* accumulate result in output buffer */
919         kernel_write_pass_float4(buffer, sample, L);
920
921         path_rng_end(kg, rng_state, rng);
922 }
923
924 CCL_NAMESPACE_END
925