Cycles: Add option to replace GI with AO approximation after certain amount of bounces
[blender-staging.git] / intern / cycles / kernel / kernel_path.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifdef __OSL__
18 #  include "osl_shader.h"
19 #endif
20
21 #include "kernel_random.h"
22 #include "kernel_projection.h"
23 #include "kernel_montecarlo.h"
24 #include "kernel_differential.h"
25 #include "kernel_camera.h"
26
27 #include "geom/geom.h"
28 #include "bvh/bvh.h"
29
30 #include "kernel_accumulate.h"
31 #include "kernel_shader.h"
32 #include "kernel_light.h"
33 #include "kernel_passes.h"
34
35 #ifdef __SUBSURFACE__
36 #  include "kernel_subsurface.h"
37 #endif
38
39 #ifdef __VOLUME__
40 #  include "kernel_volume.h"
41 #endif
42
43 #include "kernel_path_state.h"
44 #include "kernel_shadow.h"
45 #include "kernel_emission.h"
46 #include "kernel_path_common.h"
47 #include "kernel_path_surface.h"
48 #include "kernel_path_volume.h"
49
50 #ifdef __KERNEL_DEBUG__
51 #  include "kernel_debug.h"
52 #endif
53
54 CCL_NAMESPACE_BEGIN
55
56 ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
57                                         ShaderData *sd,
58                                         ShaderData *emission_sd,
59                                         PathRadiance *L,
60                                         PathState *state,
61                                         RNG *rng,
62                                         float3 throughput,
63                                         float3 ao_alpha)
64 {
65         /* todo: solve correlation */
66         float bsdf_u, bsdf_v;
67
68         path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
69
70         float ao_factor = kernel_data.background.ao_factor;
71         float3 ao_N;
72         float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
73         float3 ao_D;
74         float ao_pdf;
75
76         sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
77
78         if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) {
79                 Ray light_ray;
80                 float3 ao_shadow;
81
82                 light_ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng));
83                 light_ray.D = ao_D;
84                 light_ray.t = kernel_data.background.ao_distance;
85 #ifdef __OBJECT_MOTION__
86                 light_ray.time = ccl_fetch(sd, time);
87 #endif  /* __OBJECT_MOTION__ */
88                 light_ray.dP = ccl_fetch(sd, dP);
89                 light_ray.dD = differential3_zero();
90
91                 if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) {
92                         path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
93                 }
94         }
95 }
96
97 ccl_device void kernel_path_indirect(KernelGlobals *kg,
98                                      ShaderData *sd,
99                                      ShaderData *emission_sd,
100                                      RNG *rng,
101                                      Ray *ray,
102                                      float3 throughput,
103                                      int num_samples,
104                                      PathState *state,
105                                      PathRadiance *L)
106 {
107         /* path iteration */
108         for(;;) {
109                 /* intersect scene */
110                 Intersection isect;
111                 uint visibility = path_state_ray_visibility(kg, state);
112                 if(state->bounce > kernel_data.integrator.ao_bounces) {
113                         visibility = PATH_RAY_SHADOW;
114                         ray->t = kernel_data.background.ao_distance;
115                 }
116                 bool hit = scene_intersect(kg,
117                                            *ray,
118                                            visibility,
119                                            &isect,
120                                            NULL,
121                                            0.0f, 0.0f);
122
123 #ifdef __LAMP_MIS__
124                 if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
125                         /* ray starting from previous non-transparent bounce */
126                         Ray light_ray;
127
128                         light_ray.P = ray->P - state->ray_t*ray->D;
129                         state->ray_t += isect.t;
130                         light_ray.D = ray->D;
131                         light_ray.t = state->ray_t;
132                         light_ray.time = ray->time;
133                         light_ray.dD = ray->dD;
134                         light_ray.dP = ray->dP;
135
136                         /* intersect with lamp */
137                         float3 emission;
138                         if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission)) {
139                                 path_radiance_accum_emission(L,
140                                                              throughput,
141                                                              emission,
142                                                              state->bounce);
143                         }
144                 }
145 #endif  /* __LAMP_MIS__ */
146
147 #ifdef __VOLUME__
148                 /* Sanitize volume stack. */
149                 if(!hit) {
150                         kernel_volume_clean_stack(kg, state->volume_stack);
151                 }
152                 /* volume attenuation, emission, scatter */
153                 if(state->volume_stack[0].shader != SHADER_NONE) {
154                         Ray volume_ray = *ray;
155                         volume_ray.t = (hit)? isect.t: FLT_MAX;
156
157                         bool heterogeneous =
158                                 volume_stack_is_heterogeneous(kg,
159                                                               state->volume_stack);
160
161 #  ifdef __VOLUME_DECOUPLED__
162                         int sampling_method =
163                                 volume_stack_sampling_method(kg,
164                                                              state->volume_stack);
165                         bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, false, sampling_method);
166
167                         if(decoupled) {
168                                 /* cache steps along volume for repeated sampling */
169                                 VolumeSegment volume_segment;
170
171                                 shader_setup_from_volume(kg,
172                                                          sd,
173                                                          &volume_ray);
174                                 kernel_volume_decoupled_record(kg,
175                                                                state,
176                                                                &volume_ray,
177                                                                sd,
178                                                                &volume_segment,
179                                                                heterogeneous);
180
181                                 volume_segment.sampling_method = sampling_method;
182
183                                 /* emission */
184                                 if(volume_segment.closure_flag & SD_EMISSION) {
185                                         path_radiance_accum_emission(L,
186                                                                      throughput,
187                                                                      volume_segment.accum_emission,
188                                                                      state->bounce);
189                                 }
190
191                                 /* scattering */
192                                 VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
193
194                                 if(volume_segment.closure_flag & SD_SCATTER) {
195                                         int all = kernel_data.integrator.sample_all_lights_indirect;
196
197                                         /* direct light sampling */
198                                         kernel_branched_path_volume_connect_light(kg,
199                                                                                   rng,
200                                                                                   sd,
201                                                                                   emission_sd,
202                                                                                   throughput,
203                                                                                   state,
204                                                                                   L,
205                                                                                   all,
206                                                                                   &volume_ray,
207                                                                                   &volume_segment);
208
209                                         /* indirect sample. if we use distance sampling and take just
210                                          * one sample for direct and indirect light, we could share
211                                          * this computation, but makes code a bit complex */
212                                         float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
213                                         float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
214
215                                         result = kernel_volume_decoupled_scatter(kg,
216                                                                                  state,
217                                                                                  &volume_ray,
218                                                                                  sd,
219                                                                                  &throughput,
220                                                                                  rphase,
221                                                                                  rscatter,
222                                                                                  &volume_segment,
223                                                                                  NULL,
224                                                                                  true);
225                                 }
226
227                                 /* free cached steps */
228                                 kernel_volume_decoupled_free(kg, &volume_segment);
229
230                                 if(result == VOLUME_PATH_SCATTERED) {
231                                         if(kernel_path_volume_bounce(kg,
232                                                                      rng,
233                                                                      sd,
234                                                                      &throughput,
235                                                                      state,
236                                                                      L,
237                                                                      ray))
238                                         {
239                                                 continue;
240                                         }
241                                         else {
242                                                 break;
243                                         }
244                                 }
245                                 else {
246                                         throughput *= volume_segment.accum_transmittance;
247                                 }
248                         }
249                         else
250 #  endif  /* __VOLUME_DECOUPLED__ */
251                         {
252                                 /* integrate along volume segment with distance sampling */
253                                 VolumeIntegrateResult result = kernel_volume_integrate(
254                                         kg, state, sd, &volume_ray, L, &throughput, rng, heterogeneous);
255
256 #  ifdef __VOLUME_SCATTER__
257                                 if(result == VOLUME_PATH_SCATTERED) {
258                                         /* direct lighting */
259                                         kernel_path_volume_connect_light(kg,
260                                                                          rng,
261                                                                          sd,
262                                                                          emission_sd,
263                                                                          throughput,
264                                                                          state,
265                                                                          L);
266
267                                         /* indirect light bounce */
268                                         if(kernel_path_volume_bounce(kg,
269                                                                      rng,
270                                                                      sd,
271                                                                      &throughput,
272                                                                      state,
273                                                                      L,
274                                                                      ray))
275                                         {
276                                                 continue;
277                                         }
278                                         else {
279                                                 break;
280                                         }
281                                 }
282 #  endif  /* __VOLUME_SCATTER__ */
283                         }
284                 }
285 #endif  /* __VOLUME__ */
286
287                 if(!hit) {
288 #ifdef __BACKGROUND__
289                         /* sample background shader */
290                         float3 L_background = indirect_background(kg, emission_sd, state, ray);
291                         path_radiance_accum_background(L,
292                                                        throughput,
293                                                        L_background,
294                                                        state->bounce);
295 #endif  /* __BACKGROUND__ */
296
297                         break;
298                 }
299                 else if(state->bounce > kernel_data.integrator.ao_bounces) {
300                         break;
301                 }
302
303                 /* setup shading */
304                 shader_setup_from_ray(kg,
305                                       sd,
306                                       &isect,
307                                       ray);
308                 float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
309                 shader_eval_surface(kg, sd, rng, state, rbsdf, state->flag, SHADER_CONTEXT_INDIRECT);
310 #ifdef __BRANCHED_PATH__
311                 shader_merge_closures(sd);
312 #endif  /* __BRANCHED_PATH__ */
313
314                 /* blurring of bsdf after bounces, for rays that have a small likelihood
315                  * of following this particular path (diffuse, rough glossy) */
316                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
317                         float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
318
319                         if(blur_pdf < 1.0f) {
320                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
321                                 shader_bsdf_blur(kg, sd, blur_roughness);
322                         }
323                 }
324
325 #ifdef __EMISSION__
326                 /* emission */
327                 if(sd->flag & SD_EMISSION) {
328                         float3 emission = indirect_primitive_emission(kg,
329                                                                       sd,
330                                                                       isect.t,
331                                                                       state->flag,
332                                                                       state->ray_pdf);
333                         path_radiance_accum_emission(L, throughput, emission, state->bounce);
334                 }
335 #endif  /* __EMISSION__ */
336
337                 /* path termination. this is a strange place to put the termination, it's
338                  * mainly due to the mixed in MIS that we use. gives too many unneeded
339                  * shader evaluations, only need emission if we are going to terminate */
340                 float probability =
341                         path_state_terminate_probability(kg,
342                                                          state,
343                                                          throughput*num_samples);
344
345                 if(probability == 0.0f) {
346                         break;
347                 }
348                 else if(probability != 1.0f) {
349                         float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
350
351                         if(terminate >= probability)
352                                 break;
353
354                         throughput /= probability;
355                 }
356
357 #ifdef __AO__
358                 /* ambient occlusion */
359                 if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) {
360                         kernel_path_ao(kg, sd, emission_sd, L, state, rng, throughput, make_float3(0.0f, 0.0f, 0.0f));
361                 }
362 #endif  /* __AO__ */
363
364 #ifdef __SUBSURFACE__
365                 /* bssrdf scatter to a different location on the same object, replacing
366                  * the closures with a diffuse BSDF */
367                 if(sd->flag & SD_BSSRDF) {
368                         float bssrdf_probability;
369                         ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
370
371                         /* modify throughput for picking bssrdf or bsdf */
372                         throughput *= bssrdf_probability;
373
374                         /* do bssrdf scatter step if we picked a bssrdf closure */
375                         if(sc) {
376                                 uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
377
378                                 float bssrdf_u, bssrdf_v;
379                                 path_state_rng_2D(kg,
380                                                   rng,
381                                                   state,
382                                                   PRNG_BSDF_U,
383                                                   &bssrdf_u, &bssrdf_v);
384                                 subsurface_scatter_step(kg,
385                                                         sd,
386                                                         state,
387                                                         state->flag,
388                                                         sc,
389                                                         &lcg_state,
390                                                         bssrdf_u, bssrdf_v,
391                                                         false);
392                         }
393                 }
394 #endif  /* __SUBSURFACE__ */
395
396 #if defined(__EMISSION__) && defined(__BRANCHED_PATH__)
397                 if(kernel_data.integrator.use_direct_light) {
398                         int all = kernel_data.integrator.sample_all_lights_indirect;
399                         kernel_branched_path_surface_connect_light(kg,
400                                                                    rng,
401                                                                    sd,
402                                                                    emission_sd,
403                                                                    state,
404                                                                    throughput,
405                                                                    1.0f,
406                                                                    L,
407                                                                    all);
408                 }
409 #endif  /* defined(__EMISSION__) && defined(__BRANCHED_PATH__) */
410
411                 if(!kernel_path_surface_bounce(kg, rng, sd, &throughput, state, L, ray))
412                         break;
413         }
414 }
415
416 #ifdef __SUBSURFACE__
417 #  ifndef __KERNEL_CUDA__
418 ccl_device
419 #  else
420 ccl_device_inline
421 #  endif
422 bool kernel_path_subsurface_scatter(
423         KernelGlobals *kg,
424         ShaderData *sd,
425         ShaderData *emission_sd,
426         PathRadiance *L,
427         PathState *state,
428         RNG *rng,
429         Ray *ray,
430         float3 *throughput,
431         SubsurfaceIndirectRays *ss_indirect)
432 {
433         float bssrdf_probability;
434         ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
435
436         /* modify throughput for picking bssrdf or bsdf */
437         *throughput *= bssrdf_probability;
438
439         /* do bssrdf scatter step if we picked a bssrdf closure */
440         if(sc) {
441                 /* We should never have two consecutive BSSRDF bounces,
442                  * the second one should be converted to a diffuse BSDF to
443                  * avoid this.
444                  */
445                 kernel_assert(!ss_indirect->tracing);
446
447                 uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
448
449                 SubsurfaceIntersection ss_isect;
450                 float bssrdf_u, bssrdf_v;
451                 path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
452                 int num_hits = subsurface_scatter_multi_intersect(kg,
453                                                                   &ss_isect,
454                                                                   sd,
455                                                                   sc,
456                                                                   &lcg_state,
457                                                                   bssrdf_u, bssrdf_v,
458                                                                   false);
459 #  ifdef __VOLUME__
460                 ss_indirect->need_update_volume_stack =
461                         kernel_data.integrator.use_volumes &&
462                         ccl_fetch(sd, object_flag) & SD_OBJECT_INTERSECTS_VOLUME;
463 #  endif  /* __VOLUME__ */
464
465                 /* compute lighting with the BSDF closure */
466                 for(int hit = 0; hit < num_hits; hit++) {
467                         /* NOTE: We reuse the existing ShaderData, we assume the path
468                          * integration loop stops when this function returns true.
469                          */
470                         subsurface_scatter_multi_setup(kg,
471                                                        &ss_isect,
472                                                        hit,
473                                                        sd,
474                                                        state,
475                                                        state->flag,
476                                                        sc,
477                                                        false);
478
479                         PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
480                         Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
481                         float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
482                         PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays];
483
484                         *hit_state = *state;
485                         *hit_ray = *ray;
486                         *hit_tp = *throughput;
487
488                         hit_state->rng_offset += PRNG_BOUNCE_NUM;
489
490                         path_radiance_init(hit_L, kernel_data.film.use_light_pass);
491                         hit_L->direct_throughput = L->direct_throughput;
492                         path_radiance_copy_indirect(hit_L, L);
493
494                         kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L);
495
496                         if(kernel_path_surface_bounce(kg,
497                                                       rng,
498                                                       sd,
499                                                       hit_tp,
500                                                       hit_state,
501                                                       hit_L,
502                                                       hit_ray))
503                         {
504 #  ifdef __LAMP_MIS__
505                                 hit_state->ray_t = 0.0f;
506 #  endif  /* __LAMP_MIS__ */
507
508 #  ifdef __VOLUME__
509                                 if(ss_indirect->need_update_volume_stack) {
510                                         Ray volume_ray = *ray;
511                                         /* Setup ray from previous surface point to the new one. */
512                                         volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
513                                                                      &volume_ray.t);
514
515                                         kernel_volume_stack_update_for_subsurface(
516                                             kg,
517                                             emission_sd,
518                                             &volume_ray,
519                                             hit_state->volume_stack);
520                                 }
521 #  endif  /* __VOLUME__ */
522                                 path_radiance_reset_indirect(L);
523                                 ss_indirect->num_rays++;
524                         }
525                         else {
526                                 path_radiance_accum_sample(L, hit_L, 1);
527                         }
528                 }
529                 return true;
530         }
531         return false;
532 }
533
534 ccl_device_inline void kernel_path_subsurface_init_indirect(
535         SubsurfaceIndirectRays *ss_indirect)
536 {
537         ss_indirect->tracing = false;
538         ss_indirect->num_rays = 0;
539 }
540
541 ccl_device void kernel_path_subsurface_accum_indirect(
542         SubsurfaceIndirectRays *ss_indirect,
543         PathRadiance *L)
544 {
545         if(ss_indirect->tracing) {
546                 path_radiance_sum_indirect(L);
547                 path_radiance_accum_sample(&ss_indirect->direct_L, L, 1);
548                 if(ss_indirect->num_rays == 0) {
549                         *L = ss_indirect->direct_L;
550                 }
551         }
552 }
553
554 ccl_device void kernel_path_subsurface_setup_indirect(
555         KernelGlobals *kg,
556         SubsurfaceIndirectRays *ss_indirect,
557         PathState *state,
558         Ray *ray,
559         PathRadiance *L,
560         float3 *throughput)
561 {
562         if(!ss_indirect->tracing) {
563                 ss_indirect->direct_L = *L;
564         }
565         ss_indirect->tracing = true;
566
567         /* Setup state, ray and throughput for indirect SSS rays. */
568         ss_indirect->num_rays--;
569
570         Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays];
571         PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays];
572
573         *state = ss_indirect->state[ss_indirect->num_rays];
574         *ray = *indirect_ray;
575         *L = *indirect_L;
576         *throughput = ss_indirect->throughputs[ss_indirect->num_rays];
577
578         state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
579 }
580
581 #endif  /* __SUBSURFACE__ */
582
583 ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
584                                                RNG *rng,
585                                                int sample,
586                                                Ray ray,
587                                                ccl_global float *buffer)
588 {
589         /* initialize */
590         PathRadiance L;
591         float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
592         float L_transparent = 0.0f;
593
594         path_radiance_init(&L, kernel_data.film.use_light_pass);
595
596         /* shader data memory used for both volumes and surfaces, saves stack space */
597         ShaderData sd;
598         /* shader data used by emission, shadows, volume stacks */
599         ShaderData emission_sd;
600
601         PathState state;
602         path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
603
604 #ifdef __KERNEL_DEBUG__
605         DebugData debug_data;
606         debug_data_init(&debug_data);
607 #endif  /* __KERNEL_DEBUG__ */
608
609 #ifdef __SUBSURFACE__
610         SubsurfaceIndirectRays ss_indirect;
611         kernel_path_subsurface_init_indirect(&ss_indirect);
612
613         for(;;) {
614 #endif  /* __SUBSURFACE__ */
615
616         /* path iteration */
617         for(;;) {
618                 /* intersect scene */
619                 Intersection isect;
620                 uint visibility = path_state_ray_visibility(kg, &state);
621
622 #ifdef __HAIR__
623                 float difl = 0.0f, extmax = 0.0f;
624                 uint lcg_state = 0;
625
626                 if(kernel_data.bvh.have_curves) {
627                         if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) {       
628                                 float3 pixdiff = ray.dD.dx + ray.dD.dy;
629                                 /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
630                                 difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
631                         }
632
633                         extmax = kernel_data.curve.maximum_width;
634                         lcg_state = lcg_state_init(rng, &state, 0x51633e2d);
635                 }
636
637                 if(state.bounce > kernel_data.integrator.ao_bounces) {
638                         visibility = PATH_RAY_SHADOW;
639                         ray.t = kernel_data.background.ao_distance;
640                 }
641
642                 bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
643 #else
644                 bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
645 #endif  /* __HAIR__ */
646
647 #ifdef __KERNEL_DEBUG__
648                 if(state.flag & PATH_RAY_CAMERA) {
649                         debug_data.num_bvh_traversed_nodes += isect.num_traversed_nodes;
650                         debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
651                         debug_data.num_bvh_intersections += isect.num_intersections;
652                 }
653                 debug_data.num_ray_bounces++;
654 #endif  /* __KERNEL_DEBUG__ */
655
656 #ifdef __LAMP_MIS__
657                 if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
658                         /* ray starting from previous non-transparent bounce */
659                         Ray light_ray;
660
661                         light_ray.P = ray.P - state.ray_t*ray.D;
662                         state.ray_t += isect.t;
663                         light_ray.D = ray.D;
664                         light_ray.t = state.ray_t;
665                         light_ray.time = ray.time;
666                         light_ray.dD = ray.dD;
667                         light_ray.dP = ray.dP;
668
669                         /* intersect with lamp */
670                         float3 emission;
671
672                         if(indirect_lamp_emission(kg, &emission_sd, &state, &light_ray, &emission))
673                                 path_radiance_accum_emission(&L, throughput, emission, state.bounce);
674                 }
675 #endif  /* __LAMP_MIS__ */
676
677 #ifdef __VOLUME__
678                 /* Sanitize volume stack. */
679                 if(!hit) {
680                         kernel_volume_clean_stack(kg, state.volume_stack);
681                 }
682                 /* volume attenuation, emission, scatter */
683                 if(state.volume_stack[0].shader != SHADER_NONE) {
684                         Ray volume_ray = ray;
685                         volume_ray.t = (hit)? isect.t: FLT_MAX;
686
687                         bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
688
689 #  ifdef __VOLUME_DECOUPLED__
690                         int sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
691                         bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, true, sampling_method);
692
693                         if(decoupled) {
694                                 /* cache steps along volume for repeated sampling */
695                                 VolumeSegment volume_segment;
696
697                                 shader_setup_from_volume(kg, &sd, &volume_ray);
698                                 kernel_volume_decoupled_record(kg, &state,
699                                         &volume_ray, &sd, &volume_segment, heterogeneous);
700
701                                 volume_segment.sampling_method = sampling_method;
702
703                                 /* emission */
704                                 if(volume_segment.closure_flag & SD_EMISSION)
705                                         path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce);
706
707                                 /* scattering */
708                                 VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
709
710                                 if(volume_segment.closure_flag & SD_SCATTER) {
711                                         int all = false;
712
713                                         /* direct light sampling */
714                                         kernel_branched_path_volume_connect_light(kg, rng, &sd,
715                                                 &emission_sd, throughput, &state, &L, all,
716                                                 &volume_ray, &volume_segment);
717
718                                         /* indirect sample. if we use distance sampling and take just
719                                          * one sample for direct and indirect light, we could share
720                                          * this computation, but makes code a bit complex */
721                                         float rphase = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_PHASE);
722                                         float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
723
724                                         result = kernel_volume_decoupled_scatter(kg,
725                                                 &state, &volume_ray, &sd, &throughput,
726                                                 rphase, rscatter, &volume_segment, NULL, true);
727                                 }
728
729                                 /* free cached steps */
730                                 kernel_volume_decoupled_free(kg, &volume_segment);
731
732                                 if(result == VOLUME_PATH_SCATTERED) {
733                                         if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
734                                                 continue;
735                                         else
736                                                 break;
737                                 }
738                                 else {
739                                         throughput *= volume_segment.accum_transmittance;
740                                 }
741                         }
742                         else
743 #  endif  /* __VOLUME_DECOUPLED__ */
744                         {
745                                 /* integrate along volume segment with distance sampling */
746                                 VolumeIntegrateResult result = kernel_volume_integrate(
747                                         kg, &state, &sd, &volume_ray, &L, &throughput, rng, heterogeneous);
748
749 #  ifdef __VOLUME_SCATTER__
750                                 if(result == VOLUME_PATH_SCATTERED) {
751                                         /* direct lighting */
752                                         kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
753
754                                         /* indirect light bounce */
755                                         if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
756                                                 continue;
757                                         else
758                                                 break;
759                                 }
760 #  endif  /* __VOLUME_SCATTER__ */
761                         }
762                 }
763 #endif  /* __VOLUME__ */
764
765                 if(!hit) {
766                         /* eval background shader if nothing hit */
767                         if(kernel_data.background.transparent && (state.flag & PATH_RAY_CAMERA)) {
768                                 L_transparent += average(throughput);
769
770 #ifdef __PASSES__
771                                 if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
772 #endif  /* __PASSES__ */
773                                         break;
774                         }
775
776 #ifdef __BACKGROUND__
777                         /* sample background shader */
778                         float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
779                         path_radiance_accum_background(&L, throughput, L_background, state.bounce);
780 #endif  /* __BACKGROUND__ */
781
782                         break;
783                 }
784                 else if(state.bounce > kernel_data.integrator.ao_bounces) {
785                         break;
786                 }
787
788                 /* setup shading */
789                 shader_setup_from_ray(kg, &sd, &isect, &ray);
790                 float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF);
791                 shader_eval_surface(kg, &sd, rng, &state, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
792
793                 /* holdout */
794 #ifdef __HOLDOUT__
795                 if(((sd.flag & SD_HOLDOUT) ||
796                     (sd.object_flag & SD_OBJECT_HOLDOUT_MASK)) &&
797                    (state.flag & PATH_RAY_CAMERA))
798                 {
799                         if(kernel_data.background.transparent) {
800                                 float3 holdout_weight;
801                                 if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) {
802                                         holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
803                                 }
804                                 else {
805                                         holdout_weight = shader_holdout_eval(kg, &sd);
806                                 }
807                                 /* any throughput is ok, should all be identical here */
808                                 L_transparent += average(holdout_weight*throughput);
809                         }
810
811                         if(sd.object_flag & SD_OBJECT_HOLDOUT_MASK) {
812                                 break;
813                         }
814                 }
815 #endif  /* __HOLDOUT__ */
816
817                 /* holdout mask objects do not write data passes */
818                 kernel_write_data_passes(kg, buffer, &L, &sd, sample, &state, throughput);
819
820                 /* blurring of bsdf after bounces, for rays that have a small likelihood
821                  * of following this particular path (diffuse, rough glossy) */
822                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
823                         float blur_pdf = kernel_data.integrator.filter_glossy*state.min_ray_pdf;
824
825                         if(blur_pdf < 1.0f) {
826                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
827                                 shader_bsdf_blur(kg, &sd, blur_roughness);
828                         }
829                 }
830
831 #ifdef __EMISSION__
832                 /* emission */
833                 if(sd.flag & SD_EMISSION) {
834                         /* todo: is isect.t wrong here for transparent surfaces? */
835                         float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf);
836                         path_radiance_accum_emission(&L, throughput, emission, state.bounce);
837                 }
838 #endif  /* __EMISSION__ */
839
840                 /* path termination. this is a strange place to put the termination, it's
841                  * mainly due to the mixed in MIS that we use. gives too many unneeded
842                  * shader evaluations, only need emission if we are going to terminate */
843                 float probability = path_state_terminate_probability(kg, &state, throughput);
844
845                 if(probability == 0.0f) {
846                         break;
847                 }
848                 else if(probability != 1.0f) {
849                         float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE);
850                         if(terminate >= probability)
851                                 break;
852
853                         throughput /= probability;
854                 }
855
856 #ifdef __AO__
857                 /* ambient occlusion */
858                 if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
859                         kernel_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput, shader_bsdf_alpha(kg, &sd));
860                 }
861 #endif  /* __AO__ */
862
863 #ifdef __SUBSURFACE__
864                 /* bssrdf scatter to a different location on the same object, replacing
865                  * the closures with a diffuse BSDF */
866                 if(sd.flag & SD_BSSRDF) {
867                         if(kernel_path_subsurface_scatter(kg,
868                                                           &sd,
869                                                           &emission_sd,
870                                                           &L,
871                                                           &state,
872                                                           rng,
873                                                           &ray,
874                                                           &throughput,
875                                                           &ss_indirect))
876                         {
877                                 break;
878                         }
879                 }
880 #endif  /* __SUBSURFACE__ */
881
882                 /* direct lighting */
883                 kernel_path_surface_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
884
885                 /* compute direct lighting and next bounce */
886                 if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
887                         break;
888         }
889
890 #ifdef __SUBSURFACE__
891                 kernel_path_subsurface_accum_indirect(&ss_indirect, &L);
892
893                 /* Trace indirect subsurface rays by restarting the loop. this uses less
894                  * stack memory than invoking kernel_path_indirect.
895                  */
896                 if(ss_indirect.num_rays) {
897                         kernel_path_subsurface_setup_indirect(kg,
898                                                               &ss_indirect,
899                                                               &state,
900                                                               &ray,
901                                                               &L,
902                                                               &throughput);
903                 }
904                 else {
905                         break;
906                 }
907         }
908 #endif  /* __SUBSURFACE__ */
909
910         float3 L_sum = path_radiance_clamp_and_sum(kg, &L);
911
912         kernel_write_light_passes(kg, buffer, &L, sample);
913
914 #ifdef __KERNEL_DEBUG__
915         kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
916 #endif  /* __KERNEL_DEBUG__ */
917
918         return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
919 }
920
921 ccl_device void kernel_path_trace(KernelGlobals *kg,
922         ccl_global float *buffer, ccl_global uint *rng_state,
923         int sample, int x, int y, int offset, int stride)
924 {
925         /* buffer offset */
926         int index = offset + x + y*stride;
927         int pass_stride = kernel_data.film.pass_stride;
928
929         rng_state += index;
930         buffer += index*pass_stride;
931
932         /* initialize random numbers and ray */
933         RNG rng;
934         Ray ray;
935
936         kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
937
938         /* integrate */
939         float4 L;
940
941         if(ray.t != 0.0f)
942                 L = kernel_path_integrate(kg, &rng, sample, ray, buffer);
943         else
944                 L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
945
946         /* accumulate result in output buffer */
947         kernel_write_pass_float4(buffer, sample, L);
948
949         path_rng_end(kg, rng_state, rng);
950 }
951
952 CCL_NAMESPACE_END
953