Fix Cycles CUDA performance on CUDA 8.0.
[blender-staging.git] / intern / cycles / kernel / kernel_path.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifdef __OSL__
18 #  include "osl_shader.h"
19 #endif
20
21 #include "kernel_random.h"
22 #include "kernel_projection.h"
23 #include "kernel_montecarlo.h"
24 #include "kernel_differential.h"
25 #include "kernel_camera.h"
26
27 #include "geom/geom.h"
28 #include "bvh/bvh.h"
29
30 #include "kernel_accumulate.h"
31 #include "kernel_shader.h"
32 #include "kernel_light.h"
33 #include "kernel_passes.h"
34
35 #ifdef __SUBSURFACE__
36 #  include "kernel_subsurface.h"
37 #endif
38
39 #ifdef __VOLUME__
40 #  include "kernel_volume.h"
41 #endif
42
43 #include "kernel_path_state.h"
44 #include "kernel_shadow.h"
45 #include "kernel_emission.h"
46 #include "kernel_path_common.h"
47 #include "kernel_path_surface.h"
48 #include "kernel_path_volume.h"
49
50 #ifdef __KERNEL_DEBUG__
51 #  include "kernel_debug.h"
52 #endif
53
54 CCL_NAMESPACE_BEGIN
55
56 ccl_device void kernel_path_indirect(KernelGlobals *kg,
57                                      ShaderData *sd,
58                                      ShaderData *emission_sd,
59                                      RNG *rng,
60                                      Ray *ray,
61                                      float3 throughput,
62                                      int num_samples,
63                                      PathState *state,
64                                      PathRadiance *L)
65 {
66         /* path iteration */
67         for(;;) {
68                 /* intersect scene */
69                 Intersection isect;
70                 uint visibility = path_state_ray_visibility(kg, state);
71                 bool hit = scene_intersect(kg,
72                                            *ray,
73                                            visibility,
74                                            &isect,
75                                            NULL,
76                                            0.0f, 0.0f);
77
78 #ifdef __LAMP_MIS__
79                 if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
80                         /* ray starting from previous non-transparent bounce */
81                         Ray light_ray;
82
83                         light_ray.P = ray->P - state->ray_t*ray->D;
84                         state->ray_t += isect.t;
85                         light_ray.D = ray->D;
86                         light_ray.t = state->ray_t;
87                         light_ray.time = ray->time;
88                         light_ray.dD = ray->dD;
89                         light_ray.dP = ray->dP;
90
91                         /* intersect with lamp */
92                         float3 emission;
93                         if(indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission)) {
94                                 path_radiance_accum_emission(L,
95                                                              throughput,
96                                                              emission,
97                                                              state->bounce);
98                         }
99                 }
100 #endif
101
102 #ifdef __VOLUME__
103                 /* volume attenuation, emission, scatter */
104                 if(state->volume_stack[0].shader != SHADER_NONE) {
105                         Ray volume_ray = *ray;
106                         volume_ray.t = (hit)? isect.t: FLT_MAX;
107
108                         bool heterogeneous =
109                                 volume_stack_is_heterogeneous(kg,
110                                                               state->volume_stack);
111
112 #  ifdef __VOLUME_DECOUPLED__
113                         int sampling_method =
114                                 volume_stack_sampling_method(kg,
115                                                              state->volume_stack);
116                         bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, false, sampling_method);
117
118                         if(decoupled) {
119                                 /* cache steps along volume for repeated sampling */
120                                 VolumeSegment volume_segment;
121
122                                 shader_setup_from_volume(kg,
123                                                          sd,
124                                                          &volume_ray);
125                                 kernel_volume_decoupled_record(kg,
126                                                                state,
127                                                                &volume_ray,
128                                                                sd,
129                                                                &volume_segment,
130                                                                heterogeneous);
131
132                                 volume_segment.sampling_method = sampling_method;
133
134                                 /* emission */
135                                 if(volume_segment.closure_flag & SD_EMISSION) {
136                                         path_radiance_accum_emission(L,
137                                                                      throughput,
138                                                                      volume_segment.accum_emission,
139                                                                      state->bounce);
140                                 }
141
142                                 /* scattering */
143                                 VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
144
145                                 if(volume_segment.closure_flag & SD_SCATTER) {
146                                         int all = kernel_data.integrator.sample_all_lights_indirect;
147
148                                         /* direct light sampling */
149                                         kernel_branched_path_volume_connect_light(kg,
150                                                                                   rng,
151                                                                                   sd,
152                                                                                   emission_sd,
153                                                                                   throughput,
154                                                                                   state,
155                                                                                   L,
156                                                                                   all,
157                                                                                   &volume_ray,
158                                                                                   &volume_segment);
159
160                                         /* indirect sample. if we use distance sampling and take just
161                                          * one sample for direct and indirect light, we could share
162                                          * this computation, but makes code a bit complex */
163                                         float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
164                                         float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
165
166                                         result = kernel_volume_decoupled_scatter(kg,
167                                                                                  state,
168                                                                                  &volume_ray,
169                                                                                  sd,
170                                                                                  &throughput,
171                                                                                  rphase,
172                                                                                  rscatter,
173                                                                                  &volume_segment,
174                                                                                  NULL,
175                                                                                  true);
176                                 }
177
178                                 /* free cached steps */
179                                 kernel_volume_decoupled_free(kg, &volume_segment);
180
181                                 if(result == VOLUME_PATH_SCATTERED) {
182                                         if(kernel_path_volume_bounce(kg,
183                                                                      rng,
184                                                                      sd,
185                                                                      &throughput,
186                                                                      state,
187                                                                      L,
188                                                                      ray))
189                                         {
190                                                 continue;
191                                         }
192                                         else {
193                                                 break;
194                                         }
195                                 }
196                                 else {
197                                         throughput *= volume_segment.accum_transmittance;
198                                 }
199                         }
200                         else
201 #  endif
202                         {
203                                 /* integrate along volume segment with distance sampling */
204                                 VolumeIntegrateResult result = kernel_volume_integrate(
205                                         kg, state, sd, &volume_ray, L, &throughput, rng, heterogeneous);
206
207 #  ifdef __VOLUME_SCATTER__
208                                 if(result == VOLUME_PATH_SCATTERED) {
209                                         /* direct lighting */
210                                         kernel_path_volume_connect_light(kg,
211                                                                          rng,
212                                                                          sd,
213                                                                          emission_sd,
214                                                                          throughput,
215                                                                          state,
216                                                                          L);
217
218                                         /* indirect light bounce */
219                                         if(kernel_path_volume_bounce(kg,
220                                                                      rng,
221                                                                      sd,
222                                                                      &throughput,
223                                                                      state,
224                                                                      L,
225                                                                      ray))
226                                         {
227                                                 continue;
228                                         }
229                                         else {
230                                                 break;
231                                         }
232                                 }
233 #  endif
234                         }
235                 }
236 #endif
237
238                 if(!hit) {
239 #ifdef __BACKGROUND__
240                         /* sample background shader */
241                         float3 L_background = indirect_background(kg, emission_sd, state, ray);
242                         path_radiance_accum_background(L,
243                                                        throughput,
244                                                        L_background,
245                                                        state->bounce);
246 #endif
247
248                         break;
249                 }
250
251                 /* setup shading */
252                 shader_setup_from_ray(kg,
253                                       sd,
254                                       &isect,
255                                       ray);
256                 float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
257                 shader_eval_surface(kg, sd, rng, state, rbsdf, state->flag, SHADER_CONTEXT_INDIRECT);
258 #ifdef __BRANCHED_PATH__
259                 shader_merge_closures(sd);
260 #endif
261
262                 /* blurring of bsdf after bounces, for rays that have a small likelihood
263                  * of following this particular path (diffuse, rough glossy) */
264                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
265                         float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
266
267                         if(blur_pdf < 1.0f) {
268                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
269                                 shader_bsdf_blur(kg, sd, blur_roughness);
270                         }
271                 }
272
273 #ifdef __EMISSION__
274                 /* emission */
275                 if(sd->flag & SD_EMISSION) {
276                         float3 emission = indirect_primitive_emission(kg,
277                                                                       sd,
278                                                                       isect.t,
279                                                                       state->flag,
280                                                                       state->ray_pdf);
281                         path_radiance_accum_emission(L, throughput, emission, state->bounce);
282                 }
283 #endif
284
285                 /* path termination. this is a strange place to put the termination, it's
286                  * mainly due to the mixed in MIS that we use. gives too many unneeded
287                  * shader evaluations, only need emission if we are going to terminate */
288                 float probability =
289                         path_state_terminate_probability(kg,
290                                                          state,
291                                                          throughput*num_samples);
292
293                 if(probability == 0.0f) {
294                         break;
295                 }
296                 else if(probability != 1.0f) {
297                         float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
298
299                         if(terminate >= probability)
300                                 break;
301
302                         throughput /= probability;
303                 }
304
305 #ifdef __AO__
306                 /* ambient occlusion */
307                 if(kernel_data.integrator.use_ambient_occlusion || (sd->flag & SD_AO)) {
308                         float bsdf_u, bsdf_v;
309                         path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
310
311                         float ao_factor = kernel_data.background.ao_factor;
312                         float3 ao_N;
313                         float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
314                         float3 ao_D;
315                         float ao_pdf;
316                         float3 ao_alpha = make_float3(0.0f, 0.0f, 0.0f);
317
318                         sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
319
320                         if(dot(sd->Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
321                                 Ray light_ray;
322                                 float3 ao_shadow;
323
324                                 light_ray.P = ray_offset(sd->P, sd->Ng);
325                                 light_ray.D = ao_D;
326                                 light_ray.t = kernel_data.background.ao_distance;
327 #  ifdef __OBJECT_MOTION__
328                                 light_ray.time = sd->time;
329 #  endif
330                                 light_ray.dP = sd->dP;
331                                 light_ray.dD = differential3_zero();
332
333                                 if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow)) {
334                                         path_radiance_accum_ao(L,
335                                                                throughput,
336                                                                ao_alpha,
337                                                                ao_bsdf,
338                                                                ao_shadow,
339                                                                state->bounce);
340                                 }
341                         }
342                 }
343 #endif
344
345 #ifdef __SUBSURFACE__
346                 /* bssrdf scatter to a different location on the same object, replacing
347                  * the closures with a diffuse BSDF */
348                 if(sd->flag & SD_BSSRDF) {
349                         float bssrdf_probability;
350                         ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
351
352                         /* modify throughput for picking bssrdf or bsdf */
353                         throughput *= bssrdf_probability;
354
355                         /* do bssrdf scatter step if we picked a bssrdf closure */
356                         if(sc) {
357                                 uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
358
359                                 float bssrdf_u, bssrdf_v;
360                                 path_state_rng_2D(kg,
361                                                   rng,
362                                                   state,
363                                                   PRNG_BSDF_U,
364                                                   &bssrdf_u, &bssrdf_v);
365                                 subsurface_scatter_step(kg,
366                                                         sd,
367                                                         state,
368                                                         state->flag,
369                                                         sc,
370                                                         &lcg_state,
371                                                         bssrdf_u, bssrdf_v,
372                                                         false);
373                         }
374                 }
375 #endif
376
377 #if defined(__EMISSION__) && defined(__BRANCHED_PATH__)
378                 if(kernel_data.integrator.use_direct_light) {
379                         int all = kernel_data.integrator.sample_all_lights_indirect;
380                         kernel_branched_path_surface_connect_light(kg,
381                                                                    rng,
382                                                                    sd,
383                                                                    emission_sd,
384                                                                    state,
385                                                                    throughput,
386                                                                    1.0f,
387                                                                    L,
388                                                                    all);
389                 }
390 #endif
391
392                 if(!kernel_path_surface_bounce(kg, rng, sd, &throughput, state, L, ray))
393                         break;
394         }
395 }
396
397 ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
398                                         ShaderData *sd,
399                                         ShaderData *emission_sd,
400                                         PathRadiance *L,
401                                         PathState *state,
402                                         RNG *rng,
403                                         float3 throughput)
404 {
405         /* todo: solve correlation */
406         float bsdf_u, bsdf_v;
407
408         path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
409
410         float ao_factor = kernel_data.background.ao_factor;
411         float3 ao_N;
412         float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
413         float3 ao_D;
414         float ao_pdf;
415         float3 ao_alpha = shader_bsdf_alpha(kg, sd);
416
417         sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
418
419         if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) {
420                 Ray light_ray;
421                 float3 ao_shadow;
422
423                 light_ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng));
424                 light_ray.D = ao_D;
425                 light_ray.t = kernel_data.background.ao_distance;
426 #ifdef __OBJECT_MOTION__
427                 light_ray.time = ccl_fetch(sd, time);
428 #endif
429                 light_ray.dP = ccl_fetch(sd, dP);
430                 light_ray.dD = differential3_zero();
431
432                 if(!shadow_blocked(kg, emission_sd, state, &light_ray, &ao_shadow))
433                         path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
434         }
435 }
436
437 #ifdef __SUBSURFACE__
438 #  ifndef __KERNEL_CUDA__
439 ccl_device
440 #  else
441 ccl_device_inline
442 #  endif
443 bool kernel_path_subsurface_scatter(
444         KernelGlobals *kg,
445         ShaderData *sd,
446         ShaderData *emission_sd,
447         PathRadiance *L,
448         PathState *state,
449         RNG *rng,
450         Ray *ray,
451         float3 *throughput,
452         SubsurfaceIndirectRays *ss_indirect)
453 {
454         float bssrdf_probability;
455         ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
456
457         /* modify throughput for picking bssrdf or bsdf */
458         *throughput *= bssrdf_probability;
459
460         /* do bssrdf scatter step if we picked a bssrdf closure */
461         if(sc) {
462                 /* We should never have two consecutive BSSRDF bounces,
463                  * the second one should be converted to a diffuse BSDF to
464                  * avoid this.
465                  */
466                 kernel_assert(!ss_indirect->tracing);
467
468                 uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
469
470                 SubsurfaceIntersection ss_isect;
471                 float bssrdf_u, bssrdf_v;
472                 path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
473                 int num_hits = subsurface_scatter_multi_intersect(kg,
474                                                                   &ss_isect,
475                                                                   sd,
476                                                                   sc,
477                                                                   &lcg_state,
478                                                                   bssrdf_u, bssrdf_v,
479                                                                   false);
480 #  ifdef __VOLUME__
481                 ss_indirect->need_update_volume_stack =
482                         kernel_data.integrator.use_volumes &&
483                         ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME;
484 #  endif
485
486                 /* compute lighting with the BSDF closure */
487                 for(int hit = 0; hit < num_hits; hit++) {
488                         /* NOTE: We reuse the existing ShaderData, we assume the path
489                          * integration loop stops when this function returns true.
490                          */
491                         subsurface_scatter_multi_setup(kg,
492                                                        &ss_isect,
493                                                        hit,
494                                                        sd,
495                                                        state,
496                                                        state->flag,
497                                                        sc,
498                                                        false);
499
500                         PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
501                         Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
502                         float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
503                         PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays];
504
505                         *hit_state = *state;
506                         *hit_ray = *ray;
507                         *hit_tp = *throughput;
508
509                         hit_state->rng_offset += PRNG_BOUNCE_NUM;
510
511                         path_radiance_init(hit_L, kernel_data.film.use_light_pass);
512                         hit_L->direct_throughput = L->direct_throughput;
513                         path_radiance_copy_indirect(hit_L, L);
514
515                         kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L);
516
517                         if(kernel_path_surface_bounce(kg,
518                                                       rng,
519                                                       sd,
520                                                       hit_tp,
521                                                       hit_state,
522                                                       hit_L,
523                                                       hit_ray))
524                         {
525 #  ifdef __LAMP_MIS__
526                                 hit_state->ray_t = 0.0f;
527 #  endif
528
529 #  ifdef __VOLUME__
530                                 if(ss_indirect->need_update_volume_stack) {
531                                         Ray volume_ray = *ray;
532                                         /* Setup ray from previous surface point to the new one. */
533                                         volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
534                                                                      &volume_ray.t);
535
536                                         kernel_volume_stack_update_for_subsurface(
537                                             kg,
538                                             emission_sd,
539                                             &volume_ray,
540                                             hit_state->volume_stack);
541                                 }
542 #  endif
543                                 path_radiance_reset_indirect(L);
544                                 ss_indirect->num_rays++;
545                         }
546                         else {
547                                 path_radiance_accum_sample(L, hit_L, 1);
548                         }
549                 }
550                 return true;
551         }
552         return false;
553 }
554
555 ccl_device_inline void kernel_path_subsurface_init_indirect(
556         SubsurfaceIndirectRays *ss_indirect)
557 {
558         ss_indirect->tracing = false;
559         ss_indirect->num_rays = 0;
560 }
561
562 ccl_device void kernel_path_subsurface_accum_indirect(
563         SubsurfaceIndirectRays *ss_indirect,
564         PathRadiance *L)
565 {
566         if(ss_indirect->tracing) {
567                 path_radiance_sum_indirect(L);
568                 path_radiance_accum_sample(&ss_indirect->direct_L, L, 1);
569                 if(ss_indirect->num_rays == 0) {
570                         *L = ss_indirect->direct_L;
571                 }
572         }
573 }
574
575 ccl_device void kernel_path_subsurface_setup_indirect(
576         KernelGlobals *kg,
577         SubsurfaceIndirectRays *ss_indirect,
578         PathState *state,
579         Ray *ray,
580         PathRadiance *L,
581         float3 *throughput)
582 {
583         if(!ss_indirect->tracing) {
584                 ss_indirect->direct_L = *L;
585         }
586         ss_indirect->tracing = true;
587
588         /* Setup state, ray and throughput for indirect SSS rays. */
589         ss_indirect->num_rays--;
590
591         Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays];
592         PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays];
593
594         *state = ss_indirect->state[ss_indirect->num_rays];
595         *ray = *indirect_ray;
596         *L = *indirect_L;
597         *throughput = ss_indirect->throughputs[ss_indirect->num_rays];
598
599         state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
600 }
601
602 #endif  /* __SUBSURFACE__ */
603
604 ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
605                                                RNG *rng,
606                                                int sample,
607                                                Ray ray,
608                                                ccl_global float *buffer)
609 {
610         /* initialize */
611         PathRadiance L;
612         float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
613         float L_transparent = 0.0f;
614
615         path_radiance_init(&L, kernel_data.film.use_light_pass);
616
617         /* shader data memory used for both volumes and surfaces, saves stack space */
618         ShaderData sd;
619         /* shader data used by emission, shadows, volume stacks */
620         ShaderData emission_sd;
621
622         PathState state;
623         path_state_init(kg, &emission_sd, &state, rng, sample, &ray);
624
625 #ifdef __KERNEL_DEBUG__
626         DebugData debug_data;
627         debug_data_init(&debug_data);
628 #endif
629
630 #ifdef __SUBSURFACE__
631         SubsurfaceIndirectRays ss_indirect;
632         kernel_path_subsurface_init_indirect(&ss_indirect);
633
634         for(;;) {
635 #endif
636
637         /* path iteration */
638         for(;;) {
639                 /* intersect scene */
640                 Intersection isect;
641                 uint visibility = path_state_ray_visibility(kg, &state);
642
643 #ifdef __HAIR__
644                 float difl = 0.0f, extmax = 0.0f;
645                 uint lcg_state = 0;
646
647                 if(kernel_data.bvh.have_curves) {
648                         if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) {       
649                                 float3 pixdiff = ray.dD.dx + ray.dD.dy;
650                                 /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
651                                 difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
652                         }
653
654                         extmax = kernel_data.curve.maximum_width;
655                         lcg_state = lcg_state_init(rng, &state, 0x51633e2d);
656                 }
657
658                 bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
659 #else
660                 bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
661 #endif
662
663 #ifdef __KERNEL_DEBUG__
664                 if(state.flag & PATH_RAY_CAMERA) {
665                         debug_data.num_bvh_traversal_steps += isect.num_traversal_steps;
666                         debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
667                 }
668                 debug_data.num_ray_bounces++;
669 #endif
670
671 #ifdef __LAMP_MIS__
672                 if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
673                         /* ray starting from previous non-transparent bounce */
674                         Ray light_ray;
675
676                         light_ray.P = ray.P - state.ray_t*ray.D;
677                         state.ray_t += isect.t;
678                         light_ray.D = ray.D;
679                         light_ray.t = state.ray_t;
680                         light_ray.time = ray.time;
681                         light_ray.dD = ray.dD;
682                         light_ray.dP = ray.dP;
683
684                         /* intersect with lamp */
685                         float3 emission;
686
687                         if(indirect_lamp_emission(kg, &emission_sd, &state, &light_ray, &emission))
688                                 path_radiance_accum_emission(&L, throughput, emission, state.bounce);
689                 }
690 #endif
691
692 #ifdef __VOLUME__
693                 /* volume attenuation, emission, scatter */
694                 if(state.volume_stack[0].shader != SHADER_NONE) {
695                         Ray volume_ray = ray;
696                         volume_ray.t = (hit)? isect.t: FLT_MAX;
697
698                         bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
699
700 #  ifdef __VOLUME_DECOUPLED__
701                         int sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
702                         bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, true, sampling_method);
703
704                         if(decoupled) {
705                                 /* cache steps along volume for repeated sampling */
706                                 VolumeSegment volume_segment;
707
708                                 shader_setup_from_volume(kg, &sd, &volume_ray);
709                                 kernel_volume_decoupled_record(kg, &state,
710                                         &volume_ray, &sd, &volume_segment, heterogeneous);
711
712                                 volume_segment.sampling_method = sampling_method;
713
714                                 /* emission */
715                                 if(volume_segment.closure_flag & SD_EMISSION)
716                                         path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce);
717
718                                 /* scattering */
719                                 VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
720
721                                 if(volume_segment.closure_flag & SD_SCATTER) {
722                                         int all = false;
723
724                                         /* direct light sampling */
725                                         kernel_branched_path_volume_connect_light(kg, rng, &sd,
726                                                 &emission_sd, throughput, &state, &L, all,
727                                                 &volume_ray, &volume_segment);
728
729                                         /* indirect sample. if we use distance sampling and take just
730                                          * one sample for direct and indirect light, we could share
731                                          * this computation, but makes code a bit complex */
732                                         float rphase = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_PHASE);
733                                         float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
734
735                                         result = kernel_volume_decoupled_scatter(kg,
736                                                 &state, &volume_ray, &sd, &throughput,
737                                                 rphase, rscatter, &volume_segment, NULL, true);
738                                 }
739
740                                 /* free cached steps */
741                                 kernel_volume_decoupled_free(kg, &volume_segment);
742
743                                 if(result == VOLUME_PATH_SCATTERED) {
744                                         if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
745                                                 continue;
746                                         else
747                                                 break;
748                                 }
749                                 else {
750                                         throughput *= volume_segment.accum_transmittance;
751                                 }
752                         }
753                         else
754 #  endif
755                         {
756                                 /* integrate along volume segment with distance sampling */
757                                 VolumeIntegrateResult result = kernel_volume_integrate(
758                                         kg, &state, &sd, &volume_ray, &L, &throughput, rng, heterogeneous);
759
760 #  ifdef __VOLUME_SCATTER__
761                                 if(result == VOLUME_PATH_SCATTERED) {
762                                         /* direct lighting */
763                                         kernel_path_volume_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
764
765                                         /* indirect light bounce */
766                                         if(kernel_path_volume_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
767                                                 continue;
768                                         else
769                                                 break;
770                                 }
771 #  endif
772                         }
773                 }
774 #endif
775
776                 if(!hit) {
777                         /* eval background shader if nothing hit */
778                         if(kernel_data.background.transparent && (state.flag & PATH_RAY_CAMERA)) {
779                                 L_transparent += average(throughput);
780
781 #ifdef __PASSES__
782                                 if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
783 #endif
784                                         break;
785                         }
786
787 #ifdef __BACKGROUND__
788                         /* sample background shader */
789                         float3 L_background = indirect_background(kg, &emission_sd, &state, &ray);
790                         path_radiance_accum_background(&L, throughput, L_background, state.bounce);
791 #endif
792
793                         break;
794                 }
795
796                 /* setup shading */
797                 shader_setup_from_ray(kg, &sd, &isect, &ray);
798                 float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF);
799                 shader_eval_surface(kg, &sd, rng, &state, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
800
801                 /* holdout */
802 #ifdef __HOLDOUT__
803                 if((sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) && (state.flag & PATH_RAY_CAMERA)) {
804                         if(kernel_data.background.transparent) {
805                                 float3 holdout_weight;
806                                 
807                                 if(sd.flag & SD_HOLDOUT_MASK)
808                                         holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
809                                 else
810                                         holdout_weight = shader_holdout_eval(kg, &sd);
811
812                                 /* any throughput is ok, should all be identical here */
813                                 L_transparent += average(holdout_weight*throughput);
814                         }
815
816                         if(sd.flag & SD_HOLDOUT_MASK)
817                                 break;
818                 }
819 #endif
820
821                 /* holdout mask objects do not write data passes */
822                 kernel_write_data_passes(kg, buffer, &L, &sd, sample, &state, throughput);
823
824                 /* blurring of bsdf after bounces, for rays that have a small likelihood
825                  * of following this particular path (diffuse, rough glossy) */
826                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
827                         float blur_pdf = kernel_data.integrator.filter_glossy*state.min_ray_pdf;
828
829                         if(blur_pdf < 1.0f) {
830                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
831                                 shader_bsdf_blur(kg, &sd, blur_roughness);
832                         }
833                 }
834
835 #ifdef __EMISSION__
836                 /* emission */
837                 if(sd.flag & SD_EMISSION) {
838                         /* todo: is isect.t wrong here for transparent surfaces? */
839                         float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf);
840                         path_radiance_accum_emission(&L, throughput, emission, state.bounce);
841                 }
842 #endif
843
844                 /* path termination. this is a strange place to put the termination, it's
845                  * mainly due to the mixed in MIS that we use. gives too many unneeded
846                  * shader evaluations, only need emission if we are going to terminate */
847                 float probability = path_state_terminate_probability(kg, &state, throughput);
848
849                 if(probability == 0.0f) {
850                         break;
851                 }
852                 else if(probability != 1.0f) {
853                         float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE);
854
855                         if(terminate >= probability)
856                                 break;
857
858                         throughput /= probability;
859                 }
860
861 #ifdef __AO__
862                 /* ambient occlusion */
863                 if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
864                         kernel_path_ao(kg, &sd, &emission_sd, &L, &state, rng, throughput);
865                 }
866 #endif
867
868 #ifdef __SUBSURFACE__
869                 /* bssrdf scatter to a different location on the same object, replacing
870                  * the closures with a diffuse BSDF */
871                 if(sd.flag & SD_BSSRDF) {
872                         if(kernel_path_subsurface_scatter(kg,
873                                                           &sd,
874                                                           &emission_sd,
875                                                           &L,
876                                                           &state,
877                                                           rng,
878                                                           &ray,
879                                                           &throughput,
880                                                           &ss_indirect))
881                         {
882                                 break;
883                         }
884                 }
885 #endif  /* __SUBSURFACE__ */
886
887                 /* direct lighting */
888                 kernel_path_surface_connect_light(kg, rng, &sd, &emission_sd, throughput, &state, &L);
889
890                 /* compute direct lighting and next bounce */
891                 if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
892                         break;
893         }
894
895 #ifdef __SUBSURFACE__
896                 kernel_path_subsurface_accum_indirect(&ss_indirect, &L);
897
898                 /* Trace indirect subsurface rays by restarting the loop. this uses less
899                  * stack memory than invoking kernel_path_indirect.
900                  */
901                 if(ss_indirect.num_rays) {
902                         kernel_path_subsurface_setup_indirect(kg,
903                                                               &ss_indirect,
904                                                               &state,
905                                                               &ray,
906                                                               &L,
907                                                               &throughput);
908                 }
909                 else {
910                         break;
911                 }
912         }
913 #endif  /* __SUBSURFACE__ */
914
915         float3 L_sum = path_radiance_clamp_and_sum(kg, &L);
916
917         kernel_write_light_passes(kg, buffer, &L, sample);
918
919 #ifdef __KERNEL_DEBUG__
920         kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
921 #endif
922
923         return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
924 }
925
926 ccl_device void kernel_path_trace(KernelGlobals *kg,
927         ccl_global float *buffer, ccl_global uint *rng_state,
928         int sample, int x, int y, int offset, int stride)
929 {
930         /* buffer offset */
931         int index = offset + x + y*stride;
932         int pass_stride = kernel_data.film.pass_stride;
933
934         rng_state += index;
935         buffer += index*pass_stride;
936
937         /* initialize random numbers and ray */
938         RNG rng;
939         Ray ray;
940
941         kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
942
943         /* integrate */
944         float4 L;
945
946         if(ray.t != 0.0f)
947                 L = kernel_path_integrate(kg, &rng, sample, ray, buffer);
948         else
949                 L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
950
951         /* accumulate result in output buffer */
952         kernel_write_pass_float4(buffer, sample, L);
953
954         path_rng_end(kg, rng_state, rng);
955 }
956
957 CCL_NAMESPACE_END
958