Cycles: Remove few function arguments needed only for the split kernel
[blender.git] / intern / cycles / kernel / kernel_path.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifdef __OSL__
18 #include "osl_shader.h"
19 #endif
20
21 #include "kernel_random.h"
22 #include "kernel_projection.h"
23 #include "kernel_montecarlo.h"
24 #include "kernel_differential.h"
25 #include "kernel_camera.h"
26
27 #include "geom/geom.h"
28
29 #include "kernel_accumulate.h"
30 #include "kernel_shader.h"
31 #include "kernel_light.h"
32 #include "kernel_passes.h"
33
34 #ifdef __SUBSURFACE__
35 #include "kernel_subsurface.h"
36 #endif
37
38 #ifdef __VOLUME__
39 #include "kernel_volume.h"
40 #endif
41
42 #include "kernel_path_state.h"
43 #include "kernel_shadow.h"
44 #include "kernel_emission.h"
45 #include "kernel_path_common.h"
46 #include "kernel_path_surface.h"
47 #include "kernel_path_volume.h"
48
49 #ifdef __KERNEL_DEBUG__
50 #include "kernel_debug.h"
51 #endif
52
53 CCL_NAMESPACE_BEGIN
54
55 ccl_device void kernel_path_indirect(KernelGlobals *kg,
56                                      RNG *rng,
57                                      Ray *ray,
58                                      float3 throughput,
59                                      int num_samples,
60                                      PathState *state,
61                                      PathRadiance *L)
62 {
63         /* path iteration */
64         for(;;) {
65                 /* intersect scene */
66                 Intersection isect;
67                 uint visibility = path_state_ray_visibility(kg, state);
68                 bool hit = scene_intersect(kg,
69                                            ray,
70                                            visibility,
71                                            &isect,
72                                            NULL,
73                                            0.0f, 0.0f);
74
75 #ifdef __LAMP_MIS__
76                 if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
77                         /* ray starting from previous non-transparent bounce */
78                         Ray light_ray;
79
80                         light_ray.P = ray->P - state->ray_t*ray->D;
81                         state->ray_t += isect.t;
82                         light_ray.D = ray->D;
83                         light_ray.t = state->ray_t;
84                         light_ray.time = ray->time;
85                         light_ray.dD = ray->dD;
86                         light_ray.dP = ray->dP;
87
88                         /* intersect with lamp */
89                         float3 emission;
90                         if(indirect_lamp_emission(kg, state, &light_ray, &emission)) {
91                                 path_radiance_accum_emission(L,
92                                                              throughput,
93                                                              emission,
94                                                              state->bounce);
95                         }
96                 }
97 #endif
98
99 #ifdef __VOLUME__
100                 /* volume attenuation, emission, scatter */
101                 if(state->volume_stack[0].shader != SHADER_NONE) {
102                         Ray volume_ray = *ray;
103                         volume_ray.t = (hit)? isect.t: FLT_MAX;
104
105                         bool heterogeneous =
106                                 volume_stack_is_heterogeneous(kg,
107                                                               state->volume_stack);
108
109 #ifdef __VOLUME_DECOUPLED__
110                         int sampling_method =
111                                 volume_stack_sampling_method(kg,
112                                                              state->volume_stack);
113                         bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, false, sampling_method);
114
115                         if(decoupled) {
116                                 /* cache steps along volume for repeated sampling */
117                                 VolumeSegment volume_segment;
118                                 ShaderData volume_sd;
119
120                                 shader_setup_from_volume(kg,
121                                                          &volume_sd,
122                                                          &volume_ray);
123                                 kernel_volume_decoupled_record(kg,
124                                                                state,
125                                                                &volume_ray,
126                                                                &volume_sd,
127                                                                &volume_segment,
128                                                                heterogeneous);
129
130                                 volume_segment.sampling_method = sampling_method;
131
132                                 /* emission */
133                                 if(volume_segment.closure_flag & SD_EMISSION) {
134                                         path_radiance_accum_emission(L,
135                                                                      throughput,
136                                                                      volume_segment.accum_emission,
137                                                                      state->bounce);
138                                 }
139
140                                 /* scattering */
141                                 VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
142
143                                 if(volume_segment.closure_flag & SD_SCATTER) {
144                                         bool all = kernel_data.integrator.sample_all_lights_indirect;
145
146                                         /* direct light sampling */
147                                         kernel_branched_path_volume_connect_light(kg,
148                                                                                   rng,
149                                                                                   &volume_sd,
150                                                                                   throughput,
151                                                                                   state,
152                                                                                   L,
153                                                                                   all,
154                                                                                   &volume_ray,
155                                                                                   &volume_segment);
156
157                                         /* indirect sample. if we use distance sampling and take just
158                                          * one sample for direct and indirect light, we could share
159                                          * this computation, but makes code a bit complex */
160                                         float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
161                                         float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
162
163                                         result = kernel_volume_decoupled_scatter(kg,
164                                                                                  state,
165                                                                                  &volume_ray,
166                                                                                  &volume_sd,
167                                                                                  &throughput,
168                                                                                  rphase,
169                                                                                  rscatter,
170                                                                                  &volume_segment,
171                                                                                  NULL,
172                                                                                  true);
173                                 }
174
175                                 /* free cached steps */
176                                 kernel_volume_decoupled_free(kg, &volume_segment);
177
178                                 if(result == VOLUME_PATH_SCATTERED) {
179                                         if(kernel_path_volume_bounce(kg,
180                                                                      rng,
181                                                                      &volume_sd,
182                                                                      &throughput,
183                                                                      state,
184                                                                      L,
185                                                                      ray))
186                                         {
187                                                 continue;
188                                         }
189                                         else {
190                                                 break;
191                                         }
192                                 }
193                                 else {
194                                         throughput *= volume_segment.accum_transmittance;
195                                 }
196                         }
197                         else
198 #endif
199                         {
200                                 /* integrate along volume segment with distance sampling */
201                                 ShaderData volume_sd;
202                                 VolumeIntegrateResult result = kernel_volume_integrate(
203                                         kg, state, &volume_sd, &volume_ray, L, &throughput, rng, heterogeneous);
204
205 #ifdef __VOLUME_SCATTER__
206                                 if(result == VOLUME_PATH_SCATTERED) {
207                                         /* direct lighting */
208                                         kernel_path_volume_connect_light(kg,
209                                                                          rng,
210                                                                          &volume_sd,
211                                                                          throughput,
212                                                                          state,
213                                                                          L);
214
215                                         /* indirect light bounce */
216                                         if(kernel_path_volume_bounce(kg,
217                                                                      rng,
218                                                                      &volume_sd,
219                                                                      &throughput,
220                                                                      state,
221                                                                      L,
222                                                                      ray))
223                                         {
224                                                 continue;
225                                         }
226                                         else {
227                                                 break;
228                                         }
229                                 }
230 #endif
231                         }
232                 }
233 #endif
234
235                 if(!hit) {
236 #ifdef __BACKGROUND__
237                         /* sample background shader */
238                         float3 L_background = indirect_background(kg, state, ray);
239                         path_radiance_accum_background(L,
240                                                        throughput,
241                                                        L_background,
242                                                        state->bounce);
243 #endif
244
245                         break;
246                 }
247
248                 /* setup shading */
249                 ShaderData sd;
250                 shader_setup_from_ray(kg,
251                                       &sd,
252                                       &isect,
253                                       ray);
254                 float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
255                 shader_eval_surface(kg, &sd, state, rbsdf, state->flag, SHADER_CONTEXT_INDIRECT);
256 #ifdef __BRANCHED_PATH__
257                 shader_merge_closures(&sd);
258 #endif
259
260                 /* blurring of bsdf after bounces, for rays that have a small likelihood
261                  * of following this particular path (diffuse, rough glossy) */
262                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
263                         float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
264
265                         if(blur_pdf < 1.0f) {
266                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
267                                 shader_bsdf_blur(kg, &sd, blur_roughness);
268                         }
269                 }
270
271 #ifdef __EMISSION__
272                 /* emission */
273                 if(sd.flag & SD_EMISSION) {
274                         float3 emission = indirect_primitive_emission(kg,
275                                                                       &sd,
276                                                                       isect.t,
277                                                                       state->flag,
278                                                                       state->ray_pdf);
279                         path_radiance_accum_emission(L, throughput, emission, state->bounce);
280                 }
281 #endif
282
283                 /* path termination. this is a strange place to put the termination, it's
284                  * mainly due to the mixed in MIS that we use. gives too many unneeded
285                  * shader evaluations, only need emission if we are going to terminate */
286                 float probability =
287                         path_state_terminate_probability(kg,
288                                                          state,
289                                                          throughput*num_samples);
290
291                 if(probability == 0.0f) {
292                         break;
293                 }
294                 else if(probability != 1.0f) {
295                         float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
296
297                         if(terminate >= probability)
298                                 break;
299
300                         throughput /= probability;
301                 }
302
303 #ifdef __AO__
304                 /* ambient occlusion */
305                 if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
306                         float bsdf_u, bsdf_v;
307                         path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
308
309                         float ao_factor = kernel_data.background.ao_factor;
310                         float3 ao_N;
311                         float3 ao_bsdf = shader_bsdf_ao(kg, &sd, ao_factor, &ao_N);
312                         float3 ao_D;
313                         float ao_pdf;
314                         float3 ao_alpha = make_float3(0.0f, 0.0f, 0.0f);
315
316                         sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
317
318                         if(dot(sd.Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
319                                 Ray light_ray;
320                                 float3 ao_shadow;
321
322                                 light_ray.P = ray_offset(sd.P, sd.Ng);
323                                 light_ray.D = ao_D;
324                                 light_ray.t = kernel_data.background.ao_distance;
325 #ifdef __OBJECT_MOTION__
326                                 light_ray.time = sd.time;
327 #endif
328                                 light_ray.dP = sd.dP;
329                                 light_ray.dD = differential3_zero();
330
331                                 if(!shadow_blocked(kg, state, &light_ray, &ao_shadow)) {
332                                         path_radiance_accum_ao(L,
333                                                                throughput,
334                                                                ao_alpha,
335                                                                ao_bsdf,
336                                                                ao_shadow,
337                                                                state->bounce);
338                                 }
339                         }
340                 }
341 #endif
342
343 #ifdef __SUBSURFACE__
344                 /* bssrdf scatter to a different location on the same object, replacing
345                  * the closures with a diffuse BSDF */
346                 if(sd.flag & SD_BSSRDF) {
347                         float bssrdf_probability;
348                         ShaderClosure *sc = subsurface_scatter_pick_closure(kg, &sd, &bssrdf_probability);
349
350                         /* modify throughput for picking bssrdf or bsdf */
351                         throughput *= bssrdf_probability;
352
353                         /* do bssrdf scatter step if we picked a bssrdf closure */
354                         if(sc) {
355                                 uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
356
357                                 float bssrdf_u, bssrdf_v;
358                                 path_state_rng_2D(kg,
359                                                   rng,
360                                                   state,
361                                                   PRNG_BSDF_U,
362                                                   &bssrdf_u, &bssrdf_v);
363                                 subsurface_scatter_step(kg,
364                                                         &sd,
365                                                         state,
366                                                         state->flag,
367                                                         sc,
368                                                         &lcg_state,
369                                                         bssrdf_u, bssrdf_v,
370                                                         false);
371                         }
372                 }
373 #endif
374
375 #if defined(__EMISSION__) && defined(__BRANCHED_PATH__)
376                 if(kernel_data.integrator.use_direct_light) {
377                         bool all = kernel_data.integrator.sample_all_lights_indirect;
378                         kernel_branched_path_surface_connect_light(kg,
379                                                                    rng,
380                                                                    &sd,
381                                                                    state,
382                                                                    throughput,
383                                                                    1.0f,
384                                                                    L,
385                                                                    all);
386                 }
387 #endif
388
389                 if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, state, L, ray))
390                         break;
391         }
392 }
393
394 ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
395                                         ShaderData *sd,
396                                         PathRadiance *L,
397                                         PathState *state,
398                                         RNG *rng,
399                                         float3 throughput)
400 {
401         /* todo: solve correlation */
402         float bsdf_u, bsdf_v;
403
404         path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
405
406         float ao_factor = kernel_data.background.ao_factor;
407         float3 ao_N;
408         float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
409         float3 ao_D;
410         float ao_pdf;
411         float3 ao_alpha = shader_bsdf_alpha(kg, sd);
412
413         sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
414
415         if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) {
416                 Ray light_ray;
417                 float3 ao_shadow;
418
419                 light_ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng));
420                 light_ray.D = ao_D;
421                 light_ray.t = kernel_data.background.ao_distance;
422 #ifdef __OBJECT_MOTION__
423                 light_ray.time = ccl_fetch(sd, time);
424 #endif
425                 light_ray.dP = ccl_fetch(sd, dP);
426                 light_ray.dD = differential3_zero();
427
428                 if(!shadow_blocked(kg, state, &light_ray, &ao_shadow))
429                         path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
430         }
431 }
432
433 #ifdef __SUBSURFACE__
434
435 ccl_device bool kernel_path_subsurface_scatter(
436         KernelGlobals *kg,
437         ShaderData *sd,
438         PathRadiance *L,
439         PathState *state,
440         RNG *rng,
441         Ray *ray,
442         float3 *throughput,
443         SubsurfaceIndirectRays *ss_indirect)
444 {
445         float bssrdf_probability;
446         ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
447
448         /* modify throughput for picking bssrdf or bsdf */
449         *throughput *= bssrdf_probability;
450
451         /* do bssrdf scatter step if we picked a bssrdf closure */
452         if(sc) {
453                 /* We should never have two consecutive BSSRDF bounces,
454                  * the second one should be converted to a diffuse BSDF to
455                  * avoid this.
456                  */
457                 kernel_assert(!ss_indirect->tracing);
458
459                 uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
460
461                 SubsurfaceIntersection ss_isect;
462                 float bssrdf_u, bssrdf_v;
463                 path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
464                 int num_hits = subsurface_scatter_multi_intersect(kg,
465                                                                   &ss_isect,
466                                                                   sd,
467                                                                   sc,
468                                                                   &lcg_state,
469                                                                   bssrdf_u, bssrdf_v,
470                                                                   false);
471 #ifdef __VOLUME__
472                 ss_indirect->need_update_volume_stack =
473                         kernel_data.integrator.use_volumes &&
474                         ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME;
475 #endif
476
477                 /* compute lighting with the BSDF closure */
478                 for(int hit = 0; hit < num_hits; hit++) {
479                         /* NOTE: We reuse the existing ShaderData, we assume the path
480                          * integration loop stops when this function returns true.
481                          */
482                         subsurface_scatter_multi_setup(kg,
483                                                        &ss_isect,
484                                                        hit,
485                                                        sd,
486                                                        state,
487                                                        state->flag,
488                                                        sc,
489                                                        false);
490
491                         PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
492                         Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
493                         float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
494                         PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays];
495
496                         *hit_state = *state;
497                         *hit_ray = *ray;
498                         *hit_tp = *throughput;
499
500                         hit_state->rng_offset += PRNG_BOUNCE_NUM;
501
502                         path_radiance_init(hit_L, kernel_data.film.use_light_pass);
503                         hit_L->direct_throughput = L->direct_throughput;
504                         path_radiance_copy_indirect(hit_L, L);
505
506                         kernel_path_surface_connect_light(kg, rng, sd, *hit_tp, state, hit_L);
507
508                         if(kernel_path_surface_bounce(kg,
509                                                       rng,
510                                                       sd,
511                                                       hit_tp,
512                                                       hit_state,
513                                                       hit_L,
514                                                       hit_ray))
515                         {
516 #ifdef __LAMP_MIS__
517                                 hit_state->ray_t = 0.0f;
518 #endif
519
520 #ifdef __VOLUME__
521                                 if(ss_indirect->need_update_volume_stack) {
522                                         Ray volume_ray = *ray;
523                                         /* Setup ray from previous surface point to the new one. */
524                                         volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
525                                                                      &volume_ray.t);
526
527                                         kernel_volume_stack_update_for_subsurface(
528                                             kg,
529                                             &volume_ray,
530                                             hit_state->volume_stack);
531                                 }
532 #endif
533                                 path_radiance_reset_indirect(L);
534                                 ss_indirect->num_rays++;
535                         }
536                         else {
537                                 path_radiance_accum_sample(L, hit_L, 1);
538                         }
539                 }
540                 return true;
541         }
542         return false;
543 }
544
545 ccl_device_inline void kernel_path_subsurface_init_indirect(
546         SubsurfaceIndirectRays *ss_indirect)
547 {
548         ss_indirect->tracing = false;
549         ss_indirect->num_rays = 0;
550 }
551
552 ccl_device void kernel_path_subsurface_accum_indirect(
553         SubsurfaceIndirectRays *ss_indirect,
554         PathRadiance *L)
555 {
556         if(ss_indirect->tracing) {
557                 path_radiance_sum_indirect(L);
558                 path_radiance_accum_sample(&ss_indirect->direct_L, L, 1);
559                 if(ss_indirect->num_rays == 0) {
560                         *L = ss_indirect->direct_L;
561                 }
562         }
563 }
564
565 ccl_device void kernel_path_subsurface_setup_indirect(
566         KernelGlobals *kg,
567         SubsurfaceIndirectRays *ss_indirect,
568         PathState *state,
569         Ray *ray,
570         PathRadiance *L,
571         float3 *throughput)
572 {
573         if(!ss_indirect->tracing) {
574                 ss_indirect->direct_L = *L;
575         }
576         ss_indirect->tracing = true;
577
578         /* Setup state, ray and throughput for indirect SSS rays. */
579         ss_indirect->num_rays--;
580
581         Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays];
582         PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays];
583
584         *state = ss_indirect->state[ss_indirect->num_rays];
585         *ray = *indirect_ray;
586         *L = *indirect_L;
587         *throughput = ss_indirect->throughputs[ss_indirect->num_rays];
588
589         state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
590 }
591
592 #endif  /* __SUBSURFACE__ */
593
594 ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
595                                                RNG *rng,
596                                                int sample,
597                                                Ray ray,
598                                                ccl_global float *buffer)
599 {
600         /* initialize */
601         PathRadiance L;
602         float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
603         float L_transparent = 0.0f;
604
605         path_radiance_init(&L, kernel_data.film.use_light_pass);
606
607         PathState state;
608         path_state_init(kg, &state, rng, sample, &ray);
609
610 #ifdef __KERNEL_DEBUG__
611         DebugData debug_data;
612         debug_data_init(&debug_data);
613 #endif
614
615 #ifdef __SUBSURFACE__
616         SubsurfaceIndirectRays ss_indirect;
617         kernel_path_subsurface_init_indirect(&ss_indirect);
618
619         for(;;) {
620 #endif
621
622         /* path iteration */
623         for(;;) {
624                 /* intersect scene */
625                 Intersection isect;
626                 uint visibility = path_state_ray_visibility(kg, &state);
627
628 #ifdef __HAIR__
629                 float difl = 0.0f, extmax = 0.0f;
630                 uint lcg_state = 0;
631
632                 if(kernel_data.bvh.have_curves) {
633                         if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) {       
634                                 float3 pixdiff = ray.dD.dx + ray.dD.dy;
635                                 /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
636                                 difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
637                         }
638
639                         extmax = kernel_data.curve.maximum_width;
640                         lcg_state = lcg_state_init(rng, &state, 0x51633e2d);
641                 }
642
643                 bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax);
644 #else
645                 bool hit = scene_intersect(kg, &ray, visibility, &isect, NULL, 0.0f, 0.0f);
646 #endif
647
648 #ifdef __KERNEL_DEBUG__
649                 if(state.flag & PATH_RAY_CAMERA) {
650                         debug_data.num_bvh_traversal_steps += isect.num_traversal_steps;
651                         debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
652                 }
653                 debug_data.num_ray_bounces++;
654 #endif
655
656 #ifdef __LAMP_MIS__
657                 if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
658                         /* ray starting from previous non-transparent bounce */
659                         Ray light_ray;
660
661                         light_ray.P = ray.P - state.ray_t*ray.D;
662                         state.ray_t += isect.t;
663                         light_ray.D = ray.D;
664                         light_ray.t = state.ray_t;
665                         light_ray.time = ray.time;
666                         light_ray.dD = ray.dD;
667                         light_ray.dP = ray.dP;
668
669                         /* intersect with lamp */
670                         float3 emission;
671
672                         if(indirect_lamp_emission(kg, &state, &light_ray, &emission))
673                                 path_radiance_accum_emission(&L, throughput, emission, state.bounce);
674                 }
675 #endif
676
677 #ifdef __VOLUME__
678                 /* volume attenuation, emission, scatter */
679                 if(state.volume_stack[0].shader != SHADER_NONE) {
680                         Ray volume_ray = ray;
681                         volume_ray.t = (hit)? isect.t: FLT_MAX;
682
683                         bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
684
685 #ifdef __VOLUME_DECOUPLED__
686                         int sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
687                         bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, true, sampling_method);
688
689                         if(decoupled) {
690                                 /* cache steps along volume for repeated sampling */
691                                 VolumeSegment volume_segment;
692                                 ShaderData volume_sd;
693
694                                 shader_setup_from_volume(kg, &volume_sd, &volume_ray);
695                                 kernel_volume_decoupled_record(kg, &state,
696                                         &volume_ray, &volume_sd, &volume_segment, heterogeneous);
697
698                                 volume_segment.sampling_method = sampling_method;
699
700                                 /* emission */
701                                 if(volume_segment.closure_flag & SD_EMISSION)
702                                         path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce);
703
704                                 /* scattering */
705                                 VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
706
707                                 if(volume_segment.closure_flag & SD_SCATTER) {
708                                         bool all = false;
709
710                                         /* direct light sampling */
711                                         kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
712                                                 throughput, &state, &L, all, &volume_ray, &volume_segment);
713
714                                         /* indirect sample. if we use distance sampling and take just
715                                          * one sample for direct and indirect light, we could share
716                                          * this computation, but makes code a bit complex */
717                                         float rphase = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_PHASE);
718                                         float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
719
720                                         result = kernel_volume_decoupled_scatter(kg,
721                                                 &state, &volume_ray, &volume_sd, &throughput,
722                                                 rphase, rscatter, &volume_segment, NULL, true);
723                                 }
724
725                                 /* free cached steps */
726                                 kernel_volume_decoupled_free(kg, &volume_segment);
727
728                                 if(result == VOLUME_PATH_SCATTERED) {
729                                         if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
730                                                 continue;
731                                         else
732                                                 break;
733                                 }
734                                 else {
735                                         throughput *= volume_segment.accum_transmittance;
736                                 }
737                         }
738                         else 
739 #endif
740                         {
741                                 /* integrate along volume segment with distance sampling */
742                                 ShaderData volume_sd;
743                                 VolumeIntegrateResult result = kernel_volume_integrate(
744                                         kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng, heterogeneous);
745
746 #ifdef __VOLUME_SCATTER__
747                                 if(result == VOLUME_PATH_SCATTERED) {
748                                         /* direct lighting */
749                                         kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L);
750
751                                         /* indirect light bounce */
752                                         if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
753                                                 continue;
754                                         else
755                                                 break;
756                                 }
757 #endif
758                         }
759                 }
760 #endif
761
762                 if(!hit) {
763                         /* eval background shader if nothing hit */
764                         if(kernel_data.background.transparent && (state.flag & PATH_RAY_CAMERA)) {
765                                 L_transparent += average(throughput);
766
767 #ifdef __PASSES__
768                                 if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
769 #endif
770                                         break;
771                         }
772
773 #ifdef __BACKGROUND__
774                         /* sample background shader */
775                         float3 L_background = indirect_background(kg, &state, &ray);
776                         path_radiance_accum_background(&L, throughput, L_background, state.bounce);
777 #endif
778
779                         break;
780                 }
781
782                 /* setup shading */
783                 ShaderData sd;
784                 shader_setup_from_ray(kg, &sd, &isect, &ray);
785                 float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF);
786                 shader_eval_surface(kg, &sd, &state, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
787
788                 /* holdout */
789 #ifdef __HOLDOUT__
790                 if((sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) && (state.flag & PATH_RAY_CAMERA)) {
791                         if(kernel_data.background.transparent) {
792                                 float3 holdout_weight;
793                                 
794                                 if(sd.flag & SD_HOLDOUT_MASK)
795                                         holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
796                                 else
797                                         holdout_weight = shader_holdout_eval(kg, &sd);
798
799                                 /* any throughput is ok, should all be identical here */
800                                 L_transparent += average(holdout_weight*throughput);
801                         }
802
803                         if(sd.flag & SD_HOLDOUT_MASK)
804                                 break;
805                 }
806 #endif
807
808                 /* holdout mask objects do not write data passes */
809                 kernel_write_data_passes(kg, buffer, &L, &sd, sample, &state, throughput);
810
811                 /* blurring of bsdf after bounces, for rays that have a small likelihood
812                  * of following this particular path (diffuse, rough glossy) */
813                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
814                         float blur_pdf = kernel_data.integrator.filter_glossy*state.min_ray_pdf;
815
816                         if(blur_pdf < 1.0f) {
817                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
818                                 shader_bsdf_blur(kg, &sd, blur_roughness);
819                         }
820                 }
821
822 #ifdef __EMISSION__
823                 /* emission */
824                 if(sd.flag & SD_EMISSION) {
825                         /* todo: is isect.t wrong here for transparent surfaces? */
826                         float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf);
827                         path_radiance_accum_emission(&L, throughput, emission, state.bounce);
828                 }
829 #endif
830
831                 /* path termination. this is a strange place to put the termination, it's
832                  * mainly due to the mixed in MIS that we use. gives too many unneeded
833                  * shader evaluations, only need emission if we are going to terminate */
834                 float probability = path_state_terminate_probability(kg, &state, throughput);
835
836                 if(probability == 0.0f) {
837                         break;
838                 }
839                 else if(probability != 1.0f) {
840                         float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE);
841
842                         if(terminate >= probability)
843                                 break;
844
845                         throughput /= probability;
846                 }
847
848 #ifdef __AO__
849                 /* ambient occlusion */
850                 if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
851                         kernel_path_ao(kg, &sd, &L, &state, rng, throughput);
852                 }
853 #endif
854
855 #ifdef __SUBSURFACE__
856                 /* bssrdf scatter to a different location on the same object, replacing
857                  * the closures with a diffuse BSDF */
858                 if(sd.flag & SD_BSSRDF) {
859                         if(kernel_path_subsurface_scatter(kg,
860                                                           &sd,
861                                                           &L,
862                                                           &state,
863                                                           rng,
864                                                           &ray,
865                                                           &throughput,
866                                                           &ss_indirect))
867                         {
868                                 break;
869                         }
870                 }
871 #endif  /* __SUBSURFACE__ */
872
873                 /* direct lighting */
874                 kernel_path_surface_connect_light(kg, rng, &sd, throughput, &state, &L);
875
876                 /* compute direct lighting and next bounce */
877                 if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
878                         break;
879         }
880
881 #ifdef __SUBSURFACE__
882                 kernel_path_subsurface_accum_indirect(&ss_indirect, &L);
883
884                 /* Trace indirect subsurface rays by restarting the loop. this uses less
885                  * stack memory than invoking kernel_path_indirect.
886                  */
887                 if(ss_indirect.num_rays) {
888                         kernel_path_subsurface_setup_indirect(kg,
889                                                               &ss_indirect,
890                                                               &state,
891                                                               &ray,
892                                                               &L,
893                                                               &throughput);
894                 }
895                 else {
896                         break;
897                 }
898         }
899 #endif  /* __SUBSURFACE__ */
900
901         float3 L_sum = path_radiance_clamp_and_sum(kg, &L);
902
903         kernel_write_light_passes(kg, buffer, &L, sample);
904
905 #ifdef __KERNEL_DEBUG__
906         kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
907 #endif
908
909         return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
910 }
911
912 ccl_device void kernel_path_trace(KernelGlobals *kg,
913         ccl_global float *buffer, ccl_global uint *rng_state,
914         int sample, int x, int y, int offset, int stride)
915 {
916         /* buffer offset */
917         int index = offset + x + y*stride;
918         int pass_stride = kernel_data.film.pass_stride;
919
920         rng_state += index;
921         buffer += index*pass_stride;
922
923         /* initialize random numbers and ray */
924         RNG rng;
925         Ray ray;
926
927         kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
928
929         /* integrate */
930         float4 L;
931
932         if(ray.t != 0.0f)
933                 L = kernel_path_integrate(kg, &rng, sample, ray, buffer);
934         else
935                 L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
936
937         /* accumulate result in output buffer */
938         kernel_write_pass_float4(buffer, sample, L);
939
940         path_rng_end(kg, rng_state, rng);
941 }
942
943 CCL_NAMESPACE_END
944