Cycles: Fix wrong volume stack after SSS bounce
[blender-staging.git] / intern / cycles / kernel / kernel_path.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifdef __OSL__
18 #include "osl_shader.h"
19 #endif
20
21 #include "kernel_random.h"
22 #include "kernel_projection.h"
23 #include "kernel_montecarlo.h"
24 #include "kernel_differential.h"
25 #include "kernel_camera.h"
26
27 #include "geom/geom.h"
28
29 #include "kernel_accumulate.h"
30 #include "kernel_shader.h"
31 #include "kernel_light.h"
32 #include "kernel_passes.h"
33
34 #ifdef __SUBSURFACE__
35 #include "kernel_subsurface.h"
36 #endif
37
38 #ifdef __VOLUME__
39 #include "kernel_volume.h"
40 #endif
41
42 #include "kernel_path_state.h"
43 #include "kernel_shadow.h"
44 #include "kernel_emission.h"
45 #include "kernel_path_common.h"
46 #include "kernel_path_surface.h"
47 #include "kernel_path_volume.h"
48
49 #ifdef __KERNEL_DEBUG__
50 #include "kernel_debug.h"
51 #endif
52
53 CCL_NAMESPACE_BEGIN
54
55 ccl_device void kernel_path_indirect(KernelGlobals *kg,
56                                      RNG *rng,
57                                      Ray *ray,
58                                      float3 throughput,
59                                      int num_samples,
60                                      PathState *state,
61                                      PathRadiance *L)
62 {
63         /* path iteration */
64         for(;;) {
65                 /* intersect scene */
66                 Intersection isect;
67                 uint visibility = path_state_ray_visibility(kg, state);
68                 bool hit = scene_intersect(kg,
69                                            ray,
70                                            visibility,
71                                            &isect,
72                                            NULL,
73                                            0.0f, 0.0f);
74
75 #ifdef __LAMP_MIS__
76                 if(kernel_data.integrator.use_lamp_mis && !(state->flag & PATH_RAY_CAMERA)) {
77                         /* ray starting from previous non-transparent bounce */
78                         Ray light_ray;
79
80                         light_ray.P = ray->P - state->ray_t*ray->D;
81                         state->ray_t += isect.t;
82                         light_ray.D = ray->D;
83                         light_ray.t = state->ray_t;
84                         light_ray.time = ray->time;
85                         light_ray.dD = ray->dD;
86                         light_ray.dP = ray->dP;
87
88                         /* intersect with lamp */
89                         float3 emission;
90                         if(indirect_lamp_emission(kg, state, &light_ray, &emission)) {
91                                 path_radiance_accum_emission(L,
92                                                              throughput,
93                                                              emission,
94                                                              state->bounce);
95                         }
96                 }
97 #endif
98
99 #ifdef __VOLUME__
100                 /* volume attenuation, emission, scatter */
101                 if(state->volume_stack[0].shader != SHADER_NONE) {
102                         Ray volume_ray = *ray;
103                         volume_ray.t = (hit)? isect.t: FLT_MAX;
104
105                         bool heterogeneous =
106                                 volume_stack_is_heterogeneous(kg,
107                                                               state->volume_stack);
108
109 #ifdef __VOLUME_DECOUPLED__
110                         int sampling_method =
111                                 volume_stack_sampling_method(kg,
112                                                              state->volume_stack);
113                         bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, false, sampling_method);
114
115                         if(decoupled) {
116                                 /* cache steps along volume for repeated sampling */
117                                 VolumeSegment volume_segment;
118                                 ShaderData volume_sd;
119
120                                 shader_setup_from_volume(kg,
121                                                          &volume_sd,
122                                                          &volume_ray,
123                                                          state->bounce,
124                                                          state->transparent_bounce);
125                                 kernel_volume_decoupled_record(kg,
126                                                                state,
127                                                                &volume_ray,
128                                                                &volume_sd,
129                                                                &volume_segment,
130                                                                heterogeneous);
131
132                                 volume_segment.sampling_method = sampling_method;
133
134                                 /* emission */
135                                 if(volume_segment.closure_flag & SD_EMISSION) {
136                                         path_radiance_accum_emission(L,
137                                                                      throughput,
138                                                                      volume_segment.accum_emission,
139                                                                      state->bounce);
140                                 }
141
142                                 /* scattering */
143                                 VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
144
145                                 if(volume_segment.closure_flag & SD_SCATTER) {
146                                         bool all = kernel_data.integrator.sample_all_lights_indirect;
147
148                                         /* direct light sampling */
149                                         kernel_branched_path_volume_connect_light(kg,
150                                                                                   rng,
151                                                                                   &volume_sd,
152                                                                                   throughput,
153                                                                                   state,
154                                                                                   L,
155                                                                                   all,
156                                                                                   &volume_ray,
157                                                                                   &volume_segment);
158
159                                         /* indirect sample. if we use distance sampling and take just
160                                          * one sample for direct and indirect light, we could share
161                                          * this computation, but makes code a bit complex */
162                                         float rphase = path_state_rng_1D_for_decision(kg, rng, state, PRNG_PHASE);
163                                         float rscatter = path_state_rng_1D_for_decision(kg, rng, state, PRNG_SCATTER_DISTANCE);
164
165                                         result = kernel_volume_decoupled_scatter(kg,
166                                                                                  state,
167                                                                                  &volume_ray,
168                                                                                  &volume_sd,
169                                                                                  &throughput,
170                                                                                  rphase,
171                                                                                  rscatter,
172                                                                                  &volume_segment,
173                                                                                  NULL,
174                                                                                  true);
175                                 }
176
177                                 /* free cached steps */
178                                 kernel_volume_decoupled_free(kg, &volume_segment);
179
180                                 if(result == VOLUME_PATH_SCATTERED) {
181                                         if(kernel_path_volume_bounce(kg,
182                                                                      rng,
183                                                                      &volume_sd,
184                                                                      &throughput,
185                                                                      state,
186                                                                      L,
187                                                                      ray))
188                                         {
189                                                 continue;
190                                         }
191                                         else {
192                                                 break;
193                                         }
194                                 }
195                                 else {
196                                         throughput *= volume_segment.accum_transmittance;
197                                 }
198                         }
199                         else
200 #endif
201                         {
202                                 /* integrate along volume segment with distance sampling */
203                                 ShaderData volume_sd;
204                                 VolumeIntegrateResult result = kernel_volume_integrate(
205                                         kg, state, &volume_sd, &volume_ray, L, &throughput, rng, heterogeneous);
206
207 #ifdef __VOLUME_SCATTER__
208                                 if(result == VOLUME_PATH_SCATTERED) {
209                                         /* direct lighting */
210                                         kernel_path_volume_connect_light(kg,
211                                                                          rng,
212                                                                          &volume_sd,
213                                                                          throughput,
214                                                                          state,
215                                                                          L);
216
217                                         /* indirect light bounce */
218                                         if(kernel_path_volume_bounce(kg,
219                                                                      rng,
220                                                                      &volume_sd,
221                                                                      &throughput,
222                                                                      state,
223                                                                      L,
224                                                                      ray))
225                                         {
226                                                 continue;
227                                         }
228                                         else {
229                                                 break;
230                                         }
231                                 }
232 #endif
233                         }
234                 }
235 #endif
236
237                 if(!hit) {
238 #ifdef __BACKGROUND__
239                         /* sample background shader */
240                         float3 L_background = indirect_background(kg, state, ray);
241                         path_radiance_accum_background(L,
242                                                        throughput,
243                                                        L_background,
244                                                        state->bounce);
245 #endif
246
247                         break;
248                 }
249
250                 /* setup shading */
251                 ShaderData sd;
252                 shader_setup_from_ray(kg,
253                                       &sd,
254                                       &isect,
255                                       ray,
256                                       state->bounce,
257                                       state->transparent_bounce);
258                 float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
259                 shader_eval_surface(kg, &sd, rbsdf, state->flag, SHADER_CONTEXT_INDIRECT);
260 #ifdef __BRANCHED_PATH__
261                 shader_merge_closures(&sd);
262 #endif
263
264                 /* blurring of bsdf after bounces, for rays that have a small likelihood
265                  * of following this particular path (diffuse, rough glossy) */
266                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
267                         float blur_pdf = kernel_data.integrator.filter_glossy*state->min_ray_pdf;
268
269                         if(blur_pdf < 1.0f) {
270                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
271                                 shader_bsdf_blur(kg, &sd, blur_roughness);
272                         }
273                 }
274
275 #ifdef __EMISSION__
276                 /* emission */
277                 if(sd.flag & SD_EMISSION) {
278                         float3 emission = indirect_primitive_emission(kg,
279                                                                       &sd,
280                                                                       isect.t,
281                                                                       state->flag,
282                                                                       state->ray_pdf);
283                         path_radiance_accum_emission(L, throughput, emission, state->bounce);
284                 }
285 #endif
286
287                 /* path termination. this is a strange place to put the termination, it's
288                  * mainly due to the mixed in MIS that we use. gives too many unneeded
289                  * shader evaluations, only need emission if we are going to terminate */
290                 float probability =
291                         path_state_terminate_probability(kg,
292                                                          state,
293                                                          throughput*num_samples);
294
295                 if(probability == 0.0f) {
296                         break;
297                 }
298                 else if(probability != 1.0f) {
299                         float terminate = path_state_rng_1D_for_decision(kg, rng, state, PRNG_TERMINATE);
300
301                         if(terminate >= probability)
302                                 break;
303
304                         throughput /= probability;
305                 }
306
307 #ifdef __AO__
308                 /* ambient occlusion */
309                 if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
310                         float bsdf_u, bsdf_v;
311                         path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
312
313                         float ao_factor = kernel_data.background.ao_factor;
314                         float3 ao_N;
315                         float3 ao_bsdf = shader_bsdf_ao(kg, &sd, ao_factor, &ao_N);
316                         float3 ao_D;
317                         float ao_pdf;
318                         float3 ao_alpha = make_float3(0.0f, 0.0f, 0.0f);
319
320                         sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
321
322                         if(dot(sd.Ng, ao_D) > 0.0f && ao_pdf != 0.0f) {
323                                 Ray light_ray;
324                                 float3 ao_shadow;
325
326                                 light_ray.P = ray_offset(sd.P, sd.Ng);
327                                 light_ray.D = ao_D;
328                                 light_ray.t = kernel_data.background.ao_distance;
329 #ifdef __OBJECT_MOTION__
330                                 light_ray.time = sd.time;
331 #endif
332                                 light_ray.dP = sd.dP;
333                                 light_ray.dD = differential3_zero();
334
335                                 if(!shadow_blocked(kg, state, &light_ray, &ao_shadow)) {
336                                         path_radiance_accum_ao(L,
337                                                                throughput,
338                                                                ao_alpha,
339                                                                ao_bsdf,
340                                                                ao_shadow,
341                                                                state->bounce);
342                                 }
343                         }
344                 }
345 #endif
346
347 #ifdef __SUBSURFACE__
348                 /* bssrdf scatter to a different location on the same object, replacing
349                  * the closures with a diffuse BSDF */
350                 if(sd.flag & SD_BSSRDF) {
351                         float bssrdf_probability;
352                         ShaderClosure *sc = subsurface_scatter_pick_closure(kg, &sd, &bssrdf_probability);
353
354                         /* modify throughput for picking bssrdf or bsdf */
355                         throughput *= bssrdf_probability;
356
357                         /* do bssrdf scatter step if we picked a bssrdf closure */
358                         if(sc) {
359                                 uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
360
361                                 float bssrdf_u, bssrdf_v;
362                                 path_state_rng_2D(kg,
363                                                   rng,
364                                                   state,
365                                                   PRNG_BSDF_U,
366                                                   &bssrdf_u, &bssrdf_v);
367                                 subsurface_scatter_step(kg,
368                                                         &sd,
369                                                         state->flag,
370                                                         sc,
371                                                         &lcg_state,
372                                                         bssrdf_u, bssrdf_v,
373                                                         false);
374                         }
375                 }
376 #endif
377
378 #if defined(__EMISSION__) && defined(__BRANCHED_PATH__)
379                 if(kernel_data.integrator.use_direct_light) {
380                         bool all = kernel_data.integrator.sample_all_lights_indirect;
381                         kernel_branched_path_surface_connect_light(kg,
382                                                                    rng,
383                                                                    &sd,
384                                                                    state,
385                                                                    throughput,
386                                                                    1.0f,
387                                                                    L,
388                                                                    all);
389                 }
390 #endif
391
392                 if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, state, L, ray))
393                         break;
394         }
395 }
396
397 ccl_device void kernel_path_ao(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, PathState *state, RNG *rng, float3 throughput)
398 {
399         /* todo: solve correlation */
400         float bsdf_u, bsdf_v;
401
402         path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bsdf_u, &bsdf_v);
403
404         float ao_factor = kernel_data.background.ao_factor;
405         float3 ao_N;
406         float3 ao_bsdf = shader_bsdf_ao(kg, sd, ao_factor, &ao_N);
407         float3 ao_D;
408         float ao_pdf;
409         float3 ao_alpha = shader_bsdf_alpha(kg, sd);
410
411         sample_cos_hemisphere(ao_N, bsdf_u, bsdf_v, &ao_D, &ao_pdf);
412
413         if(dot(ccl_fetch(sd, Ng), ao_D) > 0.0f && ao_pdf != 0.0f) {
414                 Ray light_ray;
415                 float3 ao_shadow;
416
417                 light_ray.P = ray_offset(ccl_fetch(sd, P), ccl_fetch(sd, Ng));
418                 light_ray.D = ao_D;
419                 light_ray.t = kernel_data.background.ao_distance;
420 #ifdef __OBJECT_MOTION__
421                 light_ray.time = ccl_fetch(sd, time);
422 #endif
423                 light_ray.dP = ccl_fetch(sd, dP);
424                 light_ray.dD = differential3_zero();
425
426                 if(!shadow_blocked(kg, state, &light_ray, &ao_shadow))
427                         path_radiance_accum_ao(L, throughput, ao_alpha, ao_bsdf, ao_shadow, state->bounce);
428         }
429 }
430
431 #ifdef __SUBSURFACE__
432
433 ccl_device bool kernel_path_subsurface_scatter(
434         KernelGlobals *kg,
435         ShaderData *sd,
436         PathRadiance *L,
437         PathState *state,
438         RNG *rng,
439         Ray *ray,
440         float3 *throughput,
441         SubsurfaceIndirectRays *ss_indirect)
442 {
443         float bssrdf_probability;
444         ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
445
446         /* modify throughput for picking bssrdf or bsdf */
447         *throughput *= bssrdf_probability;
448
449         /* do bssrdf scatter step if we picked a bssrdf closure */
450         if(sc) {
451                 /* We should never have two consecutive BSSRDF bounces,
452                  * the second one should be converted to a diffuse BSDF to
453                  * avoid this.
454                  */
455                 kernel_assert(!ss_indirect->tracing);
456
457                 uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
458
459                 SubsurfaceIntersection ss_isect;
460                 float bssrdf_u, bssrdf_v;
461                 path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
462                 int num_hits = subsurface_scatter_multi_intersect(kg,
463                                                                   &ss_isect,
464                                                                   sd,
465                                                                   sc,
466                                                                   &lcg_state,
467                                                                   bssrdf_u, bssrdf_v,
468                                                                   false);
469 #ifdef __VOLUME__
470                 ss_indirect->need_update_volume_stack =
471                         kernel_data.integrator.use_volumes &&
472                         ccl_fetch(sd, flag) & SD_OBJECT_INTERSECTS_VOLUME;
473 #endif
474
475                 /* compute lighting with the BSDF closure */
476                 for(int hit = 0; hit < num_hits; hit++) {
477                         /* NOTE: We reuse the existing ShaderData, we assume the path
478                          * integration loop stops when this function returns true.
479                          */
480                         subsurface_scatter_multi_setup(kg,
481                                                        &ss_isect,
482                                                        hit,
483                                                        sd,
484                                                        state->flag,
485                                                        sc,
486                                                        false);
487
488                         PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
489                         Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
490                         float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
491                         PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays];
492
493                         *hit_state = *state;
494                         *hit_ray = *ray;
495                         *hit_tp = *throughput;
496
497                         hit_state->rng_offset += PRNG_BOUNCE_NUM;
498
499                         path_radiance_init(hit_L, kernel_data.film.use_light_pass);
500                         kernel_path_surface_connect_light(kg, rng, sd, *hit_tp, state, hit_L);
501
502                         if(kernel_path_surface_bounce(kg,
503                                                       rng,
504                                                       sd,
505                                                       hit_tp,
506                                                       hit_state,
507                                                       hit_L,
508                                                       hit_ray))
509                         {
510 #ifdef __LAMP_MIS__
511                                 hit_state->ray_t = 0.0f;
512 #endif
513
514 #ifdef __VOLUME__
515                                 if(ss_indirect->need_update_volume_stack) {
516                                         Ray volume_ray = *ray;
517                                         /* Setup ray from previous surface point to the new one. */
518                                         volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
519                                                                      &volume_ray.t);
520
521                                         kernel_volume_stack_update_for_subsurface(
522                                             kg,
523                                             &volume_ray,
524                                             hit_state->volume_stack);
525                                 }
526 #endif
527
528                                 ss_indirect->num_rays++;
529                         }
530                         else {
531                                 path_radiance_accum_sample(L, hit_L, 1);
532                         }
533                 }
534                 return true;
535         }
536         return false;
537 }
538
539 ccl_device_inline void kernel_path_subsurface_init_indirect(
540         SubsurfaceIndirectRays *ss_indirect)
541 {
542         ss_indirect->tracing = false;
543         ss_indirect->num_rays = 0;
544 }
545
546 ccl_device void kernel_path_subsurface_accum_indirect(
547         SubsurfaceIndirectRays *ss_indirect,
548         PathRadiance *L)
549 {
550         if(ss_indirect->tracing) {
551                 path_radiance_sum_indirect(L);
552                 path_radiance_accum_sample(&ss_indirect->direct_L, L, 1);
553                 if(ss_indirect->num_rays == 0) {
554                         *L = ss_indirect->direct_L;
555                 }
556         }
557 }
558
559 ccl_device void kernel_path_subsurface_setup_indirect(
560         KernelGlobals *kg,
561         SubsurfaceIndirectRays *ss_indirect,
562         PathState *state,
563         Ray *ray,
564         PathRadiance *L,
565         float3 *throughput)
566 {
567         if(!ss_indirect->tracing) {
568                 ss_indirect->direct_L = *L;
569         }
570         ss_indirect->tracing = true;
571
572         /* Setup state, ray and throughput for indirect SSS rays. */
573         ss_indirect->num_rays--;
574
575         Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays];
576         PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays];
577
578         *state = ss_indirect->state[ss_indirect->num_rays];
579         *ray = *indirect_ray;
580         *L = *indirect_L;
581         *throughput = ss_indirect->throughputs[ss_indirect->num_rays];
582
583         state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
584 }
585
586 #endif  /* __SUBSURFACE__ */
587
588 ccl_device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, Ray ray, ccl_global float *buffer)
589 {
590         /* initialize */
591         PathRadiance L;
592         float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
593         float L_transparent = 0.0f;
594
595         path_radiance_init(&L, kernel_data.film.use_light_pass);
596
597         PathState state;
598         path_state_init(kg, &state, rng, sample, &ray);
599
600 #ifdef __KERNEL_DEBUG__
601         DebugData debug_data;
602         debug_data_init(&debug_data);
603 #endif
604
605 #ifdef __SUBSURFACE__
606         SubsurfaceIndirectRays ss_indirect;
607         kernel_path_subsurface_init_indirect(&ss_indirect);
608
609         for(;;) {
610 #endif
611
612         /* path iteration */
613         for(;;) {
614                 /* intersect scene */
615                 Intersection isect;
616                 uint visibility = path_state_ray_visibility(kg, &state);
617
618 #ifdef __HAIR__
619                 float difl = 0.0f, extmax = 0.0f;
620                 uint lcg_state = 0;
621
622                 if(kernel_data.bvh.have_curves) {
623                         if((kernel_data.cam.resolution == 1) && (state.flag & PATH_RAY_CAMERA)) {       
624                                 float3 pixdiff = ray.dD.dx + ray.dD.dy;
625                                 /*pixdiff = pixdiff - dot(pixdiff, ray.D)*ray.D;*/
626                                 difl = kernel_data.curve.minimum_width * len(pixdiff) * 0.5f;
627                         }
628
629                         extmax = kernel_data.curve.maximum_width;
630                         lcg_state = lcg_state_init(rng, &state, 0x51633e2d);
631                 }
632
633                 bool hit = scene_intersect(kg, &ray, visibility, &isect, &lcg_state, difl, extmax);
634 #else
635                 bool hit = scene_intersect(kg, &ray, visibility, &isect, NULL, 0.0f, 0.0f);
636 #endif
637
638 #ifdef __KERNEL_DEBUG__
639                 if(state.flag & PATH_RAY_CAMERA) {
640                         debug_data.num_bvh_traversal_steps += isect.num_traversal_steps;
641                         debug_data.num_bvh_traversed_instances += isect.num_traversed_instances;
642                 }
643                 debug_data.num_ray_bounces++;
644 #endif
645
646 #ifdef __LAMP_MIS__
647                 if(kernel_data.integrator.use_lamp_mis && !(state.flag & PATH_RAY_CAMERA)) {
648                         /* ray starting from previous non-transparent bounce */
649                         Ray light_ray;
650
651                         light_ray.P = ray.P - state.ray_t*ray.D;
652                         state.ray_t += isect.t;
653                         light_ray.D = ray.D;
654                         light_ray.t = state.ray_t;
655                         light_ray.time = ray.time;
656                         light_ray.dD = ray.dD;
657                         light_ray.dP = ray.dP;
658
659                         /* intersect with lamp */
660                         float3 emission;
661
662                         if(indirect_lamp_emission(kg, &state, &light_ray, &emission))
663                                 path_radiance_accum_emission(&L, throughput, emission, state.bounce);
664                 }
665 #endif
666
667 #ifdef __VOLUME__
668                 /* volume attenuation, emission, scatter */
669                 if(state.volume_stack[0].shader != SHADER_NONE) {
670                         Ray volume_ray = ray;
671                         volume_ray.t = (hit)? isect.t: FLT_MAX;
672
673                         bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
674
675 #ifdef __VOLUME_DECOUPLED__
676                         int sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
677                         bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, true, sampling_method);
678
679                         if(decoupled) {
680                                 /* cache steps along volume for repeated sampling */
681                                 VolumeSegment volume_segment;
682                                 ShaderData volume_sd;
683
684                                 shader_setup_from_volume(kg, &volume_sd, &volume_ray, state.bounce, state.transparent_bounce);
685                                 kernel_volume_decoupled_record(kg, &state,
686                                         &volume_ray, &volume_sd, &volume_segment, heterogeneous);
687
688                                 volume_segment.sampling_method = sampling_method;
689
690                                 /* emission */
691                                 if(volume_segment.closure_flag & SD_EMISSION)
692                                         path_radiance_accum_emission(&L, throughput, volume_segment.accum_emission, state.bounce);
693
694                                 /* scattering */
695                                 VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
696
697                                 if(volume_segment.closure_flag & SD_SCATTER) {
698                                         bool all = false;
699
700                                         /* direct light sampling */
701                                         kernel_branched_path_volume_connect_light(kg, rng, &volume_sd,
702                                                 throughput, &state, &L, all, &volume_ray, &volume_segment);
703
704                                         /* indirect sample. if we use distance sampling and take just
705                                          * one sample for direct and indirect light, we could share
706                                          * this computation, but makes code a bit complex */
707                                         float rphase = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_PHASE);
708                                         float rscatter = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_SCATTER_DISTANCE);
709
710                                         result = kernel_volume_decoupled_scatter(kg,
711                                                 &state, &volume_ray, &volume_sd, &throughput,
712                                                 rphase, rscatter, &volume_segment, NULL, true);
713                                 }
714
715                                 /* free cached steps */
716                                 kernel_volume_decoupled_free(kg, &volume_segment);
717
718                                 if(result == VOLUME_PATH_SCATTERED) {
719                                         if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
720                                                 continue;
721                                         else
722                                                 break;
723                                 }
724                                 else {
725                                         throughput *= volume_segment.accum_transmittance;
726                                 }
727                         }
728                         else 
729 #endif
730                         {
731                                 /* integrate along volume segment with distance sampling */
732                                 ShaderData volume_sd;
733                                 VolumeIntegrateResult result = kernel_volume_integrate(
734                                         kg, &state, &volume_sd, &volume_ray, &L, &throughput, rng, heterogeneous);
735
736 #ifdef __VOLUME_SCATTER__
737                                 if(result == VOLUME_PATH_SCATTERED) {
738                                         /* direct lighting */
739                                         kernel_path_volume_connect_light(kg, rng, &volume_sd, throughput, &state, &L);
740
741                                         /* indirect light bounce */
742                                         if(kernel_path_volume_bounce(kg, rng, &volume_sd, &throughput, &state, &L, &ray))
743                                                 continue;
744                                         else
745                                                 break;
746                                 }
747 #endif
748                         }
749                 }
750 #endif
751
752                 if(!hit) {
753                         /* eval background shader if nothing hit */
754                         if(kernel_data.background.transparent && (state.flag & PATH_RAY_CAMERA)) {
755                                 L_transparent += average(throughput);
756
757 #ifdef __PASSES__
758                                 if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
759 #endif
760                                         break;
761                         }
762
763 #ifdef __BACKGROUND__
764                         /* sample background shader */
765                         float3 L_background = indirect_background(kg, &state, &ray);
766                         path_radiance_accum_background(&L, throughput, L_background, state.bounce);
767 #endif
768
769                         break;
770                 }
771
772                 /* setup shading */
773                 ShaderData sd;
774                 shader_setup_from_ray(kg, &sd, &isect, &ray, state.bounce, state.transparent_bounce);
775                 float rbsdf = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_BSDF);
776                 shader_eval_surface(kg, &sd, rbsdf, state.flag, SHADER_CONTEXT_MAIN);
777
778                 /* holdout */
779 #ifdef __HOLDOUT__
780                 if((sd.flag & (SD_HOLDOUT|SD_HOLDOUT_MASK)) && (state.flag & PATH_RAY_CAMERA)) {
781                         if(kernel_data.background.transparent) {
782                                 float3 holdout_weight;
783                                 
784                                 if(sd.flag & SD_HOLDOUT_MASK)
785                                         holdout_weight = make_float3(1.0f, 1.0f, 1.0f);
786                                 else
787                                         holdout_weight = shader_holdout_eval(kg, &sd);
788
789                                 /* any throughput is ok, should all be identical here */
790                                 L_transparent += average(holdout_weight*throughput);
791                         }
792
793                         if(sd.flag & SD_HOLDOUT_MASK)
794                                 break;
795                 }
796 #endif
797
798                 /* holdout mask objects do not write data passes */
799                 kernel_write_data_passes(kg, buffer, &L, &sd, sample, &state, throughput);
800
801                 /* blurring of bsdf after bounces, for rays that have a small likelihood
802                  * of following this particular path (diffuse, rough glossy) */
803                 if(kernel_data.integrator.filter_glossy != FLT_MAX) {
804                         float blur_pdf = kernel_data.integrator.filter_glossy*state.min_ray_pdf;
805
806                         if(blur_pdf < 1.0f) {
807                                 float blur_roughness = sqrtf(1.0f - blur_pdf)*0.5f;
808                                 shader_bsdf_blur(kg, &sd, blur_roughness);
809                         }
810                 }
811
812 #ifdef __EMISSION__
813                 /* emission */
814                 if(sd.flag & SD_EMISSION) {
815                         /* todo: is isect.t wrong here for transparent surfaces? */
816                         float3 emission = indirect_primitive_emission(kg, &sd, isect.t, state.flag, state.ray_pdf);
817                         path_radiance_accum_emission(&L, throughput, emission, state.bounce);
818                 }
819 #endif
820
821                 /* path termination. this is a strange place to put the termination, it's
822                  * mainly due to the mixed in MIS that we use. gives too many unneeded
823                  * shader evaluations, only need emission if we are going to terminate */
824                 float probability = path_state_terminate_probability(kg, &state, throughput);
825
826                 if(probability == 0.0f) {
827                         break;
828                 }
829                 else if(probability != 1.0f) {
830                         float terminate = path_state_rng_1D_for_decision(kg, rng, &state, PRNG_TERMINATE);
831
832                         if(terminate >= probability)
833                                 break;
834
835                         throughput /= probability;
836                 }
837
838 #ifdef __AO__
839                 /* ambient occlusion */
840                 if(kernel_data.integrator.use_ambient_occlusion || (sd.flag & SD_AO)) {
841                         kernel_path_ao(kg, &sd, &L, &state, rng, throughput);
842                 }
843 #endif
844
845 #ifdef __SUBSURFACE__
846                 /* bssrdf scatter to a different location on the same object, replacing
847                  * the closures with a diffuse BSDF */
848                 if(sd.flag & SD_BSSRDF) {
849                         if(kernel_path_subsurface_scatter(kg,
850                                                           &sd,
851                                                           &L,
852                                                           &state,
853                                                           rng,
854                                                           &ray,
855                                                           &throughput,
856                                                           &ss_indirect))
857                         {
858                                 break;
859                         }
860                 }
861 #endif  /* __SUBSURFACE__ */
862
863                 /* direct lighting */
864                 kernel_path_surface_connect_light(kg, rng, &sd, throughput, &state, &L);
865
866                 /* compute direct lighting and next bounce */
867                 if(!kernel_path_surface_bounce(kg, rng, &sd, &throughput, &state, &L, &ray))
868                         break;
869         }
870
871 #ifdef __SUBSURFACE__
872                 kernel_path_subsurface_accum_indirect(&ss_indirect, &L);
873
874                 /* Trace indirect subsurface rays by restarting the loop. this uses less
875                  * stack memory than invoking kernel_path_indirect.
876                  */
877                 if(ss_indirect.num_rays) {
878                         kernel_path_subsurface_setup_indirect(kg,
879                                                               &ss_indirect,
880                                                               &state,
881                                                               &ray,
882                                                               &L,
883                                                               &throughput);
884                 }
885                 else {
886                         break;
887                 }
888         }
889 #endif  /* __SUBSURFACE__ */
890
891         float3 L_sum = path_radiance_clamp_and_sum(kg, &L);
892
893         kernel_write_light_passes(kg, buffer, &L, sample);
894
895 #ifdef __KERNEL_DEBUG__
896         kernel_write_debug_passes(kg, buffer, &state, &debug_data, sample);
897 #endif
898
899         return make_float4(L_sum.x, L_sum.y, L_sum.z, 1.0f - L_transparent);
900 }
901
902 ccl_device void kernel_path_trace(KernelGlobals *kg,
903         ccl_global float *buffer, ccl_global uint *rng_state,
904         int sample, int x, int y, int offset, int stride)
905 {
906         /* buffer offset */
907         int index = offset + x + y*stride;
908         int pass_stride = kernel_data.film.pass_stride;
909
910         rng_state += index;
911         buffer += index*pass_stride;
912
913         /* initialize random numbers and ray */
914         RNG rng;
915         Ray ray;
916
917         kernel_path_trace_setup(kg, rng_state, sample, x, y, &rng, &ray);
918
919         /* integrate */
920         float4 L;
921
922         if(ray.t != 0.0f)
923                 L = kernel_path_integrate(kg, &rng, sample, ray, buffer);
924         else
925                 L = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
926
927         /* accumulate result in output buffer */
928         kernel_write_pass_float4(buffer, sample, L);
929
930         path_rng_end(kg, rng_state, rng);
931 }
932
933 CCL_NAMESPACE_END
934